sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from typing_extensions import Literal 16 17 from sqlglot._typing import E 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20logger = logging.getLogger("sqlglot") 21 22 23def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap( 34 keys=exp.Array(expressions=keys), 35 values=exp.Array(expressions=values), 36 ) 37 38 39def parse_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 return lambda self, this: self._parse_escape( 48 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 49 ) 50 51 52def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 63 64 65class _Parser(type): 66 def __new__(cls, clsname, bases, attrs): 67 klass = super().__new__(cls, clsname, bases, attrs) 68 69 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 70 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 71 72 return klass 73 74 75class Parser(metaclass=_Parser): 76 """ 77 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 78 79 Args: 80 error_level: The desired error level. 81 Default: ErrorLevel.IMMEDIATE 82 error_message_context: Determines the amount of context to capture from a 83 query string when displaying the error message (in number of characters). 84 Default: 100 85 max_errors: Maximum number of error messages to include in a raised ParseError. 86 This is only relevant if error_level is ErrorLevel.RAISE. 87 Default: 3 88 """ 89 90 FUNCTIONS: t.Dict[str, t.Callable] = { 91 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 92 "CONCAT": lambda args, dialect: exp.Concat( 93 expressions=args, 94 safe=not dialect.STRICT_STRING_CONCAT, 95 coalesce=dialect.CONCAT_COALESCE, 96 ), 97 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 98 expressions=args, 99 safe=not dialect.STRICT_STRING_CONCAT, 100 coalesce=dialect.CONCAT_COALESCE, 101 ), 102 "DATE_TO_DATE_STR": lambda args: exp.Cast( 103 this=seq_get(args, 0), 104 to=exp.DataType(this=exp.DataType.Type.TEXT), 105 ), 106 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 107 "LIKE": parse_like, 108 "LOG": parse_logarithm, 109 "TIME_TO_TIME_STR": lambda args: exp.Cast( 110 this=seq_get(args, 0), 111 to=exp.DataType(this=exp.DataType.Type.TEXT), 112 ), 113 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 114 this=exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 start=exp.Literal.number(1), 119 length=exp.Literal.number(10), 120 ), 121 "VAR_MAP": parse_var_map, 122 } 123 124 NO_PAREN_FUNCTIONS = { 125 TokenType.CURRENT_DATE: exp.CurrentDate, 126 TokenType.CURRENT_DATETIME: exp.CurrentDate, 127 TokenType.CURRENT_TIME: exp.CurrentTime, 128 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 129 TokenType.CURRENT_USER: exp.CurrentUser, 130 } 131 132 STRUCT_TYPE_TOKENS = { 133 TokenType.NESTED, 134 TokenType.STRUCT, 135 } 136 137 NESTED_TYPE_TOKENS = { 138 TokenType.ARRAY, 139 TokenType.LOWCARDINALITY, 140 TokenType.MAP, 141 TokenType.NULLABLE, 142 *STRUCT_TYPE_TOKENS, 143 } 144 145 ENUM_TYPE_TOKENS = { 146 TokenType.ENUM, 147 TokenType.ENUM8, 148 TokenType.ENUM16, 149 } 150 151 AGGREGATE_TYPE_TOKENS = { 152 TokenType.AGGREGATEFUNCTION, 153 TokenType.SIMPLEAGGREGATEFUNCTION, 154 } 155 156 TYPE_TOKENS = { 157 TokenType.BIT, 158 TokenType.BOOLEAN, 159 TokenType.TINYINT, 160 TokenType.UTINYINT, 161 TokenType.SMALLINT, 162 TokenType.USMALLINT, 163 TokenType.INT, 164 TokenType.UINT, 165 TokenType.BIGINT, 166 TokenType.UBIGINT, 167 TokenType.INT128, 168 TokenType.UINT128, 169 TokenType.INT256, 170 TokenType.UINT256, 171 TokenType.MEDIUMINT, 172 TokenType.UMEDIUMINT, 173 TokenType.FIXEDSTRING, 174 TokenType.FLOAT, 175 TokenType.DOUBLE, 176 TokenType.CHAR, 177 TokenType.NCHAR, 178 TokenType.VARCHAR, 179 TokenType.NVARCHAR, 180 TokenType.TEXT, 181 TokenType.MEDIUMTEXT, 182 TokenType.LONGTEXT, 183 TokenType.MEDIUMBLOB, 184 TokenType.LONGBLOB, 185 TokenType.BINARY, 186 TokenType.VARBINARY, 187 TokenType.JSON, 188 TokenType.JSONB, 189 TokenType.INTERVAL, 190 TokenType.TINYBLOB, 191 TokenType.TINYTEXT, 192 TokenType.TIME, 193 TokenType.TIMETZ, 194 TokenType.TIMESTAMP, 195 TokenType.TIMESTAMP_S, 196 TokenType.TIMESTAMP_MS, 197 TokenType.TIMESTAMP_NS, 198 TokenType.TIMESTAMPTZ, 199 TokenType.TIMESTAMPLTZ, 200 TokenType.DATETIME, 201 TokenType.DATETIME64, 202 TokenType.DATE, 203 TokenType.DATE32, 204 TokenType.INT4RANGE, 205 TokenType.INT4MULTIRANGE, 206 TokenType.INT8RANGE, 207 TokenType.INT8MULTIRANGE, 208 TokenType.NUMRANGE, 209 TokenType.NUMMULTIRANGE, 210 TokenType.TSRANGE, 211 TokenType.TSMULTIRANGE, 212 TokenType.TSTZRANGE, 213 TokenType.TSTZMULTIRANGE, 214 TokenType.DATERANGE, 215 TokenType.DATEMULTIRANGE, 216 TokenType.DECIMAL, 217 TokenType.UDECIMAL, 218 TokenType.BIGDECIMAL, 219 TokenType.UUID, 220 TokenType.GEOGRAPHY, 221 TokenType.GEOMETRY, 222 TokenType.HLLSKETCH, 223 TokenType.HSTORE, 224 TokenType.PSEUDO_TYPE, 225 TokenType.SUPER, 226 TokenType.SERIAL, 227 TokenType.SMALLSERIAL, 228 TokenType.BIGSERIAL, 229 TokenType.XML, 230 TokenType.YEAR, 231 TokenType.UNIQUEIDENTIFIER, 232 TokenType.USERDEFINED, 233 TokenType.MONEY, 234 TokenType.SMALLMONEY, 235 TokenType.ROWVERSION, 236 TokenType.IMAGE, 237 TokenType.VARIANT, 238 TokenType.OBJECT, 239 TokenType.OBJECT_IDENTIFIER, 240 TokenType.INET, 241 TokenType.IPADDRESS, 242 TokenType.IPPREFIX, 243 TokenType.IPV4, 244 TokenType.IPV6, 245 TokenType.UNKNOWN, 246 TokenType.NULL, 247 *ENUM_TYPE_TOKENS, 248 *NESTED_TYPE_TOKENS, 249 *AGGREGATE_TYPE_TOKENS, 250 } 251 252 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 253 TokenType.BIGINT: TokenType.UBIGINT, 254 TokenType.INT: TokenType.UINT, 255 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 256 TokenType.SMALLINT: TokenType.USMALLINT, 257 TokenType.TINYINT: TokenType.UTINYINT, 258 TokenType.DECIMAL: TokenType.UDECIMAL, 259 } 260 261 SUBQUERY_PREDICATES = { 262 TokenType.ANY: exp.Any, 263 TokenType.ALL: exp.All, 264 TokenType.EXISTS: exp.Exists, 265 TokenType.SOME: exp.Any, 266 } 267 268 RESERVED_TOKENS = { 269 *Tokenizer.SINGLE_TOKENS.values(), 270 TokenType.SELECT, 271 } 272 273 DB_CREATABLES = { 274 TokenType.DATABASE, 275 TokenType.SCHEMA, 276 TokenType.TABLE, 277 TokenType.VIEW, 278 TokenType.MODEL, 279 TokenType.DICTIONARY, 280 } 281 282 CREATABLES = { 283 TokenType.COLUMN, 284 TokenType.CONSTRAINT, 285 TokenType.FUNCTION, 286 TokenType.INDEX, 287 TokenType.PROCEDURE, 288 TokenType.FOREIGN_KEY, 289 *DB_CREATABLES, 290 } 291 292 # Tokens that can represent identifiers 293 ID_VAR_TOKENS = { 294 TokenType.VAR, 295 TokenType.ANTI, 296 TokenType.APPLY, 297 TokenType.ASC, 298 TokenType.AUTO_INCREMENT, 299 TokenType.BEGIN, 300 TokenType.CACHE, 301 TokenType.CASE, 302 TokenType.COLLATE, 303 TokenType.COMMAND, 304 TokenType.COMMENT, 305 TokenType.COMMIT, 306 TokenType.CONSTRAINT, 307 TokenType.DEFAULT, 308 TokenType.DELETE, 309 TokenType.DESC, 310 TokenType.DESCRIBE, 311 TokenType.DICTIONARY, 312 TokenType.DIV, 313 TokenType.END, 314 TokenType.EXECUTE, 315 TokenType.ESCAPE, 316 TokenType.FALSE, 317 TokenType.FIRST, 318 TokenType.FILTER, 319 TokenType.FINAL, 320 TokenType.FORMAT, 321 TokenType.FULL, 322 TokenType.IS, 323 TokenType.ISNULL, 324 TokenType.INTERVAL, 325 TokenType.KEEP, 326 TokenType.KILL, 327 TokenType.LEFT, 328 TokenType.LOAD, 329 TokenType.MERGE, 330 TokenType.NATURAL, 331 TokenType.NEXT, 332 TokenType.OFFSET, 333 TokenType.OPERATOR, 334 TokenType.ORDINALITY, 335 TokenType.OVERLAPS, 336 TokenType.OVERWRITE, 337 TokenType.PARTITION, 338 TokenType.PERCENT, 339 TokenType.PIVOT, 340 TokenType.PRAGMA, 341 TokenType.RANGE, 342 TokenType.RECURSIVE, 343 TokenType.REFERENCES, 344 TokenType.REFRESH, 345 TokenType.REPLACE, 346 TokenType.RIGHT, 347 TokenType.ROW, 348 TokenType.ROWS, 349 TokenType.SEMI, 350 TokenType.SET, 351 TokenType.SETTINGS, 352 TokenType.SHOW, 353 TokenType.TEMPORARY, 354 TokenType.TOP, 355 TokenType.TRUE, 356 TokenType.UNIQUE, 357 TokenType.UNPIVOT, 358 TokenType.UPDATE, 359 TokenType.USE, 360 TokenType.VOLATILE, 361 TokenType.WINDOW, 362 *CREATABLES, 363 *SUBQUERY_PREDICATES, 364 *TYPE_TOKENS, 365 *NO_PAREN_FUNCTIONS, 366 } 367 368 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 369 370 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 371 TokenType.ANTI, 372 TokenType.APPLY, 373 TokenType.ASOF, 374 TokenType.FULL, 375 TokenType.LEFT, 376 TokenType.LOCK, 377 TokenType.NATURAL, 378 TokenType.OFFSET, 379 TokenType.RIGHT, 380 TokenType.SEMI, 381 TokenType.WINDOW, 382 } 383 384 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 385 386 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 387 388 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 389 390 FUNC_TOKENS = { 391 TokenType.COLLATE, 392 TokenType.COMMAND, 393 TokenType.CURRENT_DATE, 394 TokenType.CURRENT_DATETIME, 395 TokenType.CURRENT_TIMESTAMP, 396 TokenType.CURRENT_TIME, 397 TokenType.CURRENT_USER, 398 TokenType.FILTER, 399 TokenType.FIRST, 400 TokenType.FORMAT, 401 TokenType.GLOB, 402 TokenType.IDENTIFIER, 403 TokenType.INDEX, 404 TokenType.ISNULL, 405 TokenType.ILIKE, 406 TokenType.INSERT, 407 TokenType.LIKE, 408 TokenType.MERGE, 409 TokenType.OFFSET, 410 TokenType.PRIMARY_KEY, 411 TokenType.RANGE, 412 TokenType.REPLACE, 413 TokenType.RLIKE, 414 TokenType.ROW, 415 TokenType.UNNEST, 416 TokenType.VAR, 417 TokenType.LEFT, 418 TokenType.RIGHT, 419 TokenType.DATE, 420 TokenType.DATETIME, 421 TokenType.TABLE, 422 TokenType.TIMESTAMP, 423 TokenType.TIMESTAMPTZ, 424 TokenType.WINDOW, 425 TokenType.XOR, 426 *TYPE_TOKENS, 427 *SUBQUERY_PREDICATES, 428 } 429 430 CONJUNCTION = { 431 TokenType.AND: exp.And, 432 TokenType.OR: exp.Or, 433 } 434 435 EQUALITY = { 436 TokenType.COLON_EQ: exp.PropertyEQ, 437 TokenType.EQ: exp.EQ, 438 TokenType.NEQ: exp.NEQ, 439 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 440 } 441 442 COMPARISON = { 443 TokenType.GT: exp.GT, 444 TokenType.GTE: exp.GTE, 445 TokenType.LT: exp.LT, 446 TokenType.LTE: exp.LTE, 447 } 448 449 BITWISE = { 450 TokenType.AMP: exp.BitwiseAnd, 451 TokenType.CARET: exp.BitwiseXor, 452 TokenType.PIPE: exp.BitwiseOr, 453 } 454 455 TERM = { 456 TokenType.DASH: exp.Sub, 457 TokenType.PLUS: exp.Add, 458 TokenType.MOD: exp.Mod, 459 TokenType.COLLATE: exp.Collate, 460 } 461 462 FACTOR = { 463 TokenType.DIV: exp.IntDiv, 464 TokenType.LR_ARROW: exp.Distance, 465 TokenType.SLASH: exp.Div, 466 TokenType.STAR: exp.Mul, 467 } 468 469 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 470 471 TIMES = { 472 TokenType.TIME, 473 TokenType.TIMETZ, 474 } 475 476 TIMESTAMPS = { 477 TokenType.TIMESTAMP, 478 TokenType.TIMESTAMPTZ, 479 TokenType.TIMESTAMPLTZ, 480 *TIMES, 481 } 482 483 SET_OPERATIONS = { 484 TokenType.UNION, 485 TokenType.INTERSECT, 486 TokenType.EXCEPT, 487 } 488 489 JOIN_METHODS = { 490 TokenType.NATURAL, 491 TokenType.ASOF, 492 } 493 494 JOIN_SIDES = { 495 TokenType.LEFT, 496 TokenType.RIGHT, 497 TokenType.FULL, 498 } 499 500 JOIN_KINDS = { 501 TokenType.INNER, 502 TokenType.OUTER, 503 TokenType.CROSS, 504 TokenType.SEMI, 505 TokenType.ANTI, 506 } 507 508 JOIN_HINTS: t.Set[str] = set() 509 510 LAMBDAS = { 511 TokenType.ARROW: lambda self, expressions: self.expression( 512 exp.Lambda, 513 this=self._replace_lambda( 514 self._parse_conjunction(), 515 {node.name for node in expressions}, 516 ), 517 expressions=expressions, 518 ), 519 TokenType.FARROW: lambda self, expressions: self.expression( 520 exp.Kwarg, 521 this=exp.var(expressions[0].name), 522 expression=self._parse_conjunction(), 523 ), 524 } 525 526 COLUMN_OPERATORS = { 527 TokenType.DOT: None, 528 TokenType.DCOLON: lambda self, this, to: self.expression( 529 exp.Cast if self.STRICT_CAST else exp.TryCast, 530 this=this, 531 to=to, 532 ), 533 TokenType.ARROW: lambda self, this, path: self.expression( 534 exp.JSONExtract, 535 this=this, 536 expression=path, 537 ), 538 TokenType.DARROW: lambda self, this, path: self.expression( 539 exp.JSONExtractScalar, 540 this=this, 541 expression=path, 542 ), 543 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 544 exp.JSONBExtract, 545 this=this, 546 expression=path, 547 ), 548 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 549 exp.JSONBExtractScalar, 550 this=this, 551 expression=path, 552 ), 553 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 554 exp.JSONBContains, 555 this=this, 556 expression=key, 557 ), 558 } 559 560 EXPRESSION_PARSERS = { 561 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 562 exp.Column: lambda self: self._parse_column(), 563 exp.Condition: lambda self: self._parse_conjunction(), 564 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 565 exp.Expression: lambda self: self._parse_statement(), 566 exp.From: lambda self: self._parse_from(), 567 exp.Group: lambda self: self._parse_group(), 568 exp.Having: lambda self: self._parse_having(), 569 exp.Identifier: lambda self: self._parse_id_var(), 570 exp.Join: lambda self: self._parse_join(), 571 exp.Lambda: lambda self: self._parse_lambda(), 572 exp.Lateral: lambda self: self._parse_lateral(), 573 exp.Limit: lambda self: self._parse_limit(), 574 exp.Offset: lambda self: self._parse_offset(), 575 exp.Order: lambda self: self._parse_order(), 576 exp.Ordered: lambda self: self._parse_ordered(), 577 exp.Properties: lambda self: self._parse_properties(), 578 exp.Qualify: lambda self: self._parse_qualify(), 579 exp.Returning: lambda self: self._parse_returning(), 580 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 581 exp.Table: lambda self: self._parse_table_parts(), 582 exp.TableAlias: lambda self: self._parse_table_alias(), 583 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 584 exp.Where: lambda self: self._parse_where(), 585 exp.Window: lambda self: self._parse_named_window(), 586 exp.With: lambda self: self._parse_with(), 587 "JOIN_TYPE": lambda self: self._parse_join_parts(), 588 } 589 590 STATEMENT_PARSERS = { 591 TokenType.ALTER: lambda self: self._parse_alter(), 592 TokenType.BEGIN: lambda self: self._parse_transaction(), 593 TokenType.CACHE: lambda self: self._parse_cache(), 594 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 595 TokenType.COMMENT: lambda self: self._parse_comment(), 596 TokenType.CREATE: lambda self: self._parse_create(), 597 TokenType.DELETE: lambda self: self._parse_delete(), 598 TokenType.DESC: lambda self: self._parse_describe(), 599 TokenType.DESCRIBE: lambda self: self._parse_describe(), 600 TokenType.DROP: lambda self: self._parse_drop(), 601 TokenType.INSERT: lambda self: self._parse_insert(), 602 TokenType.KILL: lambda self: self._parse_kill(), 603 TokenType.LOAD: lambda self: self._parse_load(), 604 TokenType.MERGE: lambda self: self._parse_merge(), 605 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 606 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 607 TokenType.REFRESH: lambda self: self._parse_refresh(), 608 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 609 TokenType.SET: lambda self: self._parse_set(), 610 TokenType.UNCACHE: lambda self: self._parse_uncache(), 611 TokenType.UPDATE: lambda self: self._parse_update(), 612 TokenType.USE: lambda self: self.expression( 613 exp.Use, 614 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 615 and exp.var(self._prev.text), 616 this=self._parse_table(schema=False), 617 ), 618 } 619 620 UNARY_PARSERS = { 621 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 622 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 623 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 624 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 625 } 626 627 PRIMARY_PARSERS = { 628 TokenType.STRING: lambda self, token: self.expression( 629 exp.Literal, this=token.text, is_string=True 630 ), 631 TokenType.NUMBER: lambda self, token: self.expression( 632 exp.Literal, this=token.text, is_string=False 633 ), 634 TokenType.STAR: lambda self, _: self.expression( 635 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 636 ), 637 TokenType.NULL: lambda self, _: self.expression(exp.Null), 638 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 639 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 640 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 641 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 642 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 643 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 644 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 645 exp.National, this=token.text 646 ), 647 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 648 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 649 exp.RawString, this=token.text 650 ), 651 TokenType.UNICODE_STRING: lambda self, token: self.expression( 652 exp.UnicodeString, 653 this=token.text, 654 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 655 ), 656 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 657 } 658 659 PLACEHOLDER_PARSERS = { 660 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 661 TokenType.PARAMETER: lambda self: self._parse_parameter(), 662 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 663 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 664 else None, 665 } 666 667 RANGE_PARSERS = { 668 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 669 TokenType.GLOB: binary_range_parser(exp.Glob), 670 TokenType.ILIKE: binary_range_parser(exp.ILike), 671 TokenType.IN: lambda self, this: self._parse_in(this), 672 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 673 TokenType.IS: lambda self, this: self._parse_is(this), 674 TokenType.LIKE: binary_range_parser(exp.Like), 675 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 676 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 677 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 678 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 679 } 680 681 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 682 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 683 "AUTO": lambda self: self._parse_auto_property(), 684 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 685 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 686 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 687 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 688 "CHECKSUM": lambda self: self._parse_checksum(), 689 "CLUSTER BY": lambda self: self._parse_cluster(), 690 "CLUSTERED": lambda self: self._parse_clustered_by(), 691 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 692 exp.CollateProperty, **kwargs 693 ), 694 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 695 "CONTAINS": lambda self: self._parse_contains_property(), 696 "COPY": lambda self: self._parse_copy_property(), 697 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 698 "DEFINER": lambda self: self._parse_definer(), 699 "DETERMINISTIC": lambda self: self.expression( 700 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 701 ), 702 "DISTKEY": lambda self: self._parse_distkey(), 703 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 704 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 705 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 706 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 707 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 708 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 709 "FREESPACE": lambda self: self._parse_freespace(), 710 "HEAP": lambda self: self.expression(exp.HeapProperty), 711 "IMMUTABLE": lambda self: self.expression( 712 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 713 ), 714 "INHERITS": lambda self: self.expression( 715 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 716 ), 717 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 718 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 719 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 720 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 721 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 722 "LIKE": lambda self: self._parse_create_like(), 723 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 724 "LOCK": lambda self: self._parse_locking(), 725 "LOCKING": lambda self: self._parse_locking(), 726 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 727 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 728 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 729 "MODIFIES": lambda self: self._parse_modifies_property(), 730 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 731 "NO": lambda self: self._parse_no_property(), 732 "ON": lambda self: self._parse_on_property(), 733 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 734 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 735 "PARTITION": lambda self: self._parse_partitioned_of(), 736 "PARTITION BY": lambda self: self._parse_partitioned_by(), 737 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 738 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 739 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 740 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 741 "READS": lambda self: self._parse_reads_property(), 742 "REMOTE": lambda self: self._parse_remote_with_connection(), 743 "RETURNS": lambda self: self._parse_returns(), 744 "ROW": lambda self: self._parse_row(), 745 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 746 "SAMPLE": lambda self: self.expression( 747 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 748 ), 749 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 750 "SETTINGS": lambda self: self.expression( 751 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 752 ), 753 "SORTKEY": lambda self: self._parse_sortkey(), 754 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 755 "STABLE": lambda self: self.expression( 756 exp.StabilityProperty, this=exp.Literal.string("STABLE") 757 ), 758 "STORED": lambda self: self._parse_stored(), 759 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 760 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 761 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 762 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 763 "TO": lambda self: self._parse_to_table(), 764 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 765 "TRANSFORM": lambda self: self.expression( 766 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 767 ), 768 "TTL": lambda self: self._parse_ttl(), 769 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 770 "VOLATILE": lambda self: self._parse_volatile_property(), 771 "WITH": lambda self: self._parse_with_property(), 772 } 773 774 CONSTRAINT_PARSERS = { 775 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 776 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 777 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 778 "CHARACTER SET": lambda self: self.expression( 779 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 780 ), 781 "CHECK": lambda self: self.expression( 782 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 783 ), 784 "COLLATE": lambda self: self.expression( 785 exp.CollateColumnConstraint, this=self._parse_var() 786 ), 787 "COMMENT": lambda self: self.expression( 788 exp.CommentColumnConstraint, this=self._parse_string() 789 ), 790 "COMPRESS": lambda self: self._parse_compress(), 791 "CLUSTERED": lambda self: self.expression( 792 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 793 ), 794 "NONCLUSTERED": lambda self: self.expression( 795 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 796 ), 797 "DEFAULT": lambda self: self.expression( 798 exp.DefaultColumnConstraint, this=self._parse_bitwise() 799 ), 800 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 801 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 802 "FORMAT": lambda self: self.expression( 803 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 804 ), 805 "GENERATED": lambda self: self._parse_generated_as_identity(), 806 "IDENTITY": lambda self: self._parse_auto_increment(), 807 "INLINE": lambda self: self._parse_inline(), 808 "LIKE": lambda self: self._parse_create_like(), 809 "NOT": lambda self: self._parse_not_constraint(), 810 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 811 "ON": lambda self: ( 812 self._match(TokenType.UPDATE) 813 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 814 ) 815 or self.expression(exp.OnProperty, this=self._parse_id_var()), 816 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 817 "PERIOD": lambda self: self._parse_period_for_system_time(), 818 "PRIMARY KEY": lambda self: self._parse_primary_key(), 819 "REFERENCES": lambda self: self._parse_references(match=False), 820 "TITLE": lambda self: self.expression( 821 exp.TitleColumnConstraint, this=self._parse_var_or_string() 822 ), 823 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 824 "UNIQUE": lambda self: self._parse_unique(), 825 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 826 "WITH": lambda self: self.expression( 827 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 828 ), 829 } 830 831 ALTER_PARSERS = { 832 "ADD": lambda self: self._parse_alter_table_add(), 833 "ALTER": lambda self: self._parse_alter_table_alter(), 834 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 835 "DROP": lambda self: self._parse_alter_table_drop(), 836 "RENAME": lambda self: self._parse_alter_table_rename(), 837 } 838 839 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 840 841 NO_PAREN_FUNCTION_PARSERS = { 842 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 843 "CASE": lambda self: self._parse_case(), 844 "IF": lambda self: self._parse_if(), 845 "NEXT": lambda self: self._parse_next_value_for(), 846 } 847 848 INVALID_FUNC_NAME_TOKENS = { 849 TokenType.IDENTIFIER, 850 TokenType.STRING, 851 } 852 853 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 854 855 FUNCTION_PARSERS = { 856 "ANY_VALUE": lambda self: self._parse_any_value(), 857 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 858 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 859 "DECODE": lambda self: self._parse_decode(), 860 "EXTRACT": lambda self: self._parse_extract(), 861 "JSON_OBJECT": lambda self: self._parse_json_object(), 862 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 863 "JSON_TABLE": lambda self: self._parse_json_table(), 864 "MATCH": lambda self: self._parse_match_against(), 865 "OPENJSON": lambda self: self._parse_open_json(), 866 "POSITION": lambda self: self._parse_position(), 867 "PREDICT": lambda self: self._parse_predict(), 868 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 869 "STRING_AGG": lambda self: self._parse_string_agg(), 870 "SUBSTRING": lambda self: self._parse_substring(), 871 "TRIM": lambda self: self._parse_trim(), 872 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 873 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 874 } 875 876 QUERY_MODIFIER_PARSERS = { 877 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 878 TokenType.WHERE: lambda self: ("where", self._parse_where()), 879 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 880 TokenType.HAVING: lambda self: ("having", self._parse_having()), 881 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 882 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 883 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 884 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 885 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 886 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 887 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 888 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 889 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 890 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 891 TokenType.CLUSTER_BY: lambda self: ( 892 "cluster", 893 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 894 ), 895 TokenType.DISTRIBUTE_BY: lambda self: ( 896 "distribute", 897 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 898 ), 899 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 900 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 901 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 902 } 903 904 SET_PARSERS = { 905 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 906 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 907 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 908 "TRANSACTION": lambda self: self._parse_set_transaction(), 909 } 910 911 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 912 913 TYPE_LITERAL_PARSERS = { 914 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 915 } 916 917 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 918 919 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 920 921 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 922 923 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 924 TRANSACTION_CHARACTERISTICS = { 925 "ISOLATION LEVEL REPEATABLE READ", 926 "ISOLATION LEVEL READ COMMITTED", 927 "ISOLATION LEVEL READ UNCOMMITTED", 928 "ISOLATION LEVEL SERIALIZABLE", 929 "READ WRITE", 930 "READ ONLY", 931 } 932 933 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 934 935 CLONE_KEYWORDS = {"CLONE", "COPY"} 936 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 937 938 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 939 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 940 941 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 942 943 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 944 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 945 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 946 947 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 948 949 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 950 951 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 952 953 DISTINCT_TOKENS = {TokenType.DISTINCT} 954 955 NULL_TOKENS = {TokenType.NULL} 956 957 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 958 959 STRICT_CAST = True 960 961 PREFIXED_PIVOT_COLUMNS = False 962 IDENTIFY_PIVOT_STRINGS = False 963 964 LOG_DEFAULTS_TO_LN = False 965 966 # Whether or not ADD is present for each column added by ALTER TABLE 967 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 968 969 # Whether or not the table sample clause expects CSV syntax 970 TABLESAMPLE_CSV = False 971 972 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 973 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 974 975 # Whether the TRIM function expects the characters to trim as its first argument 976 TRIM_PATTERN_FIRST = False 977 978 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 979 STRING_ALIASES = False 980 981 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 982 MODIFIERS_ATTACHED_TO_UNION = True 983 UNION_MODIFIERS = {"order", "limit", "offset"} 984 985 __slots__ = ( 986 "error_level", 987 "error_message_context", 988 "max_errors", 989 "dialect", 990 "sql", 991 "errors", 992 "_tokens", 993 "_index", 994 "_curr", 995 "_next", 996 "_prev", 997 "_prev_comments", 998 ) 999 1000 # Autofilled 1001 SHOW_TRIE: t.Dict = {} 1002 SET_TRIE: t.Dict = {} 1003 1004 def __init__( 1005 self, 1006 error_level: t.Optional[ErrorLevel] = None, 1007 error_message_context: int = 100, 1008 max_errors: int = 3, 1009 dialect: DialectType = None, 1010 ): 1011 from sqlglot.dialects import Dialect 1012 1013 self.error_level = error_level or ErrorLevel.IMMEDIATE 1014 self.error_message_context = error_message_context 1015 self.max_errors = max_errors 1016 self.dialect = Dialect.get_or_raise(dialect) 1017 self.reset() 1018 1019 def reset(self): 1020 self.sql = "" 1021 self.errors = [] 1022 self._tokens = [] 1023 self._index = 0 1024 self._curr = None 1025 self._next = None 1026 self._prev = None 1027 self._prev_comments = None 1028 1029 def parse( 1030 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1031 ) -> t.List[t.Optional[exp.Expression]]: 1032 """ 1033 Parses a list of tokens and returns a list of syntax trees, one tree 1034 per parsed SQL statement. 1035 1036 Args: 1037 raw_tokens: The list of tokens. 1038 sql: The original SQL string, used to produce helpful debug messages. 1039 1040 Returns: 1041 The list of the produced syntax trees. 1042 """ 1043 return self._parse( 1044 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1045 ) 1046 1047 def parse_into( 1048 self, 1049 expression_types: exp.IntoType, 1050 raw_tokens: t.List[Token], 1051 sql: t.Optional[str] = None, 1052 ) -> t.List[t.Optional[exp.Expression]]: 1053 """ 1054 Parses a list of tokens into a given Expression type. If a collection of Expression 1055 types is given instead, this method will try to parse the token list into each one 1056 of them, stopping at the first for which the parsing succeeds. 1057 1058 Args: 1059 expression_types: The expression type(s) to try and parse the token list into. 1060 raw_tokens: The list of tokens. 1061 sql: The original SQL string, used to produce helpful debug messages. 1062 1063 Returns: 1064 The target Expression. 1065 """ 1066 errors = [] 1067 for expression_type in ensure_list(expression_types): 1068 parser = self.EXPRESSION_PARSERS.get(expression_type) 1069 if not parser: 1070 raise TypeError(f"No parser registered for {expression_type}") 1071 1072 try: 1073 return self._parse(parser, raw_tokens, sql) 1074 except ParseError as e: 1075 e.errors[0]["into_expression"] = expression_type 1076 errors.append(e) 1077 1078 raise ParseError( 1079 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1080 errors=merge_errors(errors), 1081 ) from errors[-1] 1082 1083 def _parse( 1084 self, 1085 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1086 raw_tokens: t.List[Token], 1087 sql: t.Optional[str] = None, 1088 ) -> t.List[t.Optional[exp.Expression]]: 1089 self.reset() 1090 self.sql = sql or "" 1091 1092 total = len(raw_tokens) 1093 chunks: t.List[t.List[Token]] = [[]] 1094 1095 for i, token in enumerate(raw_tokens): 1096 if token.token_type == TokenType.SEMICOLON: 1097 if i < total - 1: 1098 chunks.append([]) 1099 else: 1100 chunks[-1].append(token) 1101 1102 expressions = [] 1103 1104 for tokens in chunks: 1105 self._index = -1 1106 self._tokens = tokens 1107 self._advance() 1108 1109 expressions.append(parse_method(self)) 1110 1111 if self._index < len(self._tokens): 1112 self.raise_error("Invalid expression / Unexpected token") 1113 1114 self.check_errors() 1115 1116 return expressions 1117 1118 def check_errors(self) -> None: 1119 """Logs or raises any found errors, depending on the chosen error level setting.""" 1120 if self.error_level == ErrorLevel.WARN: 1121 for error in self.errors: 1122 logger.error(str(error)) 1123 elif self.error_level == ErrorLevel.RAISE and self.errors: 1124 raise ParseError( 1125 concat_messages(self.errors, self.max_errors), 1126 errors=merge_errors(self.errors), 1127 ) 1128 1129 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1130 """ 1131 Appends an error in the list of recorded errors or raises it, depending on the chosen 1132 error level setting. 1133 """ 1134 token = token or self._curr or self._prev or Token.string("") 1135 start = token.start 1136 end = token.end + 1 1137 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1138 highlight = self.sql[start:end] 1139 end_context = self.sql[end : end + self.error_message_context] 1140 1141 error = ParseError.new( 1142 f"{message}. Line {token.line}, Col: {token.col}.\n" 1143 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1144 description=message, 1145 line=token.line, 1146 col=token.col, 1147 start_context=start_context, 1148 highlight=highlight, 1149 end_context=end_context, 1150 ) 1151 1152 if self.error_level == ErrorLevel.IMMEDIATE: 1153 raise error 1154 1155 self.errors.append(error) 1156 1157 def expression( 1158 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1159 ) -> E: 1160 """ 1161 Creates a new, validated Expression. 1162 1163 Args: 1164 exp_class: The expression class to instantiate. 1165 comments: An optional list of comments to attach to the expression. 1166 kwargs: The arguments to set for the expression along with their respective values. 1167 1168 Returns: 1169 The target expression. 1170 """ 1171 instance = exp_class(**kwargs) 1172 instance.add_comments(comments) if comments else self._add_comments(instance) 1173 return self.validate_expression(instance) 1174 1175 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1176 if expression and self._prev_comments: 1177 expression.add_comments(self._prev_comments) 1178 self._prev_comments = None 1179 1180 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1181 """ 1182 Validates an Expression, making sure that all its mandatory arguments are set. 1183 1184 Args: 1185 expression: The expression to validate. 1186 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1187 1188 Returns: 1189 The validated expression. 1190 """ 1191 if self.error_level != ErrorLevel.IGNORE: 1192 for error_message in expression.error_messages(args): 1193 self.raise_error(error_message) 1194 1195 return expression 1196 1197 def _find_sql(self, start: Token, end: Token) -> str: 1198 return self.sql[start.start : end.end + 1] 1199 1200 def _is_connected(self) -> bool: 1201 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1202 1203 def _advance(self, times: int = 1) -> None: 1204 self._index += times 1205 self._curr = seq_get(self._tokens, self._index) 1206 self._next = seq_get(self._tokens, self._index + 1) 1207 1208 if self._index > 0: 1209 self._prev = self._tokens[self._index - 1] 1210 self._prev_comments = self._prev.comments 1211 else: 1212 self._prev = None 1213 self._prev_comments = None 1214 1215 def _retreat(self, index: int) -> None: 1216 if index != self._index: 1217 self._advance(index - self._index) 1218 1219 def _warn_unsupported(self) -> None: 1220 if len(self._tokens) <= 1: 1221 return 1222 1223 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1224 # interested in emitting a warning for the one being currently processed. 1225 sql = self._find_sql(self._tokens[0], self._tokens[-1]) 1226 1227 logger.warning( 1228 f"Input '{sql}' contains unsupported syntax, proceeding to parse it into the" 1229 " fallback 'Command' expression. Consider filing a GitHub issue to request support" 1230 " for this syntax, e.g. if transpilation or AST metadata extraction is required." 1231 ) 1232 1233 def _parse_command(self) -> exp.Command: 1234 self._warn_unsupported() 1235 return self.expression( 1236 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1237 ) 1238 1239 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1240 start = self._prev 1241 exists = self._parse_exists() if allow_exists else None 1242 1243 self._match(TokenType.ON) 1244 1245 kind = self._match_set(self.CREATABLES) and self._prev 1246 if not kind: 1247 return self._parse_as_command(start) 1248 1249 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1250 this = self._parse_user_defined_function(kind=kind.token_type) 1251 elif kind.token_type == TokenType.TABLE: 1252 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1253 elif kind.token_type == TokenType.COLUMN: 1254 this = self._parse_column() 1255 else: 1256 this = self._parse_id_var() 1257 1258 self._match(TokenType.IS) 1259 1260 return self.expression( 1261 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1262 ) 1263 1264 def _parse_to_table( 1265 self, 1266 ) -> exp.ToTableProperty: 1267 table = self._parse_table_parts(schema=True) 1268 return self.expression(exp.ToTableProperty, this=table) 1269 1270 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1271 def _parse_ttl(self) -> exp.Expression: 1272 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1273 this = self._parse_bitwise() 1274 1275 if self._match_text_seq("DELETE"): 1276 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1277 if self._match_text_seq("RECOMPRESS"): 1278 return self.expression( 1279 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1280 ) 1281 if self._match_text_seq("TO", "DISK"): 1282 return self.expression( 1283 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1284 ) 1285 if self._match_text_seq("TO", "VOLUME"): 1286 return self.expression( 1287 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1288 ) 1289 1290 return this 1291 1292 expressions = self._parse_csv(_parse_ttl_action) 1293 where = self._parse_where() 1294 group = self._parse_group() 1295 1296 aggregates = None 1297 if group and self._match(TokenType.SET): 1298 aggregates = self._parse_csv(self._parse_set_item) 1299 1300 return self.expression( 1301 exp.MergeTreeTTL, 1302 expressions=expressions, 1303 where=where, 1304 group=group, 1305 aggregates=aggregates, 1306 ) 1307 1308 def _parse_statement(self) -> t.Optional[exp.Expression]: 1309 if self._curr is None: 1310 return None 1311 1312 if self._match_set(self.STATEMENT_PARSERS): 1313 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1314 1315 if self._match_set(Tokenizer.COMMANDS): 1316 return self._parse_command() 1317 1318 expression = self._parse_expression() 1319 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1320 return self._parse_query_modifiers(expression) 1321 1322 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1323 start = self._prev 1324 temporary = self._match(TokenType.TEMPORARY) 1325 materialized = self._match_text_seq("MATERIALIZED") 1326 1327 kind = self._match_set(self.CREATABLES) and self._prev.text 1328 if not kind: 1329 return self._parse_as_command(start) 1330 1331 return self.expression( 1332 exp.Drop, 1333 comments=start.comments, 1334 exists=exists or self._parse_exists(), 1335 this=self._parse_table(schema=True), 1336 kind=kind, 1337 temporary=temporary, 1338 materialized=materialized, 1339 cascade=self._match_text_seq("CASCADE"), 1340 constraints=self._match_text_seq("CONSTRAINTS"), 1341 purge=self._match_text_seq("PURGE"), 1342 ) 1343 1344 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1345 return ( 1346 self._match_text_seq("IF") 1347 and (not not_ or self._match(TokenType.NOT)) 1348 and self._match(TokenType.EXISTS) 1349 ) 1350 1351 def _parse_create(self) -> exp.Create | exp.Command: 1352 # Note: this can't be None because we've matched a statement parser 1353 start = self._prev 1354 comments = self._prev_comments 1355 1356 replace = ( 1357 start.token_type == TokenType.REPLACE 1358 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1359 or self._match_pair(TokenType.OR, TokenType.ALTER) 1360 ) 1361 unique = self._match(TokenType.UNIQUE) 1362 1363 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1364 self._advance() 1365 1366 properties = None 1367 create_token = self._match_set(self.CREATABLES) and self._prev 1368 1369 if not create_token: 1370 # exp.Properties.Location.POST_CREATE 1371 properties = self._parse_properties() 1372 create_token = self._match_set(self.CREATABLES) and self._prev 1373 1374 if not properties or not create_token: 1375 return self._parse_as_command(start) 1376 1377 exists = self._parse_exists(not_=True) 1378 this = None 1379 expression: t.Optional[exp.Expression] = None 1380 indexes = None 1381 no_schema_binding = None 1382 begin = None 1383 end = None 1384 clone = None 1385 1386 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1387 nonlocal properties 1388 if properties and temp_props: 1389 properties.expressions.extend(temp_props.expressions) 1390 elif temp_props: 1391 properties = temp_props 1392 1393 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1394 this = self._parse_user_defined_function(kind=create_token.token_type) 1395 1396 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1397 extend_props(self._parse_properties()) 1398 1399 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1400 1401 if not expression: 1402 if self._match(TokenType.COMMAND): 1403 expression = self._parse_as_command(self._prev) 1404 else: 1405 begin = self._match(TokenType.BEGIN) 1406 return_ = self._match_text_seq("RETURN") 1407 1408 if self._match(TokenType.STRING, advance=False): 1409 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1410 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1411 expression = self._parse_string() 1412 extend_props(self._parse_properties()) 1413 else: 1414 expression = self._parse_statement() 1415 1416 end = self._match_text_seq("END") 1417 1418 if return_: 1419 expression = self.expression(exp.Return, this=expression) 1420 elif create_token.token_type == TokenType.INDEX: 1421 this = self._parse_index(index=self._parse_id_var()) 1422 elif create_token.token_type in self.DB_CREATABLES: 1423 table_parts = self._parse_table_parts(schema=True) 1424 1425 # exp.Properties.Location.POST_NAME 1426 self._match(TokenType.COMMA) 1427 extend_props(self._parse_properties(before=True)) 1428 1429 this = self._parse_schema(this=table_parts) 1430 1431 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1432 extend_props(self._parse_properties()) 1433 1434 self._match(TokenType.ALIAS) 1435 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1436 # exp.Properties.Location.POST_ALIAS 1437 extend_props(self._parse_properties()) 1438 1439 expression = self._parse_ddl_select() 1440 1441 if create_token.token_type == TokenType.TABLE: 1442 # exp.Properties.Location.POST_EXPRESSION 1443 extend_props(self._parse_properties()) 1444 1445 indexes = [] 1446 while True: 1447 index = self._parse_index() 1448 1449 # exp.Properties.Location.POST_INDEX 1450 extend_props(self._parse_properties()) 1451 1452 if not index: 1453 break 1454 else: 1455 self._match(TokenType.COMMA) 1456 indexes.append(index) 1457 elif create_token.token_type == TokenType.VIEW: 1458 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1459 no_schema_binding = True 1460 1461 shallow = self._match_text_seq("SHALLOW") 1462 1463 if self._match_texts(self.CLONE_KEYWORDS): 1464 copy = self._prev.text.lower() == "copy" 1465 clone = self.expression( 1466 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1467 ) 1468 1469 if self._curr: 1470 return self._parse_as_command(start) 1471 1472 return self.expression( 1473 exp.Create, 1474 comments=comments, 1475 this=this, 1476 kind=create_token.text.upper(), 1477 replace=replace, 1478 unique=unique, 1479 expression=expression, 1480 exists=exists, 1481 properties=properties, 1482 indexes=indexes, 1483 no_schema_binding=no_schema_binding, 1484 begin=begin, 1485 end=end, 1486 clone=clone, 1487 ) 1488 1489 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1490 # only used for teradata currently 1491 self._match(TokenType.COMMA) 1492 1493 kwargs = { 1494 "no": self._match_text_seq("NO"), 1495 "dual": self._match_text_seq("DUAL"), 1496 "before": self._match_text_seq("BEFORE"), 1497 "default": self._match_text_seq("DEFAULT"), 1498 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1499 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1500 "after": self._match_text_seq("AFTER"), 1501 "minimum": self._match_texts(("MIN", "MINIMUM")), 1502 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1503 } 1504 1505 if self._match_texts(self.PROPERTY_PARSERS): 1506 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1507 try: 1508 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1509 except TypeError: 1510 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1511 1512 return None 1513 1514 def _parse_property(self) -> t.Optional[exp.Expression]: 1515 if self._match_texts(self.PROPERTY_PARSERS): 1516 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1517 1518 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1519 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1520 1521 if self._match_text_seq("COMPOUND", "SORTKEY"): 1522 return self._parse_sortkey(compound=True) 1523 1524 if self._match_text_seq("SQL", "SECURITY"): 1525 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1526 1527 index = self._index 1528 key = self._parse_column() 1529 1530 if not self._match(TokenType.EQ): 1531 self._retreat(index) 1532 return None 1533 1534 return self.expression( 1535 exp.Property, 1536 this=key.to_dot() if isinstance(key, exp.Column) else key, 1537 value=self._parse_column() or self._parse_var(any_token=True), 1538 ) 1539 1540 def _parse_stored(self) -> exp.FileFormatProperty: 1541 self._match(TokenType.ALIAS) 1542 1543 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1544 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1545 1546 return self.expression( 1547 exp.FileFormatProperty, 1548 this=self.expression( 1549 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1550 ) 1551 if input_format or output_format 1552 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1553 ) 1554 1555 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1556 self._match(TokenType.EQ) 1557 self._match(TokenType.ALIAS) 1558 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1559 1560 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1561 properties = [] 1562 while True: 1563 if before: 1564 prop = self._parse_property_before() 1565 else: 1566 prop = self._parse_property() 1567 1568 if not prop: 1569 break 1570 for p in ensure_list(prop): 1571 properties.append(p) 1572 1573 if properties: 1574 return self.expression(exp.Properties, expressions=properties) 1575 1576 return None 1577 1578 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1579 return self.expression( 1580 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1581 ) 1582 1583 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1584 if self._index >= 2: 1585 pre_volatile_token = self._tokens[self._index - 2] 1586 else: 1587 pre_volatile_token = None 1588 1589 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1590 return exp.VolatileProperty() 1591 1592 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1593 1594 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1595 self._match_pair(TokenType.EQ, TokenType.ON) 1596 1597 prop = self.expression(exp.WithSystemVersioningProperty) 1598 if self._match(TokenType.L_PAREN): 1599 self._match_text_seq("HISTORY_TABLE", "=") 1600 prop.set("this", self._parse_table_parts()) 1601 1602 if self._match(TokenType.COMMA): 1603 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1604 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1605 1606 self._match_r_paren() 1607 1608 return prop 1609 1610 def _parse_with_property( 1611 self, 1612 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1613 if self._match(TokenType.L_PAREN, advance=False): 1614 return self._parse_wrapped_csv(self._parse_property) 1615 1616 if self._match_text_seq("JOURNAL"): 1617 return self._parse_withjournaltable() 1618 1619 if self._match_text_seq("DATA"): 1620 return self._parse_withdata(no=False) 1621 elif self._match_text_seq("NO", "DATA"): 1622 return self._parse_withdata(no=True) 1623 1624 if not self._next: 1625 return None 1626 1627 return self._parse_withisolatedloading() 1628 1629 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1630 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1631 self._match(TokenType.EQ) 1632 1633 user = self._parse_id_var() 1634 self._match(TokenType.PARAMETER) 1635 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1636 1637 if not user or not host: 1638 return None 1639 1640 return exp.DefinerProperty(this=f"{user}@{host}") 1641 1642 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1643 self._match(TokenType.TABLE) 1644 self._match(TokenType.EQ) 1645 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1646 1647 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1648 return self.expression(exp.LogProperty, no=no) 1649 1650 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1651 return self.expression(exp.JournalProperty, **kwargs) 1652 1653 def _parse_checksum(self) -> exp.ChecksumProperty: 1654 self._match(TokenType.EQ) 1655 1656 on = None 1657 if self._match(TokenType.ON): 1658 on = True 1659 elif self._match_text_seq("OFF"): 1660 on = False 1661 1662 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1663 1664 def _parse_cluster(self) -> exp.Cluster: 1665 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1666 1667 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1668 self._match_text_seq("BY") 1669 1670 self._match_l_paren() 1671 expressions = self._parse_csv(self._parse_column) 1672 self._match_r_paren() 1673 1674 if self._match_text_seq("SORTED", "BY"): 1675 self._match_l_paren() 1676 sorted_by = self._parse_csv(self._parse_ordered) 1677 self._match_r_paren() 1678 else: 1679 sorted_by = None 1680 1681 self._match(TokenType.INTO) 1682 buckets = self._parse_number() 1683 self._match_text_seq("BUCKETS") 1684 1685 return self.expression( 1686 exp.ClusteredByProperty, 1687 expressions=expressions, 1688 sorted_by=sorted_by, 1689 buckets=buckets, 1690 ) 1691 1692 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1693 if not self._match_text_seq("GRANTS"): 1694 self._retreat(self._index - 1) 1695 return None 1696 1697 return self.expression(exp.CopyGrantsProperty) 1698 1699 def _parse_freespace(self) -> exp.FreespaceProperty: 1700 self._match(TokenType.EQ) 1701 return self.expression( 1702 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1703 ) 1704 1705 def _parse_mergeblockratio( 1706 self, no: bool = False, default: bool = False 1707 ) -> exp.MergeBlockRatioProperty: 1708 if self._match(TokenType.EQ): 1709 return self.expression( 1710 exp.MergeBlockRatioProperty, 1711 this=self._parse_number(), 1712 percent=self._match(TokenType.PERCENT), 1713 ) 1714 1715 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1716 1717 def _parse_datablocksize( 1718 self, 1719 default: t.Optional[bool] = None, 1720 minimum: t.Optional[bool] = None, 1721 maximum: t.Optional[bool] = None, 1722 ) -> exp.DataBlocksizeProperty: 1723 self._match(TokenType.EQ) 1724 size = self._parse_number() 1725 1726 units = None 1727 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1728 units = self._prev.text 1729 1730 return self.expression( 1731 exp.DataBlocksizeProperty, 1732 size=size, 1733 units=units, 1734 default=default, 1735 minimum=minimum, 1736 maximum=maximum, 1737 ) 1738 1739 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1740 self._match(TokenType.EQ) 1741 always = self._match_text_seq("ALWAYS") 1742 manual = self._match_text_seq("MANUAL") 1743 never = self._match_text_seq("NEVER") 1744 default = self._match_text_seq("DEFAULT") 1745 1746 autotemp = None 1747 if self._match_text_seq("AUTOTEMP"): 1748 autotemp = self._parse_schema() 1749 1750 return self.expression( 1751 exp.BlockCompressionProperty, 1752 always=always, 1753 manual=manual, 1754 never=never, 1755 default=default, 1756 autotemp=autotemp, 1757 ) 1758 1759 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1760 no = self._match_text_seq("NO") 1761 concurrent = self._match_text_seq("CONCURRENT") 1762 self._match_text_seq("ISOLATED", "LOADING") 1763 for_all = self._match_text_seq("FOR", "ALL") 1764 for_insert = self._match_text_seq("FOR", "INSERT") 1765 for_none = self._match_text_seq("FOR", "NONE") 1766 return self.expression( 1767 exp.IsolatedLoadingProperty, 1768 no=no, 1769 concurrent=concurrent, 1770 for_all=for_all, 1771 for_insert=for_insert, 1772 for_none=for_none, 1773 ) 1774 1775 def _parse_locking(self) -> exp.LockingProperty: 1776 if self._match(TokenType.TABLE): 1777 kind = "TABLE" 1778 elif self._match(TokenType.VIEW): 1779 kind = "VIEW" 1780 elif self._match(TokenType.ROW): 1781 kind = "ROW" 1782 elif self._match_text_seq("DATABASE"): 1783 kind = "DATABASE" 1784 else: 1785 kind = None 1786 1787 if kind in ("DATABASE", "TABLE", "VIEW"): 1788 this = self._parse_table_parts() 1789 else: 1790 this = None 1791 1792 if self._match(TokenType.FOR): 1793 for_or_in = "FOR" 1794 elif self._match(TokenType.IN): 1795 for_or_in = "IN" 1796 else: 1797 for_or_in = None 1798 1799 if self._match_text_seq("ACCESS"): 1800 lock_type = "ACCESS" 1801 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1802 lock_type = "EXCLUSIVE" 1803 elif self._match_text_seq("SHARE"): 1804 lock_type = "SHARE" 1805 elif self._match_text_seq("READ"): 1806 lock_type = "READ" 1807 elif self._match_text_seq("WRITE"): 1808 lock_type = "WRITE" 1809 elif self._match_text_seq("CHECKSUM"): 1810 lock_type = "CHECKSUM" 1811 else: 1812 lock_type = None 1813 1814 override = self._match_text_seq("OVERRIDE") 1815 1816 return self.expression( 1817 exp.LockingProperty, 1818 this=this, 1819 kind=kind, 1820 for_or_in=for_or_in, 1821 lock_type=lock_type, 1822 override=override, 1823 ) 1824 1825 def _parse_partition_by(self) -> t.List[exp.Expression]: 1826 if self._match(TokenType.PARTITION_BY): 1827 return self._parse_csv(self._parse_conjunction) 1828 return [] 1829 1830 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1831 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1832 if self._match_text_seq("MINVALUE"): 1833 return exp.var("MINVALUE") 1834 if self._match_text_seq("MAXVALUE"): 1835 return exp.var("MAXVALUE") 1836 return self._parse_bitwise() 1837 1838 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1839 expression = None 1840 from_expressions = None 1841 to_expressions = None 1842 1843 if self._match(TokenType.IN): 1844 this = self._parse_wrapped_csv(self._parse_bitwise) 1845 elif self._match(TokenType.FROM): 1846 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1847 self._match_text_seq("TO") 1848 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1849 elif self._match_text_seq("WITH", "(", "MODULUS"): 1850 this = self._parse_number() 1851 self._match_text_seq(",", "REMAINDER") 1852 expression = self._parse_number() 1853 self._match_r_paren() 1854 else: 1855 self.raise_error("Failed to parse partition bound spec.") 1856 1857 return self.expression( 1858 exp.PartitionBoundSpec, 1859 this=this, 1860 expression=expression, 1861 from_expressions=from_expressions, 1862 to_expressions=to_expressions, 1863 ) 1864 1865 # https://www.postgresql.org/docs/current/sql-createtable.html 1866 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1867 if not self._match_text_seq("OF"): 1868 self._retreat(self._index - 1) 1869 return None 1870 1871 this = self._parse_table(schema=True) 1872 1873 if self._match(TokenType.DEFAULT): 1874 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1875 elif self._match_text_seq("FOR", "VALUES"): 1876 expression = self._parse_partition_bound_spec() 1877 else: 1878 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1879 1880 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1881 1882 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1883 self._match(TokenType.EQ) 1884 return self.expression( 1885 exp.PartitionedByProperty, 1886 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1887 ) 1888 1889 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1890 if self._match_text_seq("AND", "STATISTICS"): 1891 statistics = True 1892 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1893 statistics = False 1894 else: 1895 statistics = None 1896 1897 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1898 1899 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1900 if self._match_text_seq("SQL"): 1901 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1902 return None 1903 1904 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1905 if self._match_text_seq("SQL", "DATA"): 1906 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1907 return None 1908 1909 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1910 if self._match_text_seq("PRIMARY", "INDEX"): 1911 return exp.NoPrimaryIndexProperty() 1912 if self._match_text_seq("SQL"): 1913 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1914 return None 1915 1916 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1917 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1918 return exp.OnCommitProperty() 1919 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1920 return exp.OnCommitProperty(delete=True) 1921 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1922 1923 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1924 if self._match_text_seq("SQL", "DATA"): 1925 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1926 return None 1927 1928 def _parse_distkey(self) -> exp.DistKeyProperty: 1929 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1930 1931 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1932 table = self._parse_table(schema=True) 1933 1934 options = [] 1935 while self._match_texts(("INCLUDING", "EXCLUDING")): 1936 this = self._prev.text.upper() 1937 1938 id_var = self._parse_id_var() 1939 if not id_var: 1940 return None 1941 1942 options.append( 1943 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1944 ) 1945 1946 return self.expression(exp.LikeProperty, this=table, expressions=options) 1947 1948 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1949 return self.expression( 1950 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1951 ) 1952 1953 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1954 self._match(TokenType.EQ) 1955 return self.expression( 1956 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1957 ) 1958 1959 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1960 self._match_text_seq("WITH", "CONNECTION") 1961 return self.expression( 1962 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1963 ) 1964 1965 def _parse_returns(self) -> exp.ReturnsProperty: 1966 value: t.Optional[exp.Expression] 1967 is_table = self._match(TokenType.TABLE) 1968 1969 if is_table: 1970 if self._match(TokenType.LT): 1971 value = self.expression( 1972 exp.Schema, 1973 this="TABLE", 1974 expressions=self._parse_csv(self._parse_struct_types), 1975 ) 1976 if not self._match(TokenType.GT): 1977 self.raise_error("Expecting >") 1978 else: 1979 value = self._parse_schema(exp.var("TABLE")) 1980 else: 1981 value = self._parse_types() 1982 1983 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1984 1985 def _parse_describe(self) -> exp.Describe: 1986 kind = self._match_set(self.CREATABLES) and self._prev.text 1987 extended = self._match_text_seq("EXTENDED") 1988 this = self._parse_table(schema=True) 1989 properties = self._parse_properties() 1990 expressions = properties.expressions if properties else None 1991 return self.expression( 1992 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 1993 ) 1994 1995 def _parse_insert(self) -> exp.Insert: 1996 comments = ensure_list(self._prev_comments) 1997 overwrite = self._match(TokenType.OVERWRITE) 1998 ignore = self._match(TokenType.IGNORE) 1999 local = self._match_text_seq("LOCAL") 2000 alternative = None 2001 2002 if self._match_text_seq("DIRECTORY"): 2003 this: t.Optional[exp.Expression] = self.expression( 2004 exp.Directory, 2005 this=self._parse_var_or_string(), 2006 local=local, 2007 row_format=self._parse_row_format(match_row=True), 2008 ) 2009 else: 2010 if self._match(TokenType.OR): 2011 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2012 2013 self._match(TokenType.INTO) 2014 comments += ensure_list(self._prev_comments) 2015 self._match(TokenType.TABLE) 2016 this = self._parse_table(schema=True) 2017 2018 returning = self._parse_returning() 2019 2020 return self.expression( 2021 exp.Insert, 2022 comments=comments, 2023 this=this, 2024 by_name=self._match_text_seq("BY", "NAME"), 2025 exists=self._parse_exists(), 2026 partition=self._parse_partition(), 2027 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2028 and self._parse_conjunction(), 2029 expression=self._parse_ddl_select(), 2030 conflict=self._parse_on_conflict(), 2031 returning=returning or self._parse_returning(), 2032 overwrite=overwrite, 2033 alternative=alternative, 2034 ignore=ignore, 2035 ) 2036 2037 def _parse_kill(self) -> exp.Kill: 2038 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2039 2040 return self.expression( 2041 exp.Kill, 2042 this=self._parse_primary(), 2043 kind=kind, 2044 ) 2045 2046 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2047 conflict = self._match_text_seq("ON", "CONFLICT") 2048 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2049 2050 if not conflict and not duplicate: 2051 return None 2052 2053 nothing = None 2054 expressions = None 2055 key = None 2056 constraint = None 2057 2058 if conflict: 2059 if self._match_text_seq("ON", "CONSTRAINT"): 2060 constraint = self._parse_id_var() 2061 else: 2062 key = self._parse_csv(self._parse_value) 2063 2064 self._match_text_seq("DO") 2065 if self._match_text_seq("NOTHING"): 2066 nothing = True 2067 else: 2068 self._match(TokenType.UPDATE) 2069 self._match(TokenType.SET) 2070 expressions = self._parse_csv(self._parse_equality) 2071 2072 return self.expression( 2073 exp.OnConflict, 2074 duplicate=duplicate, 2075 expressions=expressions, 2076 nothing=nothing, 2077 key=key, 2078 constraint=constraint, 2079 ) 2080 2081 def _parse_returning(self) -> t.Optional[exp.Returning]: 2082 if not self._match(TokenType.RETURNING): 2083 return None 2084 return self.expression( 2085 exp.Returning, 2086 expressions=self._parse_csv(self._parse_expression), 2087 into=self._match(TokenType.INTO) and self._parse_table_part(), 2088 ) 2089 2090 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2091 if not self._match(TokenType.FORMAT): 2092 return None 2093 return self._parse_row_format() 2094 2095 def _parse_row_format( 2096 self, match_row: bool = False 2097 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2098 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2099 return None 2100 2101 if self._match_text_seq("SERDE"): 2102 this = self._parse_string() 2103 2104 serde_properties = None 2105 if self._match(TokenType.SERDE_PROPERTIES): 2106 serde_properties = self.expression( 2107 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2108 ) 2109 2110 return self.expression( 2111 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2112 ) 2113 2114 self._match_text_seq("DELIMITED") 2115 2116 kwargs = {} 2117 2118 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2119 kwargs["fields"] = self._parse_string() 2120 if self._match_text_seq("ESCAPED", "BY"): 2121 kwargs["escaped"] = self._parse_string() 2122 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2123 kwargs["collection_items"] = self._parse_string() 2124 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2125 kwargs["map_keys"] = self._parse_string() 2126 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2127 kwargs["lines"] = self._parse_string() 2128 if self._match_text_seq("NULL", "DEFINED", "AS"): 2129 kwargs["null"] = self._parse_string() 2130 2131 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2132 2133 def _parse_load(self) -> exp.LoadData | exp.Command: 2134 if self._match_text_seq("DATA"): 2135 local = self._match_text_seq("LOCAL") 2136 self._match_text_seq("INPATH") 2137 inpath = self._parse_string() 2138 overwrite = self._match(TokenType.OVERWRITE) 2139 self._match_pair(TokenType.INTO, TokenType.TABLE) 2140 2141 return self.expression( 2142 exp.LoadData, 2143 this=self._parse_table(schema=True), 2144 local=local, 2145 overwrite=overwrite, 2146 inpath=inpath, 2147 partition=self._parse_partition(), 2148 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2149 serde=self._match_text_seq("SERDE") and self._parse_string(), 2150 ) 2151 return self._parse_as_command(self._prev) 2152 2153 def _parse_delete(self) -> exp.Delete: 2154 # This handles MySQL's "Multiple-Table Syntax" 2155 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2156 tables = None 2157 comments = self._prev_comments 2158 if not self._match(TokenType.FROM, advance=False): 2159 tables = self._parse_csv(self._parse_table) or None 2160 2161 returning = self._parse_returning() 2162 2163 return self.expression( 2164 exp.Delete, 2165 comments=comments, 2166 tables=tables, 2167 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2168 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2169 where=self._parse_where(), 2170 returning=returning or self._parse_returning(), 2171 limit=self._parse_limit(), 2172 ) 2173 2174 def _parse_update(self) -> exp.Update: 2175 comments = self._prev_comments 2176 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2177 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2178 returning = self._parse_returning() 2179 return self.expression( 2180 exp.Update, 2181 comments=comments, 2182 **{ # type: ignore 2183 "this": this, 2184 "expressions": expressions, 2185 "from": self._parse_from(joins=True), 2186 "where": self._parse_where(), 2187 "returning": returning or self._parse_returning(), 2188 "order": self._parse_order(), 2189 "limit": self._parse_limit(), 2190 }, 2191 ) 2192 2193 def _parse_uncache(self) -> exp.Uncache: 2194 if not self._match(TokenType.TABLE): 2195 self.raise_error("Expecting TABLE after UNCACHE") 2196 2197 return self.expression( 2198 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2199 ) 2200 2201 def _parse_cache(self) -> exp.Cache: 2202 lazy = self._match_text_seq("LAZY") 2203 self._match(TokenType.TABLE) 2204 table = self._parse_table(schema=True) 2205 2206 options = [] 2207 if self._match_text_seq("OPTIONS"): 2208 self._match_l_paren() 2209 k = self._parse_string() 2210 self._match(TokenType.EQ) 2211 v = self._parse_string() 2212 options = [k, v] 2213 self._match_r_paren() 2214 2215 self._match(TokenType.ALIAS) 2216 return self.expression( 2217 exp.Cache, 2218 this=table, 2219 lazy=lazy, 2220 options=options, 2221 expression=self._parse_select(nested=True), 2222 ) 2223 2224 def _parse_partition(self) -> t.Optional[exp.Partition]: 2225 if not self._match(TokenType.PARTITION): 2226 return None 2227 2228 return self.expression( 2229 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2230 ) 2231 2232 def _parse_value(self) -> exp.Tuple: 2233 if self._match(TokenType.L_PAREN): 2234 expressions = self._parse_csv(self._parse_expression) 2235 self._match_r_paren() 2236 return self.expression(exp.Tuple, expressions=expressions) 2237 2238 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2239 # https://prestodb.io/docs/current/sql/values.html 2240 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2241 2242 def _parse_projections(self) -> t.List[exp.Expression]: 2243 return self._parse_expressions() 2244 2245 def _parse_select( 2246 self, 2247 nested: bool = False, 2248 table: bool = False, 2249 parse_subquery_alias: bool = True, 2250 parse_set_operation: bool = True, 2251 ) -> t.Optional[exp.Expression]: 2252 cte = self._parse_with() 2253 2254 if cte: 2255 this = self._parse_statement() 2256 2257 if not this: 2258 self.raise_error("Failed to parse any statement following CTE") 2259 return cte 2260 2261 if "with" in this.arg_types: 2262 this.set("with", cte) 2263 else: 2264 self.raise_error(f"{this.key} does not support CTE") 2265 this = cte 2266 2267 return this 2268 2269 # duckdb supports leading with FROM x 2270 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2271 2272 if self._match(TokenType.SELECT): 2273 comments = self._prev_comments 2274 2275 hint = self._parse_hint() 2276 all_ = self._match(TokenType.ALL) 2277 distinct = self._match_set(self.DISTINCT_TOKENS) 2278 2279 kind = ( 2280 self._match(TokenType.ALIAS) 2281 and self._match_texts(("STRUCT", "VALUE")) 2282 and self._prev.text.upper() 2283 ) 2284 2285 if distinct: 2286 distinct = self.expression( 2287 exp.Distinct, 2288 on=self._parse_value() if self._match(TokenType.ON) else None, 2289 ) 2290 2291 if all_ and distinct: 2292 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2293 2294 limit = self._parse_limit(top=True) 2295 projections = self._parse_projections() 2296 2297 this = self.expression( 2298 exp.Select, 2299 kind=kind, 2300 hint=hint, 2301 distinct=distinct, 2302 expressions=projections, 2303 limit=limit, 2304 ) 2305 this.comments = comments 2306 2307 into = self._parse_into() 2308 if into: 2309 this.set("into", into) 2310 2311 if not from_: 2312 from_ = self._parse_from() 2313 2314 if from_: 2315 this.set("from", from_) 2316 2317 this = self._parse_query_modifiers(this) 2318 elif (table or nested) and self._match(TokenType.L_PAREN): 2319 if self._match(TokenType.PIVOT): 2320 this = self._parse_simplified_pivot() 2321 elif self._match(TokenType.FROM): 2322 this = exp.select("*").from_( 2323 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2324 ) 2325 else: 2326 this = ( 2327 self._parse_table() 2328 if table 2329 else self._parse_select(nested=True, parse_set_operation=False) 2330 ) 2331 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2332 2333 self._match_r_paren() 2334 2335 # We return early here so that the UNION isn't attached to the subquery by the 2336 # following call to _parse_set_operations, but instead becomes the parent node 2337 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2338 elif self._match(TokenType.VALUES): 2339 this = self.expression( 2340 exp.Values, 2341 expressions=self._parse_csv(self._parse_value), 2342 alias=self._parse_table_alias(), 2343 ) 2344 elif from_: 2345 this = exp.select("*").from_(from_.this, copy=False) 2346 else: 2347 this = None 2348 2349 if parse_set_operation: 2350 return self._parse_set_operations(this) 2351 return this 2352 2353 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2354 if not skip_with_token and not self._match(TokenType.WITH): 2355 return None 2356 2357 comments = self._prev_comments 2358 recursive = self._match(TokenType.RECURSIVE) 2359 2360 expressions = [] 2361 while True: 2362 expressions.append(self._parse_cte()) 2363 2364 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2365 break 2366 else: 2367 self._match(TokenType.WITH) 2368 2369 return self.expression( 2370 exp.With, comments=comments, expressions=expressions, recursive=recursive 2371 ) 2372 2373 def _parse_cte(self) -> exp.CTE: 2374 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2375 if not alias or not alias.this: 2376 self.raise_error("Expected CTE to have alias") 2377 2378 self._match(TokenType.ALIAS) 2379 return self.expression( 2380 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2381 ) 2382 2383 def _parse_table_alias( 2384 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2385 ) -> t.Optional[exp.TableAlias]: 2386 any_token = self._match(TokenType.ALIAS) 2387 alias = ( 2388 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2389 or self._parse_string_as_identifier() 2390 ) 2391 2392 index = self._index 2393 if self._match(TokenType.L_PAREN): 2394 columns = self._parse_csv(self._parse_function_parameter) 2395 self._match_r_paren() if columns else self._retreat(index) 2396 else: 2397 columns = None 2398 2399 if not alias and not columns: 2400 return None 2401 2402 return self.expression(exp.TableAlias, this=alias, columns=columns) 2403 2404 def _parse_subquery( 2405 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2406 ) -> t.Optional[exp.Subquery]: 2407 if not this: 2408 return None 2409 2410 return self.expression( 2411 exp.Subquery, 2412 this=this, 2413 pivots=self._parse_pivots(), 2414 alias=self._parse_table_alias() if parse_alias else None, 2415 ) 2416 2417 def _parse_query_modifiers( 2418 self, this: t.Optional[exp.Expression] 2419 ) -> t.Optional[exp.Expression]: 2420 if isinstance(this, self.MODIFIABLES): 2421 for join in iter(self._parse_join, None): 2422 this.append("joins", join) 2423 for lateral in iter(self._parse_lateral, None): 2424 this.append("laterals", lateral) 2425 2426 while True: 2427 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2428 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2429 key, expression = parser(self) 2430 2431 if expression: 2432 this.set(key, expression) 2433 if key == "limit": 2434 offset = expression.args.pop("offset", None) 2435 if offset: 2436 this.set("offset", exp.Offset(expression=offset)) 2437 continue 2438 break 2439 return this 2440 2441 def _parse_hint(self) -> t.Optional[exp.Hint]: 2442 if self._match(TokenType.HINT): 2443 hints = [] 2444 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2445 hints.extend(hint) 2446 2447 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2448 self.raise_error("Expected */ after HINT") 2449 2450 return self.expression(exp.Hint, expressions=hints) 2451 2452 return None 2453 2454 def _parse_into(self) -> t.Optional[exp.Into]: 2455 if not self._match(TokenType.INTO): 2456 return None 2457 2458 temp = self._match(TokenType.TEMPORARY) 2459 unlogged = self._match_text_seq("UNLOGGED") 2460 self._match(TokenType.TABLE) 2461 2462 return self.expression( 2463 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2464 ) 2465 2466 def _parse_from( 2467 self, joins: bool = False, skip_from_token: bool = False 2468 ) -> t.Optional[exp.From]: 2469 if not skip_from_token and not self._match(TokenType.FROM): 2470 return None 2471 2472 return self.expression( 2473 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2474 ) 2475 2476 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2477 if not self._match(TokenType.MATCH_RECOGNIZE): 2478 return None 2479 2480 self._match_l_paren() 2481 2482 partition = self._parse_partition_by() 2483 order = self._parse_order() 2484 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2485 2486 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2487 rows = exp.var("ONE ROW PER MATCH") 2488 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2489 text = "ALL ROWS PER MATCH" 2490 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2491 text += f" SHOW EMPTY MATCHES" 2492 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2493 text += f" OMIT EMPTY MATCHES" 2494 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2495 text += f" WITH UNMATCHED ROWS" 2496 rows = exp.var(text) 2497 else: 2498 rows = None 2499 2500 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2501 text = "AFTER MATCH SKIP" 2502 if self._match_text_seq("PAST", "LAST", "ROW"): 2503 text += f" PAST LAST ROW" 2504 elif self._match_text_seq("TO", "NEXT", "ROW"): 2505 text += f" TO NEXT ROW" 2506 elif self._match_text_seq("TO", "FIRST"): 2507 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2508 elif self._match_text_seq("TO", "LAST"): 2509 text += f" TO LAST {self._advance_any().text}" # type: ignore 2510 after = exp.var(text) 2511 else: 2512 after = None 2513 2514 if self._match_text_seq("PATTERN"): 2515 self._match_l_paren() 2516 2517 if not self._curr: 2518 self.raise_error("Expecting )", self._curr) 2519 2520 paren = 1 2521 start = self._curr 2522 2523 while self._curr and paren > 0: 2524 if self._curr.token_type == TokenType.L_PAREN: 2525 paren += 1 2526 if self._curr.token_type == TokenType.R_PAREN: 2527 paren -= 1 2528 2529 end = self._prev 2530 self._advance() 2531 2532 if paren > 0: 2533 self.raise_error("Expecting )", self._curr) 2534 2535 pattern = exp.var(self._find_sql(start, end)) 2536 else: 2537 pattern = None 2538 2539 define = ( 2540 self._parse_csv(self._parse_name_as_expression) 2541 if self._match_text_seq("DEFINE") 2542 else None 2543 ) 2544 2545 self._match_r_paren() 2546 2547 return self.expression( 2548 exp.MatchRecognize, 2549 partition_by=partition, 2550 order=order, 2551 measures=measures, 2552 rows=rows, 2553 after=after, 2554 pattern=pattern, 2555 define=define, 2556 alias=self._parse_table_alias(), 2557 ) 2558 2559 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2560 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2561 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2562 cross_apply = False 2563 2564 if cross_apply is not None: 2565 this = self._parse_select(table=True) 2566 view = None 2567 outer = None 2568 elif self._match(TokenType.LATERAL): 2569 this = self._parse_select(table=True) 2570 view = self._match(TokenType.VIEW) 2571 outer = self._match(TokenType.OUTER) 2572 else: 2573 return None 2574 2575 if not this: 2576 this = ( 2577 self._parse_unnest() 2578 or self._parse_function() 2579 or self._parse_id_var(any_token=False) 2580 ) 2581 2582 while self._match(TokenType.DOT): 2583 this = exp.Dot( 2584 this=this, 2585 expression=self._parse_function() or self._parse_id_var(any_token=False), 2586 ) 2587 2588 if view: 2589 table = self._parse_id_var(any_token=False) 2590 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2591 table_alias: t.Optional[exp.TableAlias] = self.expression( 2592 exp.TableAlias, this=table, columns=columns 2593 ) 2594 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2595 # We move the alias from the lateral's child node to the lateral itself 2596 table_alias = this.args["alias"].pop() 2597 else: 2598 table_alias = self._parse_table_alias() 2599 2600 return self.expression( 2601 exp.Lateral, 2602 this=this, 2603 view=view, 2604 outer=outer, 2605 alias=table_alias, 2606 cross_apply=cross_apply, 2607 ) 2608 2609 def _parse_join_parts( 2610 self, 2611 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2612 return ( 2613 self._match_set(self.JOIN_METHODS) and self._prev, 2614 self._match_set(self.JOIN_SIDES) and self._prev, 2615 self._match_set(self.JOIN_KINDS) and self._prev, 2616 ) 2617 2618 def _parse_join( 2619 self, skip_join_token: bool = False, parse_bracket: bool = False 2620 ) -> t.Optional[exp.Join]: 2621 if self._match(TokenType.COMMA): 2622 return self.expression(exp.Join, this=self._parse_table()) 2623 2624 index = self._index 2625 method, side, kind = self._parse_join_parts() 2626 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2627 join = self._match(TokenType.JOIN) 2628 2629 if not skip_join_token and not join: 2630 self._retreat(index) 2631 kind = None 2632 method = None 2633 side = None 2634 2635 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2636 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2637 2638 if not skip_join_token and not join and not outer_apply and not cross_apply: 2639 return None 2640 2641 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2642 2643 if method: 2644 kwargs["method"] = method.text 2645 if side: 2646 kwargs["side"] = side.text 2647 if kind: 2648 kwargs["kind"] = kind.text 2649 if hint: 2650 kwargs["hint"] = hint 2651 2652 if self._match(TokenType.ON): 2653 kwargs["on"] = self._parse_conjunction() 2654 elif self._match(TokenType.USING): 2655 kwargs["using"] = self._parse_wrapped_id_vars() 2656 elif not (kind and kind.token_type == TokenType.CROSS): 2657 index = self._index 2658 join = self._parse_join() 2659 2660 if join and self._match(TokenType.ON): 2661 kwargs["on"] = self._parse_conjunction() 2662 elif join and self._match(TokenType.USING): 2663 kwargs["using"] = self._parse_wrapped_id_vars() 2664 else: 2665 join = None 2666 self._retreat(index) 2667 2668 kwargs["this"].set("joins", [join] if join else None) 2669 2670 comments = [c for token in (method, side, kind) if token for c in token.comments] 2671 return self.expression(exp.Join, comments=comments, **kwargs) 2672 2673 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2674 this = self._parse_conjunction() 2675 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2676 return this 2677 2678 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2679 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2680 2681 return this 2682 2683 def _parse_index( 2684 self, 2685 index: t.Optional[exp.Expression] = None, 2686 ) -> t.Optional[exp.Index]: 2687 if index: 2688 unique = None 2689 primary = None 2690 amp = None 2691 2692 self._match(TokenType.ON) 2693 self._match(TokenType.TABLE) # hive 2694 table = self._parse_table_parts(schema=True) 2695 else: 2696 unique = self._match(TokenType.UNIQUE) 2697 primary = self._match_text_seq("PRIMARY") 2698 amp = self._match_text_seq("AMP") 2699 2700 if not self._match(TokenType.INDEX): 2701 return None 2702 2703 index = self._parse_id_var() 2704 table = None 2705 2706 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2707 2708 if self._match(TokenType.L_PAREN, advance=False): 2709 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2710 else: 2711 columns = None 2712 2713 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2714 2715 return self.expression( 2716 exp.Index, 2717 this=index, 2718 table=table, 2719 using=using, 2720 columns=columns, 2721 unique=unique, 2722 primary=primary, 2723 amp=amp, 2724 include=include, 2725 partition_by=self._parse_partition_by(), 2726 where=self._parse_where(), 2727 ) 2728 2729 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2730 hints: t.List[exp.Expression] = [] 2731 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2732 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2733 hints.append( 2734 self.expression( 2735 exp.WithTableHint, 2736 expressions=self._parse_csv( 2737 lambda: self._parse_function() or self._parse_var(any_token=True) 2738 ), 2739 ) 2740 ) 2741 self._match_r_paren() 2742 else: 2743 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2744 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2745 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2746 2747 self._match_texts(("INDEX", "KEY")) 2748 if self._match(TokenType.FOR): 2749 hint.set("target", self._advance_any() and self._prev.text.upper()) 2750 2751 hint.set("expressions", self._parse_wrapped_id_vars()) 2752 hints.append(hint) 2753 2754 return hints or None 2755 2756 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2757 return ( 2758 (not schema and self._parse_function(optional_parens=False)) 2759 or self._parse_id_var(any_token=False) 2760 or self._parse_string_as_identifier() 2761 or self._parse_placeholder() 2762 ) 2763 2764 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2765 catalog = None 2766 db = None 2767 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2768 2769 while self._match(TokenType.DOT): 2770 if catalog: 2771 # This allows nesting the table in arbitrarily many dot expressions if needed 2772 table = self.expression( 2773 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2774 ) 2775 else: 2776 catalog = db 2777 db = table 2778 table = self._parse_table_part(schema=schema) or "" 2779 2780 if not table: 2781 self.raise_error(f"Expected table name but got {self._curr}") 2782 2783 return self.expression( 2784 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2785 ) 2786 2787 def _parse_table( 2788 self, 2789 schema: bool = False, 2790 joins: bool = False, 2791 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2792 parse_bracket: bool = False, 2793 ) -> t.Optional[exp.Expression]: 2794 lateral = self._parse_lateral() 2795 if lateral: 2796 return lateral 2797 2798 unnest = self._parse_unnest() 2799 if unnest: 2800 return unnest 2801 2802 values = self._parse_derived_table_values() 2803 if values: 2804 return values 2805 2806 subquery = self._parse_select(table=True) 2807 if subquery: 2808 if not subquery.args.get("pivots"): 2809 subquery.set("pivots", self._parse_pivots()) 2810 return subquery 2811 2812 bracket = parse_bracket and self._parse_bracket(None) 2813 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2814 this = t.cast( 2815 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2816 ) 2817 2818 if schema: 2819 return self._parse_schema(this=this) 2820 2821 version = self._parse_version() 2822 2823 if version: 2824 this.set("version", version) 2825 2826 if self.dialect.ALIAS_POST_TABLESAMPLE: 2827 table_sample = self._parse_table_sample() 2828 2829 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2830 if alias: 2831 this.set("alias", alias) 2832 2833 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2834 return self.expression( 2835 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2836 ) 2837 2838 this.set("hints", self._parse_table_hints()) 2839 2840 if not this.args.get("pivots"): 2841 this.set("pivots", self._parse_pivots()) 2842 2843 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2844 table_sample = self._parse_table_sample() 2845 2846 if table_sample: 2847 table_sample.set("this", this) 2848 this = table_sample 2849 2850 if joins: 2851 for join in iter(self._parse_join, None): 2852 this.append("joins", join) 2853 2854 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2855 this.set("ordinality", True) 2856 this.set("alias", self._parse_table_alias()) 2857 2858 return this 2859 2860 def _parse_version(self) -> t.Optional[exp.Version]: 2861 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2862 this = "TIMESTAMP" 2863 elif self._match(TokenType.VERSION_SNAPSHOT): 2864 this = "VERSION" 2865 else: 2866 return None 2867 2868 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2869 kind = self._prev.text.upper() 2870 start = self._parse_bitwise() 2871 self._match_texts(("TO", "AND")) 2872 end = self._parse_bitwise() 2873 expression: t.Optional[exp.Expression] = self.expression( 2874 exp.Tuple, expressions=[start, end] 2875 ) 2876 elif self._match_text_seq("CONTAINED", "IN"): 2877 kind = "CONTAINED IN" 2878 expression = self.expression( 2879 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2880 ) 2881 elif self._match(TokenType.ALL): 2882 kind = "ALL" 2883 expression = None 2884 else: 2885 self._match_text_seq("AS", "OF") 2886 kind = "AS OF" 2887 expression = self._parse_type() 2888 2889 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2890 2891 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2892 if not self._match(TokenType.UNNEST): 2893 return None 2894 2895 expressions = self._parse_wrapped_csv(self._parse_equality) 2896 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2897 2898 alias = self._parse_table_alias() if with_alias else None 2899 2900 if alias: 2901 if self.dialect.UNNEST_COLUMN_ONLY: 2902 if alias.args.get("columns"): 2903 self.raise_error("Unexpected extra column alias in unnest.") 2904 2905 alias.set("columns", [alias.this]) 2906 alias.set("this", None) 2907 2908 columns = alias.args.get("columns") or [] 2909 if offset and len(expressions) < len(columns): 2910 offset = columns.pop() 2911 2912 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2913 self._match(TokenType.ALIAS) 2914 offset = self._parse_id_var( 2915 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2916 ) or exp.to_identifier("offset") 2917 2918 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2919 2920 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2921 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2922 if not is_derived and not self._match(TokenType.VALUES): 2923 return None 2924 2925 expressions = self._parse_csv(self._parse_value) 2926 alias = self._parse_table_alias() 2927 2928 if is_derived: 2929 self._match_r_paren() 2930 2931 return self.expression( 2932 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2933 ) 2934 2935 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2936 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2937 as_modifier and self._match_text_seq("USING", "SAMPLE") 2938 ): 2939 return None 2940 2941 bucket_numerator = None 2942 bucket_denominator = None 2943 bucket_field = None 2944 percent = None 2945 size = None 2946 seed = None 2947 2948 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2949 matched_l_paren = self._match(TokenType.L_PAREN) 2950 2951 if self.TABLESAMPLE_CSV: 2952 num = None 2953 expressions = self._parse_csv(self._parse_primary) 2954 else: 2955 expressions = None 2956 num = ( 2957 self._parse_factor() 2958 if self._match(TokenType.NUMBER, advance=False) 2959 else self._parse_primary() or self._parse_placeholder() 2960 ) 2961 2962 if self._match_text_seq("BUCKET"): 2963 bucket_numerator = self._parse_number() 2964 self._match_text_seq("OUT", "OF") 2965 bucket_denominator = bucket_denominator = self._parse_number() 2966 self._match(TokenType.ON) 2967 bucket_field = self._parse_field() 2968 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2969 percent = num 2970 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2971 size = num 2972 else: 2973 percent = num 2974 2975 if matched_l_paren: 2976 self._match_r_paren() 2977 2978 if self._match(TokenType.L_PAREN): 2979 method = self._parse_var(upper=True) 2980 seed = self._match(TokenType.COMMA) and self._parse_number() 2981 self._match_r_paren() 2982 elif self._match_texts(("SEED", "REPEATABLE")): 2983 seed = self._parse_wrapped(self._parse_number) 2984 2985 return self.expression( 2986 exp.TableSample, 2987 expressions=expressions, 2988 method=method, 2989 bucket_numerator=bucket_numerator, 2990 bucket_denominator=bucket_denominator, 2991 bucket_field=bucket_field, 2992 percent=percent, 2993 size=size, 2994 seed=seed, 2995 ) 2996 2997 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2998 return list(iter(self._parse_pivot, None)) or None 2999 3000 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3001 return list(iter(self._parse_join, None)) or None 3002 3003 # https://duckdb.org/docs/sql/statements/pivot 3004 def _parse_simplified_pivot(self) -> exp.Pivot: 3005 def _parse_on() -> t.Optional[exp.Expression]: 3006 this = self._parse_bitwise() 3007 return self._parse_in(this) if self._match(TokenType.IN) else this 3008 3009 this = self._parse_table() 3010 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3011 using = self._match(TokenType.USING) and self._parse_csv( 3012 lambda: self._parse_alias(self._parse_function()) 3013 ) 3014 group = self._parse_group() 3015 return self.expression( 3016 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3017 ) 3018 3019 def _parse_pivot_in(self) -> exp.In: 3020 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3021 this = self._parse_conjunction() 3022 3023 self._match(TokenType.ALIAS) 3024 alias = self._parse_field() 3025 if alias: 3026 return self.expression(exp.PivotAlias, this=this, alias=alias) 3027 3028 return this 3029 3030 value = self._parse_column() 3031 3032 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3033 self.raise_error("Expecting IN (") 3034 3035 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3036 3037 self._match_r_paren() 3038 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3039 3040 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3041 index = self._index 3042 include_nulls = None 3043 3044 if self._match(TokenType.PIVOT): 3045 unpivot = False 3046 elif self._match(TokenType.UNPIVOT): 3047 unpivot = True 3048 3049 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3050 if self._match_text_seq("INCLUDE", "NULLS"): 3051 include_nulls = True 3052 elif self._match_text_seq("EXCLUDE", "NULLS"): 3053 include_nulls = False 3054 else: 3055 return None 3056 3057 expressions = [] 3058 3059 if not self._match(TokenType.L_PAREN): 3060 self._retreat(index) 3061 return None 3062 3063 if unpivot: 3064 expressions = self._parse_csv(self._parse_column) 3065 else: 3066 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3067 3068 if not expressions: 3069 self.raise_error("Failed to parse PIVOT's aggregation list") 3070 3071 if not self._match(TokenType.FOR): 3072 self.raise_error("Expecting FOR") 3073 3074 field = self._parse_pivot_in() 3075 3076 self._match_r_paren() 3077 3078 pivot = self.expression( 3079 exp.Pivot, 3080 expressions=expressions, 3081 field=field, 3082 unpivot=unpivot, 3083 include_nulls=include_nulls, 3084 ) 3085 3086 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3087 pivot.set("alias", self._parse_table_alias()) 3088 3089 if not unpivot: 3090 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3091 3092 columns: t.List[exp.Expression] = [] 3093 for fld in pivot.args["field"].expressions: 3094 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3095 for name in names: 3096 if self.PREFIXED_PIVOT_COLUMNS: 3097 name = f"{name}_{field_name}" if name else field_name 3098 else: 3099 name = f"{field_name}_{name}" if name else field_name 3100 3101 columns.append(exp.to_identifier(name)) 3102 3103 pivot.set("columns", columns) 3104 3105 return pivot 3106 3107 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3108 return [agg.alias for agg in aggregations] 3109 3110 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3111 if not skip_where_token and not self._match(TokenType.WHERE): 3112 return None 3113 3114 return self.expression( 3115 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3116 ) 3117 3118 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3119 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3120 return None 3121 3122 elements = defaultdict(list) 3123 3124 if self._match(TokenType.ALL): 3125 return self.expression(exp.Group, all=True) 3126 3127 while True: 3128 expressions = self._parse_csv(self._parse_conjunction) 3129 if expressions: 3130 elements["expressions"].extend(expressions) 3131 3132 grouping_sets = self._parse_grouping_sets() 3133 if grouping_sets: 3134 elements["grouping_sets"].extend(grouping_sets) 3135 3136 rollup = None 3137 cube = None 3138 totals = None 3139 3140 index = self._index 3141 with_ = self._match(TokenType.WITH) 3142 if self._match(TokenType.ROLLUP): 3143 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3144 elements["rollup"].extend(ensure_list(rollup)) 3145 3146 if self._match(TokenType.CUBE): 3147 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3148 elements["cube"].extend(ensure_list(cube)) 3149 3150 if self._match_text_seq("TOTALS"): 3151 totals = True 3152 elements["totals"] = True # type: ignore 3153 3154 if not (grouping_sets or rollup or cube or totals): 3155 if with_: 3156 self._retreat(index) 3157 break 3158 3159 return self.expression(exp.Group, **elements) # type: ignore 3160 3161 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3162 if not self._match(TokenType.GROUPING_SETS): 3163 return None 3164 3165 return self._parse_wrapped_csv(self._parse_grouping_set) 3166 3167 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3168 if self._match(TokenType.L_PAREN): 3169 grouping_set = self._parse_csv(self._parse_column) 3170 self._match_r_paren() 3171 return self.expression(exp.Tuple, expressions=grouping_set) 3172 3173 return self._parse_column() 3174 3175 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3176 if not skip_having_token and not self._match(TokenType.HAVING): 3177 return None 3178 return self.expression(exp.Having, this=self._parse_conjunction()) 3179 3180 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3181 if not self._match(TokenType.QUALIFY): 3182 return None 3183 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3184 3185 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3186 if skip_start_token: 3187 start = None 3188 elif self._match(TokenType.START_WITH): 3189 start = self._parse_conjunction() 3190 else: 3191 return None 3192 3193 self._match(TokenType.CONNECT_BY) 3194 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3195 exp.Prior, this=self._parse_bitwise() 3196 ) 3197 connect = self._parse_conjunction() 3198 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3199 3200 if not start and self._match(TokenType.START_WITH): 3201 start = self._parse_conjunction() 3202 3203 return self.expression(exp.Connect, start=start, connect=connect) 3204 3205 def _parse_name_as_expression(self) -> exp.Alias: 3206 return self.expression( 3207 exp.Alias, 3208 alias=self._parse_id_var(any_token=True), 3209 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3210 ) 3211 3212 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3213 if self._match_text_seq("INTERPOLATE"): 3214 return self._parse_wrapped_csv(self._parse_name_as_expression) 3215 return None 3216 3217 def _parse_order( 3218 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3219 ) -> t.Optional[exp.Expression]: 3220 siblings = None 3221 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3222 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3223 return this 3224 3225 siblings = True 3226 3227 return self.expression( 3228 exp.Order, 3229 this=this, 3230 expressions=self._parse_csv(self._parse_ordered), 3231 interpolate=self._parse_interpolate(), 3232 siblings=siblings, 3233 ) 3234 3235 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3236 if not self._match(token): 3237 return None 3238 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3239 3240 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3241 this = parse_method() if parse_method else self._parse_conjunction() 3242 3243 asc = self._match(TokenType.ASC) 3244 desc = self._match(TokenType.DESC) or (asc and False) 3245 3246 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3247 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3248 3249 nulls_first = is_nulls_first or False 3250 explicitly_null_ordered = is_nulls_first or is_nulls_last 3251 3252 if ( 3253 not explicitly_null_ordered 3254 and ( 3255 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3256 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3257 ) 3258 and self.dialect.NULL_ORDERING != "nulls_are_last" 3259 ): 3260 nulls_first = True 3261 3262 if self._match_text_seq("WITH", "FILL"): 3263 with_fill = self.expression( 3264 exp.WithFill, 3265 **{ # type: ignore 3266 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3267 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3268 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3269 }, 3270 ) 3271 else: 3272 with_fill = None 3273 3274 return self.expression( 3275 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3276 ) 3277 3278 def _parse_limit( 3279 self, this: t.Optional[exp.Expression] = None, top: bool = False 3280 ) -> t.Optional[exp.Expression]: 3281 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3282 comments = self._prev_comments 3283 if top: 3284 limit_paren = self._match(TokenType.L_PAREN) 3285 expression = self._parse_term() if limit_paren else self._parse_number() 3286 3287 if limit_paren: 3288 self._match_r_paren() 3289 else: 3290 expression = self._parse_term() 3291 3292 if self._match(TokenType.COMMA): 3293 offset = expression 3294 expression = self._parse_term() 3295 else: 3296 offset = None 3297 3298 limit_exp = self.expression( 3299 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3300 ) 3301 3302 return limit_exp 3303 3304 if self._match(TokenType.FETCH): 3305 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3306 direction = self._prev.text.upper() if direction else "FIRST" 3307 3308 count = self._parse_field(tokens=self.FETCH_TOKENS) 3309 percent = self._match(TokenType.PERCENT) 3310 3311 self._match_set((TokenType.ROW, TokenType.ROWS)) 3312 3313 only = self._match_text_seq("ONLY") 3314 with_ties = self._match_text_seq("WITH", "TIES") 3315 3316 if only and with_ties: 3317 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3318 3319 return self.expression( 3320 exp.Fetch, 3321 direction=direction, 3322 count=count, 3323 percent=percent, 3324 with_ties=with_ties, 3325 ) 3326 3327 return this 3328 3329 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3330 if not self._match(TokenType.OFFSET): 3331 return this 3332 3333 count = self._parse_term() 3334 self._match_set((TokenType.ROW, TokenType.ROWS)) 3335 return self.expression(exp.Offset, this=this, expression=count) 3336 3337 def _parse_locks(self) -> t.List[exp.Lock]: 3338 locks = [] 3339 while True: 3340 if self._match_text_seq("FOR", "UPDATE"): 3341 update = True 3342 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3343 "LOCK", "IN", "SHARE", "MODE" 3344 ): 3345 update = False 3346 else: 3347 break 3348 3349 expressions = None 3350 if self._match_text_seq("OF"): 3351 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3352 3353 wait: t.Optional[bool | exp.Expression] = None 3354 if self._match_text_seq("NOWAIT"): 3355 wait = True 3356 elif self._match_text_seq("WAIT"): 3357 wait = self._parse_primary() 3358 elif self._match_text_seq("SKIP", "LOCKED"): 3359 wait = False 3360 3361 locks.append( 3362 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3363 ) 3364 3365 return locks 3366 3367 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3368 while this and self._match_set(self.SET_OPERATIONS): 3369 token_type = self._prev.token_type 3370 3371 if token_type == TokenType.UNION: 3372 operation = exp.Union 3373 elif token_type == TokenType.EXCEPT: 3374 operation = exp.Except 3375 else: 3376 operation = exp.Intersect 3377 3378 comments = self._prev.comments 3379 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3380 by_name = self._match_text_seq("BY", "NAME") 3381 expression = self._parse_select(nested=True, parse_set_operation=False) 3382 3383 this = self.expression( 3384 operation, 3385 comments=comments, 3386 this=this, 3387 distinct=distinct, 3388 by_name=by_name, 3389 expression=expression, 3390 ) 3391 3392 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3393 expression = this.expression 3394 3395 if expression: 3396 for arg in self.UNION_MODIFIERS: 3397 expr = expression.args.get(arg) 3398 if expr: 3399 this.set(arg, expr.pop()) 3400 3401 return this 3402 3403 def _parse_expression(self) -> t.Optional[exp.Expression]: 3404 return self._parse_alias(self._parse_conjunction()) 3405 3406 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3407 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3408 3409 def _parse_equality(self) -> t.Optional[exp.Expression]: 3410 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3411 3412 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3413 return self._parse_tokens(self._parse_range, self.COMPARISON) 3414 3415 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3416 this = this or self._parse_bitwise() 3417 negate = self._match(TokenType.NOT) 3418 3419 if self._match_set(self.RANGE_PARSERS): 3420 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3421 if not expression: 3422 return this 3423 3424 this = expression 3425 elif self._match(TokenType.ISNULL): 3426 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3427 3428 # Postgres supports ISNULL and NOTNULL for conditions. 3429 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3430 if self._match(TokenType.NOTNULL): 3431 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3432 this = self.expression(exp.Not, this=this) 3433 3434 if negate: 3435 this = self.expression(exp.Not, this=this) 3436 3437 if self._match(TokenType.IS): 3438 this = self._parse_is(this) 3439 3440 return this 3441 3442 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3443 index = self._index - 1 3444 negate = self._match(TokenType.NOT) 3445 3446 if self._match_text_seq("DISTINCT", "FROM"): 3447 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3448 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3449 3450 expression = self._parse_null() or self._parse_boolean() 3451 if not expression: 3452 self._retreat(index) 3453 return None 3454 3455 this = self.expression(exp.Is, this=this, expression=expression) 3456 return self.expression(exp.Not, this=this) if negate else this 3457 3458 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3459 unnest = self._parse_unnest(with_alias=False) 3460 if unnest: 3461 this = self.expression(exp.In, this=this, unnest=unnest) 3462 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3463 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3464 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3465 3466 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3467 this = self.expression(exp.In, this=this, query=expressions[0]) 3468 else: 3469 this = self.expression(exp.In, this=this, expressions=expressions) 3470 3471 if matched_l_paren: 3472 self._match_r_paren(this) 3473 elif not self._match(TokenType.R_BRACKET, expression=this): 3474 self.raise_error("Expecting ]") 3475 else: 3476 this = self.expression(exp.In, this=this, field=self._parse_field()) 3477 3478 return this 3479 3480 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3481 low = self._parse_bitwise() 3482 self._match(TokenType.AND) 3483 high = self._parse_bitwise() 3484 return self.expression(exp.Between, this=this, low=low, high=high) 3485 3486 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3487 if not self._match(TokenType.ESCAPE): 3488 return this 3489 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3490 3491 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3492 index = self._index 3493 3494 if not self._match(TokenType.INTERVAL) and match_interval: 3495 return None 3496 3497 if self._match(TokenType.STRING, advance=False): 3498 this = self._parse_primary() 3499 else: 3500 this = self._parse_term() 3501 3502 if not this or ( 3503 isinstance(this, exp.Column) 3504 and not this.table 3505 and not this.this.quoted 3506 and this.name.upper() == "IS" 3507 ): 3508 self._retreat(index) 3509 return None 3510 3511 unit = self._parse_function() or ( 3512 not self._match(TokenType.ALIAS, advance=False) 3513 and self._parse_var(any_token=True, upper=True) 3514 ) 3515 3516 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3517 # each INTERVAL expression into this canonical form so it's easy to transpile 3518 if this and this.is_number: 3519 this = exp.Literal.string(this.name) 3520 elif this and this.is_string: 3521 parts = this.name.split() 3522 3523 if len(parts) == 2: 3524 if unit: 3525 # This is not actually a unit, it's something else (e.g. a "window side") 3526 unit = None 3527 self._retreat(self._index - 1) 3528 3529 this = exp.Literal.string(parts[0]) 3530 unit = self.expression(exp.Var, this=parts[1].upper()) 3531 3532 return self.expression(exp.Interval, this=this, unit=unit) 3533 3534 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3535 this = self._parse_term() 3536 3537 while True: 3538 if self._match_set(self.BITWISE): 3539 this = self.expression( 3540 self.BITWISE[self._prev.token_type], 3541 this=this, 3542 expression=self._parse_term(), 3543 ) 3544 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3545 this = self.expression( 3546 exp.DPipe, 3547 this=this, 3548 expression=self._parse_term(), 3549 safe=not self.dialect.STRICT_STRING_CONCAT, 3550 ) 3551 elif self._match(TokenType.DQMARK): 3552 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3553 elif self._match_pair(TokenType.LT, TokenType.LT): 3554 this = self.expression( 3555 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3556 ) 3557 elif self._match_pair(TokenType.GT, TokenType.GT): 3558 this = self.expression( 3559 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3560 ) 3561 else: 3562 break 3563 3564 return this 3565 3566 def _parse_term(self) -> t.Optional[exp.Expression]: 3567 return self._parse_tokens(self._parse_factor, self.TERM) 3568 3569 def _parse_factor(self) -> t.Optional[exp.Expression]: 3570 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3571 this = parse_method() 3572 3573 while self._match_set(self.FACTOR): 3574 this = self.expression( 3575 self.FACTOR[self._prev.token_type], 3576 this=this, 3577 comments=self._prev_comments, 3578 expression=parse_method(), 3579 ) 3580 if isinstance(this, exp.Div): 3581 this.args["typed"] = self.dialect.TYPED_DIVISION 3582 this.args["safe"] = self.dialect.SAFE_DIVISION 3583 3584 return this 3585 3586 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3587 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3588 3589 def _parse_unary(self) -> t.Optional[exp.Expression]: 3590 if self._match_set(self.UNARY_PARSERS): 3591 return self.UNARY_PARSERS[self._prev.token_type](self) 3592 return self._parse_at_time_zone(self._parse_type()) 3593 3594 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3595 interval = parse_interval and self._parse_interval() 3596 if interval: 3597 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3598 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3599 interval = self.expression( # type: ignore 3600 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3601 ) 3602 3603 return interval 3604 3605 index = self._index 3606 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3607 this = self._parse_column() 3608 3609 if data_type: 3610 if isinstance(this, exp.Literal): 3611 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3612 if parser: 3613 return parser(self, this, data_type) 3614 return self.expression(exp.Cast, this=this, to=data_type) 3615 if not data_type.expressions: 3616 self._retreat(index) 3617 return self._parse_column() 3618 return self._parse_column_ops(data_type) 3619 3620 return this and self._parse_column_ops(this) 3621 3622 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3623 this = self._parse_type() 3624 if not this: 3625 return None 3626 3627 return self.expression( 3628 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3629 ) 3630 3631 def _parse_types( 3632 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3633 ) -> t.Optional[exp.Expression]: 3634 index = self._index 3635 3636 prefix = self._match_text_seq("SYSUDTLIB", ".") 3637 3638 if not self._match_set(self.TYPE_TOKENS): 3639 identifier = allow_identifiers and self._parse_id_var( 3640 any_token=False, tokens=(TokenType.VAR,) 3641 ) 3642 3643 if identifier: 3644 tokens = self.dialect.tokenize(identifier.name) 3645 3646 if len(tokens) != 1: 3647 self.raise_error("Unexpected identifier", self._prev) 3648 3649 if tokens[0].token_type in self.TYPE_TOKENS: 3650 self._prev = tokens[0] 3651 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3652 type_name = identifier.name 3653 3654 while self._match(TokenType.DOT): 3655 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3656 3657 return exp.DataType.build(type_name, udt=True) 3658 else: 3659 return None 3660 else: 3661 return None 3662 3663 type_token = self._prev.token_type 3664 3665 if type_token == TokenType.PSEUDO_TYPE: 3666 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3667 3668 if type_token == TokenType.OBJECT_IDENTIFIER: 3669 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3670 3671 nested = type_token in self.NESTED_TYPE_TOKENS 3672 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3673 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3674 expressions = None 3675 maybe_func = False 3676 3677 if self._match(TokenType.L_PAREN): 3678 if is_struct: 3679 expressions = self._parse_csv(self._parse_struct_types) 3680 elif nested: 3681 expressions = self._parse_csv( 3682 lambda: self._parse_types( 3683 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3684 ) 3685 ) 3686 elif type_token in self.ENUM_TYPE_TOKENS: 3687 expressions = self._parse_csv(self._parse_equality) 3688 elif is_aggregate: 3689 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3690 any_token=False, tokens=(TokenType.VAR,) 3691 ) 3692 if not func_or_ident or not self._match(TokenType.COMMA): 3693 return None 3694 expressions = self._parse_csv( 3695 lambda: self._parse_types( 3696 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3697 ) 3698 ) 3699 expressions.insert(0, func_or_ident) 3700 else: 3701 expressions = self._parse_csv(self._parse_type_size) 3702 3703 if not expressions or not self._match(TokenType.R_PAREN): 3704 self._retreat(index) 3705 return None 3706 3707 maybe_func = True 3708 3709 this: t.Optional[exp.Expression] = None 3710 values: t.Optional[t.List[exp.Expression]] = None 3711 3712 if nested and self._match(TokenType.LT): 3713 if is_struct: 3714 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3715 else: 3716 expressions = self._parse_csv( 3717 lambda: self._parse_types( 3718 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3719 ) 3720 ) 3721 3722 if not self._match(TokenType.GT): 3723 self.raise_error("Expecting >") 3724 3725 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3726 values = self._parse_csv(self._parse_conjunction) 3727 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3728 3729 if type_token in self.TIMESTAMPS: 3730 if self._match_text_seq("WITH", "TIME", "ZONE"): 3731 maybe_func = False 3732 tz_type = ( 3733 exp.DataType.Type.TIMETZ 3734 if type_token in self.TIMES 3735 else exp.DataType.Type.TIMESTAMPTZ 3736 ) 3737 this = exp.DataType(this=tz_type, expressions=expressions) 3738 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3739 maybe_func = False 3740 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3741 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3742 maybe_func = False 3743 elif type_token == TokenType.INTERVAL: 3744 unit = self._parse_var() 3745 3746 if self._match_text_seq("TO"): 3747 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3748 else: 3749 span = None 3750 3751 if span or not unit: 3752 this = self.expression( 3753 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3754 ) 3755 else: 3756 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3757 3758 if maybe_func and check_func: 3759 index2 = self._index 3760 peek = self._parse_string() 3761 3762 if not peek: 3763 self._retreat(index) 3764 return None 3765 3766 self._retreat(index2) 3767 3768 if not this: 3769 if self._match_text_seq("UNSIGNED"): 3770 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3771 if not unsigned_type_token: 3772 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3773 3774 type_token = unsigned_type_token or type_token 3775 3776 this = exp.DataType( 3777 this=exp.DataType.Type[type_token.value], 3778 expressions=expressions, 3779 nested=nested, 3780 values=values, 3781 prefix=prefix, 3782 ) 3783 3784 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3785 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3786 3787 return this 3788 3789 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3790 index = self._index 3791 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3792 self._match(TokenType.COLON) 3793 column_def = self._parse_column_def(this) 3794 3795 if type_required and ( 3796 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3797 ): 3798 self._retreat(index) 3799 return self._parse_types() 3800 3801 return column_def 3802 3803 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3804 if not self._match_text_seq("AT", "TIME", "ZONE"): 3805 return this 3806 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3807 3808 def _parse_column(self) -> t.Optional[exp.Expression]: 3809 this = self._parse_field() 3810 if isinstance(this, exp.Identifier): 3811 this = self.expression(exp.Column, this=this) 3812 elif not this: 3813 return self._parse_bracket(this) 3814 return self._parse_column_ops(this) 3815 3816 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3817 this = self._parse_bracket(this) 3818 3819 while self._match_set(self.COLUMN_OPERATORS): 3820 op_token = self._prev.token_type 3821 op = self.COLUMN_OPERATORS.get(op_token) 3822 3823 if op_token == TokenType.DCOLON: 3824 field = self._parse_types() 3825 if not field: 3826 self.raise_error("Expected type") 3827 elif op and self._curr: 3828 self._advance() 3829 value = self._prev.text 3830 field = ( 3831 exp.Literal.number(value) 3832 if self._prev.token_type == TokenType.NUMBER 3833 else exp.Literal.string(value) 3834 ) 3835 else: 3836 field = self._parse_field(anonymous_func=True, any_token=True) 3837 3838 if isinstance(field, exp.Func): 3839 # bigquery allows function calls like x.y.count(...) 3840 # SAFE.SUBSTR(...) 3841 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3842 this = self._replace_columns_with_dots(this) 3843 3844 if op: 3845 this = op(self, this, field) 3846 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3847 this = self.expression( 3848 exp.Column, 3849 this=field, 3850 table=this.this, 3851 db=this.args.get("table"), 3852 catalog=this.args.get("db"), 3853 ) 3854 else: 3855 this = self.expression(exp.Dot, this=this, expression=field) 3856 this = self._parse_bracket(this) 3857 return this 3858 3859 def _parse_primary(self) -> t.Optional[exp.Expression]: 3860 if self._match_set(self.PRIMARY_PARSERS): 3861 token_type = self._prev.token_type 3862 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3863 3864 if token_type == TokenType.STRING: 3865 expressions = [primary] 3866 while self._match(TokenType.STRING): 3867 expressions.append(exp.Literal.string(self._prev.text)) 3868 3869 if len(expressions) > 1: 3870 return self.expression(exp.Concat, expressions=expressions) 3871 3872 return primary 3873 3874 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3875 return exp.Literal.number(f"0.{self._prev.text}") 3876 3877 if self._match(TokenType.L_PAREN): 3878 comments = self._prev_comments 3879 query = self._parse_select() 3880 3881 if query: 3882 expressions = [query] 3883 else: 3884 expressions = self._parse_expressions() 3885 3886 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3887 3888 if isinstance(this, exp.Subqueryable): 3889 this = self._parse_set_operations( 3890 self._parse_subquery(this=this, parse_alias=False) 3891 ) 3892 elif len(expressions) > 1: 3893 this = self.expression(exp.Tuple, expressions=expressions) 3894 else: 3895 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3896 3897 if this: 3898 this.add_comments(comments) 3899 3900 self._match_r_paren(expression=this) 3901 return this 3902 3903 return None 3904 3905 def _parse_field( 3906 self, 3907 any_token: bool = False, 3908 tokens: t.Optional[t.Collection[TokenType]] = None, 3909 anonymous_func: bool = False, 3910 ) -> t.Optional[exp.Expression]: 3911 return ( 3912 self._parse_primary() 3913 or self._parse_function(anonymous=anonymous_func) 3914 or self._parse_id_var(any_token=any_token, tokens=tokens) 3915 ) 3916 3917 def _parse_function( 3918 self, 3919 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3920 anonymous: bool = False, 3921 optional_parens: bool = True, 3922 ) -> t.Optional[exp.Expression]: 3923 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3924 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3925 fn_syntax = False 3926 if ( 3927 self._match(TokenType.L_BRACE, advance=False) 3928 and self._next 3929 and self._next.text.upper() == "FN" 3930 ): 3931 self._advance(2) 3932 fn_syntax = True 3933 3934 func = self._parse_function_call( 3935 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3936 ) 3937 3938 if fn_syntax: 3939 self._match(TokenType.R_BRACE) 3940 3941 return func 3942 3943 def _parse_function_call( 3944 self, 3945 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3946 anonymous: bool = False, 3947 optional_parens: bool = True, 3948 ) -> t.Optional[exp.Expression]: 3949 if not self._curr: 3950 return None 3951 3952 comments = self._curr.comments 3953 token_type = self._curr.token_type 3954 this = self._curr.text 3955 upper = this.upper() 3956 3957 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3958 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3959 self._advance() 3960 return parser(self) 3961 3962 if not self._next or self._next.token_type != TokenType.L_PAREN: 3963 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3964 self._advance() 3965 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3966 3967 return None 3968 3969 if token_type not in self.FUNC_TOKENS: 3970 return None 3971 3972 self._advance(2) 3973 3974 parser = self.FUNCTION_PARSERS.get(upper) 3975 if parser and not anonymous: 3976 this = parser(self) 3977 else: 3978 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3979 3980 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3981 this = self.expression(subquery_predicate, this=self._parse_select()) 3982 self._match_r_paren() 3983 return this 3984 3985 if functions is None: 3986 functions = self.FUNCTIONS 3987 3988 function = functions.get(upper) 3989 3990 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3991 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3992 3993 if function and not anonymous: 3994 if "dialect" in function.__code__.co_varnames: 3995 func = function(args, dialect=self.dialect) 3996 else: 3997 func = function(args) 3998 3999 func = self.validate_expression(func, args) 4000 if not self.dialect.NORMALIZE_FUNCTIONS: 4001 func.meta["name"] = this 4002 4003 this = func 4004 else: 4005 this = self.expression(exp.Anonymous, this=this, expressions=args) 4006 4007 if isinstance(this, exp.Expression): 4008 this.add_comments(comments) 4009 4010 self._match_r_paren(this) 4011 return self._parse_window(this) 4012 4013 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4014 return self._parse_column_def(self._parse_id_var()) 4015 4016 def _parse_user_defined_function( 4017 self, kind: t.Optional[TokenType] = None 4018 ) -> t.Optional[exp.Expression]: 4019 this = self._parse_id_var() 4020 4021 while self._match(TokenType.DOT): 4022 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4023 4024 if not self._match(TokenType.L_PAREN): 4025 return this 4026 4027 expressions = self._parse_csv(self._parse_function_parameter) 4028 self._match_r_paren() 4029 return self.expression( 4030 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4031 ) 4032 4033 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4034 literal = self._parse_primary() 4035 if literal: 4036 return self.expression(exp.Introducer, this=token.text, expression=literal) 4037 4038 return self.expression(exp.Identifier, this=token.text) 4039 4040 def _parse_session_parameter(self) -> exp.SessionParameter: 4041 kind = None 4042 this = self._parse_id_var() or self._parse_primary() 4043 4044 if this and self._match(TokenType.DOT): 4045 kind = this.name 4046 this = self._parse_var() or self._parse_primary() 4047 4048 return self.expression(exp.SessionParameter, this=this, kind=kind) 4049 4050 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4051 index = self._index 4052 4053 if self._match(TokenType.L_PAREN): 4054 expressions = t.cast( 4055 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4056 ) 4057 4058 if not self._match(TokenType.R_PAREN): 4059 self._retreat(index) 4060 else: 4061 expressions = [self._parse_id_var()] 4062 4063 if self._match_set(self.LAMBDAS): 4064 return self.LAMBDAS[self._prev.token_type](self, expressions) 4065 4066 self._retreat(index) 4067 4068 this: t.Optional[exp.Expression] 4069 4070 if self._match(TokenType.DISTINCT): 4071 this = self.expression( 4072 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4073 ) 4074 else: 4075 this = self._parse_select_or_expression(alias=alias) 4076 4077 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4078 4079 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4080 index = self._index 4081 4082 if not self.errors: 4083 try: 4084 if self._parse_select(nested=True): 4085 return this 4086 except ParseError: 4087 pass 4088 finally: 4089 self.errors.clear() 4090 self._retreat(index) 4091 4092 if not self._match(TokenType.L_PAREN): 4093 return this 4094 4095 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4096 4097 self._match_r_paren() 4098 return self.expression(exp.Schema, this=this, expressions=args) 4099 4100 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4101 return self._parse_column_def(self._parse_field(any_token=True)) 4102 4103 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4104 # column defs are not really columns, they're identifiers 4105 if isinstance(this, exp.Column): 4106 this = this.this 4107 4108 kind = self._parse_types(schema=True) 4109 4110 if self._match_text_seq("FOR", "ORDINALITY"): 4111 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4112 4113 constraints: t.List[exp.Expression] = [] 4114 4115 if not kind and self._match(TokenType.ALIAS): 4116 constraints.append( 4117 self.expression( 4118 exp.ComputedColumnConstraint, 4119 this=self._parse_conjunction(), 4120 persisted=self._match_text_seq("PERSISTED"), 4121 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4122 ) 4123 ) 4124 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4125 self._match(TokenType.ALIAS) 4126 constraints.append( 4127 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4128 ) 4129 4130 while True: 4131 constraint = self._parse_column_constraint() 4132 if not constraint: 4133 break 4134 constraints.append(constraint) 4135 4136 if not kind and not constraints: 4137 return this 4138 4139 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4140 4141 def _parse_auto_increment( 4142 self, 4143 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4144 start = None 4145 increment = None 4146 4147 if self._match(TokenType.L_PAREN, advance=False): 4148 args = self._parse_wrapped_csv(self._parse_bitwise) 4149 start = seq_get(args, 0) 4150 increment = seq_get(args, 1) 4151 elif self._match_text_seq("START"): 4152 start = self._parse_bitwise() 4153 self._match_text_seq("INCREMENT") 4154 increment = self._parse_bitwise() 4155 4156 if start and increment: 4157 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4158 4159 return exp.AutoIncrementColumnConstraint() 4160 4161 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4162 if not self._match_text_seq("REFRESH"): 4163 self._retreat(self._index - 1) 4164 return None 4165 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4166 4167 def _parse_compress(self) -> exp.CompressColumnConstraint: 4168 if self._match(TokenType.L_PAREN, advance=False): 4169 return self.expression( 4170 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4171 ) 4172 4173 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4174 4175 def _parse_generated_as_identity( 4176 self, 4177 ) -> ( 4178 exp.GeneratedAsIdentityColumnConstraint 4179 | exp.ComputedColumnConstraint 4180 | exp.GeneratedAsRowColumnConstraint 4181 ): 4182 if self._match_text_seq("BY", "DEFAULT"): 4183 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4184 this = self.expression( 4185 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4186 ) 4187 else: 4188 self._match_text_seq("ALWAYS") 4189 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4190 4191 self._match(TokenType.ALIAS) 4192 4193 if self._match_text_seq("ROW"): 4194 start = self._match_text_seq("START") 4195 if not start: 4196 self._match(TokenType.END) 4197 hidden = self._match_text_seq("HIDDEN") 4198 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4199 4200 identity = self._match_text_seq("IDENTITY") 4201 4202 if self._match(TokenType.L_PAREN): 4203 if self._match(TokenType.START_WITH): 4204 this.set("start", self._parse_bitwise()) 4205 if self._match_text_seq("INCREMENT", "BY"): 4206 this.set("increment", self._parse_bitwise()) 4207 if self._match_text_seq("MINVALUE"): 4208 this.set("minvalue", self._parse_bitwise()) 4209 if self._match_text_seq("MAXVALUE"): 4210 this.set("maxvalue", self._parse_bitwise()) 4211 4212 if self._match_text_seq("CYCLE"): 4213 this.set("cycle", True) 4214 elif self._match_text_seq("NO", "CYCLE"): 4215 this.set("cycle", False) 4216 4217 if not identity: 4218 this.set("expression", self._parse_bitwise()) 4219 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4220 args = self._parse_csv(self._parse_bitwise) 4221 this.set("start", seq_get(args, 0)) 4222 this.set("increment", seq_get(args, 1)) 4223 4224 self._match_r_paren() 4225 4226 return this 4227 4228 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4229 self._match_text_seq("LENGTH") 4230 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4231 4232 def _parse_not_constraint( 4233 self, 4234 ) -> t.Optional[exp.Expression]: 4235 if self._match_text_seq("NULL"): 4236 return self.expression(exp.NotNullColumnConstraint) 4237 if self._match_text_seq("CASESPECIFIC"): 4238 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4239 if self._match_text_seq("FOR", "REPLICATION"): 4240 return self.expression(exp.NotForReplicationColumnConstraint) 4241 return None 4242 4243 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4244 if self._match(TokenType.CONSTRAINT): 4245 this = self._parse_id_var() 4246 else: 4247 this = None 4248 4249 if self._match_texts(self.CONSTRAINT_PARSERS): 4250 return self.expression( 4251 exp.ColumnConstraint, 4252 this=this, 4253 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4254 ) 4255 4256 return this 4257 4258 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4259 if not self._match(TokenType.CONSTRAINT): 4260 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4261 4262 this = self._parse_id_var() 4263 expressions = [] 4264 4265 while True: 4266 constraint = self._parse_unnamed_constraint() or self._parse_function() 4267 if not constraint: 4268 break 4269 expressions.append(constraint) 4270 4271 return self.expression(exp.Constraint, this=this, expressions=expressions) 4272 4273 def _parse_unnamed_constraint( 4274 self, constraints: t.Optional[t.Collection[str]] = None 4275 ) -> t.Optional[exp.Expression]: 4276 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4277 constraints or self.CONSTRAINT_PARSERS 4278 ): 4279 return None 4280 4281 constraint = self._prev.text.upper() 4282 if constraint not in self.CONSTRAINT_PARSERS: 4283 self.raise_error(f"No parser found for schema constraint {constraint}.") 4284 4285 return self.CONSTRAINT_PARSERS[constraint](self) 4286 4287 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4288 self._match_text_seq("KEY") 4289 return self.expression( 4290 exp.UniqueColumnConstraint, 4291 this=self._parse_schema(self._parse_id_var(any_token=False)), 4292 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4293 ) 4294 4295 def _parse_key_constraint_options(self) -> t.List[str]: 4296 options = [] 4297 while True: 4298 if not self._curr: 4299 break 4300 4301 if self._match(TokenType.ON): 4302 action = None 4303 on = self._advance_any() and self._prev.text 4304 4305 if self._match_text_seq("NO", "ACTION"): 4306 action = "NO ACTION" 4307 elif self._match_text_seq("CASCADE"): 4308 action = "CASCADE" 4309 elif self._match_text_seq("RESTRICT"): 4310 action = "RESTRICT" 4311 elif self._match_pair(TokenType.SET, TokenType.NULL): 4312 action = "SET NULL" 4313 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4314 action = "SET DEFAULT" 4315 else: 4316 self.raise_error("Invalid key constraint") 4317 4318 options.append(f"ON {on} {action}") 4319 elif self._match_text_seq("NOT", "ENFORCED"): 4320 options.append("NOT ENFORCED") 4321 elif self._match_text_seq("DEFERRABLE"): 4322 options.append("DEFERRABLE") 4323 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4324 options.append("INITIALLY DEFERRED") 4325 elif self._match_text_seq("NORELY"): 4326 options.append("NORELY") 4327 elif self._match_text_seq("MATCH", "FULL"): 4328 options.append("MATCH FULL") 4329 else: 4330 break 4331 4332 return options 4333 4334 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4335 if match and not self._match(TokenType.REFERENCES): 4336 return None 4337 4338 expressions = None 4339 this = self._parse_table(schema=True) 4340 options = self._parse_key_constraint_options() 4341 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4342 4343 def _parse_foreign_key(self) -> exp.ForeignKey: 4344 expressions = self._parse_wrapped_id_vars() 4345 reference = self._parse_references() 4346 options = {} 4347 4348 while self._match(TokenType.ON): 4349 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4350 self.raise_error("Expected DELETE or UPDATE") 4351 4352 kind = self._prev.text.lower() 4353 4354 if self._match_text_seq("NO", "ACTION"): 4355 action = "NO ACTION" 4356 elif self._match(TokenType.SET): 4357 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4358 action = "SET " + self._prev.text.upper() 4359 else: 4360 self._advance() 4361 action = self._prev.text.upper() 4362 4363 options[kind] = action 4364 4365 return self.expression( 4366 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4367 ) 4368 4369 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4370 return self._parse_field() 4371 4372 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4373 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4374 self._retreat(self._index - 1) 4375 return None 4376 4377 id_vars = self._parse_wrapped_id_vars() 4378 return self.expression( 4379 exp.PeriodForSystemTimeConstraint, 4380 this=seq_get(id_vars, 0), 4381 expression=seq_get(id_vars, 1), 4382 ) 4383 4384 def _parse_primary_key( 4385 self, wrapped_optional: bool = False, in_props: bool = False 4386 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4387 desc = ( 4388 self._match_set((TokenType.ASC, TokenType.DESC)) 4389 and self._prev.token_type == TokenType.DESC 4390 ) 4391 4392 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4393 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4394 4395 expressions = self._parse_wrapped_csv( 4396 self._parse_primary_key_part, optional=wrapped_optional 4397 ) 4398 options = self._parse_key_constraint_options() 4399 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4400 4401 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4402 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4403 4404 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4405 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4406 return this 4407 4408 bracket_kind = self._prev.token_type 4409 expressions = self._parse_csv( 4410 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4411 ) 4412 4413 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4414 self.raise_error("Expected ]") 4415 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4416 self.raise_error("Expected }") 4417 4418 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4419 if bracket_kind == TokenType.L_BRACE: 4420 this = self.expression(exp.Struct, expressions=expressions) 4421 elif not this or this.name.upper() == "ARRAY": 4422 this = self.expression(exp.Array, expressions=expressions) 4423 else: 4424 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4425 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4426 4427 self._add_comments(this) 4428 return self._parse_bracket(this) 4429 4430 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4431 if self._match(TokenType.COLON): 4432 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4433 return this 4434 4435 def _parse_case(self) -> t.Optional[exp.Expression]: 4436 ifs = [] 4437 default = None 4438 4439 comments = self._prev_comments 4440 expression = self._parse_conjunction() 4441 4442 while self._match(TokenType.WHEN): 4443 this = self._parse_conjunction() 4444 self._match(TokenType.THEN) 4445 then = self._parse_conjunction() 4446 ifs.append(self.expression(exp.If, this=this, true=then)) 4447 4448 if self._match(TokenType.ELSE): 4449 default = self._parse_conjunction() 4450 4451 if not self._match(TokenType.END): 4452 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4453 default = exp.column("interval") 4454 else: 4455 self.raise_error("Expected END after CASE", self._prev) 4456 4457 return self._parse_window( 4458 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4459 ) 4460 4461 def _parse_if(self) -> t.Optional[exp.Expression]: 4462 if self._match(TokenType.L_PAREN): 4463 args = self._parse_csv(self._parse_conjunction) 4464 this = self.validate_expression(exp.If.from_arg_list(args), args) 4465 self._match_r_paren() 4466 else: 4467 index = self._index - 1 4468 condition = self._parse_conjunction() 4469 4470 if not condition: 4471 self._retreat(index) 4472 return None 4473 4474 self._match(TokenType.THEN) 4475 true = self._parse_conjunction() 4476 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4477 self._match(TokenType.END) 4478 this = self.expression(exp.If, this=condition, true=true, false=false) 4479 4480 return self._parse_window(this) 4481 4482 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4483 if not self._match_text_seq("VALUE", "FOR"): 4484 self._retreat(self._index - 1) 4485 return None 4486 4487 return self.expression( 4488 exp.NextValueFor, 4489 this=self._parse_column(), 4490 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4491 ) 4492 4493 def _parse_extract(self) -> exp.Extract: 4494 this = self._parse_function() or self._parse_var() or self._parse_type() 4495 4496 if self._match(TokenType.FROM): 4497 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4498 4499 if not self._match(TokenType.COMMA): 4500 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4501 4502 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4503 4504 def _parse_any_value(self) -> exp.AnyValue: 4505 this = self._parse_lambda() 4506 is_max = None 4507 having = None 4508 4509 if self._match(TokenType.HAVING): 4510 self._match_texts(("MAX", "MIN")) 4511 is_max = self._prev.text == "MAX" 4512 having = self._parse_column() 4513 4514 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4515 4516 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4517 this = self._parse_conjunction() 4518 4519 if not self._match(TokenType.ALIAS): 4520 if self._match(TokenType.COMMA): 4521 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4522 4523 self.raise_error("Expected AS after CAST") 4524 4525 fmt = None 4526 to = self._parse_types() 4527 4528 if self._match(TokenType.FORMAT): 4529 fmt_string = self._parse_string() 4530 fmt = self._parse_at_time_zone(fmt_string) 4531 4532 if not to: 4533 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4534 if to.this in exp.DataType.TEMPORAL_TYPES: 4535 this = self.expression( 4536 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4537 this=this, 4538 format=exp.Literal.string( 4539 format_time( 4540 fmt_string.this if fmt_string else "", 4541 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4542 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4543 ) 4544 ), 4545 ) 4546 4547 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4548 this.set("zone", fmt.args["zone"]) 4549 return this 4550 elif not to: 4551 self.raise_error("Expected TYPE after CAST") 4552 elif isinstance(to, exp.Identifier): 4553 to = exp.DataType.build(to.name, udt=True) 4554 elif to.this == exp.DataType.Type.CHAR: 4555 if self._match(TokenType.CHARACTER_SET): 4556 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4557 4558 return self.expression( 4559 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4560 ) 4561 4562 def _parse_string_agg(self) -> exp.Expression: 4563 if self._match(TokenType.DISTINCT): 4564 args: t.List[t.Optional[exp.Expression]] = [ 4565 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4566 ] 4567 if self._match(TokenType.COMMA): 4568 args.extend(self._parse_csv(self._parse_conjunction)) 4569 else: 4570 args = self._parse_csv(self._parse_conjunction) # type: ignore 4571 4572 index = self._index 4573 if not self._match(TokenType.R_PAREN) and args: 4574 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4575 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4576 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4577 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4578 4579 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4580 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4581 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4582 if not self._match_text_seq("WITHIN", "GROUP"): 4583 self._retreat(index) 4584 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4585 4586 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4587 order = self._parse_order(this=seq_get(args, 0)) 4588 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4589 4590 def _parse_convert( 4591 self, strict: bool, safe: t.Optional[bool] = None 4592 ) -> t.Optional[exp.Expression]: 4593 this = self._parse_bitwise() 4594 4595 if self._match(TokenType.USING): 4596 to: t.Optional[exp.Expression] = self.expression( 4597 exp.CharacterSet, this=self._parse_var() 4598 ) 4599 elif self._match(TokenType.COMMA): 4600 to = self._parse_types() 4601 else: 4602 to = None 4603 4604 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4605 4606 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4607 """ 4608 There are generally two variants of the DECODE function: 4609 4610 - DECODE(bin, charset) 4611 - DECODE(expression, search, result [, search, result] ... [, default]) 4612 4613 The second variant will always be parsed into a CASE expression. Note that NULL 4614 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4615 instead of relying on pattern matching. 4616 """ 4617 args = self._parse_csv(self._parse_conjunction) 4618 4619 if len(args) < 3: 4620 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4621 4622 expression, *expressions = args 4623 if not expression: 4624 return None 4625 4626 ifs = [] 4627 for search, result in zip(expressions[::2], expressions[1::2]): 4628 if not search or not result: 4629 return None 4630 4631 if isinstance(search, exp.Literal): 4632 ifs.append( 4633 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4634 ) 4635 elif isinstance(search, exp.Null): 4636 ifs.append( 4637 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4638 ) 4639 else: 4640 cond = exp.or_( 4641 exp.EQ(this=expression.copy(), expression=search), 4642 exp.and_( 4643 exp.Is(this=expression.copy(), expression=exp.Null()), 4644 exp.Is(this=search.copy(), expression=exp.Null()), 4645 copy=False, 4646 ), 4647 copy=False, 4648 ) 4649 ifs.append(exp.If(this=cond, true=result)) 4650 4651 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4652 4653 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4654 self._match_text_seq("KEY") 4655 key = self._parse_column() 4656 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4657 self._match_text_seq("VALUE") 4658 value = self._parse_bitwise() 4659 4660 if not key and not value: 4661 return None 4662 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4663 4664 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4665 if not this or not self._match_text_seq("FORMAT", "JSON"): 4666 return this 4667 4668 return self.expression(exp.FormatJson, this=this) 4669 4670 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4671 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4672 for value in values: 4673 if self._match_text_seq(value, "ON", on): 4674 return f"{value} ON {on}" 4675 4676 return None 4677 4678 @t.overload 4679 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 4680 ... 4681 4682 @t.overload 4683 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 4684 ... 4685 4686 def _parse_json_object(self, agg=False): 4687 star = self._parse_star() 4688 expressions = ( 4689 [star] 4690 if star 4691 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4692 ) 4693 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4694 4695 unique_keys = None 4696 if self._match_text_seq("WITH", "UNIQUE"): 4697 unique_keys = True 4698 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4699 unique_keys = False 4700 4701 self._match_text_seq("KEYS") 4702 4703 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4704 self._parse_type() 4705 ) 4706 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4707 4708 return self.expression( 4709 exp.JSONObjectAgg if agg else exp.JSONObject, 4710 expressions=expressions, 4711 null_handling=null_handling, 4712 unique_keys=unique_keys, 4713 return_type=return_type, 4714 encoding=encoding, 4715 ) 4716 4717 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4718 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4719 if not self._match_text_seq("NESTED"): 4720 this = self._parse_id_var() 4721 kind = self._parse_types(allow_identifiers=False) 4722 nested = None 4723 else: 4724 this = None 4725 kind = None 4726 nested = True 4727 4728 path = self._match_text_seq("PATH") and self._parse_string() 4729 nested_schema = nested and self._parse_json_schema() 4730 4731 return self.expression( 4732 exp.JSONColumnDef, 4733 this=this, 4734 kind=kind, 4735 path=path, 4736 nested_schema=nested_schema, 4737 ) 4738 4739 def _parse_json_schema(self) -> exp.JSONSchema: 4740 self._match_text_seq("COLUMNS") 4741 return self.expression( 4742 exp.JSONSchema, 4743 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4744 ) 4745 4746 def _parse_json_table(self) -> exp.JSONTable: 4747 this = self._parse_format_json(self._parse_bitwise()) 4748 path = self._match(TokenType.COMMA) and self._parse_string() 4749 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4750 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4751 schema = self._parse_json_schema() 4752 4753 return exp.JSONTable( 4754 this=this, 4755 schema=schema, 4756 path=path, 4757 error_handling=error_handling, 4758 empty_handling=empty_handling, 4759 ) 4760 4761 def _parse_match_against(self) -> exp.MatchAgainst: 4762 expressions = self._parse_csv(self._parse_column) 4763 4764 self._match_text_seq(")", "AGAINST", "(") 4765 4766 this = self._parse_string() 4767 4768 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4769 modifier = "IN NATURAL LANGUAGE MODE" 4770 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4771 modifier = f"{modifier} WITH QUERY EXPANSION" 4772 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4773 modifier = "IN BOOLEAN MODE" 4774 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4775 modifier = "WITH QUERY EXPANSION" 4776 else: 4777 modifier = None 4778 4779 return self.expression( 4780 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4781 ) 4782 4783 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4784 def _parse_open_json(self) -> exp.OpenJSON: 4785 this = self._parse_bitwise() 4786 path = self._match(TokenType.COMMA) and self._parse_string() 4787 4788 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4789 this = self._parse_field(any_token=True) 4790 kind = self._parse_types() 4791 path = self._parse_string() 4792 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4793 4794 return self.expression( 4795 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4796 ) 4797 4798 expressions = None 4799 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4800 self._match_l_paren() 4801 expressions = self._parse_csv(_parse_open_json_column_def) 4802 4803 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4804 4805 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4806 args = self._parse_csv(self._parse_bitwise) 4807 4808 if self._match(TokenType.IN): 4809 return self.expression( 4810 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4811 ) 4812 4813 if haystack_first: 4814 haystack = seq_get(args, 0) 4815 needle = seq_get(args, 1) 4816 else: 4817 needle = seq_get(args, 0) 4818 haystack = seq_get(args, 1) 4819 4820 return self.expression( 4821 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4822 ) 4823 4824 def _parse_predict(self) -> exp.Predict: 4825 self._match_text_seq("MODEL") 4826 this = self._parse_table() 4827 4828 self._match(TokenType.COMMA) 4829 self._match_text_seq("TABLE") 4830 4831 return self.expression( 4832 exp.Predict, 4833 this=this, 4834 expression=self._parse_table(), 4835 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4836 ) 4837 4838 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4839 args = self._parse_csv(self._parse_table) 4840 return exp.JoinHint(this=func_name.upper(), expressions=args) 4841 4842 def _parse_substring(self) -> exp.Substring: 4843 # Postgres supports the form: substring(string [from int] [for int]) 4844 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4845 4846 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4847 4848 if self._match(TokenType.FROM): 4849 args.append(self._parse_bitwise()) 4850 if self._match(TokenType.FOR): 4851 args.append(self._parse_bitwise()) 4852 4853 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4854 4855 def _parse_trim(self) -> exp.Trim: 4856 # https://www.w3resource.com/sql/character-functions/trim.php 4857 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4858 4859 position = None 4860 collation = None 4861 expression = None 4862 4863 if self._match_texts(self.TRIM_TYPES): 4864 position = self._prev.text.upper() 4865 4866 this = self._parse_bitwise() 4867 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4868 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4869 expression = self._parse_bitwise() 4870 4871 if invert_order: 4872 this, expression = expression, this 4873 4874 if self._match(TokenType.COLLATE): 4875 collation = self._parse_bitwise() 4876 4877 return self.expression( 4878 exp.Trim, this=this, position=position, expression=expression, collation=collation 4879 ) 4880 4881 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4882 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4883 4884 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4885 return self._parse_window(self._parse_id_var(), alias=True) 4886 4887 def _parse_respect_or_ignore_nulls( 4888 self, this: t.Optional[exp.Expression] 4889 ) -> t.Optional[exp.Expression]: 4890 if self._match_text_seq("IGNORE", "NULLS"): 4891 return self.expression(exp.IgnoreNulls, this=this) 4892 if self._match_text_seq("RESPECT", "NULLS"): 4893 return self.expression(exp.RespectNulls, this=this) 4894 return this 4895 4896 def _parse_window( 4897 self, this: t.Optional[exp.Expression], alias: bool = False 4898 ) -> t.Optional[exp.Expression]: 4899 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4900 self._match(TokenType.WHERE) 4901 this = self.expression( 4902 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4903 ) 4904 self._match_r_paren() 4905 4906 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4907 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4908 if self._match_text_seq("WITHIN", "GROUP"): 4909 order = self._parse_wrapped(self._parse_order) 4910 this = self.expression(exp.WithinGroup, this=this, expression=order) 4911 4912 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4913 # Some dialects choose to implement and some do not. 4914 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4915 4916 # There is some code above in _parse_lambda that handles 4917 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4918 4919 # The below changes handle 4920 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4921 4922 # Oracle allows both formats 4923 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4924 # and Snowflake chose to do the same for familiarity 4925 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4926 this = self._parse_respect_or_ignore_nulls(this) 4927 4928 # bigquery select from window x AS (partition by ...) 4929 if alias: 4930 over = None 4931 self._match(TokenType.ALIAS) 4932 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4933 return this 4934 else: 4935 over = self._prev.text.upper() 4936 4937 if not self._match(TokenType.L_PAREN): 4938 return self.expression( 4939 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4940 ) 4941 4942 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4943 4944 first = self._match(TokenType.FIRST) 4945 if self._match_text_seq("LAST"): 4946 first = False 4947 4948 partition, order = self._parse_partition_and_order() 4949 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4950 4951 if kind: 4952 self._match(TokenType.BETWEEN) 4953 start = self._parse_window_spec() 4954 self._match(TokenType.AND) 4955 end = self._parse_window_spec() 4956 4957 spec = self.expression( 4958 exp.WindowSpec, 4959 kind=kind, 4960 start=start["value"], 4961 start_side=start["side"], 4962 end=end["value"], 4963 end_side=end["side"], 4964 ) 4965 else: 4966 spec = None 4967 4968 self._match_r_paren() 4969 4970 window = self.expression( 4971 exp.Window, 4972 this=this, 4973 partition_by=partition, 4974 order=order, 4975 spec=spec, 4976 alias=window_alias, 4977 over=over, 4978 first=first, 4979 ) 4980 4981 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4982 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4983 return self._parse_window(window, alias=alias) 4984 4985 return window 4986 4987 def _parse_partition_and_order( 4988 self, 4989 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4990 return self._parse_partition_by(), self._parse_order() 4991 4992 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4993 self._match(TokenType.BETWEEN) 4994 4995 return { 4996 "value": ( 4997 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4998 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4999 or self._parse_bitwise() 5000 ), 5001 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5002 } 5003 5004 def _parse_alias( 5005 self, this: t.Optional[exp.Expression], explicit: bool = False 5006 ) -> t.Optional[exp.Expression]: 5007 any_token = self._match(TokenType.ALIAS) 5008 comments = self._prev_comments 5009 5010 if explicit and not any_token: 5011 return this 5012 5013 if self._match(TokenType.L_PAREN): 5014 aliases = self.expression( 5015 exp.Aliases, 5016 comments=comments, 5017 this=this, 5018 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5019 ) 5020 self._match_r_paren(aliases) 5021 return aliases 5022 5023 alias = self._parse_id_var(any_token) or ( 5024 self.STRING_ALIASES and self._parse_string_as_identifier() 5025 ) 5026 5027 if alias: 5028 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5029 5030 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5031 if not this.comments and this.this.comments: 5032 this.comments = this.this.comments 5033 this.this.comments = None 5034 5035 return this 5036 5037 def _parse_id_var( 5038 self, 5039 any_token: bool = True, 5040 tokens: t.Optional[t.Collection[TokenType]] = None, 5041 ) -> t.Optional[exp.Expression]: 5042 identifier = self._parse_identifier() 5043 5044 if identifier: 5045 return identifier 5046 5047 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5048 quoted = self._prev.token_type == TokenType.STRING 5049 return exp.Identifier(this=self._prev.text, quoted=quoted) 5050 5051 return None 5052 5053 def _parse_string(self) -> t.Optional[exp.Expression]: 5054 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5055 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5056 return self._parse_placeholder() 5057 5058 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5059 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5060 5061 def _parse_number(self) -> t.Optional[exp.Expression]: 5062 if self._match(TokenType.NUMBER): 5063 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5064 return self._parse_placeholder() 5065 5066 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5067 if self._match(TokenType.IDENTIFIER): 5068 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5069 return self._parse_placeholder() 5070 5071 def _parse_var( 5072 self, 5073 any_token: bool = False, 5074 tokens: t.Optional[t.Collection[TokenType]] = None, 5075 upper: bool = False, 5076 ) -> t.Optional[exp.Expression]: 5077 if ( 5078 (any_token and self._advance_any()) 5079 or self._match(TokenType.VAR) 5080 or (self._match_set(tokens) if tokens else False) 5081 ): 5082 return self.expression( 5083 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5084 ) 5085 return self._parse_placeholder() 5086 5087 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5088 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5089 self._advance() 5090 return self._prev 5091 return None 5092 5093 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5094 return self._parse_var() or self._parse_string() 5095 5096 def _parse_null(self) -> t.Optional[exp.Expression]: 5097 if self._match_set(self.NULL_TOKENS): 5098 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5099 return self._parse_placeholder() 5100 5101 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5102 if self._match(TokenType.TRUE): 5103 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5104 if self._match(TokenType.FALSE): 5105 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5106 return self._parse_placeholder() 5107 5108 def _parse_star(self) -> t.Optional[exp.Expression]: 5109 if self._match(TokenType.STAR): 5110 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5111 return self._parse_placeholder() 5112 5113 def _parse_parameter(self) -> exp.Parameter: 5114 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5115 return ( 5116 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5117 ) 5118 5119 self._match(TokenType.L_BRACE) 5120 this = _parse_parameter_part() 5121 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5122 self._match(TokenType.R_BRACE) 5123 5124 return self.expression(exp.Parameter, this=this, expression=expression) 5125 5126 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5127 if self._match_set(self.PLACEHOLDER_PARSERS): 5128 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5129 if placeholder: 5130 return placeholder 5131 self._advance(-1) 5132 return None 5133 5134 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5135 if not self._match(TokenType.EXCEPT): 5136 return None 5137 if self._match(TokenType.L_PAREN, advance=False): 5138 return self._parse_wrapped_csv(self._parse_column) 5139 5140 except_column = self._parse_column() 5141 return [except_column] if except_column else None 5142 5143 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5144 if not self._match(TokenType.REPLACE): 5145 return None 5146 if self._match(TokenType.L_PAREN, advance=False): 5147 return self._parse_wrapped_csv(self._parse_expression) 5148 5149 replace_expression = self._parse_expression() 5150 return [replace_expression] if replace_expression else None 5151 5152 def _parse_csv( 5153 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5154 ) -> t.List[exp.Expression]: 5155 parse_result = parse_method() 5156 items = [parse_result] if parse_result is not None else [] 5157 5158 while self._match(sep): 5159 self._add_comments(parse_result) 5160 parse_result = parse_method() 5161 if parse_result is not None: 5162 items.append(parse_result) 5163 5164 return items 5165 5166 def _parse_tokens( 5167 self, parse_method: t.Callable, expressions: t.Dict 5168 ) -> t.Optional[exp.Expression]: 5169 this = parse_method() 5170 5171 while self._match_set(expressions): 5172 this = self.expression( 5173 expressions[self._prev.token_type], 5174 this=this, 5175 comments=self._prev_comments, 5176 expression=parse_method(), 5177 ) 5178 5179 return this 5180 5181 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5182 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5183 5184 def _parse_wrapped_csv( 5185 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5186 ) -> t.List[exp.Expression]: 5187 return self._parse_wrapped( 5188 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5189 ) 5190 5191 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5192 wrapped = self._match(TokenType.L_PAREN) 5193 if not wrapped and not optional: 5194 self.raise_error("Expecting (") 5195 parse_result = parse_method() 5196 if wrapped: 5197 self._match_r_paren() 5198 return parse_result 5199 5200 def _parse_expressions(self) -> t.List[exp.Expression]: 5201 return self._parse_csv(self._parse_expression) 5202 5203 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5204 return self._parse_select() or self._parse_set_operations( 5205 self._parse_expression() if alias else self._parse_conjunction() 5206 ) 5207 5208 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5209 return self._parse_query_modifiers( 5210 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5211 ) 5212 5213 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5214 this = None 5215 if self._match_texts(self.TRANSACTION_KIND): 5216 this = self._prev.text 5217 5218 self._match_texts(("TRANSACTION", "WORK")) 5219 5220 modes = [] 5221 while True: 5222 mode = [] 5223 while self._match(TokenType.VAR): 5224 mode.append(self._prev.text) 5225 5226 if mode: 5227 modes.append(" ".join(mode)) 5228 if not self._match(TokenType.COMMA): 5229 break 5230 5231 return self.expression(exp.Transaction, this=this, modes=modes) 5232 5233 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5234 chain = None 5235 savepoint = None 5236 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5237 5238 self._match_texts(("TRANSACTION", "WORK")) 5239 5240 if self._match_text_seq("TO"): 5241 self._match_text_seq("SAVEPOINT") 5242 savepoint = self._parse_id_var() 5243 5244 if self._match(TokenType.AND): 5245 chain = not self._match_text_seq("NO") 5246 self._match_text_seq("CHAIN") 5247 5248 if is_rollback: 5249 return self.expression(exp.Rollback, savepoint=savepoint) 5250 5251 return self.expression(exp.Commit, chain=chain) 5252 5253 def _parse_refresh(self) -> exp.Refresh: 5254 self._match(TokenType.TABLE) 5255 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5256 5257 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5258 if not self._match_text_seq("ADD"): 5259 return None 5260 5261 self._match(TokenType.COLUMN) 5262 exists_column = self._parse_exists(not_=True) 5263 expression = self._parse_field_def() 5264 5265 if expression: 5266 expression.set("exists", exists_column) 5267 5268 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5269 if self._match_texts(("FIRST", "AFTER")): 5270 position = self._prev.text 5271 column_position = self.expression( 5272 exp.ColumnPosition, this=self._parse_column(), position=position 5273 ) 5274 expression.set("position", column_position) 5275 5276 return expression 5277 5278 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5279 drop = self._match(TokenType.DROP) and self._parse_drop() 5280 if drop and not isinstance(drop, exp.Command): 5281 drop.set("kind", drop.args.get("kind", "COLUMN")) 5282 return drop 5283 5284 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5285 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5286 return self.expression( 5287 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5288 ) 5289 5290 def _parse_add_constraint(self) -> exp.AddConstraint: 5291 this = None 5292 kind = self._prev.token_type 5293 5294 if kind == TokenType.CONSTRAINT: 5295 this = self._parse_id_var() 5296 5297 if self._match_text_seq("CHECK"): 5298 expression = self._parse_wrapped(self._parse_conjunction) 5299 enforced = self._match_text_seq("ENFORCED") or False 5300 5301 return self.expression( 5302 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5303 ) 5304 5305 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5306 expression = self._parse_foreign_key() 5307 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5308 expression = self._parse_primary_key() 5309 else: 5310 expression = None 5311 5312 return self.expression(exp.AddConstraint, this=this, expression=expression) 5313 5314 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5315 index = self._index - 1 5316 5317 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5318 return self._parse_csv(self._parse_add_constraint) 5319 5320 self._retreat(index) 5321 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5322 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5323 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5324 5325 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5326 self._match(TokenType.COLUMN) 5327 column = self._parse_field(any_token=True) 5328 5329 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5330 return self.expression(exp.AlterColumn, this=column, drop=True) 5331 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5332 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5333 5334 self._match_text_seq("SET", "DATA") 5335 return self.expression( 5336 exp.AlterColumn, 5337 this=column, 5338 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5339 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5340 using=self._match(TokenType.USING) and self._parse_conjunction(), 5341 ) 5342 5343 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5344 index = self._index - 1 5345 5346 partition_exists = self._parse_exists() 5347 if self._match(TokenType.PARTITION, advance=False): 5348 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5349 5350 self._retreat(index) 5351 return self._parse_csv(self._parse_drop_column) 5352 5353 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5354 if self._match(TokenType.COLUMN): 5355 exists = self._parse_exists() 5356 old_column = self._parse_column() 5357 to = self._match_text_seq("TO") 5358 new_column = self._parse_column() 5359 5360 if old_column is None or to is None or new_column is None: 5361 return None 5362 5363 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5364 5365 self._match_text_seq("TO") 5366 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5367 5368 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5369 start = self._prev 5370 5371 if not self._match(TokenType.TABLE): 5372 return self._parse_as_command(start) 5373 5374 exists = self._parse_exists() 5375 only = self._match_text_seq("ONLY") 5376 this = self._parse_table(schema=True) 5377 5378 if self._next: 5379 self._advance() 5380 5381 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5382 if parser: 5383 actions = ensure_list(parser(self)) 5384 5385 if not self._curr and actions: 5386 return self.expression( 5387 exp.AlterTable, 5388 this=this, 5389 exists=exists, 5390 actions=actions, 5391 only=only, 5392 ) 5393 5394 return self._parse_as_command(start) 5395 5396 def _parse_merge(self) -> exp.Merge: 5397 self._match(TokenType.INTO) 5398 target = self._parse_table() 5399 5400 if target and self._match(TokenType.ALIAS, advance=False): 5401 target.set("alias", self._parse_table_alias()) 5402 5403 self._match(TokenType.USING) 5404 using = self._parse_table() 5405 5406 self._match(TokenType.ON) 5407 on = self._parse_conjunction() 5408 5409 return self.expression( 5410 exp.Merge, 5411 this=target, 5412 using=using, 5413 on=on, 5414 expressions=self._parse_when_matched(), 5415 ) 5416 5417 def _parse_when_matched(self) -> t.List[exp.When]: 5418 whens = [] 5419 5420 while self._match(TokenType.WHEN): 5421 matched = not self._match(TokenType.NOT) 5422 self._match_text_seq("MATCHED") 5423 source = ( 5424 False 5425 if self._match_text_seq("BY", "TARGET") 5426 else self._match_text_seq("BY", "SOURCE") 5427 ) 5428 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5429 5430 self._match(TokenType.THEN) 5431 5432 if self._match(TokenType.INSERT): 5433 _this = self._parse_star() 5434 if _this: 5435 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5436 else: 5437 then = self.expression( 5438 exp.Insert, 5439 this=self._parse_value(), 5440 expression=self._match(TokenType.VALUES) and self._parse_value(), 5441 ) 5442 elif self._match(TokenType.UPDATE): 5443 expressions = self._parse_star() 5444 if expressions: 5445 then = self.expression(exp.Update, expressions=expressions) 5446 else: 5447 then = self.expression( 5448 exp.Update, 5449 expressions=self._match(TokenType.SET) 5450 and self._parse_csv(self._parse_equality), 5451 ) 5452 elif self._match(TokenType.DELETE): 5453 then = self.expression(exp.Var, this=self._prev.text) 5454 else: 5455 then = None 5456 5457 whens.append( 5458 self.expression( 5459 exp.When, 5460 matched=matched, 5461 source=source, 5462 condition=condition, 5463 then=then, 5464 ) 5465 ) 5466 return whens 5467 5468 def _parse_show(self) -> t.Optional[exp.Expression]: 5469 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5470 if parser: 5471 return parser(self) 5472 return self._parse_as_command(self._prev) 5473 5474 def _parse_set_item_assignment( 5475 self, kind: t.Optional[str] = None 5476 ) -> t.Optional[exp.Expression]: 5477 index = self._index 5478 5479 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5480 return self._parse_set_transaction(global_=kind == "GLOBAL") 5481 5482 left = self._parse_primary() or self._parse_id_var() 5483 assignment_delimiter = self._match_texts(("=", "TO")) 5484 5485 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5486 self._retreat(index) 5487 return None 5488 5489 right = self._parse_statement() or self._parse_id_var() 5490 this = self.expression(exp.EQ, this=left, expression=right) 5491 5492 return self.expression(exp.SetItem, this=this, kind=kind) 5493 5494 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5495 self._match_text_seq("TRANSACTION") 5496 characteristics = self._parse_csv( 5497 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5498 ) 5499 return self.expression( 5500 exp.SetItem, 5501 expressions=characteristics, 5502 kind="TRANSACTION", 5503 **{"global": global_}, # type: ignore 5504 ) 5505 5506 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5507 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5508 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5509 5510 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5511 index = self._index 5512 set_ = self.expression( 5513 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5514 ) 5515 5516 if self._curr: 5517 self._retreat(index) 5518 return self._parse_as_command(self._prev) 5519 5520 return set_ 5521 5522 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5523 for option in options: 5524 if self._match_text_seq(*option.split(" ")): 5525 return exp.var(option) 5526 return None 5527 5528 def _parse_as_command(self, start: Token) -> exp.Command: 5529 while self._curr: 5530 self._advance() 5531 text = self._find_sql(start, self._prev) 5532 size = len(start.text) 5533 self._warn_unsupported() 5534 return exp.Command(this=text[:size], expression=text[size:]) 5535 5536 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5537 settings = [] 5538 5539 self._match_l_paren() 5540 kind = self._parse_id_var() 5541 5542 if self._match(TokenType.L_PAREN): 5543 while True: 5544 key = self._parse_id_var() 5545 value = self._parse_primary() 5546 5547 if not key and value is None: 5548 break 5549 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5550 self._match(TokenType.R_PAREN) 5551 5552 self._match_r_paren() 5553 5554 return self.expression( 5555 exp.DictProperty, 5556 this=this, 5557 kind=kind.this if kind else None, 5558 settings=settings, 5559 ) 5560 5561 def _parse_dict_range(self, this: str) -> exp.DictRange: 5562 self._match_l_paren() 5563 has_min = self._match_text_seq("MIN") 5564 if has_min: 5565 min = self._parse_var() or self._parse_primary() 5566 self._match_text_seq("MAX") 5567 max = self._parse_var() or self._parse_primary() 5568 else: 5569 max = self._parse_var() or self._parse_primary() 5570 min = exp.Literal.number(0) 5571 self._match_r_paren() 5572 return self.expression(exp.DictRange, this=this, min=min, max=max) 5573 5574 def _parse_comprehension( 5575 self, this: t.Optional[exp.Expression] 5576 ) -> t.Optional[exp.Comprehension]: 5577 index = self._index 5578 expression = self._parse_column() 5579 if not self._match(TokenType.IN): 5580 self._retreat(index - 1) 5581 return None 5582 iterator = self._parse_column() 5583 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5584 return self.expression( 5585 exp.Comprehension, 5586 this=this, 5587 expression=expression, 5588 iterator=iterator, 5589 condition=condition, 5590 ) 5591 5592 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5593 if self._match(TokenType.HEREDOC_STRING): 5594 return self.expression(exp.Heredoc, this=self._prev.text) 5595 5596 if not self._match_text_seq("$"): 5597 return None 5598 5599 tags = ["$"] 5600 tag_text = None 5601 5602 if self._is_connected(): 5603 self._advance() 5604 tags.append(self._prev.text.upper()) 5605 else: 5606 self.raise_error("No closing $ found") 5607 5608 if tags[-1] != "$": 5609 if self._is_connected() and self._match_text_seq("$"): 5610 tag_text = tags[-1] 5611 tags.append("$") 5612 else: 5613 self.raise_error("No closing $ found") 5614 5615 heredoc_start = self._curr 5616 5617 while self._curr: 5618 if self._match_text_seq(*tags, advance=False): 5619 this = self._find_sql(heredoc_start, self._prev) 5620 self._advance(len(tags)) 5621 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5622 5623 self._advance() 5624 5625 self.raise_error(f"No closing {''.join(tags)} found") 5626 return None 5627 5628 def _find_parser( 5629 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5630 ) -> t.Optional[t.Callable]: 5631 if not self._curr: 5632 return None 5633 5634 index = self._index 5635 this = [] 5636 while True: 5637 # The current token might be multiple words 5638 curr = self._curr.text.upper() 5639 key = curr.split(" ") 5640 this.append(curr) 5641 5642 self._advance() 5643 result, trie = in_trie(trie, key) 5644 if result == TrieResult.FAILED: 5645 break 5646 5647 if result == TrieResult.EXISTS: 5648 subparser = parsers[" ".join(this)] 5649 return subparser 5650 5651 self._retreat(index) 5652 return None 5653 5654 def _match(self, token_type, advance=True, expression=None): 5655 if not self._curr: 5656 return None 5657 5658 if self._curr.token_type == token_type: 5659 if advance: 5660 self._advance() 5661 self._add_comments(expression) 5662 return True 5663 5664 return None 5665 5666 def _match_set(self, types, advance=True): 5667 if not self._curr: 5668 return None 5669 5670 if self._curr.token_type in types: 5671 if advance: 5672 self._advance() 5673 return True 5674 5675 return None 5676 5677 def _match_pair(self, token_type_a, token_type_b, advance=True): 5678 if not self._curr or not self._next: 5679 return None 5680 5681 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5682 if advance: 5683 self._advance(2) 5684 return True 5685 5686 return None 5687 5688 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5689 if not self._match(TokenType.L_PAREN, expression=expression): 5690 self.raise_error("Expecting (") 5691 5692 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5693 if not self._match(TokenType.R_PAREN, expression=expression): 5694 self.raise_error("Expecting )") 5695 5696 def _match_texts(self, texts, advance=True): 5697 if self._curr and self._curr.text.upper() in texts: 5698 if advance: 5699 self._advance() 5700 return True 5701 return None 5702 5703 def _match_text_seq(self, *texts, advance=True): 5704 index = self._index 5705 for text in texts: 5706 if self._curr and self._curr.text.upper() == text: 5707 self._advance() 5708 else: 5709 self._retreat(index) 5710 return None 5711 5712 if not advance: 5713 self._retreat(index) 5714 5715 return True 5716 5717 @t.overload 5718 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5719 ... 5720 5721 @t.overload 5722 def _replace_columns_with_dots( 5723 self, this: t.Optional[exp.Expression] 5724 ) -> t.Optional[exp.Expression]: 5725 ... 5726 5727 def _replace_columns_with_dots(self, this): 5728 if isinstance(this, exp.Dot): 5729 exp.replace_children(this, self._replace_columns_with_dots) 5730 elif isinstance(this, exp.Column): 5731 exp.replace_children(this, self._replace_columns_with_dots) 5732 table = this.args.get("table") 5733 this = ( 5734 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5735 ) 5736 5737 return this 5738 5739 def _replace_lambda( 5740 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5741 ) -> t.Optional[exp.Expression]: 5742 if not node: 5743 return node 5744 5745 for column in node.find_all(exp.Column): 5746 if column.parts[0].name in lambda_variables: 5747 dot_or_id = column.to_dot() if column.table else column.this 5748 parent = column.parent 5749 5750 while isinstance(parent, exp.Dot): 5751 if not isinstance(parent.parent, exp.Dot): 5752 parent.replace(dot_or_id) 5753 break 5754 parent = parent.parent 5755 else: 5756 if column is node: 5757 node = dot_or_id 5758 else: 5759 column.replace(dot_or_id) 5760 return node
24def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap( 35 keys=exp.Array(expressions=keys), 36 values=exp.Array(expressions=values), 37 )
53def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 54 # Default argument order is base, expression 55 this = seq_get(args, 0) 56 expression = seq_get(args, 1) 57 58 if expression: 59 if not dialect.LOG_BASE_FIRST: 60 this, expression = expression, this 61 return exp.Log(this=this, expression=expression) 62 63 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
76class Parser(metaclass=_Parser): 77 """ 78 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 79 80 Args: 81 error_level: The desired error level. 82 Default: ErrorLevel.IMMEDIATE 83 error_message_context: Determines the amount of context to capture from a 84 query string when displaying the error message (in number of characters). 85 Default: 100 86 max_errors: Maximum number of error messages to include in a raised ParseError. 87 This is only relevant if error_level is ErrorLevel.RAISE. 88 Default: 3 89 """ 90 91 FUNCTIONS: t.Dict[str, t.Callable] = { 92 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 93 "CONCAT": lambda args, dialect: exp.Concat( 94 expressions=args, 95 safe=not dialect.STRICT_STRING_CONCAT, 96 coalesce=dialect.CONCAT_COALESCE, 97 ), 98 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 99 expressions=args, 100 safe=not dialect.STRICT_STRING_CONCAT, 101 coalesce=dialect.CONCAT_COALESCE, 102 ), 103 "DATE_TO_DATE_STR": lambda args: exp.Cast( 104 this=seq_get(args, 0), 105 to=exp.DataType(this=exp.DataType.Type.TEXT), 106 ), 107 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 108 "LIKE": parse_like, 109 "LOG": parse_logarithm, 110 "TIME_TO_TIME_STR": lambda args: exp.Cast( 111 this=seq_get(args, 0), 112 to=exp.DataType(this=exp.DataType.Type.TEXT), 113 ), 114 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 115 this=exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 start=exp.Literal.number(1), 120 length=exp.Literal.number(10), 121 ), 122 "VAR_MAP": parse_var_map, 123 } 124 125 NO_PAREN_FUNCTIONS = { 126 TokenType.CURRENT_DATE: exp.CurrentDate, 127 TokenType.CURRENT_DATETIME: exp.CurrentDate, 128 TokenType.CURRENT_TIME: exp.CurrentTime, 129 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 130 TokenType.CURRENT_USER: exp.CurrentUser, 131 } 132 133 STRUCT_TYPE_TOKENS = { 134 TokenType.NESTED, 135 TokenType.STRUCT, 136 } 137 138 NESTED_TYPE_TOKENS = { 139 TokenType.ARRAY, 140 TokenType.LOWCARDINALITY, 141 TokenType.MAP, 142 TokenType.NULLABLE, 143 *STRUCT_TYPE_TOKENS, 144 } 145 146 ENUM_TYPE_TOKENS = { 147 TokenType.ENUM, 148 TokenType.ENUM8, 149 TokenType.ENUM16, 150 } 151 152 AGGREGATE_TYPE_TOKENS = { 153 TokenType.AGGREGATEFUNCTION, 154 TokenType.SIMPLEAGGREGATEFUNCTION, 155 } 156 157 TYPE_TOKENS = { 158 TokenType.BIT, 159 TokenType.BOOLEAN, 160 TokenType.TINYINT, 161 TokenType.UTINYINT, 162 TokenType.SMALLINT, 163 TokenType.USMALLINT, 164 TokenType.INT, 165 TokenType.UINT, 166 TokenType.BIGINT, 167 TokenType.UBIGINT, 168 TokenType.INT128, 169 TokenType.UINT128, 170 TokenType.INT256, 171 TokenType.UINT256, 172 TokenType.MEDIUMINT, 173 TokenType.UMEDIUMINT, 174 TokenType.FIXEDSTRING, 175 TokenType.FLOAT, 176 TokenType.DOUBLE, 177 TokenType.CHAR, 178 TokenType.NCHAR, 179 TokenType.VARCHAR, 180 TokenType.NVARCHAR, 181 TokenType.TEXT, 182 TokenType.MEDIUMTEXT, 183 TokenType.LONGTEXT, 184 TokenType.MEDIUMBLOB, 185 TokenType.LONGBLOB, 186 TokenType.BINARY, 187 TokenType.VARBINARY, 188 TokenType.JSON, 189 TokenType.JSONB, 190 TokenType.INTERVAL, 191 TokenType.TINYBLOB, 192 TokenType.TINYTEXT, 193 TokenType.TIME, 194 TokenType.TIMETZ, 195 TokenType.TIMESTAMP, 196 TokenType.TIMESTAMP_S, 197 TokenType.TIMESTAMP_MS, 198 TokenType.TIMESTAMP_NS, 199 TokenType.TIMESTAMPTZ, 200 TokenType.TIMESTAMPLTZ, 201 TokenType.DATETIME, 202 TokenType.DATETIME64, 203 TokenType.DATE, 204 TokenType.DATE32, 205 TokenType.INT4RANGE, 206 TokenType.INT4MULTIRANGE, 207 TokenType.INT8RANGE, 208 TokenType.INT8MULTIRANGE, 209 TokenType.NUMRANGE, 210 TokenType.NUMMULTIRANGE, 211 TokenType.TSRANGE, 212 TokenType.TSMULTIRANGE, 213 TokenType.TSTZRANGE, 214 TokenType.TSTZMULTIRANGE, 215 TokenType.DATERANGE, 216 TokenType.DATEMULTIRANGE, 217 TokenType.DECIMAL, 218 TokenType.UDECIMAL, 219 TokenType.BIGDECIMAL, 220 TokenType.UUID, 221 TokenType.GEOGRAPHY, 222 TokenType.GEOMETRY, 223 TokenType.HLLSKETCH, 224 TokenType.HSTORE, 225 TokenType.PSEUDO_TYPE, 226 TokenType.SUPER, 227 TokenType.SERIAL, 228 TokenType.SMALLSERIAL, 229 TokenType.BIGSERIAL, 230 TokenType.XML, 231 TokenType.YEAR, 232 TokenType.UNIQUEIDENTIFIER, 233 TokenType.USERDEFINED, 234 TokenType.MONEY, 235 TokenType.SMALLMONEY, 236 TokenType.ROWVERSION, 237 TokenType.IMAGE, 238 TokenType.VARIANT, 239 TokenType.OBJECT, 240 TokenType.OBJECT_IDENTIFIER, 241 TokenType.INET, 242 TokenType.IPADDRESS, 243 TokenType.IPPREFIX, 244 TokenType.IPV4, 245 TokenType.IPV6, 246 TokenType.UNKNOWN, 247 TokenType.NULL, 248 *ENUM_TYPE_TOKENS, 249 *NESTED_TYPE_TOKENS, 250 *AGGREGATE_TYPE_TOKENS, 251 } 252 253 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 254 TokenType.BIGINT: TokenType.UBIGINT, 255 TokenType.INT: TokenType.UINT, 256 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 257 TokenType.SMALLINT: TokenType.USMALLINT, 258 TokenType.TINYINT: TokenType.UTINYINT, 259 TokenType.DECIMAL: TokenType.UDECIMAL, 260 } 261 262 SUBQUERY_PREDICATES = { 263 TokenType.ANY: exp.Any, 264 TokenType.ALL: exp.All, 265 TokenType.EXISTS: exp.Exists, 266 TokenType.SOME: exp.Any, 267 } 268 269 RESERVED_TOKENS = { 270 *Tokenizer.SINGLE_TOKENS.values(), 271 TokenType.SELECT, 272 } 273 274 DB_CREATABLES = { 275 TokenType.DATABASE, 276 TokenType.SCHEMA, 277 TokenType.TABLE, 278 TokenType.VIEW, 279 TokenType.MODEL, 280 TokenType.DICTIONARY, 281 } 282 283 CREATABLES = { 284 TokenType.COLUMN, 285 TokenType.CONSTRAINT, 286 TokenType.FUNCTION, 287 TokenType.INDEX, 288 TokenType.PROCEDURE, 289 TokenType.FOREIGN_KEY, 290 *DB_CREATABLES, 291 } 292 293 # Tokens that can represent identifiers 294 ID_VAR_TOKENS = { 295 TokenType.VAR, 296 TokenType.ANTI, 297 TokenType.APPLY, 298 TokenType.ASC, 299 TokenType.AUTO_INCREMENT, 300 TokenType.BEGIN, 301 TokenType.CACHE, 302 TokenType.CASE, 303 TokenType.COLLATE, 304 TokenType.COMMAND, 305 TokenType.COMMENT, 306 TokenType.COMMIT, 307 TokenType.CONSTRAINT, 308 TokenType.DEFAULT, 309 TokenType.DELETE, 310 TokenType.DESC, 311 TokenType.DESCRIBE, 312 TokenType.DICTIONARY, 313 TokenType.DIV, 314 TokenType.END, 315 TokenType.EXECUTE, 316 TokenType.ESCAPE, 317 TokenType.FALSE, 318 TokenType.FIRST, 319 TokenType.FILTER, 320 TokenType.FINAL, 321 TokenType.FORMAT, 322 TokenType.FULL, 323 TokenType.IS, 324 TokenType.ISNULL, 325 TokenType.INTERVAL, 326 TokenType.KEEP, 327 TokenType.KILL, 328 TokenType.LEFT, 329 TokenType.LOAD, 330 TokenType.MERGE, 331 TokenType.NATURAL, 332 TokenType.NEXT, 333 TokenType.OFFSET, 334 TokenType.OPERATOR, 335 TokenType.ORDINALITY, 336 TokenType.OVERLAPS, 337 TokenType.OVERWRITE, 338 TokenType.PARTITION, 339 TokenType.PERCENT, 340 TokenType.PIVOT, 341 TokenType.PRAGMA, 342 TokenType.RANGE, 343 TokenType.RECURSIVE, 344 TokenType.REFERENCES, 345 TokenType.REFRESH, 346 TokenType.REPLACE, 347 TokenType.RIGHT, 348 TokenType.ROW, 349 TokenType.ROWS, 350 TokenType.SEMI, 351 TokenType.SET, 352 TokenType.SETTINGS, 353 TokenType.SHOW, 354 TokenType.TEMPORARY, 355 TokenType.TOP, 356 TokenType.TRUE, 357 TokenType.UNIQUE, 358 TokenType.UNPIVOT, 359 TokenType.UPDATE, 360 TokenType.USE, 361 TokenType.VOLATILE, 362 TokenType.WINDOW, 363 *CREATABLES, 364 *SUBQUERY_PREDICATES, 365 *TYPE_TOKENS, 366 *NO_PAREN_FUNCTIONS, 367 } 368 369 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 370 371 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 372 TokenType.ANTI, 373 TokenType.APPLY, 374 TokenType.ASOF, 375 TokenType.FULL, 376 TokenType.LEFT, 377 TokenType.LOCK, 378 TokenType.NATURAL, 379 TokenType.OFFSET, 380 TokenType.RIGHT, 381 TokenType.SEMI, 382 TokenType.WINDOW, 383 } 384 385 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 386 387 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 388 389 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 390 391 FUNC_TOKENS = { 392 TokenType.COLLATE, 393 TokenType.COMMAND, 394 TokenType.CURRENT_DATE, 395 TokenType.CURRENT_DATETIME, 396 TokenType.CURRENT_TIMESTAMP, 397 TokenType.CURRENT_TIME, 398 TokenType.CURRENT_USER, 399 TokenType.FILTER, 400 TokenType.FIRST, 401 TokenType.FORMAT, 402 TokenType.GLOB, 403 TokenType.IDENTIFIER, 404 TokenType.INDEX, 405 TokenType.ISNULL, 406 TokenType.ILIKE, 407 TokenType.INSERT, 408 TokenType.LIKE, 409 TokenType.MERGE, 410 TokenType.OFFSET, 411 TokenType.PRIMARY_KEY, 412 TokenType.RANGE, 413 TokenType.REPLACE, 414 TokenType.RLIKE, 415 TokenType.ROW, 416 TokenType.UNNEST, 417 TokenType.VAR, 418 TokenType.LEFT, 419 TokenType.RIGHT, 420 TokenType.DATE, 421 TokenType.DATETIME, 422 TokenType.TABLE, 423 TokenType.TIMESTAMP, 424 TokenType.TIMESTAMPTZ, 425 TokenType.WINDOW, 426 TokenType.XOR, 427 *TYPE_TOKENS, 428 *SUBQUERY_PREDICATES, 429 } 430 431 CONJUNCTION = { 432 TokenType.AND: exp.And, 433 TokenType.OR: exp.Or, 434 } 435 436 EQUALITY = { 437 TokenType.COLON_EQ: exp.PropertyEQ, 438 TokenType.EQ: exp.EQ, 439 TokenType.NEQ: exp.NEQ, 440 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 441 } 442 443 COMPARISON = { 444 TokenType.GT: exp.GT, 445 TokenType.GTE: exp.GTE, 446 TokenType.LT: exp.LT, 447 TokenType.LTE: exp.LTE, 448 } 449 450 BITWISE = { 451 TokenType.AMP: exp.BitwiseAnd, 452 TokenType.CARET: exp.BitwiseXor, 453 TokenType.PIPE: exp.BitwiseOr, 454 } 455 456 TERM = { 457 TokenType.DASH: exp.Sub, 458 TokenType.PLUS: exp.Add, 459 TokenType.MOD: exp.Mod, 460 TokenType.COLLATE: exp.Collate, 461 } 462 463 FACTOR = { 464 TokenType.DIV: exp.IntDiv, 465 TokenType.LR_ARROW: exp.Distance, 466 TokenType.SLASH: exp.Div, 467 TokenType.STAR: exp.Mul, 468 } 469 470 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 471 472 TIMES = { 473 TokenType.TIME, 474 TokenType.TIMETZ, 475 } 476 477 TIMESTAMPS = { 478 TokenType.TIMESTAMP, 479 TokenType.TIMESTAMPTZ, 480 TokenType.TIMESTAMPLTZ, 481 *TIMES, 482 } 483 484 SET_OPERATIONS = { 485 TokenType.UNION, 486 TokenType.INTERSECT, 487 TokenType.EXCEPT, 488 } 489 490 JOIN_METHODS = { 491 TokenType.NATURAL, 492 TokenType.ASOF, 493 } 494 495 JOIN_SIDES = { 496 TokenType.LEFT, 497 TokenType.RIGHT, 498 TokenType.FULL, 499 } 500 501 JOIN_KINDS = { 502 TokenType.INNER, 503 TokenType.OUTER, 504 TokenType.CROSS, 505 TokenType.SEMI, 506 TokenType.ANTI, 507 } 508 509 JOIN_HINTS: t.Set[str] = set() 510 511 LAMBDAS = { 512 TokenType.ARROW: lambda self, expressions: self.expression( 513 exp.Lambda, 514 this=self._replace_lambda( 515 self._parse_conjunction(), 516 {node.name for node in expressions}, 517 ), 518 expressions=expressions, 519 ), 520 TokenType.FARROW: lambda self, expressions: self.expression( 521 exp.Kwarg, 522 this=exp.var(expressions[0].name), 523 expression=self._parse_conjunction(), 524 ), 525 } 526 527 COLUMN_OPERATORS = { 528 TokenType.DOT: None, 529 TokenType.DCOLON: lambda self, this, to: self.expression( 530 exp.Cast if self.STRICT_CAST else exp.TryCast, 531 this=this, 532 to=to, 533 ), 534 TokenType.ARROW: lambda self, this, path: self.expression( 535 exp.JSONExtract, 536 this=this, 537 expression=path, 538 ), 539 TokenType.DARROW: lambda self, this, path: self.expression( 540 exp.JSONExtractScalar, 541 this=this, 542 expression=path, 543 ), 544 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 545 exp.JSONBExtract, 546 this=this, 547 expression=path, 548 ), 549 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 550 exp.JSONBExtractScalar, 551 this=this, 552 expression=path, 553 ), 554 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 555 exp.JSONBContains, 556 this=this, 557 expression=key, 558 ), 559 } 560 561 EXPRESSION_PARSERS = { 562 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 563 exp.Column: lambda self: self._parse_column(), 564 exp.Condition: lambda self: self._parse_conjunction(), 565 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 566 exp.Expression: lambda self: self._parse_statement(), 567 exp.From: lambda self: self._parse_from(), 568 exp.Group: lambda self: self._parse_group(), 569 exp.Having: lambda self: self._parse_having(), 570 exp.Identifier: lambda self: self._parse_id_var(), 571 exp.Join: lambda self: self._parse_join(), 572 exp.Lambda: lambda self: self._parse_lambda(), 573 exp.Lateral: lambda self: self._parse_lateral(), 574 exp.Limit: lambda self: self._parse_limit(), 575 exp.Offset: lambda self: self._parse_offset(), 576 exp.Order: lambda self: self._parse_order(), 577 exp.Ordered: lambda self: self._parse_ordered(), 578 exp.Properties: lambda self: self._parse_properties(), 579 exp.Qualify: lambda self: self._parse_qualify(), 580 exp.Returning: lambda self: self._parse_returning(), 581 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 582 exp.Table: lambda self: self._parse_table_parts(), 583 exp.TableAlias: lambda self: self._parse_table_alias(), 584 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 585 exp.Where: lambda self: self._parse_where(), 586 exp.Window: lambda self: self._parse_named_window(), 587 exp.With: lambda self: self._parse_with(), 588 "JOIN_TYPE": lambda self: self._parse_join_parts(), 589 } 590 591 STATEMENT_PARSERS = { 592 TokenType.ALTER: lambda self: self._parse_alter(), 593 TokenType.BEGIN: lambda self: self._parse_transaction(), 594 TokenType.CACHE: lambda self: self._parse_cache(), 595 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 596 TokenType.COMMENT: lambda self: self._parse_comment(), 597 TokenType.CREATE: lambda self: self._parse_create(), 598 TokenType.DELETE: lambda self: self._parse_delete(), 599 TokenType.DESC: lambda self: self._parse_describe(), 600 TokenType.DESCRIBE: lambda self: self._parse_describe(), 601 TokenType.DROP: lambda self: self._parse_drop(), 602 TokenType.INSERT: lambda self: self._parse_insert(), 603 TokenType.KILL: lambda self: self._parse_kill(), 604 TokenType.LOAD: lambda self: self._parse_load(), 605 TokenType.MERGE: lambda self: self._parse_merge(), 606 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 607 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 608 TokenType.REFRESH: lambda self: self._parse_refresh(), 609 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 610 TokenType.SET: lambda self: self._parse_set(), 611 TokenType.UNCACHE: lambda self: self._parse_uncache(), 612 TokenType.UPDATE: lambda self: self._parse_update(), 613 TokenType.USE: lambda self: self.expression( 614 exp.Use, 615 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 616 and exp.var(self._prev.text), 617 this=self._parse_table(schema=False), 618 ), 619 } 620 621 UNARY_PARSERS = { 622 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 623 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 624 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 625 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 626 } 627 628 PRIMARY_PARSERS = { 629 TokenType.STRING: lambda self, token: self.expression( 630 exp.Literal, this=token.text, is_string=True 631 ), 632 TokenType.NUMBER: lambda self, token: self.expression( 633 exp.Literal, this=token.text, is_string=False 634 ), 635 TokenType.STAR: lambda self, _: self.expression( 636 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 637 ), 638 TokenType.NULL: lambda self, _: self.expression(exp.Null), 639 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 640 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 641 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 642 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 643 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 644 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 645 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 646 exp.National, this=token.text 647 ), 648 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 649 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 650 exp.RawString, this=token.text 651 ), 652 TokenType.UNICODE_STRING: lambda self, token: self.expression( 653 exp.UnicodeString, 654 this=token.text, 655 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 656 ), 657 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 658 } 659 660 PLACEHOLDER_PARSERS = { 661 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 662 TokenType.PARAMETER: lambda self: self._parse_parameter(), 663 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 664 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 665 else None, 666 } 667 668 RANGE_PARSERS = { 669 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 670 TokenType.GLOB: binary_range_parser(exp.Glob), 671 TokenType.ILIKE: binary_range_parser(exp.ILike), 672 TokenType.IN: lambda self, this: self._parse_in(this), 673 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 674 TokenType.IS: lambda self, this: self._parse_is(this), 675 TokenType.LIKE: binary_range_parser(exp.Like), 676 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 677 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 678 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 679 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 680 } 681 682 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 683 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 684 "AUTO": lambda self: self._parse_auto_property(), 685 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 686 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 687 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 688 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 689 "CHECKSUM": lambda self: self._parse_checksum(), 690 "CLUSTER BY": lambda self: self._parse_cluster(), 691 "CLUSTERED": lambda self: self._parse_clustered_by(), 692 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 693 exp.CollateProperty, **kwargs 694 ), 695 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 696 "CONTAINS": lambda self: self._parse_contains_property(), 697 "COPY": lambda self: self._parse_copy_property(), 698 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 699 "DEFINER": lambda self: self._parse_definer(), 700 "DETERMINISTIC": lambda self: self.expression( 701 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 702 ), 703 "DISTKEY": lambda self: self._parse_distkey(), 704 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 705 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 706 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 707 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 708 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 709 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 710 "FREESPACE": lambda self: self._parse_freespace(), 711 "HEAP": lambda self: self.expression(exp.HeapProperty), 712 "IMMUTABLE": lambda self: self.expression( 713 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 714 ), 715 "INHERITS": lambda self: self.expression( 716 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 717 ), 718 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 719 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 720 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 721 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 722 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 723 "LIKE": lambda self: self._parse_create_like(), 724 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 725 "LOCK": lambda self: self._parse_locking(), 726 "LOCKING": lambda self: self._parse_locking(), 727 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 728 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 729 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 730 "MODIFIES": lambda self: self._parse_modifies_property(), 731 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 732 "NO": lambda self: self._parse_no_property(), 733 "ON": lambda self: self._parse_on_property(), 734 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 735 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 736 "PARTITION": lambda self: self._parse_partitioned_of(), 737 "PARTITION BY": lambda self: self._parse_partitioned_by(), 738 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 739 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 740 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 741 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 742 "READS": lambda self: self._parse_reads_property(), 743 "REMOTE": lambda self: self._parse_remote_with_connection(), 744 "RETURNS": lambda self: self._parse_returns(), 745 "ROW": lambda self: self._parse_row(), 746 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 747 "SAMPLE": lambda self: self.expression( 748 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 749 ), 750 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 751 "SETTINGS": lambda self: self.expression( 752 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 753 ), 754 "SORTKEY": lambda self: self._parse_sortkey(), 755 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 756 "STABLE": lambda self: self.expression( 757 exp.StabilityProperty, this=exp.Literal.string("STABLE") 758 ), 759 "STORED": lambda self: self._parse_stored(), 760 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 761 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 762 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 763 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 764 "TO": lambda self: self._parse_to_table(), 765 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 766 "TRANSFORM": lambda self: self.expression( 767 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 768 ), 769 "TTL": lambda self: self._parse_ttl(), 770 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 771 "VOLATILE": lambda self: self._parse_volatile_property(), 772 "WITH": lambda self: self._parse_with_property(), 773 } 774 775 CONSTRAINT_PARSERS = { 776 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 777 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 778 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 779 "CHARACTER SET": lambda self: self.expression( 780 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 781 ), 782 "CHECK": lambda self: self.expression( 783 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 784 ), 785 "COLLATE": lambda self: self.expression( 786 exp.CollateColumnConstraint, this=self._parse_var() 787 ), 788 "COMMENT": lambda self: self.expression( 789 exp.CommentColumnConstraint, this=self._parse_string() 790 ), 791 "COMPRESS": lambda self: self._parse_compress(), 792 "CLUSTERED": lambda self: self.expression( 793 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 794 ), 795 "NONCLUSTERED": lambda self: self.expression( 796 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 797 ), 798 "DEFAULT": lambda self: self.expression( 799 exp.DefaultColumnConstraint, this=self._parse_bitwise() 800 ), 801 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 802 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 803 "FORMAT": lambda self: self.expression( 804 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 805 ), 806 "GENERATED": lambda self: self._parse_generated_as_identity(), 807 "IDENTITY": lambda self: self._parse_auto_increment(), 808 "INLINE": lambda self: self._parse_inline(), 809 "LIKE": lambda self: self._parse_create_like(), 810 "NOT": lambda self: self._parse_not_constraint(), 811 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 812 "ON": lambda self: ( 813 self._match(TokenType.UPDATE) 814 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 815 ) 816 or self.expression(exp.OnProperty, this=self._parse_id_var()), 817 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 818 "PERIOD": lambda self: self._parse_period_for_system_time(), 819 "PRIMARY KEY": lambda self: self._parse_primary_key(), 820 "REFERENCES": lambda self: self._parse_references(match=False), 821 "TITLE": lambda self: self.expression( 822 exp.TitleColumnConstraint, this=self._parse_var_or_string() 823 ), 824 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 825 "UNIQUE": lambda self: self._parse_unique(), 826 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 827 "WITH": lambda self: self.expression( 828 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 829 ), 830 } 831 832 ALTER_PARSERS = { 833 "ADD": lambda self: self._parse_alter_table_add(), 834 "ALTER": lambda self: self._parse_alter_table_alter(), 835 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 836 "DROP": lambda self: self._parse_alter_table_drop(), 837 "RENAME": lambda self: self._parse_alter_table_rename(), 838 } 839 840 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 841 842 NO_PAREN_FUNCTION_PARSERS = { 843 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 844 "CASE": lambda self: self._parse_case(), 845 "IF": lambda self: self._parse_if(), 846 "NEXT": lambda self: self._parse_next_value_for(), 847 } 848 849 INVALID_FUNC_NAME_TOKENS = { 850 TokenType.IDENTIFIER, 851 TokenType.STRING, 852 } 853 854 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 855 856 FUNCTION_PARSERS = { 857 "ANY_VALUE": lambda self: self._parse_any_value(), 858 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 859 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 860 "DECODE": lambda self: self._parse_decode(), 861 "EXTRACT": lambda self: self._parse_extract(), 862 "JSON_OBJECT": lambda self: self._parse_json_object(), 863 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 864 "JSON_TABLE": lambda self: self._parse_json_table(), 865 "MATCH": lambda self: self._parse_match_against(), 866 "OPENJSON": lambda self: self._parse_open_json(), 867 "POSITION": lambda self: self._parse_position(), 868 "PREDICT": lambda self: self._parse_predict(), 869 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 870 "STRING_AGG": lambda self: self._parse_string_agg(), 871 "SUBSTRING": lambda self: self._parse_substring(), 872 "TRIM": lambda self: self._parse_trim(), 873 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 874 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 875 } 876 877 QUERY_MODIFIER_PARSERS = { 878 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 879 TokenType.WHERE: lambda self: ("where", self._parse_where()), 880 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 881 TokenType.HAVING: lambda self: ("having", self._parse_having()), 882 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 883 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 884 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 885 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 886 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 887 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 888 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 889 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 890 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 891 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 892 TokenType.CLUSTER_BY: lambda self: ( 893 "cluster", 894 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 895 ), 896 TokenType.DISTRIBUTE_BY: lambda self: ( 897 "distribute", 898 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 899 ), 900 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 901 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 902 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 903 } 904 905 SET_PARSERS = { 906 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 907 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 908 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 909 "TRANSACTION": lambda self: self._parse_set_transaction(), 910 } 911 912 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 913 914 TYPE_LITERAL_PARSERS = { 915 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 916 } 917 918 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 919 920 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 921 922 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 923 924 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 925 TRANSACTION_CHARACTERISTICS = { 926 "ISOLATION LEVEL REPEATABLE READ", 927 "ISOLATION LEVEL READ COMMITTED", 928 "ISOLATION LEVEL READ UNCOMMITTED", 929 "ISOLATION LEVEL SERIALIZABLE", 930 "READ WRITE", 931 "READ ONLY", 932 } 933 934 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 935 936 CLONE_KEYWORDS = {"CLONE", "COPY"} 937 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 938 939 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 940 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 941 942 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 943 944 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 945 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 946 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 947 948 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 949 950 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 951 952 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 953 954 DISTINCT_TOKENS = {TokenType.DISTINCT} 955 956 NULL_TOKENS = {TokenType.NULL} 957 958 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 959 960 STRICT_CAST = True 961 962 PREFIXED_PIVOT_COLUMNS = False 963 IDENTIFY_PIVOT_STRINGS = False 964 965 LOG_DEFAULTS_TO_LN = False 966 967 # Whether or not ADD is present for each column added by ALTER TABLE 968 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 969 970 # Whether or not the table sample clause expects CSV syntax 971 TABLESAMPLE_CSV = False 972 973 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 974 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 975 976 # Whether the TRIM function expects the characters to trim as its first argument 977 TRIM_PATTERN_FIRST = False 978 979 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 980 STRING_ALIASES = False 981 982 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 983 MODIFIERS_ATTACHED_TO_UNION = True 984 UNION_MODIFIERS = {"order", "limit", "offset"} 985 986 __slots__ = ( 987 "error_level", 988 "error_message_context", 989 "max_errors", 990 "dialect", 991 "sql", 992 "errors", 993 "_tokens", 994 "_index", 995 "_curr", 996 "_next", 997 "_prev", 998 "_prev_comments", 999 ) 1000 1001 # Autofilled 1002 SHOW_TRIE: t.Dict = {} 1003 SET_TRIE: t.Dict = {} 1004 1005 def __init__( 1006 self, 1007 error_level: t.Optional[ErrorLevel] = None, 1008 error_message_context: int = 100, 1009 max_errors: int = 3, 1010 dialect: DialectType = None, 1011 ): 1012 from sqlglot.dialects import Dialect 1013 1014 self.error_level = error_level or ErrorLevel.IMMEDIATE 1015 self.error_message_context = error_message_context 1016 self.max_errors = max_errors 1017 self.dialect = Dialect.get_or_raise(dialect) 1018 self.reset() 1019 1020 def reset(self): 1021 self.sql = "" 1022 self.errors = [] 1023 self._tokens = [] 1024 self._index = 0 1025 self._curr = None 1026 self._next = None 1027 self._prev = None 1028 self._prev_comments = None 1029 1030 def parse( 1031 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1032 ) -> t.List[t.Optional[exp.Expression]]: 1033 """ 1034 Parses a list of tokens and returns a list of syntax trees, one tree 1035 per parsed SQL statement. 1036 1037 Args: 1038 raw_tokens: The list of tokens. 1039 sql: The original SQL string, used to produce helpful debug messages. 1040 1041 Returns: 1042 The list of the produced syntax trees. 1043 """ 1044 return self._parse( 1045 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1046 ) 1047 1048 def parse_into( 1049 self, 1050 expression_types: exp.IntoType, 1051 raw_tokens: t.List[Token], 1052 sql: t.Optional[str] = None, 1053 ) -> t.List[t.Optional[exp.Expression]]: 1054 """ 1055 Parses a list of tokens into a given Expression type. If a collection of Expression 1056 types is given instead, this method will try to parse the token list into each one 1057 of them, stopping at the first for which the parsing succeeds. 1058 1059 Args: 1060 expression_types: The expression type(s) to try and parse the token list into. 1061 raw_tokens: The list of tokens. 1062 sql: The original SQL string, used to produce helpful debug messages. 1063 1064 Returns: 1065 The target Expression. 1066 """ 1067 errors = [] 1068 for expression_type in ensure_list(expression_types): 1069 parser = self.EXPRESSION_PARSERS.get(expression_type) 1070 if not parser: 1071 raise TypeError(f"No parser registered for {expression_type}") 1072 1073 try: 1074 return self._parse(parser, raw_tokens, sql) 1075 except ParseError as e: 1076 e.errors[0]["into_expression"] = expression_type 1077 errors.append(e) 1078 1079 raise ParseError( 1080 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1081 errors=merge_errors(errors), 1082 ) from errors[-1] 1083 1084 def _parse( 1085 self, 1086 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1087 raw_tokens: t.List[Token], 1088 sql: t.Optional[str] = None, 1089 ) -> t.List[t.Optional[exp.Expression]]: 1090 self.reset() 1091 self.sql = sql or "" 1092 1093 total = len(raw_tokens) 1094 chunks: t.List[t.List[Token]] = [[]] 1095 1096 for i, token in enumerate(raw_tokens): 1097 if token.token_type == TokenType.SEMICOLON: 1098 if i < total - 1: 1099 chunks.append([]) 1100 else: 1101 chunks[-1].append(token) 1102 1103 expressions = [] 1104 1105 for tokens in chunks: 1106 self._index = -1 1107 self._tokens = tokens 1108 self._advance() 1109 1110 expressions.append(parse_method(self)) 1111 1112 if self._index < len(self._tokens): 1113 self.raise_error("Invalid expression / Unexpected token") 1114 1115 self.check_errors() 1116 1117 return expressions 1118 1119 def check_errors(self) -> None: 1120 """Logs or raises any found errors, depending on the chosen error level setting.""" 1121 if self.error_level == ErrorLevel.WARN: 1122 for error in self.errors: 1123 logger.error(str(error)) 1124 elif self.error_level == ErrorLevel.RAISE and self.errors: 1125 raise ParseError( 1126 concat_messages(self.errors, self.max_errors), 1127 errors=merge_errors(self.errors), 1128 ) 1129 1130 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1131 """ 1132 Appends an error in the list of recorded errors or raises it, depending on the chosen 1133 error level setting. 1134 """ 1135 token = token or self._curr or self._prev or Token.string("") 1136 start = token.start 1137 end = token.end + 1 1138 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1139 highlight = self.sql[start:end] 1140 end_context = self.sql[end : end + self.error_message_context] 1141 1142 error = ParseError.new( 1143 f"{message}. Line {token.line}, Col: {token.col}.\n" 1144 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1145 description=message, 1146 line=token.line, 1147 col=token.col, 1148 start_context=start_context, 1149 highlight=highlight, 1150 end_context=end_context, 1151 ) 1152 1153 if self.error_level == ErrorLevel.IMMEDIATE: 1154 raise error 1155 1156 self.errors.append(error) 1157 1158 def expression( 1159 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1160 ) -> E: 1161 """ 1162 Creates a new, validated Expression. 1163 1164 Args: 1165 exp_class: The expression class to instantiate. 1166 comments: An optional list of comments to attach to the expression. 1167 kwargs: The arguments to set for the expression along with their respective values. 1168 1169 Returns: 1170 The target expression. 1171 """ 1172 instance = exp_class(**kwargs) 1173 instance.add_comments(comments) if comments else self._add_comments(instance) 1174 return self.validate_expression(instance) 1175 1176 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1177 if expression and self._prev_comments: 1178 expression.add_comments(self._prev_comments) 1179 self._prev_comments = None 1180 1181 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1182 """ 1183 Validates an Expression, making sure that all its mandatory arguments are set. 1184 1185 Args: 1186 expression: The expression to validate. 1187 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1188 1189 Returns: 1190 The validated expression. 1191 """ 1192 if self.error_level != ErrorLevel.IGNORE: 1193 for error_message in expression.error_messages(args): 1194 self.raise_error(error_message) 1195 1196 return expression 1197 1198 def _find_sql(self, start: Token, end: Token) -> str: 1199 return self.sql[start.start : end.end + 1] 1200 1201 def _is_connected(self) -> bool: 1202 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1203 1204 def _advance(self, times: int = 1) -> None: 1205 self._index += times 1206 self._curr = seq_get(self._tokens, self._index) 1207 self._next = seq_get(self._tokens, self._index + 1) 1208 1209 if self._index > 0: 1210 self._prev = self._tokens[self._index - 1] 1211 self._prev_comments = self._prev.comments 1212 else: 1213 self._prev = None 1214 self._prev_comments = None 1215 1216 def _retreat(self, index: int) -> None: 1217 if index != self._index: 1218 self._advance(index - self._index) 1219 1220 def _warn_unsupported(self) -> None: 1221 if len(self._tokens) <= 1: 1222 return 1223 1224 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1225 # interested in emitting a warning for the one being currently processed. 1226 sql = self._find_sql(self._tokens[0], self._tokens[-1]) 1227 1228 logger.warning( 1229 f"Input '{sql}' contains unsupported syntax, proceeding to parse it into the" 1230 " fallback 'Command' expression. Consider filing a GitHub issue to request support" 1231 " for this syntax, e.g. if transpilation or AST metadata extraction is required." 1232 ) 1233 1234 def _parse_command(self) -> exp.Command: 1235 self._warn_unsupported() 1236 return self.expression( 1237 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1238 ) 1239 1240 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1241 start = self._prev 1242 exists = self._parse_exists() if allow_exists else None 1243 1244 self._match(TokenType.ON) 1245 1246 kind = self._match_set(self.CREATABLES) and self._prev 1247 if not kind: 1248 return self._parse_as_command(start) 1249 1250 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1251 this = self._parse_user_defined_function(kind=kind.token_type) 1252 elif kind.token_type == TokenType.TABLE: 1253 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1254 elif kind.token_type == TokenType.COLUMN: 1255 this = self._parse_column() 1256 else: 1257 this = self._parse_id_var() 1258 1259 self._match(TokenType.IS) 1260 1261 return self.expression( 1262 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1263 ) 1264 1265 def _parse_to_table( 1266 self, 1267 ) -> exp.ToTableProperty: 1268 table = self._parse_table_parts(schema=True) 1269 return self.expression(exp.ToTableProperty, this=table) 1270 1271 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1272 def _parse_ttl(self) -> exp.Expression: 1273 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1274 this = self._parse_bitwise() 1275 1276 if self._match_text_seq("DELETE"): 1277 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1278 if self._match_text_seq("RECOMPRESS"): 1279 return self.expression( 1280 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1281 ) 1282 if self._match_text_seq("TO", "DISK"): 1283 return self.expression( 1284 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1285 ) 1286 if self._match_text_seq("TO", "VOLUME"): 1287 return self.expression( 1288 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1289 ) 1290 1291 return this 1292 1293 expressions = self._parse_csv(_parse_ttl_action) 1294 where = self._parse_where() 1295 group = self._parse_group() 1296 1297 aggregates = None 1298 if group and self._match(TokenType.SET): 1299 aggregates = self._parse_csv(self._parse_set_item) 1300 1301 return self.expression( 1302 exp.MergeTreeTTL, 1303 expressions=expressions, 1304 where=where, 1305 group=group, 1306 aggregates=aggregates, 1307 ) 1308 1309 def _parse_statement(self) -> t.Optional[exp.Expression]: 1310 if self._curr is None: 1311 return None 1312 1313 if self._match_set(self.STATEMENT_PARSERS): 1314 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1315 1316 if self._match_set(Tokenizer.COMMANDS): 1317 return self._parse_command() 1318 1319 expression = self._parse_expression() 1320 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1321 return self._parse_query_modifiers(expression) 1322 1323 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1324 start = self._prev 1325 temporary = self._match(TokenType.TEMPORARY) 1326 materialized = self._match_text_seq("MATERIALIZED") 1327 1328 kind = self._match_set(self.CREATABLES) and self._prev.text 1329 if not kind: 1330 return self._parse_as_command(start) 1331 1332 return self.expression( 1333 exp.Drop, 1334 comments=start.comments, 1335 exists=exists or self._parse_exists(), 1336 this=self._parse_table(schema=True), 1337 kind=kind, 1338 temporary=temporary, 1339 materialized=materialized, 1340 cascade=self._match_text_seq("CASCADE"), 1341 constraints=self._match_text_seq("CONSTRAINTS"), 1342 purge=self._match_text_seq("PURGE"), 1343 ) 1344 1345 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1346 return ( 1347 self._match_text_seq("IF") 1348 and (not not_ or self._match(TokenType.NOT)) 1349 and self._match(TokenType.EXISTS) 1350 ) 1351 1352 def _parse_create(self) -> exp.Create | exp.Command: 1353 # Note: this can't be None because we've matched a statement parser 1354 start = self._prev 1355 comments = self._prev_comments 1356 1357 replace = ( 1358 start.token_type == TokenType.REPLACE 1359 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1360 or self._match_pair(TokenType.OR, TokenType.ALTER) 1361 ) 1362 unique = self._match(TokenType.UNIQUE) 1363 1364 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1365 self._advance() 1366 1367 properties = None 1368 create_token = self._match_set(self.CREATABLES) and self._prev 1369 1370 if not create_token: 1371 # exp.Properties.Location.POST_CREATE 1372 properties = self._parse_properties() 1373 create_token = self._match_set(self.CREATABLES) and self._prev 1374 1375 if not properties or not create_token: 1376 return self._parse_as_command(start) 1377 1378 exists = self._parse_exists(not_=True) 1379 this = None 1380 expression: t.Optional[exp.Expression] = None 1381 indexes = None 1382 no_schema_binding = None 1383 begin = None 1384 end = None 1385 clone = None 1386 1387 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1388 nonlocal properties 1389 if properties and temp_props: 1390 properties.expressions.extend(temp_props.expressions) 1391 elif temp_props: 1392 properties = temp_props 1393 1394 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1395 this = self._parse_user_defined_function(kind=create_token.token_type) 1396 1397 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1398 extend_props(self._parse_properties()) 1399 1400 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1401 1402 if not expression: 1403 if self._match(TokenType.COMMAND): 1404 expression = self._parse_as_command(self._prev) 1405 else: 1406 begin = self._match(TokenType.BEGIN) 1407 return_ = self._match_text_seq("RETURN") 1408 1409 if self._match(TokenType.STRING, advance=False): 1410 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1411 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1412 expression = self._parse_string() 1413 extend_props(self._parse_properties()) 1414 else: 1415 expression = self._parse_statement() 1416 1417 end = self._match_text_seq("END") 1418 1419 if return_: 1420 expression = self.expression(exp.Return, this=expression) 1421 elif create_token.token_type == TokenType.INDEX: 1422 this = self._parse_index(index=self._parse_id_var()) 1423 elif create_token.token_type in self.DB_CREATABLES: 1424 table_parts = self._parse_table_parts(schema=True) 1425 1426 # exp.Properties.Location.POST_NAME 1427 self._match(TokenType.COMMA) 1428 extend_props(self._parse_properties(before=True)) 1429 1430 this = self._parse_schema(this=table_parts) 1431 1432 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1433 extend_props(self._parse_properties()) 1434 1435 self._match(TokenType.ALIAS) 1436 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1437 # exp.Properties.Location.POST_ALIAS 1438 extend_props(self._parse_properties()) 1439 1440 expression = self._parse_ddl_select() 1441 1442 if create_token.token_type == TokenType.TABLE: 1443 # exp.Properties.Location.POST_EXPRESSION 1444 extend_props(self._parse_properties()) 1445 1446 indexes = [] 1447 while True: 1448 index = self._parse_index() 1449 1450 # exp.Properties.Location.POST_INDEX 1451 extend_props(self._parse_properties()) 1452 1453 if not index: 1454 break 1455 else: 1456 self._match(TokenType.COMMA) 1457 indexes.append(index) 1458 elif create_token.token_type == TokenType.VIEW: 1459 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1460 no_schema_binding = True 1461 1462 shallow = self._match_text_seq("SHALLOW") 1463 1464 if self._match_texts(self.CLONE_KEYWORDS): 1465 copy = self._prev.text.lower() == "copy" 1466 clone = self.expression( 1467 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1468 ) 1469 1470 if self._curr: 1471 return self._parse_as_command(start) 1472 1473 return self.expression( 1474 exp.Create, 1475 comments=comments, 1476 this=this, 1477 kind=create_token.text.upper(), 1478 replace=replace, 1479 unique=unique, 1480 expression=expression, 1481 exists=exists, 1482 properties=properties, 1483 indexes=indexes, 1484 no_schema_binding=no_schema_binding, 1485 begin=begin, 1486 end=end, 1487 clone=clone, 1488 ) 1489 1490 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1491 # only used for teradata currently 1492 self._match(TokenType.COMMA) 1493 1494 kwargs = { 1495 "no": self._match_text_seq("NO"), 1496 "dual": self._match_text_seq("DUAL"), 1497 "before": self._match_text_seq("BEFORE"), 1498 "default": self._match_text_seq("DEFAULT"), 1499 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1500 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1501 "after": self._match_text_seq("AFTER"), 1502 "minimum": self._match_texts(("MIN", "MINIMUM")), 1503 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1504 } 1505 1506 if self._match_texts(self.PROPERTY_PARSERS): 1507 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1508 try: 1509 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1510 except TypeError: 1511 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1512 1513 return None 1514 1515 def _parse_property(self) -> t.Optional[exp.Expression]: 1516 if self._match_texts(self.PROPERTY_PARSERS): 1517 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1518 1519 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1520 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1521 1522 if self._match_text_seq("COMPOUND", "SORTKEY"): 1523 return self._parse_sortkey(compound=True) 1524 1525 if self._match_text_seq("SQL", "SECURITY"): 1526 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1527 1528 index = self._index 1529 key = self._parse_column() 1530 1531 if not self._match(TokenType.EQ): 1532 self._retreat(index) 1533 return None 1534 1535 return self.expression( 1536 exp.Property, 1537 this=key.to_dot() if isinstance(key, exp.Column) else key, 1538 value=self._parse_column() or self._parse_var(any_token=True), 1539 ) 1540 1541 def _parse_stored(self) -> exp.FileFormatProperty: 1542 self._match(TokenType.ALIAS) 1543 1544 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1545 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1546 1547 return self.expression( 1548 exp.FileFormatProperty, 1549 this=self.expression( 1550 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1551 ) 1552 if input_format or output_format 1553 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1554 ) 1555 1556 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1557 self._match(TokenType.EQ) 1558 self._match(TokenType.ALIAS) 1559 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1560 1561 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1562 properties = [] 1563 while True: 1564 if before: 1565 prop = self._parse_property_before() 1566 else: 1567 prop = self._parse_property() 1568 1569 if not prop: 1570 break 1571 for p in ensure_list(prop): 1572 properties.append(p) 1573 1574 if properties: 1575 return self.expression(exp.Properties, expressions=properties) 1576 1577 return None 1578 1579 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1580 return self.expression( 1581 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1582 ) 1583 1584 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1585 if self._index >= 2: 1586 pre_volatile_token = self._tokens[self._index - 2] 1587 else: 1588 pre_volatile_token = None 1589 1590 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1591 return exp.VolatileProperty() 1592 1593 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1594 1595 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1596 self._match_pair(TokenType.EQ, TokenType.ON) 1597 1598 prop = self.expression(exp.WithSystemVersioningProperty) 1599 if self._match(TokenType.L_PAREN): 1600 self._match_text_seq("HISTORY_TABLE", "=") 1601 prop.set("this", self._parse_table_parts()) 1602 1603 if self._match(TokenType.COMMA): 1604 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1605 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1606 1607 self._match_r_paren() 1608 1609 return prop 1610 1611 def _parse_with_property( 1612 self, 1613 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1614 if self._match(TokenType.L_PAREN, advance=False): 1615 return self._parse_wrapped_csv(self._parse_property) 1616 1617 if self._match_text_seq("JOURNAL"): 1618 return self._parse_withjournaltable() 1619 1620 if self._match_text_seq("DATA"): 1621 return self._parse_withdata(no=False) 1622 elif self._match_text_seq("NO", "DATA"): 1623 return self._parse_withdata(no=True) 1624 1625 if not self._next: 1626 return None 1627 1628 return self._parse_withisolatedloading() 1629 1630 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1631 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1632 self._match(TokenType.EQ) 1633 1634 user = self._parse_id_var() 1635 self._match(TokenType.PARAMETER) 1636 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1637 1638 if not user or not host: 1639 return None 1640 1641 return exp.DefinerProperty(this=f"{user}@{host}") 1642 1643 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1644 self._match(TokenType.TABLE) 1645 self._match(TokenType.EQ) 1646 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1647 1648 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1649 return self.expression(exp.LogProperty, no=no) 1650 1651 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1652 return self.expression(exp.JournalProperty, **kwargs) 1653 1654 def _parse_checksum(self) -> exp.ChecksumProperty: 1655 self._match(TokenType.EQ) 1656 1657 on = None 1658 if self._match(TokenType.ON): 1659 on = True 1660 elif self._match_text_seq("OFF"): 1661 on = False 1662 1663 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1664 1665 def _parse_cluster(self) -> exp.Cluster: 1666 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1667 1668 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1669 self._match_text_seq("BY") 1670 1671 self._match_l_paren() 1672 expressions = self._parse_csv(self._parse_column) 1673 self._match_r_paren() 1674 1675 if self._match_text_seq("SORTED", "BY"): 1676 self._match_l_paren() 1677 sorted_by = self._parse_csv(self._parse_ordered) 1678 self._match_r_paren() 1679 else: 1680 sorted_by = None 1681 1682 self._match(TokenType.INTO) 1683 buckets = self._parse_number() 1684 self._match_text_seq("BUCKETS") 1685 1686 return self.expression( 1687 exp.ClusteredByProperty, 1688 expressions=expressions, 1689 sorted_by=sorted_by, 1690 buckets=buckets, 1691 ) 1692 1693 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1694 if not self._match_text_seq("GRANTS"): 1695 self._retreat(self._index - 1) 1696 return None 1697 1698 return self.expression(exp.CopyGrantsProperty) 1699 1700 def _parse_freespace(self) -> exp.FreespaceProperty: 1701 self._match(TokenType.EQ) 1702 return self.expression( 1703 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1704 ) 1705 1706 def _parse_mergeblockratio( 1707 self, no: bool = False, default: bool = False 1708 ) -> exp.MergeBlockRatioProperty: 1709 if self._match(TokenType.EQ): 1710 return self.expression( 1711 exp.MergeBlockRatioProperty, 1712 this=self._parse_number(), 1713 percent=self._match(TokenType.PERCENT), 1714 ) 1715 1716 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1717 1718 def _parse_datablocksize( 1719 self, 1720 default: t.Optional[bool] = None, 1721 minimum: t.Optional[bool] = None, 1722 maximum: t.Optional[bool] = None, 1723 ) -> exp.DataBlocksizeProperty: 1724 self._match(TokenType.EQ) 1725 size = self._parse_number() 1726 1727 units = None 1728 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1729 units = self._prev.text 1730 1731 return self.expression( 1732 exp.DataBlocksizeProperty, 1733 size=size, 1734 units=units, 1735 default=default, 1736 minimum=minimum, 1737 maximum=maximum, 1738 ) 1739 1740 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1741 self._match(TokenType.EQ) 1742 always = self._match_text_seq("ALWAYS") 1743 manual = self._match_text_seq("MANUAL") 1744 never = self._match_text_seq("NEVER") 1745 default = self._match_text_seq("DEFAULT") 1746 1747 autotemp = None 1748 if self._match_text_seq("AUTOTEMP"): 1749 autotemp = self._parse_schema() 1750 1751 return self.expression( 1752 exp.BlockCompressionProperty, 1753 always=always, 1754 manual=manual, 1755 never=never, 1756 default=default, 1757 autotemp=autotemp, 1758 ) 1759 1760 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1761 no = self._match_text_seq("NO") 1762 concurrent = self._match_text_seq("CONCURRENT") 1763 self._match_text_seq("ISOLATED", "LOADING") 1764 for_all = self._match_text_seq("FOR", "ALL") 1765 for_insert = self._match_text_seq("FOR", "INSERT") 1766 for_none = self._match_text_seq("FOR", "NONE") 1767 return self.expression( 1768 exp.IsolatedLoadingProperty, 1769 no=no, 1770 concurrent=concurrent, 1771 for_all=for_all, 1772 for_insert=for_insert, 1773 for_none=for_none, 1774 ) 1775 1776 def _parse_locking(self) -> exp.LockingProperty: 1777 if self._match(TokenType.TABLE): 1778 kind = "TABLE" 1779 elif self._match(TokenType.VIEW): 1780 kind = "VIEW" 1781 elif self._match(TokenType.ROW): 1782 kind = "ROW" 1783 elif self._match_text_seq("DATABASE"): 1784 kind = "DATABASE" 1785 else: 1786 kind = None 1787 1788 if kind in ("DATABASE", "TABLE", "VIEW"): 1789 this = self._parse_table_parts() 1790 else: 1791 this = None 1792 1793 if self._match(TokenType.FOR): 1794 for_or_in = "FOR" 1795 elif self._match(TokenType.IN): 1796 for_or_in = "IN" 1797 else: 1798 for_or_in = None 1799 1800 if self._match_text_seq("ACCESS"): 1801 lock_type = "ACCESS" 1802 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1803 lock_type = "EXCLUSIVE" 1804 elif self._match_text_seq("SHARE"): 1805 lock_type = "SHARE" 1806 elif self._match_text_seq("READ"): 1807 lock_type = "READ" 1808 elif self._match_text_seq("WRITE"): 1809 lock_type = "WRITE" 1810 elif self._match_text_seq("CHECKSUM"): 1811 lock_type = "CHECKSUM" 1812 else: 1813 lock_type = None 1814 1815 override = self._match_text_seq("OVERRIDE") 1816 1817 return self.expression( 1818 exp.LockingProperty, 1819 this=this, 1820 kind=kind, 1821 for_or_in=for_or_in, 1822 lock_type=lock_type, 1823 override=override, 1824 ) 1825 1826 def _parse_partition_by(self) -> t.List[exp.Expression]: 1827 if self._match(TokenType.PARTITION_BY): 1828 return self._parse_csv(self._parse_conjunction) 1829 return [] 1830 1831 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1832 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1833 if self._match_text_seq("MINVALUE"): 1834 return exp.var("MINVALUE") 1835 if self._match_text_seq("MAXVALUE"): 1836 return exp.var("MAXVALUE") 1837 return self._parse_bitwise() 1838 1839 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1840 expression = None 1841 from_expressions = None 1842 to_expressions = None 1843 1844 if self._match(TokenType.IN): 1845 this = self._parse_wrapped_csv(self._parse_bitwise) 1846 elif self._match(TokenType.FROM): 1847 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1848 self._match_text_seq("TO") 1849 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1850 elif self._match_text_seq("WITH", "(", "MODULUS"): 1851 this = self._parse_number() 1852 self._match_text_seq(",", "REMAINDER") 1853 expression = self._parse_number() 1854 self._match_r_paren() 1855 else: 1856 self.raise_error("Failed to parse partition bound spec.") 1857 1858 return self.expression( 1859 exp.PartitionBoundSpec, 1860 this=this, 1861 expression=expression, 1862 from_expressions=from_expressions, 1863 to_expressions=to_expressions, 1864 ) 1865 1866 # https://www.postgresql.org/docs/current/sql-createtable.html 1867 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1868 if not self._match_text_seq("OF"): 1869 self._retreat(self._index - 1) 1870 return None 1871 1872 this = self._parse_table(schema=True) 1873 1874 if self._match(TokenType.DEFAULT): 1875 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1876 elif self._match_text_seq("FOR", "VALUES"): 1877 expression = self._parse_partition_bound_spec() 1878 else: 1879 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1880 1881 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1882 1883 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1884 self._match(TokenType.EQ) 1885 return self.expression( 1886 exp.PartitionedByProperty, 1887 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1888 ) 1889 1890 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1891 if self._match_text_seq("AND", "STATISTICS"): 1892 statistics = True 1893 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1894 statistics = False 1895 else: 1896 statistics = None 1897 1898 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1899 1900 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1901 if self._match_text_seq("SQL"): 1902 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1903 return None 1904 1905 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1906 if self._match_text_seq("SQL", "DATA"): 1907 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1908 return None 1909 1910 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1911 if self._match_text_seq("PRIMARY", "INDEX"): 1912 return exp.NoPrimaryIndexProperty() 1913 if self._match_text_seq("SQL"): 1914 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1915 return None 1916 1917 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1918 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1919 return exp.OnCommitProperty() 1920 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1921 return exp.OnCommitProperty(delete=True) 1922 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1923 1924 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1925 if self._match_text_seq("SQL", "DATA"): 1926 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1927 return None 1928 1929 def _parse_distkey(self) -> exp.DistKeyProperty: 1930 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1931 1932 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1933 table = self._parse_table(schema=True) 1934 1935 options = [] 1936 while self._match_texts(("INCLUDING", "EXCLUDING")): 1937 this = self._prev.text.upper() 1938 1939 id_var = self._parse_id_var() 1940 if not id_var: 1941 return None 1942 1943 options.append( 1944 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1945 ) 1946 1947 return self.expression(exp.LikeProperty, this=table, expressions=options) 1948 1949 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1950 return self.expression( 1951 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1952 ) 1953 1954 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1955 self._match(TokenType.EQ) 1956 return self.expression( 1957 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1958 ) 1959 1960 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1961 self._match_text_seq("WITH", "CONNECTION") 1962 return self.expression( 1963 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1964 ) 1965 1966 def _parse_returns(self) -> exp.ReturnsProperty: 1967 value: t.Optional[exp.Expression] 1968 is_table = self._match(TokenType.TABLE) 1969 1970 if is_table: 1971 if self._match(TokenType.LT): 1972 value = self.expression( 1973 exp.Schema, 1974 this="TABLE", 1975 expressions=self._parse_csv(self._parse_struct_types), 1976 ) 1977 if not self._match(TokenType.GT): 1978 self.raise_error("Expecting >") 1979 else: 1980 value = self._parse_schema(exp.var("TABLE")) 1981 else: 1982 value = self._parse_types() 1983 1984 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1985 1986 def _parse_describe(self) -> exp.Describe: 1987 kind = self._match_set(self.CREATABLES) and self._prev.text 1988 extended = self._match_text_seq("EXTENDED") 1989 this = self._parse_table(schema=True) 1990 properties = self._parse_properties() 1991 expressions = properties.expressions if properties else None 1992 return self.expression( 1993 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 1994 ) 1995 1996 def _parse_insert(self) -> exp.Insert: 1997 comments = ensure_list(self._prev_comments) 1998 overwrite = self._match(TokenType.OVERWRITE) 1999 ignore = self._match(TokenType.IGNORE) 2000 local = self._match_text_seq("LOCAL") 2001 alternative = None 2002 2003 if self._match_text_seq("DIRECTORY"): 2004 this: t.Optional[exp.Expression] = self.expression( 2005 exp.Directory, 2006 this=self._parse_var_or_string(), 2007 local=local, 2008 row_format=self._parse_row_format(match_row=True), 2009 ) 2010 else: 2011 if self._match(TokenType.OR): 2012 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2013 2014 self._match(TokenType.INTO) 2015 comments += ensure_list(self._prev_comments) 2016 self._match(TokenType.TABLE) 2017 this = self._parse_table(schema=True) 2018 2019 returning = self._parse_returning() 2020 2021 return self.expression( 2022 exp.Insert, 2023 comments=comments, 2024 this=this, 2025 by_name=self._match_text_seq("BY", "NAME"), 2026 exists=self._parse_exists(), 2027 partition=self._parse_partition(), 2028 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2029 and self._parse_conjunction(), 2030 expression=self._parse_ddl_select(), 2031 conflict=self._parse_on_conflict(), 2032 returning=returning or self._parse_returning(), 2033 overwrite=overwrite, 2034 alternative=alternative, 2035 ignore=ignore, 2036 ) 2037 2038 def _parse_kill(self) -> exp.Kill: 2039 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2040 2041 return self.expression( 2042 exp.Kill, 2043 this=self._parse_primary(), 2044 kind=kind, 2045 ) 2046 2047 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2048 conflict = self._match_text_seq("ON", "CONFLICT") 2049 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2050 2051 if not conflict and not duplicate: 2052 return None 2053 2054 nothing = None 2055 expressions = None 2056 key = None 2057 constraint = None 2058 2059 if conflict: 2060 if self._match_text_seq("ON", "CONSTRAINT"): 2061 constraint = self._parse_id_var() 2062 else: 2063 key = self._parse_csv(self._parse_value) 2064 2065 self._match_text_seq("DO") 2066 if self._match_text_seq("NOTHING"): 2067 nothing = True 2068 else: 2069 self._match(TokenType.UPDATE) 2070 self._match(TokenType.SET) 2071 expressions = self._parse_csv(self._parse_equality) 2072 2073 return self.expression( 2074 exp.OnConflict, 2075 duplicate=duplicate, 2076 expressions=expressions, 2077 nothing=nothing, 2078 key=key, 2079 constraint=constraint, 2080 ) 2081 2082 def _parse_returning(self) -> t.Optional[exp.Returning]: 2083 if not self._match(TokenType.RETURNING): 2084 return None 2085 return self.expression( 2086 exp.Returning, 2087 expressions=self._parse_csv(self._parse_expression), 2088 into=self._match(TokenType.INTO) and self._parse_table_part(), 2089 ) 2090 2091 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2092 if not self._match(TokenType.FORMAT): 2093 return None 2094 return self._parse_row_format() 2095 2096 def _parse_row_format( 2097 self, match_row: bool = False 2098 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2099 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2100 return None 2101 2102 if self._match_text_seq("SERDE"): 2103 this = self._parse_string() 2104 2105 serde_properties = None 2106 if self._match(TokenType.SERDE_PROPERTIES): 2107 serde_properties = self.expression( 2108 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2109 ) 2110 2111 return self.expression( 2112 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2113 ) 2114 2115 self._match_text_seq("DELIMITED") 2116 2117 kwargs = {} 2118 2119 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2120 kwargs["fields"] = self._parse_string() 2121 if self._match_text_seq("ESCAPED", "BY"): 2122 kwargs["escaped"] = self._parse_string() 2123 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2124 kwargs["collection_items"] = self._parse_string() 2125 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2126 kwargs["map_keys"] = self._parse_string() 2127 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2128 kwargs["lines"] = self._parse_string() 2129 if self._match_text_seq("NULL", "DEFINED", "AS"): 2130 kwargs["null"] = self._parse_string() 2131 2132 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2133 2134 def _parse_load(self) -> exp.LoadData | exp.Command: 2135 if self._match_text_seq("DATA"): 2136 local = self._match_text_seq("LOCAL") 2137 self._match_text_seq("INPATH") 2138 inpath = self._parse_string() 2139 overwrite = self._match(TokenType.OVERWRITE) 2140 self._match_pair(TokenType.INTO, TokenType.TABLE) 2141 2142 return self.expression( 2143 exp.LoadData, 2144 this=self._parse_table(schema=True), 2145 local=local, 2146 overwrite=overwrite, 2147 inpath=inpath, 2148 partition=self._parse_partition(), 2149 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2150 serde=self._match_text_seq("SERDE") and self._parse_string(), 2151 ) 2152 return self._parse_as_command(self._prev) 2153 2154 def _parse_delete(self) -> exp.Delete: 2155 # This handles MySQL's "Multiple-Table Syntax" 2156 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2157 tables = None 2158 comments = self._prev_comments 2159 if not self._match(TokenType.FROM, advance=False): 2160 tables = self._parse_csv(self._parse_table) or None 2161 2162 returning = self._parse_returning() 2163 2164 return self.expression( 2165 exp.Delete, 2166 comments=comments, 2167 tables=tables, 2168 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2169 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2170 where=self._parse_where(), 2171 returning=returning or self._parse_returning(), 2172 limit=self._parse_limit(), 2173 ) 2174 2175 def _parse_update(self) -> exp.Update: 2176 comments = self._prev_comments 2177 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2178 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2179 returning = self._parse_returning() 2180 return self.expression( 2181 exp.Update, 2182 comments=comments, 2183 **{ # type: ignore 2184 "this": this, 2185 "expressions": expressions, 2186 "from": self._parse_from(joins=True), 2187 "where": self._parse_where(), 2188 "returning": returning or self._parse_returning(), 2189 "order": self._parse_order(), 2190 "limit": self._parse_limit(), 2191 }, 2192 ) 2193 2194 def _parse_uncache(self) -> exp.Uncache: 2195 if not self._match(TokenType.TABLE): 2196 self.raise_error("Expecting TABLE after UNCACHE") 2197 2198 return self.expression( 2199 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2200 ) 2201 2202 def _parse_cache(self) -> exp.Cache: 2203 lazy = self._match_text_seq("LAZY") 2204 self._match(TokenType.TABLE) 2205 table = self._parse_table(schema=True) 2206 2207 options = [] 2208 if self._match_text_seq("OPTIONS"): 2209 self._match_l_paren() 2210 k = self._parse_string() 2211 self._match(TokenType.EQ) 2212 v = self._parse_string() 2213 options = [k, v] 2214 self._match_r_paren() 2215 2216 self._match(TokenType.ALIAS) 2217 return self.expression( 2218 exp.Cache, 2219 this=table, 2220 lazy=lazy, 2221 options=options, 2222 expression=self._parse_select(nested=True), 2223 ) 2224 2225 def _parse_partition(self) -> t.Optional[exp.Partition]: 2226 if not self._match(TokenType.PARTITION): 2227 return None 2228 2229 return self.expression( 2230 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2231 ) 2232 2233 def _parse_value(self) -> exp.Tuple: 2234 if self._match(TokenType.L_PAREN): 2235 expressions = self._parse_csv(self._parse_expression) 2236 self._match_r_paren() 2237 return self.expression(exp.Tuple, expressions=expressions) 2238 2239 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2240 # https://prestodb.io/docs/current/sql/values.html 2241 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2242 2243 def _parse_projections(self) -> t.List[exp.Expression]: 2244 return self._parse_expressions() 2245 2246 def _parse_select( 2247 self, 2248 nested: bool = False, 2249 table: bool = False, 2250 parse_subquery_alias: bool = True, 2251 parse_set_operation: bool = True, 2252 ) -> t.Optional[exp.Expression]: 2253 cte = self._parse_with() 2254 2255 if cte: 2256 this = self._parse_statement() 2257 2258 if not this: 2259 self.raise_error("Failed to parse any statement following CTE") 2260 return cte 2261 2262 if "with" in this.arg_types: 2263 this.set("with", cte) 2264 else: 2265 self.raise_error(f"{this.key} does not support CTE") 2266 this = cte 2267 2268 return this 2269 2270 # duckdb supports leading with FROM x 2271 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2272 2273 if self._match(TokenType.SELECT): 2274 comments = self._prev_comments 2275 2276 hint = self._parse_hint() 2277 all_ = self._match(TokenType.ALL) 2278 distinct = self._match_set(self.DISTINCT_TOKENS) 2279 2280 kind = ( 2281 self._match(TokenType.ALIAS) 2282 and self._match_texts(("STRUCT", "VALUE")) 2283 and self._prev.text.upper() 2284 ) 2285 2286 if distinct: 2287 distinct = self.expression( 2288 exp.Distinct, 2289 on=self._parse_value() if self._match(TokenType.ON) else None, 2290 ) 2291 2292 if all_ and distinct: 2293 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2294 2295 limit = self._parse_limit(top=True) 2296 projections = self._parse_projections() 2297 2298 this = self.expression( 2299 exp.Select, 2300 kind=kind, 2301 hint=hint, 2302 distinct=distinct, 2303 expressions=projections, 2304 limit=limit, 2305 ) 2306 this.comments = comments 2307 2308 into = self._parse_into() 2309 if into: 2310 this.set("into", into) 2311 2312 if not from_: 2313 from_ = self._parse_from() 2314 2315 if from_: 2316 this.set("from", from_) 2317 2318 this = self._parse_query_modifiers(this) 2319 elif (table or nested) and self._match(TokenType.L_PAREN): 2320 if self._match(TokenType.PIVOT): 2321 this = self._parse_simplified_pivot() 2322 elif self._match(TokenType.FROM): 2323 this = exp.select("*").from_( 2324 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2325 ) 2326 else: 2327 this = ( 2328 self._parse_table() 2329 if table 2330 else self._parse_select(nested=True, parse_set_operation=False) 2331 ) 2332 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2333 2334 self._match_r_paren() 2335 2336 # We return early here so that the UNION isn't attached to the subquery by the 2337 # following call to _parse_set_operations, but instead becomes the parent node 2338 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2339 elif self._match(TokenType.VALUES): 2340 this = self.expression( 2341 exp.Values, 2342 expressions=self._parse_csv(self._parse_value), 2343 alias=self._parse_table_alias(), 2344 ) 2345 elif from_: 2346 this = exp.select("*").from_(from_.this, copy=False) 2347 else: 2348 this = None 2349 2350 if parse_set_operation: 2351 return self._parse_set_operations(this) 2352 return this 2353 2354 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2355 if not skip_with_token and not self._match(TokenType.WITH): 2356 return None 2357 2358 comments = self._prev_comments 2359 recursive = self._match(TokenType.RECURSIVE) 2360 2361 expressions = [] 2362 while True: 2363 expressions.append(self._parse_cte()) 2364 2365 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2366 break 2367 else: 2368 self._match(TokenType.WITH) 2369 2370 return self.expression( 2371 exp.With, comments=comments, expressions=expressions, recursive=recursive 2372 ) 2373 2374 def _parse_cte(self) -> exp.CTE: 2375 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2376 if not alias or not alias.this: 2377 self.raise_error("Expected CTE to have alias") 2378 2379 self._match(TokenType.ALIAS) 2380 return self.expression( 2381 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2382 ) 2383 2384 def _parse_table_alias( 2385 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2386 ) -> t.Optional[exp.TableAlias]: 2387 any_token = self._match(TokenType.ALIAS) 2388 alias = ( 2389 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2390 or self._parse_string_as_identifier() 2391 ) 2392 2393 index = self._index 2394 if self._match(TokenType.L_PAREN): 2395 columns = self._parse_csv(self._parse_function_parameter) 2396 self._match_r_paren() if columns else self._retreat(index) 2397 else: 2398 columns = None 2399 2400 if not alias and not columns: 2401 return None 2402 2403 return self.expression(exp.TableAlias, this=alias, columns=columns) 2404 2405 def _parse_subquery( 2406 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2407 ) -> t.Optional[exp.Subquery]: 2408 if not this: 2409 return None 2410 2411 return self.expression( 2412 exp.Subquery, 2413 this=this, 2414 pivots=self._parse_pivots(), 2415 alias=self._parse_table_alias() if parse_alias else None, 2416 ) 2417 2418 def _parse_query_modifiers( 2419 self, this: t.Optional[exp.Expression] 2420 ) -> t.Optional[exp.Expression]: 2421 if isinstance(this, self.MODIFIABLES): 2422 for join in iter(self._parse_join, None): 2423 this.append("joins", join) 2424 for lateral in iter(self._parse_lateral, None): 2425 this.append("laterals", lateral) 2426 2427 while True: 2428 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2429 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2430 key, expression = parser(self) 2431 2432 if expression: 2433 this.set(key, expression) 2434 if key == "limit": 2435 offset = expression.args.pop("offset", None) 2436 if offset: 2437 this.set("offset", exp.Offset(expression=offset)) 2438 continue 2439 break 2440 return this 2441 2442 def _parse_hint(self) -> t.Optional[exp.Hint]: 2443 if self._match(TokenType.HINT): 2444 hints = [] 2445 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2446 hints.extend(hint) 2447 2448 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2449 self.raise_error("Expected */ after HINT") 2450 2451 return self.expression(exp.Hint, expressions=hints) 2452 2453 return None 2454 2455 def _parse_into(self) -> t.Optional[exp.Into]: 2456 if not self._match(TokenType.INTO): 2457 return None 2458 2459 temp = self._match(TokenType.TEMPORARY) 2460 unlogged = self._match_text_seq("UNLOGGED") 2461 self._match(TokenType.TABLE) 2462 2463 return self.expression( 2464 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2465 ) 2466 2467 def _parse_from( 2468 self, joins: bool = False, skip_from_token: bool = False 2469 ) -> t.Optional[exp.From]: 2470 if not skip_from_token and not self._match(TokenType.FROM): 2471 return None 2472 2473 return self.expression( 2474 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2475 ) 2476 2477 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2478 if not self._match(TokenType.MATCH_RECOGNIZE): 2479 return None 2480 2481 self._match_l_paren() 2482 2483 partition = self._parse_partition_by() 2484 order = self._parse_order() 2485 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2486 2487 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2488 rows = exp.var("ONE ROW PER MATCH") 2489 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2490 text = "ALL ROWS PER MATCH" 2491 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2492 text += f" SHOW EMPTY MATCHES" 2493 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2494 text += f" OMIT EMPTY MATCHES" 2495 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2496 text += f" WITH UNMATCHED ROWS" 2497 rows = exp.var(text) 2498 else: 2499 rows = None 2500 2501 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2502 text = "AFTER MATCH SKIP" 2503 if self._match_text_seq("PAST", "LAST", "ROW"): 2504 text += f" PAST LAST ROW" 2505 elif self._match_text_seq("TO", "NEXT", "ROW"): 2506 text += f" TO NEXT ROW" 2507 elif self._match_text_seq("TO", "FIRST"): 2508 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2509 elif self._match_text_seq("TO", "LAST"): 2510 text += f" TO LAST {self._advance_any().text}" # type: ignore 2511 after = exp.var(text) 2512 else: 2513 after = None 2514 2515 if self._match_text_seq("PATTERN"): 2516 self._match_l_paren() 2517 2518 if not self._curr: 2519 self.raise_error("Expecting )", self._curr) 2520 2521 paren = 1 2522 start = self._curr 2523 2524 while self._curr and paren > 0: 2525 if self._curr.token_type == TokenType.L_PAREN: 2526 paren += 1 2527 if self._curr.token_type == TokenType.R_PAREN: 2528 paren -= 1 2529 2530 end = self._prev 2531 self._advance() 2532 2533 if paren > 0: 2534 self.raise_error("Expecting )", self._curr) 2535 2536 pattern = exp.var(self._find_sql(start, end)) 2537 else: 2538 pattern = None 2539 2540 define = ( 2541 self._parse_csv(self._parse_name_as_expression) 2542 if self._match_text_seq("DEFINE") 2543 else None 2544 ) 2545 2546 self._match_r_paren() 2547 2548 return self.expression( 2549 exp.MatchRecognize, 2550 partition_by=partition, 2551 order=order, 2552 measures=measures, 2553 rows=rows, 2554 after=after, 2555 pattern=pattern, 2556 define=define, 2557 alias=self._parse_table_alias(), 2558 ) 2559 2560 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2561 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2562 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2563 cross_apply = False 2564 2565 if cross_apply is not None: 2566 this = self._parse_select(table=True) 2567 view = None 2568 outer = None 2569 elif self._match(TokenType.LATERAL): 2570 this = self._parse_select(table=True) 2571 view = self._match(TokenType.VIEW) 2572 outer = self._match(TokenType.OUTER) 2573 else: 2574 return None 2575 2576 if not this: 2577 this = ( 2578 self._parse_unnest() 2579 or self._parse_function() 2580 or self._parse_id_var(any_token=False) 2581 ) 2582 2583 while self._match(TokenType.DOT): 2584 this = exp.Dot( 2585 this=this, 2586 expression=self._parse_function() or self._parse_id_var(any_token=False), 2587 ) 2588 2589 if view: 2590 table = self._parse_id_var(any_token=False) 2591 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2592 table_alias: t.Optional[exp.TableAlias] = self.expression( 2593 exp.TableAlias, this=table, columns=columns 2594 ) 2595 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2596 # We move the alias from the lateral's child node to the lateral itself 2597 table_alias = this.args["alias"].pop() 2598 else: 2599 table_alias = self._parse_table_alias() 2600 2601 return self.expression( 2602 exp.Lateral, 2603 this=this, 2604 view=view, 2605 outer=outer, 2606 alias=table_alias, 2607 cross_apply=cross_apply, 2608 ) 2609 2610 def _parse_join_parts( 2611 self, 2612 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2613 return ( 2614 self._match_set(self.JOIN_METHODS) and self._prev, 2615 self._match_set(self.JOIN_SIDES) and self._prev, 2616 self._match_set(self.JOIN_KINDS) and self._prev, 2617 ) 2618 2619 def _parse_join( 2620 self, skip_join_token: bool = False, parse_bracket: bool = False 2621 ) -> t.Optional[exp.Join]: 2622 if self._match(TokenType.COMMA): 2623 return self.expression(exp.Join, this=self._parse_table()) 2624 2625 index = self._index 2626 method, side, kind = self._parse_join_parts() 2627 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2628 join = self._match(TokenType.JOIN) 2629 2630 if not skip_join_token and not join: 2631 self._retreat(index) 2632 kind = None 2633 method = None 2634 side = None 2635 2636 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2637 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2638 2639 if not skip_join_token and not join and not outer_apply and not cross_apply: 2640 return None 2641 2642 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2643 2644 if method: 2645 kwargs["method"] = method.text 2646 if side: 2647 kwargs["side"] = side.text 2648 if kind: 2649 kwargs["kind"] = kind.text 2650 if hint: 2651 kwargs["hint"] = hint 2652 2653 if self._match(TokenType.ON): 2654 kwargs["on"] = self._parse_conjunction() 2655 elif self._match(TokenType.USING): 2656 kwargs["using"] = self._parse_wrapped_id_vars() 2657 elif not (kind and kind.token_type == TokenType.CROSS): 2658 index = self._index 2659 join = self._parse_join() 2660 2661 if join and self._match(TokenType.ON): 2662 kwargs["on"] = self._parse_conjunction() 2663 elif join and self._match(TokenType.USING): 2664 kwargs["using"] = self._parse_wrapped_id_vars() 2665 else: 2666 join = None 2667 self._retreat(index) 2668 2669 kwargs["this"].set("joins", [join] if join else None) 2670 2671 comments = [c for token in (method, side, kind) if token for c in token.comments] 2672 return self.expression(exp.Join, comments=comments, **kwargs) 2673 2674 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2675 this = self._parse_conjunction() 2676 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2677 return this 2678 2679 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2680 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2681 2682 return this 2683 2684 def _parse_index( 2685 self, 2686 index: t.Optional[exp.Expression] = None, 2687 ) -> t.Optional[exp.Index]: 2688 if index: 2689 unique = None 2690 primary = None 2691 amp = None 2692 2693 self._match(TokenType.ON) 2694 self._match(TokenType.TABLE) # hive 2695 table = self._parse_table_parts(schema=True) 2696 else: 2697 unique = self._match(TokenType.UNIQUE) 2698 primary = self._match_text_seq("PRIMARY") 2699 amp = self._match_text_seq("AMP") 2700 2701 if not self._match(TokenType.INDEX): 2702 return None 2703 2704 index = self._parse_id_var() 2705 table = None 2706 2707 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2708 2709 if self._match(TokenType.L_PAREN, advance=False): 2710 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2711 else: 2712 columns = None 2713 2714 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2715 2716 return self.expression( 2717 exp.Index, 2718 this=index, 2719 table=table, 2720 using=using, 2721 columns=columns, 2722 unique=unique, 2723 primary=primary, 2724 amp=amp, 2725 include=include, 2726 partition_by=self._parse_partition_by(), 2727 where=self._parse_where(), 2728 ) 2729 2730 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2731 hints: t.List[exp.Expression] = [] 2732 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2733 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2734 hints.append( 2735 self.expression( 2736 exp.WithTableHint, 2737 expressions=self._parse_csv( 2738 lambda: self._parse_function() or self._parse_var(any_token=True) 2739 ), 2740 ) 2741 ) 2742 self._match_r_paren() 2743 else: 2744 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2745 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2746 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2747 2748 self._match_texts(("INDEX", "KEY")) 2749 if self._match(TokenType.FOR): 2750 hint.set("target", self._advance_any() and self._prev.text.upper()) 2751 2752 hint.set("expressions", self._parse_wrapped_id_vars()) 2753 hints.append(hint) 2754 2755 return hints or None 2756 2757 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2758 return ( 2759 (not schema and self._parse_function(optional_parens=False)) 2760 or self._parse_id_var(any_token=False) 2761 or self._parse_string_as_identifier() 2762 or self._parse_placeholder() 2763 ) 2764 2765 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2766 catalog = None 2767 db = None 2768 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2769 2770 while self._match(TokenType.DOT): 2771 if catalog: 2772 # This allows nesting the table in arbitrarily many dot expressions if needed 2773 table = self.expression( 2774 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2775 ) 2776 else: 2777 catalog = db 2778 db = table 2779 table = self._parse_table_part(schema=schema) or "" 2780 2781 if not table: 2782 self.raise_error(f"Expected table name but got {self._curr}") 2783 2784 return self.expression( 2785 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2786 ) 2787 2788 def _parse_table( 2789 self, 2790 schema: bool = False, 2791 joins: bool = False, 2792 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2793 parse_bracket: bool = False, 2794 ) -> t.Optional[exp.Expression]: 2795 lateral = self._parse_lateral() 2796 if lateral: 2797 return lateral 2798 2799 unnest = self._parse_unnest() 2800 if unnest: 2801 return unnest 2802 2803 values = self._parse_derived_table_values() 2804 if values: 2805 return values 2806 2807 subquery = self._parse_select(table=True) 2808 if subquery: 2809 if not subquery.args.get("pivots"): 2810 subquery.set("pivots", self._parse_pivots()) 2811 return subquery 2812 2813 bracket = parse_bracket and self._parse_bracket(None) 2814 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2815 this = t.cast( 2816 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2817 ) 2818 2819 if schema: 2820 return self._parse_schema(this=this) 2821 2822 version = self._parse_version() 2823 2824 if version: 2825 this.set("version", version) 2826 2827 if self.dialect.ALIAS_POST_TABLESAMPLE: 2828 table_sample = self._parse_table_sample() 2829 2830 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2831 if alias: 2832 this.set("alias", alias) 2833 2834 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2835 return self.expression( 2836 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2837 ) 2838 2839 this.set("hints", self._parse_table_hints()) 2840 2841 if not this.args.get("pivots"): 2842 this.set("pivots", self._parse_pivots()) 2843 2844 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2845 table_sample = self._parse_table_sample() 2846 2847 if table_sample: 2848 table_sample.set("this", this) 2849 this = table_sample 2850 2851 if joins: 2852 for join in iter(self._parse_join, None): 2853 this.append("joins", join) 2854 2855 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2856 this.set("ordinality", True) 2857 this.set("alias", self._parse_table_alias()) 2858 2859 return this 2860 2861 def _parse_version(self) -> t.Optional[exp.Version]: 2862 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2863 this = "TIMESTAMP" 2864 elif self._match(TokenType.VERSION_SNAPSHOT): 2865 this = "VERSION" 2866 else: 2867 return None 2868 2869 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2870 kind = self._prev.text.upper() 2871 start = self._parse_bitwise() 2872 self._match_texts(("TO", "AND")) 2873 end = self._parse_bitwise() 2874 expression: t.Optional[exp.Expression] = self.expression( 2875 exp.Tuple, expressions=[start, end] 2876 ) 2877 elif self._match_text_seq("CONTAINED", "IN"): 2878 kind = "CONTAINED IN" 2879 expression = self.expression( 2880 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2881 ) 2882 elif self._match(TokenType.ALL): 2883 kind = "ALL" 2884 expression = None 2885 else: 2886 self._match_text_seq("AS", "OF") 2887 kind = "AS OF" 2888 expression = self._parse_type() 2889 2890 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2891 2892 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2893 if not self._match(TokenType.UNNEST): 2894 return None 2895 2896 expressions = self._parse_wrapped_csv(self._parse_equality) 2897 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2898 2899 alias = self._parse_table_alias() if with_alias else None 2900 2901 if alias: 2902 if self.dialect.UNNEST_COLUMN_ONLY: 2903 if alias.args.get("columns"): 2904 self.raise_error("Unexpected extra column alias in unnest.") 2905 2906 alias.set("columns", [alias.this]) 2907 alias.set("this", None) 2908 2909 columns = alias.args.get("columns") or [] 2910 if offset and len(expressions) < len(columns): 2911 offset = columns.pop() 2912 2913 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2914 self._match(TokenType.ALIAS) 2915 offset = self._parse_id_var( 2916 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2917 ) or exp.to_identifier("offset") 2918 2919 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2920 2921 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2922 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2923 if not is_derived and not self._match(TokenType.VALUES): 2924 return None 2925 2926 expressions = self._parse_csv(self._parse_value) 2927 alias = self._parse_table_alias() 2928 2929 if is_derived: 2930 self._match_r_paren() 2931 2932 return self.expression( 2933 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2934 ) 2935 2936 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2937 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2938 as_modifier and self._match_text_seq("USING", "SAMPLE") 2939 ): 2940 return None 2941 2942 bucket_numerator = None 2943 bucket_denominator = None 2944 bucket_field = None 2945 percent = None 2946 size = None 2947 seed = None 2948 2949 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2950 matched_l_paren = self._match(TokenType.L_PAREN) 2951 2952 if self.TABLESAMPLE_CSV: 2953 num = None 2954 expressions = self._parse_csv(self._parse_primary) 2955 else: 2956 expressions = None 2957 num = ( 2958 self._parse_factor() 2959 if self._match(TokenType.NUMBER, advance=False) 2960 else self._parse_primary() or self._parse_placeholder() 2961 ) 2962 2963 if self._match_text_seq("BUCKET"): 2964 bucket_numerator = self._parse_number() 2965 self._match_text_seq("OUT", "OF") 2966 bucket_denominator = bucket_denominator = self._parse_number() 2967 self._match(TokenType.ON) 2968 bucket_field = self._parse_field() 2969 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2970 percent = num 2971 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2972 size = num 2973 else: 2974 percent = num 2975 2976 if matched_l_paren: 2977 self._match_r_paren() 2978 2979 if self._match(TokenType.L_PAREN): 2980 method = self._parse_var(upper=True) 2981 seed = self._match(TokenType.COMMA) and self._parse_number() 2982 self._match_r_paren() 2983 elif self._match_texts(("SEED", "REPEATABLE")): 2984 seed = self._parse_wrapped(self._parse_number) 2985 2986 return self.expression( 2987 exp.TableSample, 2988 expressions=expressions, 2989 method=method, 2990 bucket_numerator=bucket_numerator, 2991 bucket_denominator=bucket_denominator, 2992 bucket_field=bucket_field, 2993 percent=percent, 2994 size=size, 2995 seed=seed, 2996 ) 2997 2998 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2999 return list(iter(self._parse_pivot, None)) or None 3000 3001 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3002 return list(iter(self._parse_join, None)) or None 3003 3004 # https://duckdb.org/docs/sql/statements/pivot 3005 def _parse_simplified_pivot(self) -> exp.Pivot: 3006 def _parse_on() -> t.Optional[exp.Expression]: 3007 this = self._parse_bitwise() 3008 return self._parse_in(this) if self._match(TokenType.IN) else this 3009 3010 this = self._parse_table() 3011 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3012 using = self._match(TokenType.USING) and self._parse_csv( 3013 lambda: self._parse_alias(self._parse_function()) 3014 ) 3015 group = self._parse_group() 3016 return self.expression( 3017 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3018 ) 3019 3020 def _parse_pivot_in(self) -> exp.In: 3021 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3022 this = self._parse_conjunction() 3023 3024 self._match(TokenType.ALIAS) 3025 alias = self._parse_field() 3026 if alias: 3027 return self.expression(exp.PivotAlias, this=this, alias=alias) 3028 3029 return this 3030 3031 value = self._parse_column() 3032 3033 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3034 self.raise_error("Expecting IN (") 3035 3036 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3037 3038 self._match_r_paren() 3039 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3040 3041 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3042 index = self._index 3043 include_nulls = None 3044 3045 if self._match(TokenType.PIVOT): 3046 unpivot = False 3047 elif self._match(TokenType.UNPIVOT): 3048 unpivot = True 3049 3050 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3051 if self._match_text_seq("INCLUDE", "NULLS"): 3052 include_nulls = True 3053 elif self._match_text_seq("EXCLUDE", "NULLS"): 3054 include_nulls = False 3055 else: 3056 return None 3057 3058 expressions = [] 3059 3060 if not self._match(TokenType.L_PAREN): 3061 self._retreat(index) 3062 return None 3063 3064 if unpivot: 3065 expressions = self._parse_csv(self._parse_column) 3066 else: 3067 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3068 3069 if not expressions: 3070 self.raise_error("Failed to parse PIVOT's aggregation list") 3071 3072 if not self._match(TokenType.FOR): 3073 self.raise_error("Expecting FOR") 3074 3075 field = self._parse_pivot_in() 3076 3077 self._match_r_paren() 3078 3079 pivot = self.expression( 3080 exp.Pivot, 3081 expressions=expressions, 3082 field=field, 3083 unpivot=unpivot, 3084 include_nulls=include_nulls, 3085 ) 3086 3087 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3088 pivot.set("alias", self._parse_table_alias()) 3089 3090 if not unpivot: 3091 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3092 3093 columns: t.List[exp.Expression] = [] 3094 for fld in pivot.args["field"].expressions: 3095 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3096 for name in names: 3097 if self.PREFIXED_PIVOT_COLUMNS: 3098 name = f"{name}_{field_name}" if name else field_name 3099 else: 3100 name = f"{field_name}_{name}" if name else field_name 3101 3102 columns.append(exp.to_identifier(name)) 3103 3104 pivot.set("columns", columns) 3105 3106 return pivot 3107 3108 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3109 return [agg.alias for agg in aggregations] 3110 3111 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3112 if not skip_where_token and not self._match(TokenType.WHERE): 3113 return None 3114 3115 return self.expression( 3116 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3117 ) 3118 3119 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3120 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3121 return None 3122 3123 elements = defaultdict(list) 3124 3125 if self._match(TokenType.ALL): 3126 return self.expression(exp.Group, all=True) 3127 3128 while True: 3129 expressions = self._parse_csv(self._parse_conjunction) 3130 if expressions: 3131 elements["expressions"].extend(expressions) 3132 3133 grouping_sets = self._parse_grouping_sets() 3134 if grouping_sets: 3135 elements["grouping_sets"].extend(grouping_sets) 3136 3137 rollup = None 3138 cube = None 3139 totals = None 3140 3141 index = self._index 3142 with_ = self._match(TokenType.WITH) 3143 if self._match(TokenType.ROLLUP): 3144 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3145 elements["rollup"].extend(ensure_list(rollup)) 3146 3147 if self._match(TokenType.CUBE): 3148 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3149 elements["cube"].extend(ensure_list(cube)) 3150 3151 if self._match_text_seq("TOTALS"): 3152 totals = True 3153 elements["totals"] = True # type: ignore 3154 3155 if not (grouping_sets or rollup or cube or totals): 3156 if with_: 3157 self._retreat(index) 3158 break 3159 3160 return self.expression(exp.Group, **elements) # type: ignore 3161 3162 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3163 if not self._match(TokenType.GROUPING_SETS): 3164 return None 3165 3166 return self._parse_wrapped_csv(self._parse_grouping_set) 3167 3168 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3169 if self._match(TokenType.L_PAREN): 3170 grouping_set = self._parse_csv(self._parse_column) 3171 self._match_r_paren() 3172 return self.expression(exp.Tuple, expressions=grouping_set) 3173 3174 return self._parse_column() 3175 3176 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3177 if not skip_having_token and not self._match(TokenType.HAVING): 3178 return None 3179 return self.expression(exp.Having, this=self._parse_conjunction()) 3180 3181 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3182 if not self._match(TokenType.QUALIFY): 3183 return None 3184 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3185 3186 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3187 if skip_start_token: 3188 start = None 3189 elif self._match(TokenType.START_WITH): 3190 start = self._parse_conjunction() 3191 else: 3192 return None 3193 3194 self._match(TokenType.CONNECT_BY) 3195 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3196 exp.Prior, this=self._parse_bitwise() 3197 ) 3198 connect = self._parse_conjunction() 3199 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3200 3201 if not start and self._match(TokenType.START_WITH): 3202 start = self._parse_conjunction() 3203 3204 return self.expression(exp.Connect, start=start, connect=connect) 3205 3206 def _parse_name_as_expression(self) -> exp.Alias: 3207 return self.expression( 3208 exp.Alias, 3209 alias=self._parse_id_var(any_token=True), 3210 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3211 ) 3212 3213 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3214 if self._match_text_seq("INTERPOLATE"): 3215 return self._parse_wrapped_csv(self._parse_name_as_expression) 3216 return None 3217 3218 def _parse_order( 3219 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3220 ) -> t.Optional[exp.Expression]: 3221 siblings = None 3222 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3223 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3224 return this 3225 3226 siblings = True 3227 3228 return self.expression( 3229 exp.Order, 3230 this=this, 3231 expressions=self._parse_csv(self._parse_ordered), 3232 interpolate=self._parse_interpolate(), 3233 siblings=siblings, 3234 ) 3235 3236 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3237 if not self._match(token): 3238 return None 3239 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3240 3241 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3242 this = parse_method() if parse_method else self._parse_conjunction() 3243 3244 asc = self._match(TokenType.ASC) 3245 desc = self._match(TokenType.DESC) or (asc and False) 3246 3247 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3248 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3249 3250 nulls_first = is_nulls_first or False 3251 explicitly_null_ordered = is_nulls_first or is_nulls_last 3252 3253 if ( 3254 not explicitly_null_ordered 3255 and ( 3256 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3257 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3258 ) 3259 and self.dialect.NULL_ORDERING != "nulls_are_last" 3260 ): 3261 nulls_first = True 3262 3263 if self._match_text_seq("WITH", "FILL"): 3264 with_fill = self.expression( 3265 exp.WithFill, 3266 **{ # type: ignore 3267 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3268 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3269 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3270 }, 3271 ) 3272 else: 3273 with_fill = None 3274 3275 return self.expression( 3276 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3277 ) 3278 3279 def _parse_limit( 3280 self, this: t.Optional[exp.Expression] = None, top: bool = False 3281 ) -> t.Optional[exp.Expression]: 3282 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3283 comments = self._prev_comments 3284 if top: 3285 limit_paren = self._match(TokenType.L_PAREN) 3286 expression = self._parse_term() if limit_paren else self._parse_number() 3287 3288 if limit_paren: 3289 self._match_r_paren() 3290 else: 3291 expression = self._parse_term() 3292 3293 if self._match(TokenType.COMMA): 3294 offset = expression 3295 expression = self._parse_term() 3296 else: 3297 offset = None 3298 3299 limit_exp = self.expression( 3300 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3301 ) 3302 3303 return limit_exp 3304 3305 if self._match(TokenType.FETCH): 3306 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3307 direction = self._prev.text.upper() if direction else "FIRST" 3308 3309 count = self._parse_field(tokens=self.FETCH_TOKENS) 3310 percent = self._match(TokenType.PERCENT) 3311 3312 self._match_set((TokenType.ROW, TokenType.ROWS)) 3313 3314 only = self._match_text_seq("ONLY") 3315 with_ties = self._match_text_seq("WITH", "TIES") 3316 3317 if only and with_ties: 3318 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3319 3320 return self.expression( 3321 exp.Fetch, 3322 direction=direction, 3323 count=count, 3324 percent=percent, 3325 with_ties=with_ties, 3326 ) 3327 3328 return this 3329 3330 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3331 if not self._match(TokenType.OFFSET): 3332 return this 3333 3334 count = self._parse_term() 3335 self._match_set((TokenType.ROW, TokenType.ROWS)) 3336 return self.expression(exp.Offset, this=this, expression=count) 3337 3338 def _parse_locks(self) -> t.List[exp.Lock]: 3339 locks = [] 3340 while True: 3341 if self._match_text_seq("FOR", "UPDATE"): 3342 update = True 3343 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3344 "LOCK", "IN", "SHARE", "MODE" 3345 ): 3346 update = False 3347 else: 3348 break 3349 3350 expressions = None 3351 if self._match_text_seq("OF"): 3352 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3353 3354 wait: t.Optional[bool | exp.Expression] = None 3355 if self._match_text_seq("NOWAIT"): 3356 wait = True 3357 elif self._match_text_seq("WAIT"): 3358 wait = self._parse_primary() 3359 elif self._match_text_seq("SKIP", "LOCKED"): 3360 wait = False 3361 3362 locks.append( 3363 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3364 ) 3365 3366 return locks 3367 3368 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3369 while this and self._match_set(self.SET_OPERATIONS): 3370 token_type = self._prev.token_type 3371 3372 if token_type == TokenType.UNION: 3373 operation = exp.Union 3374 elif token_type == TokenType.EXCEPT: 3375 operation = exp.Except 3376 else: 3377 operation = exp.Intersect 3378 3379 comments = self._prev.comments 3380 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3381 by_name = self._match_text_seq("BY", "NAME") 3382 expression = self._parse_select(nested=True, parse_set_operation=False) 3383 3384 this = self.expression( 3385 operation, 3386 comments=comments, 3387 this=this, 3388 distinct=distinct, 3389 by_name=by_name, 3390 expression=expression, 3391 ) 3392 3393 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3394 expression = this.expression 3395 3396 if expression: 3397 for arg in self.UNION_MODIFIERS: 3398 expr = expression.args.get(arg) 3399 if expr: 3400 this.set(arg, expr.pop()) 3401 3402 return this 3403 3404 def _parse_expression(self) -> t.Optional[exp.Expression]: 3405 return self._parse_alias(self._parse_conjunction()) 3406 3407 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3408 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3409 3410 def _parse_equality(self) -> t.Optional[exp.Expression]: 3411 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3412 3413 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3414 return self._parse_tokens(self._parse_range, self.COMPARISON) 3415 3416 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3417 this = this or self._parse_bitwise() 3418 negate = self._match(TokenType.NOT) 3419 3420 if self._match_set(self.RANGE_PARSERS): 3421 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3422 if not expression: 3423 return this 3424 3425 this = expression 3426 elif self._match(TokenType.ISNULL): 3427 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3428 3429 # Postgres supports ISNULL and NOTNULL for conditions. 3430 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3431 if self._match(TokenType.NOTNULL): 3432 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3433 this = self.expression(exp.Not, this=this) 3434 3435 if negate: 3436 this = self.expression(exp.Not, this=this) 3437 3438 if self._match(TokenType.IS): 3439 this = self._parse_is(this) 3440 3441 return this 3442 3443 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3444 index = self._index - 1 3445 negate = self._match(TokenType.NOT) 3446 3447 if self._match_text_seq("DISTINCT", "FROM"): 3448 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3449 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3450 3451 expression = self._parse_null() or self._parse_boolean() 3452 if not expression: 3453 self._retreat(index) 3454 return None 3455 3456 this = self.expression(exp.Is, this=this, expression=expression) 3457 return self.expression(exp.Not, this=this) if negate else this 3458 3459 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3460 unnest = self._parse_unnest(with_alias=False) 3461 if unnest: 3462 this = self.expression(exp.In, this=this, unnest=unnest) 3463 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3464 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3465 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3466 3467 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3468 this = self.expression(exp.In, this=this, query=expressions[0]) 3469 else: 3470 this = self.expression(exp.In, this=this, expressions=expressions) 3471 3472 if matched_l_paren: 3473 self._match_r_paren(this) 3474 elif not self._match(TokenType.R_BRACKET, expression=this): 3475 self.raise_error("Expecting ]") 3476 else: 3477 this = self.expression(exp.In, this=this, field=self._parse_field()) 3478 3479 return this 3480 3481 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3482 low = self._parse_bitwise() 3483 self._match(TokenType.AND) 3484 high = self._parse_bitwise() 3485 return self.expression(exp.Between, this=this, low=low, high=high) 3486 3487 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3488 if not self._match(TokenType.ESCAPE): 3489 return this 3490 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3491 3492 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3493 index = self._index 3494 3495 if not self._match(TokenType.INTERVAL) and match_interval: 3496 return None 3497 3498 if self._match(TokenType.STRING, advance=False): 3499 this = self._parse_primary() 3500 else: 3501 this = self._parse_term() 3502 3503 if not this or ( 3504 isinstance(this, exp.Column) 3505 and not this.table 3506 and not this.this.quoted 3507 and this.name.upper() == "IS" 3508 ): 3509 self._retreat(index) 3510 return None 3511 3512 unit = self._parse_function() or ( 3513 not self._match(TokenType.ALIAS, advance=False) 3514 and self._parse_var(any_token=True, upper=True) 3515 ) 3516 3517 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3518 # each INTERVAL expression into this canonical form so it's easy to transpile 3519 if this and this.is_number: 3520 this = exp.Literal.string(this.name) 3521 elif this and this.is_string: 3522 parts = this.name.split() 3523 3524 if len(parts) == 2: 3525 if unit: 3526 # This is not actually a unit, it's something else (e.g. a "window side") 3527 unit = None 3528 self._retreat(self._index - 1) 3529 3530 this = exp.Literal.string(parts[0]) 3531 unit = self.expression(exp.Var, this=parts[1].upper()) 3532 3533 return self.expression(exp.Interval, this=this, unit=unit) 3534 3535 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3536 this = self._parse_term() 3537 3538 while True: 3539 if self._match_set(self.BITWISE): 3540 this = self.expression( 3541 self.BITWISE[self._prev.token_type], 3542 this=this, 3543 expression=self._parse_term(), 3544 ) 3545 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3546 this = self.expression( 3547 exp.DPipe, 3548 this=this, 3549 expression=self._parse_term(), 3550 safe=not self.dialect.STRICT_STRING_CONCAT, 3551 ) 3552 elif self._match(TokenType.DQMARK): 3553 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3554 elif self._match_pair(TokenType.LT, TokenType.LT): 3555 this = self.expression( 3556 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3557 ) 3558 elif self._match_pair(TokenType.GT, TokenType.GT): 3559 this = self.expression( 3560 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3561 ) 3562 else: 3563 break 3564 3565 return this 3566 3567 def _parse_term(self) -> t.Optional[exp.Expression]: 3568 return self._parse_tokens(self._parse_factor, self.TERM) 3569 3570 def _parse_factor(self) -> t.Optional[exp.Expression]: 3571 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3572 this = parse_method() 3573 3574 while self._match_set(self.FACTOR): 3575 this = self.expression( 3576 self.FACTOR[self._prev.token_type], 3577 this=this, 3578 comments=self._prev_comments, 3579 expression=parse_method(), 3580 ) 3581 if isinstance(this, exp.Div): 3582 this.args["typed"] = self.dialect.TYPED_DIVISION 3583 this.args["safe"] = self.dialect.SAFE_DIVISION 3584 3585 return this 3586 3587 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3588 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3589 3590 def _parse_unary(self) -> t.Optional[exp.Expression]: 3591 if self._match_set(self.UNARY_PARSERS): 3592 return self.UNARY_PARSERS[self._prev.token_type](self) 3593 return self._parse_at_time_zone(self._parse_type()) 3594 3595 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3596 interval = parse_interval and self._parse_interval() 3597 if interval: 3598 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3599 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3600 interval = self.expression( # type: ignore 3601 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3602 ) 3603 3604 return interval 3605 3606 index = self._index 3607 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3608 this = self._parse_column() 3609 3610 if data_type: 3611 if isinstance(this, exp.Literal): 3612 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3613 if parser: 3614 return parser(self, this, data_type) 3615 return self.expression(exp.Cast, this=this, to=data_type) 3616 if not data_type.expressions: 3617 self._retreat(index) 3618 return self._parse_column() 3619 return self._parse_column_ops(data_type) 3620 3621 return this and self._parse_column_ops(this) 3622 3623 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3624 this = self._parse_type() 3625 if not this: 3626 return None 3627 3628 return self.expression( 3629 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3630 ) 3631 3632 def _parse_types( 3633 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3634 ) -> t.Optional[exp.Expression]: 3635 index = self._index 3636 3637 prefix = self._match_text_seq("SYSUDTLIB", ".") 3638 3639 if not self._match_set(self.TYPE_TOKENS): 3640 identifier = allow_identifiers and self._parse_id_var( 3641 any_token=False, tokens=(TokenType.VAR,) 3642 ) 3643 3644 if identifier: 3645 tokens = self.dialect.tokenize(identifier.name) 3646 3647 if len(tokens) != 1: 3648 self.raise_error("Unexpected identifier", self._prev) 3649 3650 if tokens[0].token_type in self.TYPE_TOKENS: 3651 self._prev = tokens[0] 3652 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3653 type_name = identifier.name 3654 3655 while self._match(TokenType.DOT): 3656 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3657 3658 return exp.DataType.build(type_name, udt=True) 3659 else: 3660 return None 3661 else: 3662 return None 3663 3664 type_token = self._prev.token_type 3665 3666 if type_token == TokenType.PSEUDO_TYPE: 3667 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3668 3669 if type_token == TokenType.OBJECT_IDENTIFIER: 3670 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3671 3672 nested = type_token in self.NESTED_TYPE_TOKENS 3673 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3674 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3675 expressions = None 3676 maybe_func = False 3677 3678 if self._match(TokenType.L_PAREN): 3679 if is_struct: 3680 expressions = self._parse_csv(self._parse_struct_types) 3681 elif nested: 3682 expressions = self._parse_csv( 3683 lambda: self._parse_types( 3684 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3685 ) 3686 ) 3687 elif type_token in self.ENUM_TYPE_TOKENS: 3688 expressions = self._parse_csv(self._parse_equality) 3689 elif is_aggregate: 3690 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3691 any_token=False, tokens=(TokenType.VAR,) 3692 ) 3693 if not func_or_ident or not self._match(TokenType.COMMA): 3694 return None 3695 expressions = self._parse_csv( 3696 lambda: self._parse_types( 3697 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3698 ) 3699 ) 3700 expressions.insert(0, func_or_ident) 3701 else: 3702 expressions = self._parse_csv(self._parse_type_size) 3703 3704 if not expressions or not self._match(TokenType.R_PAREN): 3705 self._retreat(index) 3706 return None 3707 3708 maybe_func = True 3709 3710 this: t.Optional[exp.Expression] = None 3711 values: t.Optional[t.List[exp.Expression]] = None 3712 3713 if nested and self._match(TokenType.LT): 3714 if is_struct: 3715 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3716 else: 3717 expressions = self._parse_csv( 3718 lambda: self._parse_types( 3719 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3720 ) 3721 ) 3722 3723 if not self._match(TokenType.GT): 3724 self.raise_error("Expecting >") 3725 3726 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3727 values = self._parse_csv(self._parse_conjunction) 3728 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3729 3730 if type_token in self.TIMESTAMPS: 3731 if self._match_text_seq("WITH", "TIME", "ZONE"): 3732 maybe_func = False 3733 tz_type = ( 3734 exp.DataType.Type.TIMETZ 3735 if type_token in self.TIMES 3736 else exp.DataType.Type.TIMESTAMPTZ 3737 ) 3738 this = exp.DataType(this=tz_type, expressions=expressions) 3739 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3740 maybe_func = False 3741 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3742 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3743 maybe_func = False 3744 elif type_token == TokenType.INTERVAL: 3745 unit = self._parse_var() 3746 3747 if self._match_text_seq("TO"): 3748 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3749 else: 3750 span = None 3751 3752 if span or not unit: 3753 this = self.expression( 3754 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3755 ) 3756 else: 3757 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3758 3759 if maybe_func and check_func: 3760 index2 = self._index 3761 peek = self._parse_string() 3762 3763 if not peek: 3764 self._retreat(index) 3765 return None 3766 3767 self._retreat(index2) 3768 3769 if not this: 3770 if self._match_text_seq("UNSIGNED"): 3771 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3772 if not unsigned_type_token: 3773 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3774 3775 type_token = unsigned_type_token or type_token 3776 3777 this = exp.DataType( 3778 this=exp.DataType.Type[type_token.value], 3779 expressions=expressions, 3780 nested=nested, 3781 values=values, 3782 prefix=prefix, 3783 ) 3784 3785 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3786 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3787 3788 return this 3789 3790 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3791 index = self._index 3792 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3793 self._match(TokenType.COLON) 3794 column_def = self._parse_column_def(this) 3795 3796 if type_required and ( 3797 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3798 ): 3799 self._retreat(index) 3800 return self._parse_types() 3801 3802 return column_def 3803 3804 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3805 if not self._match_text_seq("AT", "TIME", "ZONE"): 3806 return this 3807 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3808 3809 def _parse_column(self) -> t.Optional[exp.Expression]: 3810 this = self._parse_field() 3811 if isinstance(this, exp.Identifier): 3812 this = self.expression(exp.Column, this=this) 3813 elif not this: 3814 return self._parse_bracket(this) 3815 return self._parse_column_ops(this) 3816 3817 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3818 this = self._parse_bracket(this) 3819 3820 while self._match_set(self.COLUMN_OPERATORS): 3821 op_token = self._prev.token_type 3822 op = self.COLUMN_OPERATORS.get(op_token) 3823 3824 if op_token == TokenType.DCOLON: 3825 field = self._parse_types() 3826 if not field: 3827 self.raise_error("Expected type") 3828 elif op and self._curr: 3829 self._advance() 3830 value = self._prev.text 3831 field = ( 3832 exp.Literal.number(value) 3833 if self._prev.token_type == TokenType.NUMBER 3834 else exp.Literal.string(value) 3835 ) 3836 else: 3837 field = self._parse_field(anonymous_func=True, any_token=True) 3838 3839 if isinstance(field, exp.Func): 3840 # bigquery allows function calls like x.y.count(...) 3841 # SAFE.SUBSTR(...) 3842 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3843 this = self._replace_columns_with_dots(this) 3844 3845 if op: 3846 this = op(self, this, field) 3847 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3848 this = self.expression( 3849 exp.Column, 3850 this=field, 3851 table=this.this, 3852 db=this.args.get("table"), 3853 catalog=this.args.get("db"), 3854 ) 3855 else: 3856 this = self.expression(exp.Dot, this=this, expression=field) 3857 this = self._parse_bracket(this) 3858 return this 3859 3860 def _parse_primary(self) -> t.Optional[exp.Expression]: 3861 if self._match_set(self.PRIMARY_PARSERS): 3862 token_type = self._prev.token_type 3863 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3864 3865 if token_type == TokenType.STRING: 3866 expressions = [primary] 3867 while self._match(TokenType.STRING): 3868 expressions.append(exp.Literal.string(self._prev.text)) 3869 3870 if len(expressions) > 1: 3871 return self.expression(exp.Concat, expressions=expressions) 3872 3873 return primary 3874 3875 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3876 return exp.Literal.number(f"0.{self._prev.text}") 3877 3878 if self._match(TokenType.L_PAREN): 3879 comments = self._prev_comments 3880 query = self._parse_select() 3881 3882 if query: 3883 expressions = [query] 3884 else: 3885 expressions = self._parse_expressions() 3886 3887 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3888 3889 if isinstance(this, exp.Subqueryable): 3890 this = self._parse_set_operations( 3891 self._parse_subquery(this=this, parse_alias=False) 3892 ) 3893 elif len(expressions) > 1: 3894 this = self.expression(exp.Tuple, expressions=expressions) 3895 else: 3896 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3897 3898 if this: 3899 this.add_comments(comments) 3900 3901 self._match_r_paren(expression=this) 3902 return this 3903 3904 return None 3905 3906 def _parse_field( 3907 self, 3908 any_token: bool = False, 3909 tokens: t.Optional[t.Collection[TokenType]] = None, 3910 anonymous_func: bool = False, 3911 ) -> t.Optional[exp.Expression]: 3912 return ( 3913 self._parse_primary() 3914 or self._parse_function(anonymous=anonymous_func) 3915 or self._parse_id_var(any_token=any_token, tokens=tokens) 3916 ) 3917 3918 def _parse_function( 3919 self, 3920 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3921 anonymous: bool = False, 3922 optional_parens: bool = True, 3923 ) -> t.Optional[exp.Expression]: 3924 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3925 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3926 fn_syntax = False 3927 if ( 3928 self._match(TokenType.L_BRACE, advance=False) 3929 and self._next 3930 and self._next.text.upper() == "FN" 3931 ): 3932 self._advance(2) 3933 fn_syntax = True 3934 3935 func = self._parse_function_call( 3936 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3937 ) 3938 3939 if fn_syntax: 3940 self._match(TokenType.R_BRACE) 3941 3942 return func 3943 3944 def _parse_function_call( 3945 self, 3946 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3947 anonymous: bool = False, 3948 optional_parens: bool = True, 3949 ) -> t.Optional[exp.Expression]: 3950 if not self._curr: 3951 return None 3952 3953 comments = self._curr.comments 3954 token_type = self._curr.token_type 3955 this = self._curr.text 3956 upper = this.upper() 3957 3958 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3959 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3960 self._advance() 3961 return parser(self) 3962 3963 if not self._next or self._next.token_type != TokenType.L_PAREN: 3964 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3965 self._advance() 3966 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3967 3968 return None 3969 3970 if token_type not in self.FUNC_TOKENS: 3971 return None 3972 3973 self._advance(2) 3974 3975 parser = self.FUNCTION_PARSERS.get(upper) 3976 if parser and not anonymous: 3977 this = parser(self) 3978 else: 3979 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3980 3981 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3982 this = self.expression(subquery_predicate, this=self._parse_select()) 3983 self._match_r_paren() 3984 return this 3985 3986 if functions is None: 3987 functions = self.FUNCTIONS 3988 3989 function = functions.get(upper) 3990 3991 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3992 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3993 3994 if function and not anonymous: 3995 if "dialect" in function.__code__.co_varnames: 3996 func = function(args, dialect=self.dialect) 3997 else: 3998 func = function(args) 3999 4000 func = self.validate_expression(func, args) 4001 if not self.dialect.NORMALIZE_FUNCTIONS: 4002 func.meta["name"] = this 4003 4004 this = func 4005 else: 4006 this = self.expression(exp.Anonymous, this=this, expressions=args) 4007 4008 if isinstance(this, exp.Expression): 4009 this.add_comments(comments) 4010 4011 self._match_r_paren(this) 4012 return self._parse_window(this) 4013 4014 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4015 return self._parse_column_def(self._parse_id_var()) 4016 4017 def _parse_user_defined_function( 4018 self, kind: t.Optional[TokenType] = None 4019 ) -> t.Optional[exp.Expression]: 4020 this = self._parse_id_var() 4021 4022 while self._match(TokenType.DOT): 4023 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4024 4025 if not self._match(TokenType.L_PAREN): 4026 return this 4027 4028 expressions = self._parse_csv(self._parse_function_parameter) 4029 self._match_r_paren() 4030 return self.expression( 4031 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4032 ) 4033 4034 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4035 literal = self._parse_primary() 4036 if literal: 4037 return self.expression(exp.Introducer, this=token.text, expression=literal) 4038 4039 return self.expression(exp.Identifier, this=token.text) 4040 4041 def _parse_session_parameter(self) -> exp.SessionParameter: 4042 kind = None 4043 this = self._parse_id_var() or self._parse_primary() 4044 4045 if this and self._match(TokenType.DOT): 4046 kind = this.name 4047 this = self._parse_var() or self._parse_primary() 4048 4049 return self.expression(exp.SessionParameter, this=this, kind=kind) 4050 4051 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4052 index = self._index 4053 4054 if self._match(TokenType.L_PAREN): 4055 expressions = t.cast( 4056 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4057 ) 4058 4059 if not self._match(TokenType.R_PAREN): 4060 self._retreat(index) 4061 else: 4062 expressions = [self._parse_id_var()] 4063 4064 if self._match_set(self.LAMBDAS): 4065 return self.LAMBDAS[self._prev.token_type](self, expressions) 4066 4067 self._retreat(index) 4068 4069 this: t.Optional[exp.Expression] 4070 4071 if self._match(TokenType.DISTINCT): 4072 this = self.expression( 4073 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4074 ) 4075 else: 4076 this = self._parse_select_or_expression(alias=alias) 4077 4078 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4079 4080 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4081 index = self._index 4082 4083 if not self.errors: 4084 try: 4085 if self._parse_select(nested=True): 4086 return this 4087 except ParseError: 4088 pass 4089 finally: 4090 self.errors.clear() 4091 self._retreat(index) 4092 4093 if not self._match(TokenType.L_PAREN): 4094 return this 4095 4096 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4097 4098 self._match_r_paren() 4099 return self.expression(exp.Schema, this=this, expressions=args) 4100 4101 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4102 return self._parse_column_def(self._parse_field(any_token=True)) 4103 4104 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4105 # column defs are not really columns, they're identifiers 4106 if isinstance(this, exp.Column): 4107 this = this.this 4108 4109 kind = self._parse_types(schema=True) 4110 4111 if self._match_text_seq("FOR", "ORDINALITY"): 4112 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4113 4114 constraints: t.List[exp.Expression] = [] 4115 4116 if not kind and self._match(TokenType.ALIAS): 4117 constraints.append( 4118 self.expression( 4119 exp.ComputedColumnConstraint, 4120 this=self._parse_conjunction(), 4121 persisted=self._match_text_seq("PERSISTED"), 4122 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4123 ) 4124 ) 4125 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4126 self._match(TokenType.ALIAS) 4127 constraints.append( 4128 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4129 ) 4130 4131 while True: 4132 constraint = self._parse_column_constraint() 4133 if not constraint: 4134 break 4135 constraints.append(constraint) 4136 4137 if not kind and not constraints: 4138 return this 4139 4140 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4141 4142 def _parse_auto_increment( 4143 self, 4144 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4145 start = None 4146 increment = None 4147 4148 if self._match(TokenType.L_PAREN, advance=False): 4149 args = self._parse_wrapped_csv(self._parse_bitwise) 4150 start = seq_get(args, 0) 4151 increment = seq_get(args, 1) 4152 elif self._match_text_seq("START"): 4153 start = self._parse_bitwise() 4154 self._match_text_seq("INCREMENT") 4155 increment = self._parse_bitwise() 4156 4157 if start and increment: 4158 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4159 4160 return exp.AutoIncrementColumnConstraint() 4161 4162 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4163 if not self._match_text_seq("REFRESH"): 4164 self._retreat(self._index - 1) 4165 return None 4166 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4167 4168 def _parse_compress(self) -> exp.CompressColumnConstraint: 4169 if self._match(TokenType.L_PAREN, advance=False): 4170 return self.expression( 4171 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4172 ) 4173 4174 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4175 4176 def _parse_generated_as_identity( 4177 self, 4178 ) -> ( 4179 exp.GeneratedAsIdentityColumnConstraint 4180 | exp.ComputedColumnConstraint 4181 | exp.GeneratedAsRowColumnConstraint 4182 ): 4183 if self._match_text_seq("BY", "DEFAULT"): 4184 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4185 this = self.expression( 4186 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4187 ) 4188 else: 4189 self._match_text_seq("ALWAYS") 4190 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4191 4192 self._match(TokenType.ALIAS) 4193 4194 if self._match_text_seq("ROW"): 4195 start = self._match_text_seq("START") 4196 if not start: 4197 self._match(TokenType.END) 4198 hidden = self._match_text_seq("HIDDEN") 4199 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4200 4201 identity = self._match_text_seq("IDENTITY") 4202 4203 if self._match(TokenType.L_PAREN): 4204 if self._match(TokenType.START_WITH): 4205 this.set("start", self._parse_bitwise()) 4206 if self._match_text_seq("INCREMENT", "BY"): 4207 this.set("increment", self._parse_bitwise()) 4208 if self._match_text_seq("MINVALUE"): 4209 this.set("minvalue", self._parse_bitwise()) 4210 if self._match_text_seq("MAXVALUE"): 4211 this.set("maxvalue", self._parse_bitwise()) 4212 4213 if self._match_text_seq("CYCLE"): 4214 this.set("cycle", True) 4215 elif self._match_text_seq("NO", "CYCLE"): 4216 this.set("cycle", False) 4217 4218 if not identity: 4219 this.set("expression", self._parse_bitwise()) 4220 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4221 args = self._parse_csv(self._parse_bitwise) 4222 this.set("start", seq_get(args, 0)) 4223 this.set("increment", seq_get(args, 1)) 4224 4225 self._match_r_paren() 4226 4227 return this 4228 4229 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4230 self._match_text_seq("LENGTH") 4231 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4232 4233 def _parse_not_constraint( 4234 self, 4235 ) -> t.Optional[exp.Expression]: 4236 if self._match_text_seq("NULL"): 4237 return self.expression(exp.NotNullColumnConstraint) 4238 if self._match_text_seq("CASESPECIFIC"): 4239 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4240 if self._match_text_seq("FOR", "REPLICATION"): 4241 return self.expression(exp.NotForReplicationColumnConstraint) 4242 return None 4243 4244 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4245 if self._match(TokenType.CONSTRAINT): 4246 this = self._parse_id_var() 4247 else: 4248 this = None 4249 4250 if self._match_texts(self.CONSTRAINT_PARSERS): 4251 return self.expression( 4252 exp.ColumnConstraint, 4253 this=this, 4254 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4255 ) 4256 4257 return this 4258 4259 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4260 if not self._match(TokenType.CONSTRAINT): 4261 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4262 4263 this = self._parse_id_var() 4264 expressions = [] 4265 4266 while True: 4267 constraint = self._parse_unnamed_constraint() or self._parse_function() 4268 if not constraint: 4269 break 4270 expressions.append(constraint) 4271 4272 return self.expression(exp.Constraint, this=this, expressions=expressions) 4273 4274 def _parse_unnamed_constraint( 4275 self, constraints: t.Optional[t.Collection[str]] = None 4276 ) -> t.Optional[exp.Expression]: 4277 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4278 constraints or self.CONSTRAINT_PARSERS 4279 ): 4280 return None 4281 4282 constraint = self._prev.text.upper() 4283 if constraint not in self.CONSTRAINT_PARSERS: 4284 self.raise_error(f"No parser found for schema constraint {constraint}.") 4285 4286 return self.CONSTRAINT_PARSERS[constraint](self) 4287 4288 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4289 self._match_text_seq("KEY") 4290 return self.expression( 4291 exp.UniqueColumnConstraint, 4292 this=self._parse_schema(self._parse_id_var(any_token=False)), 4293 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4294 ) 4295 4296 def _parse_key_constraint_options(self) -> t.List[str]: 4297 options = [] 4298 while True: 4299 if not self._curr: 4300 break 4301 4302 if self._match(TokenType.ON): 4303 action = None 4304 on = self._advance_any() and self._prev.text 4305 4306 if self._match_text_seq("NO", "ACTION"): 4307 action = "NO ACTION" 4308 elif self._match_text_seq("CASCADE"): 4309 action = "CASCADE" 4310 elif self._match_text_seq("RESTRICT"): 4311 action = "RESTRICT" 4312 elif self._match_pair(TokenType.SET, TokenType.NULL): 4313 action = "SET NULL" 4314 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4315 action = "SET DEFAULT" 4316 else: 4317 self.raise_error("Invalid key constraint") 4318 4319 options.append(f"ON {on} {action}") 4320 elif self._match_text_seq("NOT", "ENFORCED"): 4321 options.append("NOT ENFORCED") 4322 elif self._match_text_seq("DEFERRABLE"): 4323 options.append("DEFERRABLE") 4324 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4325 options.append("INITIALLY DEFERRED") 4326 elif self._match_text_seq("NORELY"): 4327 options.append("NORELY") 4328 elif self._match_text_seq("MATCH", "FULL"): 4329 options.append("MATCH FULL") 4330 else: 4331 break 4332 4333 return options 4334 4335 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4336 if match and not self._match(TokenType.REFERENCES): 4337 return None 4338 4339 expressions = None 4340 this = self._parse_table(schema=True) 4341 options = self._parse_key_constraint_options() 4342 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4343 4344 def _parse_foreign_key(self) -> exp.ForeignKey: 4345 expressions = self._parse_wrapped_id_vars() 4346 reference = self._parse_references() 4347 options = {} 4348 4349 while self._match(TokenType.ON): 4350 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4351 self.raise_error("Expected DELETE or UPDATE") 4352 4353 kind = self._prev.text.lower() 4354 4355 if self._match_text_seq("NO", "ACTION"): 4356 action = "NO ACTION" 4357 elif self._match(TokenType.SET): 4358 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4359 action = "SET " + self._prev.text.upper() 4360 else: 4361 self._advance() 4362 action = self._prev.text.upper() 4363 4364 options[kind] = action 4365 4366 return self.expression( 4367 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4368 ) 4369 4370 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4371 return self._parse_field() 4372 4373 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4374 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4375 self._retreat(self._index - 1) 4376 return None 4377 4378 id_vars = self._parse_wrapped_id_vars() 4379 return self.expression( 4380 exp.PeriodForSystemTimeConstraint, 4381 this=seq_get(id_vars, 0), 4382 expression=seq_get(id_vars, 1), 4383 ) 4384 4385 def _parse_primary_key( 4386 self, wrapped_optional: bool = False, in_props: bool = False 4387 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4388 desc = ( 4389 self._match_set((TokenType.ASC, TokenType.DESC)) 4390 and self._prev.token_type == TokenType.DESC 4391 ) 4392 4393 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4394 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4395 4396 expressions = self._parse_wrapped_csv( 4397 self._parse_primary_key_part, optional=wrapped_optional 4398 ) 4399 options = self._parse_key_constraint_options() 4400 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4401 4402 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4403 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4404 4405 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4406 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4407 return this 4408 4409 bracket_kind = self._prev.token_type 4410 expressions = self._parse_csv( 4411 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4412 ) 4413 4414 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4415 self.raise_error("Expected ]") 4416 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4417 self.raise_error("Expected }") 4418 4419 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4420 if bracket_kind == TokenType.L_BRACE: 4421 this = self.expression(exp.Struct, expressions=expressions) 4422 elif not this or this.name.upper() == "ARRAY": 4423 this = self.expression(exp.Array, expressions=expressions) 4424 else: 4425 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4426 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4427 4428 self._add_comments(this) 4429 return self._parse_bracket(this) 4430 4431 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4432 if self._match(TokenType.COLON): 4433 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4434 return this 4435 4436 def _parse_case(self) -> t.Optional[exp.Expression]: 4437 ifs = [] 4438 default = None 4439 4440 comments = self._prev_comments 4441 expression = self._parse_conjunction() 4442 4443 while self._match(TokenType.WHEN): 4444 this = self._parse_conjunction() 4445 self._match(TokenType.THEN) 4446 then = self._parse_conjunction() 4447 ifs.append(self.expression(exp.If, this=this, true=then)) 4448 4449 if self._match(TokenType.ELSE): 4450 default = self._parse_conjunction() 4451 4452 if not self._match(TokenType.END): 4453 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4454 default = exp.column("interval") 4455 else: 4456 self.raise_error("Expected END after CASE", self._prev) 4457 4458 return self._parse_window( 4459 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4460 ) 4461 4462 def _parse_if(self) -> t.Optional[exp.Expression]: 4463 if self._match(TokenType.L_PAREN): 4464 args = self._parse_csv(self._parse_conjunction) 4465 this = self.validate_expression(exp.If.from_arg_list(args), args) 4466 self._match_r_paren() 4467 else: 4468 index = self._index - 1 4469 condition = self._parse_conjunction() 4470 4471 if not condition: 4472 self._retreat(index) 4473 return None 4474 4475 self._match(TokenType.THEN) 4476 true = self._parse_conjunction() 4477 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4478 self._match(TokenType.END) 4479 this = self.expression(exp.If, this=condition, true=true, false=false) 4480 4481 return self._parse_window(this) 4482 4483 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4484 if not self._match_text_seq("VALUE", "FOR"): 4485 self._retreat(self._index - 1) 4486 return None 4487 4488 return self.expression( 4489 exp.NextValueFor, 4490 this=self._parse_column(), 4491 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4492 ) 4493 4494 def _parse_extract(self) -> exp.Extract: 4495 this = self._parse_function() or self._parse_var() or self._parse_type() 4496 4497 if self._match(TokenType.FROM): 4498 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4499 4500 if not self._match(TokenType.COMMA): 4501 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4502 4503 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4504 4505 def _parse_any_value(self) -> exp.AnyValue: 4506 this = self._parse_lambda() 4507 is_max = None 4508 having = None 4509 4510 if self._match(TokenType.HAVING): 4511 self._match_texts(("MAX", "MIN")) 4512 is_max = self._prev.text == "MAX" 4513 having = self._parse_column() 4514 4515 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4516 4517 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4518 this = self._parse_conjunction() 4519 4520 if not self._match(TokenType.ALIAS): 4521 if self._match(TokenType.COMMA): 4522 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4523 4524 self.raise_error("Expected AS after CAST") 4525 4526 fmt = None 4527 to = self._parse_types() 4528 4529 if self._match(TokenType.FORMAT): 4530 fmt_string = self._parse_string() 4531 fmt = self._parse_at_time_zone(fmt_string) 4532 4533 if not to: 4534 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4535 if to.this in exp.DataType.TEMPORAL_TYPES: 4536 this = self.expression( 4537 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4538 this=this, 4539 format=exp.Literal.string( 4540 format_time( 4541 fmt_string.this if fmt_string else "", 4542 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4543 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4544 ) 4545 ), 4546 ) 4547 4548 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4549 this.set("zone", fmt.args["zone"]) 4550 return this 4551 elif not to: 4552 self.raise_error("Expected TYPE after CAST") 4553 elif isinstance(to, exp.Identifier): 4554 to = exp.DataType.build(to.name, udt=True) 4555 elif to.this == exp.DataType.Type.CHAR: 4556 if self._match(TokenType.CHARACTER_SET): 4557 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4558 4559 return self.expression( 4560 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4561 ) 4562 4563 def _parse_string_agg(self) -> exp.Expression: 4564 if self._match(TokenType.DISTINCT): 4565 args: t.List[t.Optional[exp.Expression]] = [ 4566 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4567 ] 4568 if self._match(TokenType.COMMA): 4569 args.extend(self._parse_csv(self._parse_conjunction)) 4570 else: 4571 args = self._parse_csv(self._parse_conjunction) # type: ignore 4572 4573 index = self._index 4574 if not self._match(TokenType.R_PAREN) and args: 4575 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4576 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4577 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4578 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4579 4580 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4581 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4582 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4583 if not self._match_text_seq("WITHIN", "GROUP"): 4584 self._retreat(index) 4585 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4586 4587 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4588 order = self._parse_order(this=seq_get(args, 0)) 4589 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4590 4591 def _parse_convert( 4592 self, strict: bool, safe: t.Optional[bool] = None 4593 ) -> t.Optional[exp.Expression]: 4594 this = self._parse_bitwise() 4595 4596 if self._match(TokenType.USING): 4597 to: t.Optional[exp.Expression] = self.expression( 4598 exp.CharacterSet, this=self._parse_var() 4599 ) 4600 elif self._match(TokenType.COMMA): 4601 to = self._parse_types() 4602 else: 4603 to = None 4604 4605 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4606 4607 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4608 """ 4609 There are generally two variants of the DECODE function: 4610 4611 - DECODE(bin, charset) 4612 - DECODE(expression, search, result [, search, result] ... [, default]) 4613 4614 The second variant will always be parsed into a CASE expression. Note that NULL 4615 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4616 instead of relying on pattern matching. 4617 """ 4618 args = self._parse_csv(self._parse_conjunction) 4619 4620 if len(args) < 3: 4621 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4622 4623 expression, *expressions = args 4624 if not expression: 4625 return None 4626 4627 ifs = [] 4628 for search, result in zip(expressions[::2], expressions[1::2]): 4629 if not search or not result: 4630 return None 4631 4632 if isinstance(search, exp.Literal): 4633 ifs.append( 4634 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4635 ) 4636 elif isinstance(search, exp.Null): 4637 ifs.append( 4638 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4639 ) 4640 else: 4641 cond = exp.or_( 4642 exp.EQ(this=expression.copy(), expression=search), 4643 exp.and_( 4644 exp.Is(this=expression.copy(), expression=exp.Null()), 4645 exp.Is(this=search.copy(), expression=exp.Null()), 4646 copy=False, 4647 ), 4648 copy=False, 4649 ) 4650 ifs.append(exp.If(this=cond, true=result)) 4651 4652 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4653 4654 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4655 self._match_text_seq("KEY") 4656 key = self._parse_column() 4657 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4658 self._match_text_seq("VALUE") 4659 value = self._parse_bitwise() 4660 4661 if not key and not value: 4662 return None 4663 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4664 4665 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4666 if not this or not self._match_text_seq("FORMAT", "JSON"): 4667 return this 4668 4669 return self.expression(exp.FormatJson, this=this) 4670 4671 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4672 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4673 for value in values: 4674 if self._match_text_seq(value, "ON", on): 4675 return f"{value} ON {on}" 4676 4677 return None 4678 4679 @t.overload 4680 def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: 4681 ... 4682 4683 @t.overload 4684 def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: 4685 ... 4686 4687 def _parse_json_object(self, agg=False): 4688 star = self._parse_star() 4689 expressions = ( 4690 [star] 4691 if star 4692 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4693 ) 4694 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4695 4696 unique_keys = None 4697 if self._match_text_seq("WITH", "UNIQUE"): 4698 unique_keys = True 4699 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4700 unique_keys = False 4701 4702 self._match_text_seq("KEYS") 4703 4704 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4705 self._parse_type() 4706 ) 4707 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4708 4709 return self.expression( 4710 exp.JSONObjectAgg if agg else exp.JSONObject, 4711 expressions=expressions, 4712 null_handling=null_handling, 4713 unique_keys=unique_keys, 4714 return_type=return_type, 4715 encoding=encoding, 4716 ) 4717 4718 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4719 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4720 if not self._match_text_seq("NESTED"): 4721 this = self._parse_id_var() 4722 kind = self._parse_types(allow_identifiers=False) 4723 nested = None 4724 else: 4725 this = None 4726 kind = None 4727 nested = True 4728 4729 path = self._match_text_seq("PATH") and self._parse_string() 4730 nested_schema = nested and self._parse_json_schema() 4731 4732 return self.expression( 4733 exp.JSONColumnDef, 4734 this=this, 4735 kind=kind, 4736 path=path, 4737 nested_schema=nested_schema, 4738 ) 4739 4740 def _parse_json_schema(self) -> exp.JSONSchema: 4741 self._match_text_seq("COLUMNS") 4742 return self.expression( 4743 exp.JSONSchema, 4744 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4745 ) 4746 4747 def _parse_json_table(self) -> exp.JSONTable: 4748 this = self._parse_format_json(self._parse_bitwise()) 4749 path = self._match(TokenType.COMMA) and self._parse_string() 4750 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4751 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4752 schema = self._parse_json_schema() 4753 4754 return exp.JSONTable( 4755 this=this, 4756 schema=schema, 4757 path=path, 4758 error_handling=error_handling, 4759 empty_handling=empty_handling, 4760 ) 4761 4762 def _parse_match_against(self) -> exp.MatchAgainst: 4763 expressions = self._parse_csv(self._parse_column) 4764 4765 self._match_text_seq(")", "AGAINST", "(") 4766 4767 this = self._parse_string() 4768 4769 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4770 modifier = "IN NATURAL LANGUAGE MODE" 4771 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4772 modifier = f"{modifier} WITH QUERY EXPANSION" 4773 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4774 modifier = "IN BOOLEAN MODE" 4775 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4776 modifier = "WITH QUERY EXPANSION" 4777 else: 4778 modifier = None 4779 4780 return self.expression( 4781 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4782 ) 4783 4784 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4785 def _parse_open_json(self) -> exp.OpenJSON: 4786 this = self._parse_bitwise() 4787 path = self._match(TokenType.COMMA) and self._parse_string() 4788 4789 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4790 this = self._parse_field(any_token=True) 4791 kind = self._parse_types() 4792 path = self._parse_string() 4793 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4794 4795 return self.expression( 4796 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4797 ) 4798 4799 expressions = None 4800 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4801 self._match_l_paren() 4802 expressions = self._parse_csv(_parse_open_json_column_def) 4803 4804 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4805 4806 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4807 args = self._parse_csv(self._parse_bitwise) 4808 4809 if self._match(TokenType.IN): 4810 return self.expression( 4811 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4812 ) 4813 4814 if haystack_first: 4815 haystack = seq_get(args, 0) 4816 needle = seq_get(args, 1) 4817 else: 4818 needle = seq_get(args, 0) 4819 haystack = seq_get(args, 1) 4820 4821 return self.expression( 4822 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4823 ) 4824 4825 def _parse_predict(self) -> exp.Predict: 4826 self._match_text_seq("MODEL") 4827 this = self._parse_table() 4828 4829 self._match(TokenType.COMMA) 4830 self._match_text_seq("TABLE") 4831 4832 return self.expression( 4833 exp.Predict, 4834 this=this, 4835 expression=self._parse_table(), 4836 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4837 ) 4838 4839 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4840 args = self._parse_csv(self._parse_table) 4841 return exp.JoinHint(this=func_name.upper(), expressions=args) 4842 4843 def _parse_substring(self) -> exp.Substring: 4844 # Postgres supports the form: substring(string [from int] [for int]) 4845 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4846 4847 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4848 4849 if self._match(TokenType.FROM): 4850 args.append(self._parse_bitwise()) 4851 if self._match(TokenType.FOR): 4852 args.append(self._parse_bitwise()) 4853 4854 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4855 4856 def _parse_trim(self) -> exp.Trim: 4857 # https://www.w3resource.com/sql/character-functions/trim.php 4858 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4859 4860 position = None 4861 collation = None 4862 expression = None 4863 4864 if self._match_texts(self.TRIM_TYPES): 4865 position = self._prev.text.upper() 4866 4867 this = self._parse_bitwise() 4868 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4869 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4870 expression = self._parse_bitwise() 4871 4872 if invert_order: 4873 this, expression = expression, this 4874 4875 if self._match(TokenType.COLLATE): 4876 collation = self._parse_bitwise() 4877 4878 return self.expression( 4879 exp.Trim, this=this, position=position, expression=expression, collation=collation 4880 ) 4881 4882 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4883 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4884 4885 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4886 return self._parse_window(self._parse_id_var(), alias=True) 4887 4888 def _parse_respect_or_ignore_nulls( 4889 self, this: t.Optional[exp.Expression] 4890 ) -> t.Optional[exp.Expression]: 4891 if self._match_text_seq("IGNORE", "NULLS"): 4892 return self.expression(exp.IgnoreNulls, this=this) 4893 if self._match_text_seq("RESPECT", "NULLS"): 4894 return self.expression(exp.RespectNulls, this=this) 4895 return this 4896 4897 def _parse_window( 4898 self, this: t.Optional[exp.Expression], alias: bool = False 4899 ) -> t.Optional[exp.Expression]: 4900 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4901 self._match(TokenType.WHERE) 4902 this = self.expression( 4903 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4904 ) 4905 self._match_r_paren() 4906 4907 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4908 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4909 if self._match_text_seq("WITHIN", "GROUP"): 4910 order = self._parse_wrapped(self._parse_order) 4911 this = self.expression(exp.WithinGroup, this=this, expression=order) 4912 4913 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4914 # Some dialects choose to implement and some do not. 4915 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4916 4917 # There is some code above in _parse_lambda that handles 4918 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4919 4920 # The below changes handle 4921 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4922 4923 # Oracle allows both formats 4924 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4925 # and Snowflake chose to do the same for familiarity 4926 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4927 this = self._parse_respect_or_ignore_nulls(this) 4928 4929 # bigquery select from window x AS (partition by ...) 4930 if alias: 4931 over = None 4932 self._match(TokenType.ALIAS) 4933 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4934 return this 4935 else: 4936 over = self._prev.text.upper() 4937 4938 if not self._match(TokenType.L_PAREN): 4939 return self.expression( 4940 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4941 ) 4942 4943 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4944 4945 first = self._match(TokenType.FIRST) 4946 if self._match_text_seq("LAST"): 4947 first = False 4948 4949 partition, order = self._parse_partition_and_order() 4950 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4951 4952 if kind: 4953 self._match(TokenType.BETWEEN) 4954 start = self._parse_window_spec() 4955 self._match(TokenType.AND) 4956 end = self._parse_window_spec() 4957 4958 spec = self.expression( 4959 exp.WindowSpec, 4960 kind=kind, 4961 start=start["value"], 4962 start_side=start["side"], 4963 end=end["value"], 4964 end_side=end["side"], 4965 ) 4966 else: 4967 spec = None 4968 4969 self._match_r_paren() 4970 4971 window = self.expression( 4972 exp.Window, 4973 this=this, 4974 partition_by=partition, 4975 order=order, 4976 spec=spec, 4977 alias=window_alias, 4978 over=over, 4979 first=first, 4980 ) 4981 4982 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4983 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4984 return self._parse_window(window, alias=alias) 4985 4986 return window 4987 4988 def _parse_partition_and_order( 4989 self, 4990 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4991 return self._parse_partition_by(), self._parse_order() 4992 4993 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4994 self._match(TokenType.BETWEEN) 4995 4996 return { 4997 "value": ( 4998 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4999 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5000 or self._parse_bitwise() 5001 ), 5002 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5003 } 5004 5005 def _parse_alias( 5006 self, this: t.Optional[exp.Expression], explicit: bool = False 5007 ) -> t.Optional[exp.Expression]: 5008 any_token = self._match(TokenType.ALIAS) 5009 comments = self._prev_comments 5010 5011 if explicit and not any_token: 5012 return this 5013 5014 if self._match(TokenType.L_PAREN): 5015 aliases = self.expression( 5016 exp.Aliases, 5017 comments=comments, 5018 this=this, 5019 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5020 ) 5021 self._match_r_paren(aliases) 5022 return aliases 5023 5024 alias = self._parse_id_var(any_token) or ( 5025 self.STRING_ALIASES and self._parse_string_as_identifier() 5026 ) 5027 5028 if alias: 5029 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5030 5031 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5032 if not this.comments and this.this.comments: 5033 this.comments = this.this.comments 5034 this.this.comments = None 5035 5036 return this 5037 5038 def _parse_id_var( 5039 self, 5040 any_token: bool = True, 5041 tokens: t.Optional[t.Collection[TokenType]] = None, 5042 ) -> t.Optional[exp.Expression]: 5043 identifier = self._parse_identifier() 5044 5045 if identifier: 5046 return identifier 5047 5048 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5049 quoted = self._prev.token_type == TokenType.STRING 5050 return exp.Identifier(this=self._prev.text, quoted=quoted) 5051 5052 return None 5053 5054 def _parse_string(self) -> t.Optional[exp.Expression]: 5055 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5056 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5057 return self._parse_placeholder() 5058 5059 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5060 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5061 5062 def _parse_number(self) -> t.Optional[exp.Expression]: 5063 if self._match(TokenType.NUMBER): 5064 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5065 return self._parse_placeholder() 5066 5067 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5068 if self._match(TokenType.IDENTIFIER): 5069 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5070 return self._parse_placeholder() 5071 5072 def _parse_var( 5073 self, 5074 any_token: bool = False, 5075 tokens: t.Optional[t.Collection[TokenType]] = None, 5076 upper: bool = False, 5077 ) -> t.Optional[exp.Expression]: 5078 if ( 5079 (any_token and self._advance_any()) 5080 or self._match(TokenType.VAR) 5081 or (self._match_set(tokens) if tokens else False) 5082 ): 5083 return self.expression( 5084 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5085 ) 5086 return self._parse_placeholder() 5087 5088 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5089 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5090 self._advance() 5091 return self._prev 5092 return None 5093 5094 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5095 return self._parse_var() or self._parse_string() 5096 5097 def _parse_null(self) -> t.Optional[exp.Expression]: 5098 if self._match_set(self.NULL_TOKENS): 5099 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5100 return self._parse_placeholder() 5101 5102 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5103 if self._match(TokenType.TRUE): 5104 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5105 if self._match(TokenType.FALSE): 5106 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5107 return self._parse_placeholder() 5108 5109 def _parse_star(self) -> t.Optional[exp.Expression]: 5110 if self._match(TokenType.STAR): 5111 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5112 return self._parse_placeholder() 5113 5114 def _parse_parameter(self) -> exp.Parameter: 5115 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5116 return ( 5117 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5118 ) 5119 5120 self._match(TokenType.L_BRACE) 5121 this = _parse_parameter_part() 5122 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5123 self._match(TokenType.R_BRACE) 5124 5125 return self.expression(exp.Parameter, this=this, expression=expression) 5126 5127 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5128 if self._match_set(self.PLACEHOLDER_PARSERS): 5129 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5130 if placeholder: 5131 return placeholder 5132 self._advance(-1) 5133 return None 5134 5135 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5136 if not self._match(TokenType.EXCEPT): 5137 return None 5138 if self._match(TokenType.L_PAREN, advance=False): 5139 return self._parse_wrapped_csv(self._parse_column) 5140 5141 except_column = self._parse_column() 5142 return [except_column] if except_column else None 5143 5144 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5145 if not self._match(TokenType.REPLACE): 5146 return None 5147 if self._match(TokenType.L_PAREN, advance=False): 5148 return self._parse_wrapped_csv(self._parse_expression) 5149 5150 replace_expression = self._parse_expression() 5151 return [replace_expression] if replace_expression else None 5152 5153 def _parse_csv( 5154 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5155 ) -> t.List[exp.Expression]: 5156 parse_result = parse_method() 5157 items = [parse_result] if parse_result is not None else [] 5158 5159 while self._match(sep): 5160 self._add_comments(parse_result) 5161 parse_result = parse_method() 5162 if parse_result is not None: 5163 items.append(parse_result) 5164 5165 return items 5166 5167 def _parse_tokens( 5168 self, parse_method: t.Callable, expressions: t.Dict 5169 ) -> t.Optional[exp.Expression]: 5170 this = parse_method() 5171 5172 while self._match_set(expressions): 5173 this = self.expression( 5174 expressions[self._prev.token_type], 5175 this=this, 5176 comments=self._prev_comments, 5177 expression=parse_method(), 5178 ) 5179 5180 return this 5181 5182 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5183 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5184 5185 def _parse_wrapped_csv( 5186 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5187 ) -> t.List[exp.Expression]: 5188 return self._parse_wrapped( 5189 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5190 ) 5191 5192 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5193 wrapped = self._match(TokenType.L_PAREN) 5194 if not wrapped and not optional: 5195 self.raise_error("Expecting (") 5196 parse_result = parse_method() 5197 if wrapped: 5198 self._match_r_paren() 5199 return parse_result 5200 5201 def _parse_expressions(self) -> t.List[exp.Expression]: 5202 return self._parse_csv(self._parse_expression) 5203 5204 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5205 return self._parse_select() or self._parse_set_operations( 5206 self._parse_expression() if alias else self._parse_conjunction() 5207 ) 5208 5209 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5210 return self._parse_query_modifiers( 5211 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5212 ) 5213 5214 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5215 this = None 5216 if self._match_texts(self.TRANSACTION_KIND): 5217 this = self._prev.text 5218 5219 self._match_texts(("TRANSACTION", "WORK")) 5220 5221 modes = [] 5222 while True: 5223 mode = [] 5224 while self._match(TokenType.VAR): 5225 mode.append(self._prev.text) 5226 5227 if mode: 5228 modes.append(" ".join(mode)) 5229 if not self._match(TokenType.COMMA): 5230 break 5231 5232 return self.expression(exp.Transaction, this=this, modes=modes) 5233 5234 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5235 chain = None 5236 savepoint = None 5237 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5238 5239 self._match_texts(("TRANSACTION", "WORK")) 5240 5241 if self._match_text_seq("TO"): 5242 self._match_text_seq("SAVEPOINT") 5243 savepoint = self._parse_id_var() 5244 5245 if self._match(TokenType.AND): 5246 chain = not self._match_text_seq("NO") 5247 self._match_text_seq("CHAIN") 5248 5249 if is_rollback: 5250 return self.expression(exp.Rollback, savepoint=savepoint) 5251 5252 return self.expression(exp.Commit, chain=chain) 5253 5254 def _parse_refresh(self) -> exp.Refresh: 5255 self._match(TokenType.TABLE) 5256 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5257 5258 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5259 if not self._match_text_seq("ADD"): 5260 return None 5261 5262 self._match(TokenType.COLUMN) 5263 exists_column = self._parse_exists(not_=True) 5264 expression = self._parse_field_def() 5265 5266 if expression: 5267 expression.set("exists", exists_column) 5268 5269 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5270 if self._match_texts(("FIRST", "AFTER")): 5271 position = self._prev.text 5272 column_position = self.expression( 5273 exp.ColumnPosition, this=self._parse_column(), position=position 5274 ) 5275 expression.set("position", column_position) 5276 5277 return expression 5278 5279 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5280 drop = self._match(TokenType.DROP) and self._parse_drop() 5281 if drop and not isinstance(drop, exp.Command): 5282 drop.set("kind", drop.args.get("kind", "COLUMN")) 5283 return drop 5284 5285 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5286 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5287 return self.expression( 5288 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5289 ) 5290 5291 def _parse_add_constraint(self) -> exp.AddConstraint: 5292 this = None 5293 kind = self._prev.token_type 5294 5295 if kind == TokenType.CONSTRAINT: 5296 this = self._parse_id_var() 5297 5298 if self._match_text_seq("CHECK"): 5299 expression = self._parse_wrapped(self._parse_conjunction) 5300 enforced = self._match_text_seq("ENFORCED") or False 5301 5302 return self.expression( 5303 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5304 ) 5305 5306 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5307 expression = self._parse_foreign_key() 5308 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5309 expression = self._parse_primary_key() 5310 else: 5311 expression = None 5312 5313 return self.expression(exp.AddConstraint, this=this, expression=expression) 5314 5315 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5316 index = self._index - 1 5317 5318 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5319 return self._parse_csv(self._parse_add_constraint) 5320 5321 self._retreat(index) 5322 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5323 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5324 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5325 5326 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5327 self._match(TokenType.COLUMN) 5328 column = self._parse_field(any_token=True) 5329 5330 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5331 return self.expression(exp.AlterColumn, this=column, drop=True) 5332 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5333 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5334 5335 self._match_text_seq("SET", "DATA") 5336 return self.expression( 5337 exp.AlterColumn, 5338 this=column, 5339 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5340 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5341 using=self._match(TokenType.USING) and self._parse_conjunction(), 5342 ) 5343 5344 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5345 index = self._index - 1 5346 5347 partition_exists = self._parse_exists() 5348 if self._match(TokenType.PARTITION, advance=False): 5349 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5350 5351 self._retreat(index) 5352 return self._parse_csv(self._parse_drop_column) 5353 5354 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5355 if self._match(TokenType.COLUMN): 5356 exists = self._parse_exists() 5357 old_column = self._parse_column() 5358 to = self._match_text_seq("TO") 5359 new_column = self._parse_column() 5360 5361 if old_column is None or to is None or new_column is None: 5362 return None 5363 5364 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5365 5366 self._match_text_seq("TO") 5367 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5368 5369 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5370 start = self._prev 5371 5372 if not self._match(TokenType.TABLE): 5373 return self._parse_as_command(start) 5374 5375 exists = self._parse_exists() 5376 only = self._match_text_seq("ONLY") 5377 this = self._parse_table(schema=True) 5378 5379 if self._next: 5380 self._advance() 5381 5382 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5383 if parser: 5384 actions = ensure_list(parser(self)) 5385 5386 if not self._curr and actions: 5387 return self.expression( 5388 exp.AlterTable, 5389 this=this, 5390 exists=exists, 5391 actions=actions, 5392 only=only, 5393 ) 5394 5395 return self._parse_as_command(start) 5396 5397 def _parse_merge(self) -> exp.Merge: 5398 self._match(TokenType.INTO) 5399 target = self._parse_table() 5400 5401 if target and self._match(TokenType.ALIAS, advance=False): 5402 target.set("alias", self._parse_table_alias()) 5403 5404 self._match(TokenType.USING) 5405 using = self._parse_table() 5406 5407 self._match(TokenType.ON) 5408 on = self._parse_conjunction() 5409 5410 return self.expression( 5411 exp.Merge, 5412 this=target, 5413 using=using, 5414 on=on, 5415 expressions=self._parse_when_matched(), 5416 ) 5417 5418 def _parse_when_matched(self) -> t.List[exp.When]: 5419 whens = [] 5420 5421 while self._match(TokenType.WHEN): 5422 matched = not self._match(TokenType.NOT) 5423 self._match_text_seq("MATCHED") 5424 source = ( 5425 False 5426 if self._match_text_seq("BY", "TARGET") 5427 else self._match_text_seq("BY", "SOURCE") 5428 ) 5429 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5430 5431 self._match(TokenType.THEN) 5432 5433 if self._match(TokenType.INSERT): 5434 _this = self._parse_star() 5435 if _this: 5436 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5437 else: 5438 then = self.expression( 5439 exp.Insert, 5440 this=self._parse_value(), 5441 expression=self._match(TokenType.VALUES) and self._parse_value(), 5442 ) 5443 elif self._match(TokenType.UPDATE): 5444 expressions = self._parse_star() 5445 if expressions: 5446 then = self.expression(exp.Update, expressions=expressions) 5447 else: 5448 then = self.expression( 5449 exp.Update, 5450 expressions=self._match(TokenType.SET) 5451 and self._parse_csv(self._parse_equality), 5452 ) 5453 elif self._match(TokenType.DELETE): 5454 then = self.expression(exp.Var, this=self._prev.text) 5455 else: 5456 then = None 5457 5458 whens.append( 5459 self.expression( 5460 exp.When, 5461 matched=matched, 5462 source=source, 5463 condition=condition, 5464 then=then, 5465 ) 5466 ) 5467 return whens 5468 5469 def _parse_show(self) -> t.Optional[exp.Expression]: 5470 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5471 if parser: 5472 return parser(self) 5473 return self._parse_as_command(self._prev) 5474 5475 def _parse_set_item_assignment( 5476 self, kind: t.Optional[str] = None 5477 ) -> t.Optional[exp.Expression]: 5478 index = self._index 5479 5480 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5481 return self._parse_set_transaction(global_=kind == "GLOBAL") 5482 5483 left = self._parse_primary() or self._parse_id_var() 5484 assignment_delimiter = self._match_texts(("=", "TO")) 5485 5486 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5487 self._retreat(index) 5488 return None 5489 5490 right = self._parse_statement() or self._parse_id_var() 5491 this = self.expression(exp.EQ, this=left, expression=right) 5492 5493 return self.expression(exp.SetItem, this=this, kind=kind) 5494 5495 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5496 self._match_text_seq("TRANSACTION") 5497 characteristics = self._parse_csv( 5498 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5499 ) 5500 return self.expression( 5501 exp.SetItem, 5502 expressions=characteristics, 5503 kind="TRANSACTION", 5504 **{"global": global_}, # type: ignore 5505 ) 5506 5507 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5508 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5509 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5510 5511 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5512 index = self._index 5513 set_ = self.expression( 5514 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5515 ) 5516 5517 if self._curr: 5518 self._retreat(index) 5519 return self._parse_as_command(self._prev) 5520 5521 return set_ 5522 5523 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5524 for option in options: 5525 if self._match_text_seq(*option.split(" ")): 5526 return exp.var(option) 5527 return None 5528 5529 def _parse_as_command(self, start: Token) -> exp.Command: 5530 while self._curr: 5531 self._advance() 5532 text = self._find_sql(start, self._prev) 5533 size = len(start.text) 5534 self._warn_unsupported() 5535 return exp.Command(this=text[:size], expression=text[size:]) 5536 5537 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5538 settings = [] 5539 5540 self._match_l_paren() 5541 kind = self._parse_id_var() 5542 5543 if self._match(TokenType.L_PAREN): 5544 while True: 5545 key = self._parse_id_var() 5546 value = self._parse_primary() 5547 5548 if not key and value is None: 5549 break 5550 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5551 self._match(TokenType.R_PAREN) 5552 5553 self._match_r_paren() 5554 5555 return self.expression( 5556 exp.DictProperty, 5557 this=this, 5558 kind=kind.this if kind else None, 5559 settings=settings, 5560 ) 5561 5562 def _parse_dict_range(self, this: str) -> exp.DictRange: 5563 self._match_l_paren() 5564 has_min = self._match_text_seq("MIN") 5565 if has_min: 5566 min = self._parse_var() or self._parse_primary() 5567 self._match_text_seq("MAX") 5568 max = self._parse_var() or self._parse_primary() 5569 else: 5570 max = self._parse_var() or self._parse_primary() 5571 min = exp.Literal.number(0) 5572 self._match_r_paren() 5573 return self.expression(exp.DictRange, this=this, min=min, max=max) 5574 5575 def _parse_comprehension( 5576 self, this: t.Optional[exp.Expression] 5577 ) -> t.Optional[exp.Comprehension]: 5578 index = self._index 5579 expression = self._parse_column() 5580 if not self._match(TokenType.IN): 5581 self._retreat(index - 1) 5582 return None 5583 iterator = self._parse_column() 5584 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5585 return self.expression( 5586 exp.Comprehension, 5587 this=this, 5588 expression=expression, 5589 iterator=iterator, 5590 condition=condition, 5591 ) 5592 5593 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5594 if self._match(TokenType.HEREDOC_STRING): 5595 return self.expression(exp.Heredoc, this=self._prev.text) 5596 5597 if not self._match_text_seq("$"): 5598 return None 5599 5600 tags = ["$"] 5601 tag_text = None 5602 5603 if self._is_connected(): 5604 self._advance() 5605 tags.append(self._prev.text.upper()) 5606 else: 5607 self.raise_error("No closing $ found") 5608 5609 if tags[-1] != "$": 5610 if self._is_connected() and self._match_text_seq("$"): 5611 tag_text = tags[-1] 5612 tags.append("$") 5613 else: 5614 self.raise_error("No closing $ found") 5615 5616 heredoc_start = self._curr 5617 5618 while self._curr: 5619 if self._match_text_seq(*tags, advance=False): 5620 this = self._find_sql(heredoc_start, self._prev) 5621 self._advance(len(tags)) 5622 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5623 5624 self._advance() 5625 5626 self.raise_error(f"No closing {''.join(tags)} found") 5627 return None 5628 5629 def _find_parser( 5630 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5631 ) -> t.Optional[t.Callable]: 5632 if not self._curr: 5633 return None 5634 5635 index = self._index 5636 this = [] 5637 while True: 5638 # The current token might be multiple words 5639 curr = self._curr.text.upper() 5640 key = curr.split(" ") 5641 this.append(curr) 5642 5643 self._advance() 5644 result, trie = in_trie(trie, key) 5645 if result == TrieResult.FAILED: 5646 break 5647 5648 if result == TrieResult.EXISTS: 5649 subparser = parsers[" ".join(this)] 5650 return subparser 5651 5652 self._retreat(index) 5653 return None 5654 5655 def _match(self, token_type, advance=True, expression=None): 5656 if not self._curr: 5657 return None 5658 5659 if self._curr.token_type == token_type: 5660 if advance: 5661 self._advance() 5662 self._add_comments(expression) 5663 return True 5664 5665 return None 5666 5667 def _match_set(self, types, advance=True): 5668 if not self._curr: 5669 return None 5670 5671 if self._curr.token_type in types: 5672 if advance: 5673 self._advance() 5674 return True 5675 5676 return None 5677 5678 def _match_pair(self, token_type_a, token_type_b, advance=True): 5679 if not self._curr or not self._next: 5680 return None 5681 5682 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5683 if advance: 5684 self._advance(2) 5685 return True 5686 5687 return None 5688 5689 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5690 if not self._match(TokenType.L_PAREN, expression=expression): 5691 self.raise_error("Expecting (") 5692 5693 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5694 if not self._match(TokenType.R_PAREN, expression=expression): 5695 self.raise_error("Expecting )") 5696 5697 def _match_texts(self, texts, advance=True): 5698 if self._curr and self._curr.text.upper() in texts: 5699 if advance: 5700 self._advance() 5701 return True 5702 return None 5703 5704 def _match_text_seq(self, *texts, advance=True): 5705 index = self._index 5706 for text in texts: 5707 if self._curr and self._curr.text.upper() == text: 5708 self._advance() 5709 else: 5710 self._retreat(index) 5711 return None 5712 5713 if not advance: 5714 self._retreat(index) 5715 5716 return True 5717 5718 @t.overload 5719 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5720 ... 5721 5722 @t.overload 5723 def _replace_columns_with_dots( 5724 self, this: t.Optional[exp.Expression] 5725 ) -> t.Optional[exp.Expression]: 5726 ... 5727 5728 def _replace_columns_with_dots(self, this): 5729 if isinstance(this, exp.Dot): 5730 exp.replace_children(this, self._replace_columns_with_dots) 5731 elif isinstance(this, exp.Column): 5732 exp.replace_children(this, self._replace_columns_with_dots) 5733 table = this.args.get("table") 5734 this = ( 5735 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5736 ) 5737 5738 return this 5739 5740 def _replace_lambda( 5741 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5742 ) -> t.Optional[exp.Expression]: 5743 if not node: 5744 return node 5745 5746 for column in node.find_all(exp.Column): 5747 if column.parts[0].name in lambda_variables: 5748 dot_or_id = column.to_dot() if column.table else column.this 5749 parent = column.parent 5750 5751 while isinstance(parent, exp.Dot): 5752 if not isinstance(parent.parent, exp.Dot): 5753 parent.replace(dot_or_id) 5754 break 5755 parent = parent.parent 5756 else: 5757 if column is node: 5758 node = dot_or_id 5759 else: 5760 column.replace(dot_or_id) 5761 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1005 def __init__( 1006 self, 1007 error_level: t.Optional[ErrorLevel] = None, 1008 error_message_context: int = 100, 1009 max_errors: int = 3, 1010 dialect: DialectType = None, 1011 ): 1012 from sqlglot.dialects import Dialect 1013 1014 self.error_level = error_level or ErrorLevel.IMMEDIATE 1015 self.error_message_context = error_message_context 1016 self.max_errors = max_errors 1017 self.dialect = Dialect.get_or_raise(dialect) 1018 self.reset()
1030 def parse( 1031 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1032 ) -> t.List[t.Optional[exp.Expression]]: 1033 """ 1034 Parses a list of tokens and returns a list of syntax trees, one tree 1035 per parsed SQL statement. 1036 1037 Args: 1038 raw_tokens: The list of tokens. 1039 sql: The original SQL string, used to produce helpful debug messages. 1040 1041 Returns: 1042 The list of the produced syntax trees. 1043 """ 1044 return self._parse( 1045 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1046 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1048 def parse_into( 1049 self, 1050 expression_types: exp.IntoType, 1051 raw_tokens: t.List[Token], 1052 sql: t.Optional[str] = None, 1053 ) -> t.List[t.Optional[exp.Expression]]: 1054 """ 1055 Parses a list of tokens into a given Expression type. If a collection of Expression 1056 types is given instead, this method will try to parse the token list into each one 1057 of them, stopping at the first for which the parsing succeeds. 1058 1059 Args: 1060 expression_types: The expression type(s) to try and parse the token list into. 1061 raw_tokens: The list of tokens. 1062 sql: The original SQL string, used to produce helpful debug messages. 1063 1064 Returns: 1065 The target Expression. 1066 """ 1067 errors = [] 1068 for expression_type in ensure_list(expression_types): 1069 parser = self.EXPRESSION_PARSERS.get(expression_type) 1070 if not parser: 1071 raise TypeError(f"No parser registered for {expression_type}") 1072 1073 try: 1074 return self._parse(parser, raw_tokens, sql) 1075 except ParseError as e: 1076 e.errors[0]["into_expression"] = expression_type 1077 errors.append(e) 1078 1079 raise ParseError( 1080 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1081 errors=merge_errors(errors), 1082 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1119 def check_errors(self) -> None: 1120 """Logs or raises any found errors, depending on the chosen error level setting.""" 1121 if self.error_level == ErrorLevel.WARN: 1122 for error in self.errors: 1123 logger.error(str(error)) 1124 elif self.error_level == ErrorLevel.RAISE and self.errors: 1125 raise ParseError( 1126 concat_messages(self.errors, self.max_errors), 1127 errors=merge_errors(self.errors), 1128 )
Logs or raises any found errors, depending on the chosen error level setting.
1130 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1131 """ 1132 Appends an error in the list of recorded errors or raises it, depending on the chosen 1133 error level setting. 1134 """ 1135 token = token or self._curr or self._prev or Token.string("") 1136 start = token.start 1137 end = token.end + 1 1138 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1139 highlight = self.sql[start:end] 1140 end_context = self.sql[end : end + self.error_message_context] 1141 1142 error = ParseError.new( 1143 f"{message}. Line {token.line}, Col: {token.col}.\n" 1144 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1145 description=message, 1146 line=token.line, 1147 col=token.col, 1148 start_context=start_context, 1149 highlight=highlight, 1150 end_context=end_context, 1151 ) 1152 1153 if self.error_level == ErrorLevel.IMMEDIATE: 1154 raise error 1155 1156 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1158 def expression( 1159 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1160 ) -> E: 1161 """ 1162 Creates a new, validated Expression. 1163 1164 Args: 1165 exp_class: The expression class to instantiate. 1166 comments: An optional list of comments to attach to the expression. 1167 kwargs: The arguments to set for the expression along with their respective values. 1168 1169 Returns: 1170 The target expression. 1171 """ 1172 instance = exp_class(**kwargs) 1173 instance.add_comments(comments) if comments else self._add_comments(instance) 1174 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1181 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1182 """ 1183 Validates an Expression, making sure that all its mandatory arguments are set. 1184 1185 Args: 1186 expression: The expression to validate. 1187 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1188 1189 Returns: 1190 The validated expression. 1191 """ 1192 if self.error_level != ErrorLevel.IGNORE: 1193 for error_message in expression.error_messages(args): 1194 self.raise_error(error_message) 1195 1196 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.