sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _parser(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _parser 74 75 76class _Parser(type): 77 def __new__(cls, clsname, bases, attrs): 78 klass = super().__new__(cls, clsname, bases, attrs) 79 80 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 81 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 82 83 return klass 84 85 86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: Determines the amount of context to capture from a 94 query string when displaying the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": parse_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": parse_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": parse_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": parse_like, 122 "LOG": parse_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": parse_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 AGGREGATE_TYPE_TOKENS = { 166 TokenType.AGGREGATEFUNCTION, 167 TokenType.SIMPLEAGGREGATEFUNCTION, 168 } 169 170 TYPE_TOKENS = { 171 TokenType.BIT, 172 TokenType.BOOLEAN, 173 TokenType.TINYINT, 174 TokenType.UTINYINT, 175 TokenType.SMALLINT, 176 TokenType.USMALLINT, 177 TokenType.INT, 178 TokenType.UINT, 179 TokenType.BIGINT, 180 TokenType.UBIGINT, 181 TokenType.INT128, 182 TokenType.UINT128, 183 TokenType.INT256, 184 TokenType.UINT256, 185 TokenType.MEDIUMINT, 186 TokenType.UMEDIUMINT, 187 TokenType.FIXEDSTRING, 188 TokenType.FLOAT, 189 TokenType.DOUBLE, 190 TokenType.CHAR, 191 TokenType.NCHAR, 192 TokenType.VARCHAR, 193 TokenType.NVARCHAR, 194 TokenType.BPCHAR, 195 TokenType.TEXT, 196 TokenType.MEDIUMTEXT, 197 TokenType.LONGTEXT, 198 TokenType.MEDIUMBLOB, 199 TokenType.LONGBLOB, 200 TokenType.BINARY, 201 TokenType.VARBINARY, 202 TokenType.JSON, 203 TokenType.JSONB, 204 TokenType.INTERVAL, 205 TokenType.TINYBLOB, 206 TokenType.TINYTEXT, 207 TokenType.TIME, 208 TokenType.TIMETZ, 209 TokenType.TIMESTAMP, 210 TokenType.TIMESTAMP_S, 211 TokenType.TIMESTAMP_MS, 212 TokenType.TIMESTAMP_NS, 213 TokenType.TIMESTAMPTZ, 214 TokenType.TIMESTAMPLTZ, 215 TokenType.DATETIME, 216 TokenType.DATETIME64, 217 TokenType.DATE, 218 TokenType.DATE32, 219 TokenType.INT4RANGE, 220 TokenType.INT4MULTIRANGE, 221 TokenType.INT8RANGE, 222 TokenType.INT8MULTIRANGE, 223 TokenType.NUMRANGE, 224 TokenType.NUMMULTIRANGE, 225 TokenType.TSRANGE, 226 TokenType.TSMULTIRANGE, 227 TokenType.TSTZRANGE, 228 TokenType.TSTZMULTIRANGE, 229 TokenType.DATERANGE, 230 TokenType.DATEMULTIRANGE, 231 TokenType.DECIMAL, 232 TokenType.UDECIMAL, 233 TokenType.BIGDECIMAL, 234 TokenType.UUID, 235 TokenType.GEOGRAPHY, 236 TokenType.GEOMETRY, 237 TokenType.HLLSKETCH, 238 TokenType.HSTORE, 239 TokenType.PSEUDO_TYPE, 240 TokenType.SUPER, 241 TokenType.SERIAL, 242 TokenType.SMALLSERIAL, 243 TokenType.BIGSERIAL, 244 TokenType.XML, 245 TokenType.YEAR, 246 TokenType.UNIQUEIDENTIFIER, 247 TokenType.USERDEFINED, 248 TokenType.MONEY, 249 TokenType.SMALLMONEY, 250 TokenType.ROWVERSION, 251 TokenType.IMAGE, 252 TokenType.VARIANT, 253 TokenType.OBJECT, 254 TokenType.OBJECT_IDENTIFIER, 255 TokenType.INET, 256 TokenType.IPADDRESS, 257 TokenType.IPPREFIX, 258 TokenType.IPV4, 259 TokenType.IPV6, 260 TokenType.UNKNOWN, 261 TokenType.NULL, 262 *ENUM_TYPE_TOKENS, 263 *NESTED_TYPE_TOKENS, 264 *AGGREGATE_TYPE_TOKENS, 265 } 266 267 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 268 TokenType.BIGINT: TokenType.UBIGINT, 269 TokenType.INT: TokenType.UINT, 270 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 271 TokenType.SMALLINT: TokenType.USMALLINT, 272 TokenType.TINYINT: TokenType.UTINYINT, 273 TokenType.DECIMAL: TokenType.UDECIMAL, 274 } 275 276 SUBQUERY_PREDICATES = { 277 TokenType.ANY: exp.Any, 278 TokenType.ALL: exp.All, 279 TokenType.EXISTS: exp.Exists, 280 TokenType.SOME: exp.Any, 281 } 282 283 RESERVED_TOKENS = { 284 *Tokenizer.SINGLE_TOKENS.values(), 285 TokenType.SELECT, 286 } 287 288 DB_CREATABLES = { 289 TokenType.DATABASE, 290 TokenType.SCHEMA, 291 TokenType.TABLE, 292 TokenType.VIEW, 293 TokenType.MODEL, 294 TokenType.DICTIONARY, 295 } 296 297 CREATABLES = { 298 TokenType.COLUMN, 299 TokenType.CONSTRAINT, 300 TokenType.FUNCTION, 301 TokenType.INDEX, 302 TokenType.PROCEDURE, 303 TokenType.FOREIGN_KEY, 304 *DB_CREATABLES, 305 } 306 307 # Tokens that can represent identifiers 308 ID_VAR_TOKENS = { 309 TokenType.VAR, 310 TokenType.ANTI, 311 TokenType.APPLY, 312 TokenType.ASC, 313 TokenType.AUTO_INCREMENT, 314 TokenType.BEGIN, 315 TokenType.BPCHAR, 316 TokenType.CACHE, 317 TokenType.CASE, 318 TokenType.COLLATE, 319 TokenType.COMMAND, 320 TokenType.COMMENT, 321 TokenType.COMMIT, 322 TokenType.CONSTRAINT, 323 TokenType.DEFAULT, 324 TokenType.DELETE, 325 TokenType.DESC, 326 TokenType.DESCRIBE, 327 TokenType.DICTIONARY, 328 TokenType.DIV, 329 TokenType.END, 330 TokenType.EXECUTE, 331 TokenType.ESCAPE, 332 TokenType.FALSE, 333 TokenType.FIRST, 334 TokenType.FILTER, 335 TokenType.FINAL, 336 TokenType.FORMAT, 337 TokenType.FULL, 338 TokenType.IS, 339 TokenType.ISNULL, 340 TokenType.INTERVAL, 341 TokenType.KEEP, 342 TokenType.KILL, 343 TokenType.LEFT, 344 TokenType.LOAD, 345 TokenType.MERGE, 346 TokenType.NATURAL, 347 TokenType.NEXT, 348 TokenType.OFFSET, 349 TokenType.OPERATOR, 350 TokenType.ORDINALITY, 351 TokenType.OVERLAPS, 352 TokenType.OVERWRITE, 353 TokenType.PARTITION, 354 TokenType.PERCENT, 355 TokenType.PIVOT, 356 TokenType.PRAGMA, 357 TokenType.RANGE, 358 TokenType.RECURSIVE, 359 TokenType.REFERENCES, 360 TokenType.REFRESH, 361 TokenType.REPLACE, 362 TokenType.RIGHT, 363 TokenType.ROW, 364 TokenType.ROWS, 365 TokenType.SEMI, 366 TokenType.SET, 367 TokenType.SETTINGS, 368 TokenType.SHOW, 369 TokenType.TEMPORARY, 370 TokenType.TOP, 371 TokenType.TRUE, 372 TokenType.UNIQUE, 373 TokenType.UNPIVOT, 374 TokenType.UPDATE, 375 TokenType.USE, 376 TokenType.VOLATILE, 377 TokenType.WINDOW, 378 *CREATABLES, 379 *SUBQUERY_PREDICATES, 380 *TYPE_TOKENS, 381 *NO_PAREN_FUNCTIONS, 382 } 383 384 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 385 386 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 387 TokenType.ANTI, 388 TokenType.APPLY, 389 TokenType.ASOF, 390 TokenType.FULL, 391 TokenType.LEFT, 392 TokenType.LOCK, 393 TokenType.NATURAL, 394 TokenType.OFFSET, 395 TokenType.RIGHT, 396 TokenType.SEMI, 397 TokenType.WINDOW, 398 } 399 400 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 401 402 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 403 404 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 405 406 FUNC_TOKENS = { 407 TokenType.COLLATE, 408 TokenType.COMMAND, 409 TokenType.CURRENT_DATE, 410 TokenType.CURRENT_DATETIME, 411 TokenType.CURRENT_TIMESTAMP, 412 TokenType.CURRENT_TIME, 413 TokenType.CURRENT_USER, 414 TokenType.FILTER, 415 TokenType.FIRST, 416 TokenType.FORMAT, 417 TokenType.GLOB, 418 TokenType.IDENTIFIER, 419 TokenType.INDEX, 420 TokenType.ISNULL, 421 TokenType.ILIKE, 422 TokenType.INSERT, 423 TokenType.LIKE, 424 TokenType.MERGE, 425 TokenType.OFFSET, 426 TokenType.PRIMARY_KEY, 427 TokenType.RANGE, 428 TokenType.REPLACE, 429 TokenType.RLIKE, 430 TokenType.ROW, 431 TokenType.UNNEST, 432 TokenType.VAR, 433 TokenType.LEFT, 434 TokenType.RIGHT, 435 TokenType.DATE, 436 TokenType.DATETIME, 437 TokenType.TABLE, 438 TokenType.TIMESTAMP, 439 TokenType.TIMESTAMPTZ, 440 TokenType.WINDOW, 441 TokenType.XOR, 442 *TYPE_TOKENS, 443 *SUBQUERY_PREDICATES, 444 } 445 446 CONJUNCTION = { 447 TokenType.AND: exp.And, 448 TokenType.OR: exp.Or, 449 } 450 451 EQUALITY = { 452 TokenType.COLON_EQ: exp.PropertyEQ, 453 TokenType.EQ: exp.EQ, 454 TokenType.NEQ: exp.NEQ, 455 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 456 } 457 458 COMPARISON = { 459 TokenType.GT: exp.GT, 460 TokenType.GTE: exp.GTE, 461 TokenType.LT: exp.LT, 462 TokenType.LTE: exp.LTE, 463 } 464 465 BITWISE = { 466 TokenType.AMP: exp.BitwiseAnd, 467 TokenType.CARET: exp.BitwiseXor, 468 TokenType.PIPE: exp.BitwiseOr, 469 } 470 471 TERM = { 472 TokenType.DASH: exp.Sub, 473 TokenType.PLUS: exp.Add, 474 TokenType.MOD: exp.Mod, 475 TokenType.COLLATE: exp.Collate, 476 } 477 478 FACTOR = { 479 TokenType.DIV: exp.IntDiv, 480 TokenType.LR_ARROW: exp.Distance, 481 TokenType.SLASH: exp.Div, 482 TokenType.STAR: exp.Mul, 483 } 484 485 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 486 487 TIMES = { 488 TokenType.TIME, 489 TokenType.TIMETZ, 490 } 491 492 TIMESTAMPS = { 493 TokenType.TIMESTAMP, 494 TokenType.TIMESTAMPTZ, 495 TokenType.TIMESTAMPLTZ, 496 *TIMES, 497 } 498 499 SET_OPERATIONS = { 500 TokenType.UNION, 501 TokenType.INTERSECT, 502 TokenType.EXCEPT, 503 } 504 505 JOIN_METHODS = { 506 TokenType.NATURAL, 507 TokenType.ASOF, 508 } 509 510 JOIN_SIDES = { 511 TokenType.LEFT, 512 TokenType.RIGHT, 513 TokenType.FULL, 514 } 515 516 JOIN_KINDS = { 517 TokenType.INNER, 518 TokenType.OUTER, 519 TokenType.CROSS, 520 TokenType.SEMI, 521 TokenType.ANTI, 522 } 523 524 JOIN_HINTS: t.Set[str] = set() 525 526 LAMBDAS = { 527 TokenType.ARROW: lambda self, expressions: self.expression( 528 exp.Lambda, 529 this=self._replace_lambda( 530 self._parse_conjunction(), 531 {node.name for node in expressions}, 532 ), 533 expressions=expressions, 534 ), 535 TokenType.FARROW: lambda self, expressions: self.expression( 536 exp.Kwarg, 537 this=exp.var(expressions[0].name), 538 expression=self._parse_conjunction(), 539 ), 540 } 541 542 COLUMN_OPERATORS = { 543 TokenType.DOT: None, 544 TokenType.DCOLON: lambda self, this, to: self.expression( 545 exp.Cast if self.STRICT_CAST else exp.TryCast, 546 this=this, 547 to=to, 548 ), 549 TokenType.ARROW: lambda self, this, path: self.expression( 550 exp.JSONExtract, 551 this=this, 552 expression=self.dialect.to_json_path(path), 553 ), 554 TokenType.DARROW: lambda self, this, path: self.expression( 555 exp.JSONExtractScalar, 556 this=this, 557 expression=self.dialect.to_json_path(path), 558 ), 559 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 560 exp.JSONBExtract, 561 this=this, 562 expression=path, 563 ), 564 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 565 exp.JSONBExtractScalar, 566 this=this, 567 expression=path, 568 ), 569 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 570 exp.JSONBContains, 571 this=this, 572 expression=key, 573 ), 574 } 575 576 EXPRESSION_PARSERS = { 577 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 578 exp.Column: lambda self: self._parse_column(), 579 exp.Condition: lambda self: self._parse_conjunction(), 580 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 581 exp.Expression: lambda self: self._parse_statement(), 582 exp.From: lambda self: self._parse_from(), 583 exp.Group: lambda self: self._parse_group(), 584 exp.Having: lambda self: self._parse_having(), 585 exp.Identifier: lambda self: self._parse_id_var(), 586 exp.Join: lambda self: self._parse_join(), 587 exp.Lambda: lambda self: self._parse_lambda(), 588 exp.Lateral: lambda self: self._parse_lateral(), 589 exp.Limit: lambda self: self._parse_limit(), 590 exp.Offset: lambda self: self._parse_offset(), 591 exp.Order: lambda self: self._parse_order(), 592 exp.Ordered: lambda self: self._parse_ordered(), 593 exp.Properties: lambda self: self._parse_properties(), 594 exp.Qualify: lambda self: self._parse_qualify(), 595 exp.Returning: lambda self: self._parse_returning(), 596 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 597 exp.Table: lambda self: self._parse_table_parts(), 598 exp.TableAlias: lambda self: self._parse_table_alias(), 599 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 600 exp.Where: lambda self: self._parse_where(), 601 exp.Window: lambda self: self._parse_named_window(), 602 exp.With: lambda self: self._parse_with(), 603 "JOIN_TYPE": lambda self: self._parse_join_parts(), 604 } 605 606 STATEMENT_PARSERS = { 607 TokenType.ALTER: lambda self: self._parse_alter(), 608 TokenType.BEGIN: lambda self: self._parse_transaction(), 609 TokenType.CACHE: lambda self: self._parse_cache(), 610 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 611 TokenType.COMMENT: lambda self: self._parse_comment(), 612 TokenType.CREATE: lambda self: self._parse_create(), 613 TokenType.DELETE: lambda self: self._parse_delete(), 614 TokenType.DESC: lambda self: self._parse_describe(), 615 TokenType.DESCRIBE: lambda self: self._parse_describe(), 616 TokenType.DROP: lambda self: self._parse_drop(), 617 TokenType.INSERT: lambda self: self._parse_insert(), 618 TokenType.KILL: lambda self: self._parse_kill(), 619 TokenType.LOAD: lambda self: self._parse_load(), 620 TokenType.MERGE: lambda self: self._parse_merge(), 621 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 622 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 623 TokenType.REFRESH: lambda self: self._parse_refresh(), 624 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 625 TokenType.SET: lambda self: self._parse_set(), 626 TokenType.UNCACHE: lambda self: self._parse_uncache(), 627 TokenType.UPDATE: lambda self: self._parse_update(), 628 TokenType.USE: lambda self: self.expression( 629 exp.Use, 630 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 631 and exp.var(self._prev.text), 632 this=self._parse_table(schema=False), 633 ), 634 } 635 636 UNARY_PARSERS = { 637 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 638 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 639 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 640 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 641 } 642 643 PRIMARY_PARSERS = { 644 TokenType.STRING: lambda self, token: self.expression( 645 exp.Literal, this=token.text, is_string=True 646 ), 647 TokenType.NUMBER: lambda self, token: self.expression( 648 exp.Literal, this=token.text, is_string=False 649 ), 650 TokenType.STAR: lambda self, _: self.expression( 651 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 652 ), 653 TokenType.NULL: lambda self, _: self.expression(exp.Null), 654 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 655 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 656 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 657 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 658 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 659 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 660 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 661 exp.National, this=token.text 662 ), 663 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.UNICODE_STRING: lambda self, token: self.expression( 668 exp.UnicodeString, 669 this=token.text, 670 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 671 ), 672 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 673 } 674 675 PLACEHOLDER_PARSERS = { 676 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 677 TokenType.PARAMETER: lambda self: self._parse_parameter(), 678 TokenType.COLON: lambda self: ( 679 self.expression(exp.Placeholder, this=self._prev.text) 680 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 681 else None 682 ), 683 } 684 685 RANGE_PARSERS = { 686 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 687 TokenType.GLOB: binary_range_parser(exp.Glob), 688 TokenType.ILIKE: binary_range_parser(exp.ILike), 689 TokenType.IN: lambda self, this: self._parse_in(this), 690 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 691 TokenType.IS: lambda self, this: self._parse_is(this), 692 TokenType.LIKE: binary_range_parser(exp.Like), 693 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 694 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 695 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 696 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 697 } 698 699 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 700 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 701 "AUTO": lambda self: self._parse_auto_property(), 702 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 703 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 704 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 705 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 706 "CHECKSUM": lambda self: self._parse_checksum(), 707 "CLUSTER BY": lambda self: self._parse_cluster(), 708 "CLUSTERED": lambda self: self._parse_clustered_by(), 709 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 710 exp.CollateProperty, **kwargs 711 ), 712 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 713 "CONTAINS": lambda self: self._parse_contains_property(), 714 "COPY": lambda self: self._parse_copy_property(), 715 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 716 "DEFINER": lambda self: self._parse_definer(), 717 "DETERMINISTIC": lambda self: self.expression( 718 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 719 ), 720 "DISTKEY": lambda self: self._parse_distkey(), 721 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 722 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 723 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 724 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 725 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 726 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 727 "FREESPACE": lambda self: self._parse_freespace(), 728 "HEAP": lambda self: self.expression(exp.HeapProperty), 729 "IMMUTABLE": lambda self: self.expression( 730 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 731 ), 732 "INHERITS": lambda self: self.expression( 733 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 734 ), 735 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 736 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 737 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 738 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 739 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 740 "LIKE": lambda self: self._parse_create_like(), 741 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 742 "LOCK": lambda self: self._parse_locking(), 743 "LOCKING": lambda self: self._parse_locking(), 744 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 745 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 746 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 747 "MODIFIES": lambda self: self._parse_modifies_property(), 748 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 749 "NO": lambda self: self._parse_no_property(), 750 "ON": lambda self: self._parse_on_property(), 751 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 752 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 753 "PARTITION": lambda self: self._parse_partitioned_of(), 754 "PARTITION BY": lambda self: self._parse_partitioned_by(), 755 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 756 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 757 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 758 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 759 "READS": lambda self: self._parse_reads_property(), 760 "REMOTE": lambda self: self._parse_remote_with_connection(), 761 "RETURNS": lambda self: self._parse_returns(), 762 "ROW": lambda self: self._parse_row(), 763 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 764 "SAMPLE": lambda self: self.expression( 765 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 766 ), 767 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 768 "SETTINGS": lambda self: self.expression( 769 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 770 ), 771 "SORTKEY": lambda self: self._parse_sortkey(), 772 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 773 "STABLE": lambda self: self.expression( 774 exp.StabilityProperty, this=exp.Literal.string("STABLE") 775 ), 776 "STORED": lambda self: self._parse_stored(), 777 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 778 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 779 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 780 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 781 "TO": lambda self: self._parse_to_table(), 782 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 783 "TRANSFORM": lambda self: self.expression( 784 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 785 ), 786 "TTL": lambda self: self._parse_ttl(), 787 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 788 "VOLATILE": lambda self: self._parse_volatile_property(), 789 "WITH": lambda self: self._parse_with_property(), 790 } 791 792 CONSTRAINT_PARSERS = { 793 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 794 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 795 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 796 "CHARACTER SET": lambda self: self.expression( 797 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 798 ), 799 "CHECK": lambda self: self.expression( 800 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 801 ), 802 "COLLATE": lambda self: self.expression( 803 exp.CollateColumnConstraint, this=self._parse_var() 804 ), 805 "COMMENT": lambda self: self.expression( 806 exp.CommentColumnConstraint, this=self._parse_string() 807 ), 808 "COMPRESS": lambda self: self._parse_compress(), 809 "CLUSTERED": lambda self: self.expression( 810 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 811 ), 812 "NONCLUSTERED": lambda self: self.expression( 813 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 814 ), 815 "DEFAULT": lambda self: self.expression( 816 exp.DefaultColumnConstraint, this=self._parse_bitwise() 817 ), 818 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 819 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 820 "FORMAT": lambda self: self.expression( 821 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 822 ), 823 "GENERATED": lambda self: self._parse_generated_as_identity(), 824 "IDENTITY": lambda self: self._parse_auto_increment(), 825 "INLINE": lambda self: self._parse_inline(), 826 "LIKE": lambda self: self._parse_create_like(), 827 "NOT": lambda self: self._parse_not_constraint(), 828 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 829 "ON": lambda self: ( 830 self._match(TokenType.UPDATE) 831 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 832 ) 833 or self.expression(exp.OnProperty, this=self._parse_id_var()), 834 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 835 "PERIOD": lambda self: self._parse_period_for_system_time(), 836 "PRIMARY KEY": lambda self: self._parse_primary_key(), 837 "REFERENCES": lambda self: self._parse_references(match=False), 838 "TITLE": lambda self: self.expression( 839 exp.TitleColumnConstraint, this=self._parse_var_or_string() 840 ), 841 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 842 "UNIQUE": lambda self: self._parse_unique(), 843 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 844 "WITH": lambda self: self.expression( 845 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 846 ), 847 } 848 849 ALTER_PARSERS = { 850 "ADD": lambda self: self._parse_alter_table_add(), 851 "ALTER": lambda self: self._parse_alter_table_alter(), 852 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 853 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 854 "DROP": lambda self: self._parse_alter_table_drop(), 855 "RENAME": lambda self: self._parse_alter_table_rename(), 856 } 857 858 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 859 860 NO_PAREN_FUNCTION_PARSERS = { 861 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 862 "CASE": lambda self: self._parse_case(), 863 "IF": lambda self: self._parse_if(), 864 "NEXT": lambda self: self._parse_next_value_for(), 865 } 866 867 INVALID_FUNC_NAME_TOKENS = { 868 TokenType.IDENTIFIER, 869 TokenType.STRING, 870 } 871 872 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 873 874 FUNCTION_PARSERS = { 875 "ANY_VALUE": lambda self: self._parse_any_value(), 876 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 877 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 878 "DECODE": lambda self: self._parse_decode(), 879 "EXTRACT": lambda self: self._parse_extract(), 880 "JSON_OBJECT": lambda self: self._parse_json_object(), 881 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 882 "JSON_TABLE": lambda self: self._parse_json_table(), 883 "MATCH": lambda self: self._parse_match_against(), 884 "OPENJSON": lambda self: self._parse_open_json(), 885 "POSITION": lambda self: self._parse_position(), 886 "PREDICT": lambda self: self._parse_predict(), 887 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 888 "STRING_AGG": lambda self: self._parse_string_agg(), 889 "SUBSTRING": lambda self: self._parse_substring(), 890 "TRIM": lambda self: self._parse_trim(), 891 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 892 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 893 } 894 895 QUERY_MODIFIER_PARSERS = { 896 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 897 TokenType.WHERE: lambda self: ("where", self._parse_where()), 898 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 899 TokenType.HAVING: lambda self: ("having", self._parse_having()), 900 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 901 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 902 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 903 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 904 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 905 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 906 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 907 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 908 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 909 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 910 TokenType.CLUSTER_BY: lambda self: ( 911 "cluster", 912 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 913 ), 914 TokenType.DISTRIBUTE_BY: lambda self: ( 915 "distribute", 916 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 917 ), 918 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 919 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 920 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 921 } 922 923 SET_PARSERS = { 924 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 925 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 926 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 927 "TRANSACTION": lambda self: self._parse_set_transaction(), 928 } 929 930 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 931 932 TYPE_LITERAL_PARSERS = { 933 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 934 } 935 936 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 937 938 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 939 940 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 941 942 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 943 TRANSACTION_CHARACTERISTICS = { 944 "ISOLATION LEVEL REPEATABLE READ", 945 "ISOLATION LEVEL READ COMMITTED", 946 "ISOLATION LEVEL READ UNCOMMITTED", 947 "ISOLATION LEVEL SERIALIZABLE", 948 "READ WRITE", 949 "READ ONLY", 950 } 951 952 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 953 954 CLONE_KEYWORDS = {"CLONE", "COPY"} 955 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 956 957 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 958 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 959 960 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 961 962 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 963 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 964 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 965 966 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 967 968 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 969 970 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 971 972 DISTINCT_TOKENS = {TokenType.DISTINCT} 973 974 NULL_TOKENS = {TokenType.NULL} 975 976 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 977 978 STRICT_CAST = True 979 980 PREFIXED_PIVOT_COLUMNS = False 981 IDENTIFY_PIVOT_STRINGS = False 982 983 LOG_DEFAULTS_TO_LN = False 984 985 # Whether or not ADD is present for each column added by ALTER TABLE 986 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 987 988 # Whether or not the table sample clause expects CSV syntax 989 TABLESAMPLE_CSV = False 990 991 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 992 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 993 994 # Whether the TRIM function expects the characters to trim as its first argument 995 TRIM_PATTERN_FIRST = False 996 997 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 998 STRING_ALIASES = False 999 1000 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1001 MODIFIERS_ATTACHED_TO_UNION = True 1002 UNION_MODIFIERS = {"order", "limit", "offset"} 1003 1004 # parses no parenthesis if statements as commands 1005 NO_PAREN_IF_COMMANDS = True 1006 1007 __slots__ = ( 1008 "error_level", 1009 "error_message_context", 1010 "max_errors", 1011 "dialect", 1012 "sql", 1013 "errors", 1014 "_tokens", 1015 "_index", 1016 "_curr", 1017 "_next", 1018 "_prev", 1019 "_prev_comments", 1020 ) 1021 1022 # Autofilled 1023 SHOW_TRIE: t.Dict = {} 1024 SET_TRIE: t.Dict = {} 1025 1026 def __init__( 1027 self, 1028 error_level: t.Optional[ErrorLevel] = None, 1029 error_message_context: int = 100, 1030 max_errors: int = 3, 1031 dialect: DialectType = None, 1032 ): 1033 from sqlglot.dialects import Dialect 1034 1035 self.error_level = error_level or ErrorLevel.IMMEDIATE 1036 self.error_message_context = error_message_context 1037 self.max_errors = max_errors 1038 self.dialect = Dialect.get_or_raise(dialect) 1039 self.reset() 1040 1041 def reset(self): 1042 self.sql = "" 1043 self.errors = [] 1044 self._tokens = [] 1045 self._index = 0 1046 self._curr = None 1047 self._next = None 1048 self._prev = None 1049 self._prev_comments = None 1050 1051 def parse( 1052 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1053 ) -> t.List[t.Optional[exp.Expression]]: 1054 """ 1055 Parses a list of tokens and returns a list of syntax trees, one tree 1056 per parsed SQL statement. 1057 1058 Args: 1059 raw_tokens: The list of tokens. 1060 sql: The original SQL string, used to produce helpful debug messages. 1061 1062 Returns: 1063 The list of the produced syntax trees. 1064 """ 1065 return self._parse( 1066 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1067 ) 1068 1069 def parse_into( 1070 self, 1071 expression_types: exp.IntoType, 1072 raw_tokens: t.List[Token], 1073 sql: t.Optional[str] = None, 1074 ) -> t.List[t.Optional[exp.Expression]]: 1075 """ 1076 Parses a list of tokens into a given Expression type. If a collection of Expression 1077 types is given instead, this method will try to parse the token list into each one 1078 of them, stopping at the first for which the parsing succeeds. 1079 1080 Args: 1081 expression_types: The expression type(s) to try and parse the token list into. 1082 raw_tokens: The list of tokens. 1083 sql: The original SQL string, used to produce helpful debug messages. 1084 1085 Returns: 1086 The target Expression. 1087 """ 1088 errors = [] 1089 for expression_type in ensure_list(expression_types): 1090 parser = self.EXPRESSION_PARSERS.get(expression_type) 1091 if not parser: 1092 raise TypeError(f"No parser registered for {expression_type}") 1093 1094 try: 1095 return self._parse(parser, raw_tokens, sql) 1096 except ParseError as e: 1097 e.errors[0]["into_expression"] = expression_type 1098 errors.append(e) 1099 1100 raise ParseError( 1101 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1102 errors=merge_errors(errors), 1103 ) from errors[-1] 1104 1105 def _parse( 1106 self, 1107 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1108 raw_tokens: t.List[Token], 1109 sql: t.Optional[str] = None, 1110 ) -> t.List[t.Optional[exp.Expression]]: 1111 self.reset() 1112 self.sql = sql or "" 1113 1114 total = len(raw_tokens) 1115 chunks: t.List[t.List[Token]] = [[]] 1116 1117 for i, token in enumerate(raw_tokens): 1118 if token.token_type == TokenType.SEMICOLON: 1119 if i < total - 1: 1120 chunks.append([]) 1121 else: 1122 chunks[-1].append(token) 1123 1124 expressions = [] 1125 1126 for tokens in chunks: 1127 self._index = -1 1128 self._tokens = tokens 1129 self._advance() 1130 1131 expressions.append(parse_method(self)) 1132 1133 if self._index < len(self._tokens): 1134 self.raise_error("Invalid expression / Unexpected token") 1135 1136 self.check_errors() 1137 1138 return expressions 1139 1140 def check_errors(self) -> None: 1141 """Logs or raises any found errors, depending on the chosen error level setting.""" 1142 if self.error_level == ErrorLevel.WARN: 1143 for error in self.errors: 1144 logger.error(str(error)) 1145 elif self.error_level == ErrorLevel.RAISE and self.errors: 1146 raise ParseError( 1147 concat_messages(self.errors, self.max_errors), 1148 errors=merge_errors(self.errors), 1149 ) 1150 1151 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1152 """ 1153 Appends an error in the list of recorded errors or raises it, depending on the chosen 1154 error level setting. 1155 """ 1156 token = token or self._curr or self._prev or Token.string("") 1157 start = token.start 1158 end = token.end + 1 1159 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1160 highlight = self.sql[start:end] 1161 end_context = self.sql[end : end + self.error_message_context] 1162 1163 error = ParseError.new( 1164 f"{message}. Line {token.line}, Col: {token.col}.\n" 1165 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1166 description=message, 1167 line=token.line, 1168 col=token.col, 1169 start_context=start_context, 1170 highlight=highlight, 1171 end_context=end_context, 1172 ) 1173 1174 if self.error_level == ErrorLevel.IMMEDIATE: 1175 raise error 1176 1177 self.errors.append(error) 1178 1179 def expression( 1180 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1181 ) -> E: 1182 """ 1183 Creates a new, validated Expression. 1184 1185 Args: 1186 exp_class: The expression class to instantiate. 1187 comments: An optional list of comments to attach to the expression. 1188 kwargs: The arguments to set for the expression along with their respective values. 1189 1190 Returns: 1191 The target expression. 1192 """ 1193 instance = exp_class(**kwargs) 1194 instance.add_comments(comments) if comments else self._add_comments(instance) 1195 return self.validate_expression(instance) 1196 1197 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1198 if expression and self._prev_comments: 1199 expression.add_comments(self._prev_comments) 1200 self._prev_comments = None 1201 1202 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1203 """ 1204 Validates an Expression, making sure that all its mandatory arguments are set. 1205 1206 Args: 1207 expression: The expression to validate. 1208 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1209 1210 Returns: 1211 The validated expression. 1212 """ 1213 if self.error_level != ErrorLevel.IGNORE: 1214 for error_message in expression.error_messages(args): 1215 self.raise_error(error_message) 1216 1217 return expression 1218 1219 def _find_sql(self, start: Token, end: Token) -> str: 1220 return self.sql[start.start : end.end + 1] 1221 1222 def _is_connected(self) -> bool: 1223 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1224 1225 def _advance(self, times: int = 1) -> None: 1226 self._index += times 1227 self._curr = seq_get(self._tokens, self._index) 1228 self._next = seq_get(self._tokens, self._index + 1) 1229 1230 if self._index > 0: 1231 self._prev = self._tokens[self._index - 1] 1232 self._prev_comments = self._prev.comments 1233 else: 1234 self._prev = None 1235 self._prev_comments = None 1236 1237 def _retreat(self, index: int) -> None: 1238 if index != self._index: 1239 self._advance(index - self._index) 1240 1241 def _warn_unsupported(self) -> None: 1242 if len(self._tokens) <= 1: 1243 return 1244 1245 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1246 # interested in emitting a warning for the one being currently processed. 1247 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1248 1249 logger.warning( 1250 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1251 ) 1252 1253 def _parse_command(self) -> exp.Command: 1254 self._warn_unsupported() 1255 return self.expression( 1256 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1257 ) 1258 1259 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1260 start = self._prev 1261 exists = self._parse_exists() if allow_exists else None 1262 1263 self._match(TokenType.ON) 1264 1265 kind = self._match_set(self.CREATABLES) and self._prev 1266 if not kind: 1267 return self._parse_as_command(start) 1268 1269 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1270 this = self._parse_user_defined_function(kind=kind.token_type) 1271 elif kind.token_type == TokenType.TABLE: 1272 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1273 elif kind.token_type == TokenType.COLUMN: 1274 this = self._parse_column() 1275 else: 1276 this = self._parse_id_var() 1277 1278 self._match(TokenType.IS) 1279 1280 return self.expression( 1281 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1282 ) 1283 1284 def _parse_to_table( 1285 self, 1286 ) -> exp.ToTableProperty: 1287 table = self._parse_table_parts(schema=True) 1288 return self.expression(exp.ToTableProperty, this=table) 1289 1290 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1291 def _parse_ttl(self) -> exp.Expression: 1292 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1293 this = self._parse_bitwise() 1294 1295 if self._match_text_seq("DELETE"): 1296 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1297 if self._match_text_seq("RECOMPRESS"): 1298 return self.expression( 1299 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1300 ) 1301 if self._match_text_seq("TO", "DISK"): 1302 return self.expression( 1303 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1304 ) 1305 if self._match_text_seq("TO", "VOLUME"): 1306 return self.expression( 1307 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1308 ) 1309 1310 return this 1311 1312 expressions = self._parse_csv(_parse_ttl_action) 1313 where = self._parse_where() 1314 group = self._parse_group() 1315 1316 aggregates = None 1317 if group and self._match(TokenType.SET): 1318 aggregates = self._parse_csv(self._parse_set_item) 1319 1320 return self.expression( 1321 exp.MergeTreeTTL, 1322 expressions=expressions, 1323 where=where, 1324 group=group, 1325 aggregates=aggregates, 1326 ) 1327 1328 def _parse_statement(self) -> t.Optional[exp.Expression]: 1329 if self._curr is None: 1330 return None 1331 1332 if self._match_set(self.STATEMENT_PARSERS): 1333 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1334 1335 if self._match_set(Tokenizer.COMMANDS): 1336 return self._parse_command() 1337 1338 expression = self._parse_expression() 1339 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1340 return self._parse_query_modifiers(expression) 1341 1342 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1343 start = self._prev 1344 temporary = self._match(TokenType.TEMPORARY) 1345 materialized = self._match_text_seq("MATERIALIZED") 1346 1347 kind = self._match_set(self.CREATABLES) and self._prev.text 1348 if not kind: 1349 return self._parse_as_command(start) 1350 1351 return self.expression( 1352 exp.Drop, 1353 comments=start.comments, 1354 exists=exists or self._parse_exists(), 1355 this=self._parse_table( 1356 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1357 ), 1358 kind=kind, 1359 temporary=temporary, 1360 materialized=materialized, 1361 cascade=self._match_text_seq("CASCADE"), 1362 constraints=self._match_text_seq("CONSTRAINTS"), 1363 purge=self._match_text_seq("PURGE"), 1364 ) 1365 1366 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1367 return ( 1368 self._match_text_seq("IF") 1369 and (not not_ or self._match(TokenType.NOT)) 1370 and self._match(TokenType.EXISTS) 1371 ) 1372 1373 def _parse_create(self) -> exp.Create | exp.Command: 1374 # Note: this can't be None because we've matched a statement parser 1375 start = self._prev 1376 comments = self._prev_comments 1377 1378 replace = ( 1379 start.token_type == TokenType.REPLACE 1380 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1381 or self._match_pair(TokenType.OR, TokenType.ALTER) 1382 ) 1383 unique = self._match(TokenType.UNIQUE) 1384 1385 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1386 self._advance() 1387 1388 properties = None 1389 create_token = self._match_set(self.CREATABLES) and self._prev 1390 1391 if not create_token: 1392 # exp.Properties.Location.POST_CREATE 1393 properties = self._parse_properties() 1394 create_token = self._match_set(self.CREATABLES) and self._prev 1395 1396 if not properties or not create_token: 1397 return self._parse_as_command(start) 1398 1399 exists = self._parse_exists(not_=True) 1400 this = None 1401 expression: t.Optional[exp.Expression] = None 1402 indexes = None 1403 no_schema_binding = None 1404 begin = None 1405 end = None 1406 clone = None 1407 1408 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1409 nonlocal properties 1410 if properties and temp_props: 1411 properties.expressions.extend(temp_props.expressions) 1412 elif temp_props: 1413 properties = temp_props 1414 1415 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1416 this = self._parse_user_defined_function(kind=create_token.token_type) 1417 1418 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1419 extend_props(self._parse_properties()) 1420 1421 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1422 1423 if not expression: 1424 if self._match(TokenType.COMMAND): 1425 expression = self._parse_as_command(self._prev) 1426 else: 1427 begin = self._match(TokenType.BEGIN) 1428 return_ = self._match_text_seq("RETURN") 1429 1430 if self._match(TokenType.STRING, advance=False): 1431 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1432 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1433 expression = self._parse_string() 1434 extend_props(self._parse_properties()) 1435 else: 1436 expression = self._parse_statement() 1437 1438 end = self._match_text_seq("END") 1439 1440 if return_: 1441 expression = self.expression(exp.Return, this=expression) 1442 elif create_token.token_type == TokenType.INDEX: 1443 this = self._parse_index(index=self._parse_id_var()) 1444 elif create_token.token_type in self.DB_CREATABLES: 1445 table_parts = self._parse_table_parts( 1446 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1447 ) 1448 1449 # exp.Properties.Location.POST_NAME 1450 self._match(TokenType.COMMA) 1451 extend_props(self._parse_properties(before=True)) 1452 1453 this = self._parse_schema(this=table_parts) 1454 1455 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1456 extend_props(self._parse_properties()) 1457 1458 self._match(TokenType.ALIAS) 1459 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1460 # exp.Properties.Location.POST_ALIAS 1461 extend_props(self._parse_properties()) 1462 1463 expression = self._parse_ddl_select() 1464 1465 if create_token.token_type == TokenType.TABLE: 1466 # exp.Properties.Location.POST_EXPRESSION 1467 extend_props(self._parse_properties()) 1468 1469 indexes = [] 1470 while True: 1471 index = self._parse_index() 1472 1473 # exp.Properties.Location.POST_INDEX 1474 extend_props(self._parse_properties()) 1475 1476 if not index: 1477 break 1478 else: 1479 self._match(TokenType.COMMA) 1480 indexes.append(index) 1481 elif create_token.token_type == TokenType.VIEW: 1482 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1483 no_schema_binding = True 1484 1485 shallow = self._match_text_seq("SHALLOW") 1486 1487 if self._match_texts(self.CLONE_KEYWORDS): 1488 copy = self._prev.text.lower() == "copy" 1489 clone = self.expression( 1490 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1491 ) 1492 1493 if self._curr: 1494 return self._parse_as_command(start) 1495 1496 return self.expression( 1497 exp.Create, 1498 comments=comments, 1499 this=this, 1500 kind=create_token.text.upper(), 1501 replace=replace, 1502 unique=unique, 1503 expression=expression, 1504 exists=exists, 1505 properties=properties, 1506 indexes=indexes, 1507 no_schema_binding=no_schema_binding, 1508 begin=begin, 1509 end=end, 1510 clone=clone, 1511 ) 1512 1513 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1514 # only used for teradata currently 1515 self._match(TokenType.COMMA) 1516 1517 kwargs = { 1518 "no": self._match_text_seq("NO"), 1519 "dual": self._match_text_seq("DUAL"), 1520 "before": self._match_text_seq("BEFORE"), 1521 "default": self._match_text_seq("DEFAULT"), 1522 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1523 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1524 "after": self._match_text_seq("AFTER"), 1525 "minimum": self._match_texts(("MIN", "MINIMUM")), 1526 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1527 } 1528 1529 if self._match_texts(self.PROPERTY_PARSERS): 1530 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1531 try: 1532 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1533 except TypeError: 1534 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1535 1536 return None 1537 1538 def _parse_property(self) -> t.Optional[exp.Expression]: 1539 if self._match_texts(self.PROPERTY_PARSERS): 1540 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1541 1542 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1543 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1544 1545 if self._match_text_seq("COMPOUND", "SORTKEY"): 1546 return self._parse_sortkey(compound=True) 1547 1548 if self._match_text_seq("SQL", "SECURITY"): 1549 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1550 1551 index = self._index 1552 key = self._parse_column() 1553 1554 if not self._match(TokenType.EQ): 1555 self._retreat(index) 1556 return None 1557 1558 return self.expression( 1559 exp.Property, 1560 this=key.to_dot() if isinstance(key, exp.Column) else key, 1561 value=self._parse_column() or self._parse_var(any_token=True), 1562 ) 1563 1564 def _parse_stored(self) -> exp.FileFormatProperty: 1565 self._match(TokenType.ALIAS) 1566 1567 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1568 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1569 1570 return self.expression( 1571 exp.FileFormatProperty, 1572 this=( 1573 self.expression( 1574 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1575 ) 1576 if input_format or output_format 1577 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1578 ), 1579 ) 1580 1581 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1582 self._match(TokenType.EQ) 1583 self._match(TokenType.ALIAS) 1584 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1585 1586 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1587 properties = [] 1588 while True: 1589 if before: 1590 prop = self._parse_property_before() 1591 else: 1592 prop = self._parse_property() 1593 1594 if not prop: 1595 break 1596 for p in ensure_list(prop): 1597 properties.append(p) 1598 1599 if properties: 1600 return self.expression(exp.Properties, expressions=properties) 1601 1602 return None 1603 1604 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1605 return self.expression( 1606 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1607 ) 1608 1609 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1610 if self._index >= 2: 1611 pre_volatile_token = self._tokens[self._index - 2] 1612 else: 1613 pre_volatile_token = None 1614 1615 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1616 return exp.VolatileProperty() 1617 1618 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1619 1620 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1621 self._match_pair(TokenType.EQ, TokenType.ON) 1622 1623 prop = self.expression(exp.WithSystemVersioningProperty) 1624 if self._match(TokenType.L_PAREN): 1625 self._match_text_seq("HISTORY_TABLE", "=") 1626 prop.set("this", self._parse_table_parts()) 1627 1628 if self._match(TokenType.COMMA): 1629 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1630 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1631 1632 self._match_r_paren() 1633 1634 return prop 1635 1636 def _parse_with_property( 1637 self, 1638 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1639 if self._match(TokenType.L_PAREN, advance=False): 1640 return self._parse_wrapped_csv(self._parse_property) 1641 1642 if self._match_text_seq("JOURNAL"): 1643 return self._parse_withjournaltable() 1644 1645 if self._match_text_seq("DATA"): 1646 return self._parse_withdata(no=False) 1647 elif self._match_text_seq("NO", "DATA"): 1648 return self._parse_withdata(no=True) 1649 1650 if not self._next: 1651 return None 1652 1653 return self._parse_withisolatedloading() 1654 1655 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1656 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1657 self._match(TokenType.EQ) 1658 1659 user = self._parse_id_var() 1660 self._match(TokenType.PARAMETER) 1661 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1662 1663 if not user or not host: 1664 return None 1665 1666 return exp.DefinerProperty(this=f"{user}@{host}") 1667 1668 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1669 self._match(TokenType.TABLE) 1670 self._match(TokenType.EQ) 1671 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1672 1673 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1674 return self.expression(exp.LogProperty, no=no) 1675 1676 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1677 return self.expression(exp.JournalProperty, **kwargs) 1678 1679 def _parse_checksum(self) -> exp.ChecksumProperty: 1680 self._match(TokenType.EQ) 1681 1682 on = None 1683 if self._match(TokenType.ON): 1684 on = True 1685 elif self._match_text_seq("OFF"): 1686 on = False 1687 1688 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1689 1690 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1691 return self.expression( 1692 exp.Cluster, 1693 expressions=( 1694 self._parse_wrapped_csv(self._parse_ordered) 1695 if wrapped 1696 else self._parse_csv(self._parse_ordered) 1697 ), 1698 ) 1699 1700 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1701 self._match_text_seq("BY") 1702 1703 self._match_l_paren() 1704 expressions = self._parse_csv(self._parse_column) 1705 self._match_r_paren() 1706 1707 if self._match_text_seq("SORTED", "BY"): 1708 self._match_l_paren() 1709 sorted_by = self._parse_csv(self._parse_ordered) 1710 self._match_r_paren() 1711 else: 1712 sorted_by = None 1713 1714 self._match(TokenType.INTO) 1715 buckets = self._parse_number() 1716 self._match_text_seq("BUCKETS") 1717 1718 return self.expression( 1719 exp.ClusteredByProperty, 1720 expressions=expressions, 1721 sorted_by=sorted_by, 1722 buckets=buckets, 1723 ) 1724 1725 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1726 if not self._match_text_seq("GRANTS"): 1727 self._retreat(self._index - 1) 1728 return None 1729 1730 return self.expression(exp.CopyGrantsProperty) 1731 1732 def _parse_freespace(self) -> exp.FreespaceProperty: 1733 self._match(TokenType.EQ) 1734 return self.expression( 1735 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1736 ) 1737 1738 def _parse_mergeblockratio( 1739 self, no: bool = False, default: bool = False 1740 ) -> exp.MergeBlockRatioProperty: 1741 if self._match(TokenType.EQ): 1742 return self.expression( 1743 exp.MergeBlockRatioProperty, 1744 this=self._parse_number(), 1745 percent=self._match(TokenType.PERCENT), 1746 ) 1747 1748 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1749 1750 def _parse_datablocksize( 1751 self, 1752 default: t.Optional[bool] = None, 1753 minimum: t.Optional[bool] = None, 1754 maximum: t.Optional[bool] = None, 1755 ) -> exp.DataBlocksizeProperty: 1756 self._match(TokenType.EQ) 1757 size = self._parse_number() 1758 1759 units = None 1760 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1761 units = self._prev.text 1762 1763 return self.expression( 1764 exp.DataBlocksizeProperty, 1765 size=size, 1766 units=units, 1767 default=default, 1768 minimum=minimum, 1769 maximum=maximum, 1770 ) 1771 1772 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1773 self._match(TokenType.EQ) 1774 always = self._match_text_seq("ALWAYS") 1775 manual = self._match_text_seq("MANUAL") 1776 never = self._match_text_seq("NEVER") 1777 default = self._match_text_seq("DEFAULT") 1778 1779 autotemp = None 1780 if self._match_text_seq("AUTOTEMP"): 1781 autotemp = self._parse_schema() 1782 1783 return self.expression( 1784 exp.BlockCompressionProperty, 1785 always=always, 1786 manual=manual, 1787 never=never, 1788 default=default, 1789 autotemp=autotemp, 1790 ) 1791 1792 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1793 no = self._match_text_seq("NO") 1794 concurrent = self._match_text_seq("CONCURRENT") 1795 self._match_text_seq("ISOLATED", "LOADING") 1796 for_all = self._match_text_seq("FOR", "ALL") 1797 for_insert = self._match_text_seq("FOR", "INSERT") 1798 for_none = self._match_text_seq("FOR", "NONE") 1799 return self.expression( 1800 exp.IsolatedLoadingProperty, 1801 no=no, 1802 concurrent=concurrent, 1803 for_all=for_all, 1804 for_insert=for_insert, 1805 for_none=for_none, 1806 ) 1807 1808 def _parse_locking(self) -> exp.LockingProperty: 1809 if self._match(TokenType.TABLE): 1810 kind = "TABLE" 1811 elif self._match(TokenType.VIEW): 1812 kind = "VIEW" 1813 elif self._match(TokenType.ROW): 1814 kind = "ROW" 1815 elif self._match_text_seq("DATABASE"): 1816 kind = "DATABASE" 1817 else: 1818 kind = None 1819 1820 if kind in ("DATABASE", "TABLE", "VIEW"): 1821 this = self._parse_table_parts() 1822 else: 1823 this = None 1824 1825 if self._match(TokenType.FOR): 1826 for_or_in = "FOR" 1827 elif self._match(TokenType.IN): 1828 for_or_in = "IN" 1829 else: 1830 for_or_in = None 1831 1832 if self._match_text_seq("ACCESS"): 1833 lock_type = "ACCESS" 1834 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1835 lock_type = "EXCLUSIVE" 1836 elif self._match_text_seq("SHARE"): 1837 lock_type = "SHARE" 1838 elif self._match_text_seq("READ"): 1839 lock_type = "READ" 1840 elif self._match_text_seq("WRITE"): 1841 lock_type = "WRITE" 1842 elif self._match_text_seq("CHECKSUM"): 1843 lock_type = "CHECKSUM" 1844 else: 1845 lock_type = None 1846 1847 override = self._match_text_seq("OVERRIDE") 1848 1849 return self.expression( 1850 exp.LockingProperty, 1851 this=this, 1852 kind=kind, 1853 for_or_in=for_or_in, 1854 lock_type=lock_type, 1855 override=override, 1856 ) 1857 1858 def _parse_partition_by(self) -> t.List[exp.Expression]: 1859 if self._match(TokenType.PARTITION_BY): 1860 return self._parse_csv(self._parse_conjunction) 1861 return [] 1862 1863 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1864 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1865 if self._match_text_seq("MINVALUE"): 1866 return exp.var("MINVALUE") 1867 if self._match_text_seq("MAXVALUE"): 1868 return exp.var("MAXVALUE") 1869 return self._parse_bitwise() 1870 1871 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1872 expression = None 1873 from_expressions = None 1874 to_expressions = None 1875 1876 if self._match(TokenType.IN): 1877 this = self._parse_wrapped_csv(self._parse_bitwise) 1878 elif self._match(TokenType.FROM): 1879 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1880 self._match_text_seq("TO") 1881 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1882 elif self._match_text_seq("WITH", "(", "MODULUS"): 1883 this = self._parse_number() 1884 self._match_text_seq(",", "REMAINDER") 1885 expression = self._parse_number() 1886 self._match_r_paren() 1887 else: 1888 self.raise_error("Failed to parse partition bound spec.") 1889 1890 return self.expression( 1891 exp.PartitionBoundSpec, 1892 this=this, 1893 expression=expression, 1894 from_expressions=from_expressions, 1895 to_expressions=to_expressions, 1896 ) 1897 1898 # https://www.postgresql.org/docs/current/sql-createtable.html 1899 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1900 if not self._match_text_seq("OF"): 1901 self._retreat(self._index - 1) 1902 return None 1903 1904 this = self._parse_table(schema=True) 1905 1906 if self._match(TokenType.DEFAULT): 1907 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1908 elif self._match_text_seq("FOR", "VALUES"): 1909 expression = self._parse_partition_bound_spec() 1910 else: 1911 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1912 1913 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1914 1915 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1916 self._match(TokenType.EQ) 1917 return self.expression( 1918 exp.PartitionedByProperty, 1919 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1920 ) 1921 1922 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1923 if self._match_text_seq("AND", "STATISTICS"): 1924 statistics = True 1925 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1926 statistics = False 1927 else: 1928 statistics = None 1929 1930 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1931 1932 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1933 if self._match_text_seq("SQL"): 1934 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1935 return None 1936 1937 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1938 if self._match_text_seq("SQL", "DATA"): 1939 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1940 return None 1941 1942 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1943 if self._match_text_seq("PRIMARY", "INDEX"): 1944 return exp.NoPrimaryIndexProperty() 1945 if self._match_text_seq("SQL"): 1946 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1947 return None 1948 1949 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1950 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1951 return exp.OnCommitProperty() 1952 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1953 return exp.OnCommitProperty(delete=True) 1954 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1955 1956 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1957 if self._match_text_seq("SQL", "DATA"): 1958 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1959 return None 1960 1961 def _parse_distkey(self) -> exp.DistKeyProperty: 1962 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1963 1964 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1965 table = self._parse_table(schema=True) 1966 1967 options = [] 1968 while self._match_texts(("INCLUDING", "EXCLUDING")): 1969 this = self._prev.text.upper() 1970 1971 id_var = self._parse_id_var() 1972 if not id_var: 1973 return None 1974 1975 options.append( 1976 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1977 ) 1978 1979 return self.expression(exp.LikeProperty, this=table, expressions=options) 1980 1981 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1982 return self.expression( 1983 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1984 ) 1985 1986 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1987 self._match(TokenType.EQ) 1988 return self.expression( 1989 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1990 ) 1991 1992 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1993 self._match_text_seq("WITH", "CONNECTION") 1994 return self.expression( 1995 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1996 ) 1997 1998 def _parse_returns(self) -> exp.ReturnsProperty: 1999 value: t.Optional[exp.Expression] 2000 is_table = self._match(TokenType.TABLE) 2001 2002 if is_table: 2003 if self._match(TokenType.LT): 2004 value = self.expression( 2005 exp.Schema, 2006 this="TABLE", 2007 expressions=self._parse_csv(self._parse_struct_types), 2008 ) 2009 if not self._match(TokenType.GT): 2010 self.raise_error("Expecting >") 2011 else: 2012 value = self._parse_schema(exp.var("TABLE")) 2013 else: 2014 value = self._parse_types() 2015 2016 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2017 2018 def _parse_describe(self) -> exp.Describe: 2019 kind = self._match_set(self.CREATABLES) and self._prev.text 2020 extended = self._match_text_seq("EXTENDED") 2021 this = self._parse_table(schema=True) 2022 properties = self._parse_properties() 2023 expressions = properties.expressions if properties else None 2024 return self.expression( 2025 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2026 ) 2027 2028 def _parse_insert(self) -> exp.Insert: 2029 comments = ensure_list(self._prev_comments) 2030 overwrite = self._match(TokenType.OVERWRITE) 2031 ignore = self._match(TokenType.IGNORE) 2032 local = self._match_text_seq("LOCAL") 2033 alternative = None 2034 2035 if self._match_text_seq("DIRECTORY"): 2036 this: t.Optional[exp.Expression] = self.expression( 2037 exp.Directory, 2038 this=self._parse_var_or_string(), 2039 local=local, 2040 row_format=self._parse_row_format(match_row=True), 2041 ) 2042 else: 2043 if self._match(TokenType.OR): 2044 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2045 2046 self._match(TokenType.INTO) 2047 comments += ensure_list(self._prev_comments) 2048 self._match(TokenType.TABLE) 2049 this = self._parse_table(schema=True) 2050 2051 returning = self._parse_returning() 2052 2053 return self.expression( 2054 exp.Insert, 2055 comments=comments, 2056 this=this, 2057 by_name=self._match_text_seq("BY", "NAME"), 2058 exists=self._parse_exists(), 2059 partition=self._parse_partition(), 2060 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2061 and self._parse_conjunction(), 2062 expression=self._parse_ddl_select(), 2063 conflict=self._parse_on_conflict(), 2064 returning=returning or self._parse_returning(), 2065 overwrite=overwrite, 2066 alternative=alternative, 2067 ignore=ignore, 2068 ) 2069 2070 def _parse_kill(self) -> exp.Kill: 2071 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2072 2073 return self.expression( 2074 exp.Kill, 2075 this=self._parse_primary(), 2076 kind=kind, 2077 ) 2078 2079 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2080 conflict = self._match_text_seq("ON", "CONFLICT") 2081 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2082 2083 if not conflict and not duplicate: 2084 return None 2085 2086 nothing = None 2087 expressions = None 2088 key = None 2089 constraint = None 2090 2091 if conflict: 2092 if self._match_text_seq("ON", "CONSTRAINT"): 2093 constraint = self._parse_id_var() 2094 else: 2095 key = self._parse_csv(self._parse_value) 2096 2097 self._match_text_seq("DO") 2098 if self._match_text_seq("NOTHING"): 2099 nothing = True 2100 else: 2101 self._match(TokenType.UPDATE) 2102 self._match(TokenType.SET) 2103 expressions = self._parse_csv(self._parse_equality) 2104 2105 return self.expression( 2106 exp.OnConflict, 2107 duplicate=duplicate, 2108 expressions=expressions, 2109 nothing=nothing, 2110 key=key, 2111 constraint=constraint, 2112 ) 2113 2114 def _parse_returning(self) -> t.Optional[exp.Returning]: 2115 if not self._match(TokenType.RETURNING): 2116 return None 2117 return self.expression( 2118 exp.Returning, 2119 expressions=self._parse_csv(self._parse_expression), 2120 into=self._match(TokenType.INTO) and self._parse_table_part(), 2121 ) 2122 2123 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2124 if not self._match(TokenType.FORMAT): 2125 return None 2126 return self._parse_row_format() 2127 2128 def _parse_row_format( 2129 self, match_row: bool = False 2130 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2131 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2132 return None 2133 2134 if self._match_text_seq("SERDE"): 2135 this = self._parse_string() 2136 2137 serde_properties = None 2138 if self._match(TokenType.SERDE_PROPERTIES): 2139 serde_properties = self.expression( 2140 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2141 ) 2142 2143 return self.expression( 2144 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2145 ) 2146 2147 self._match_text_seq("DELIMITED") 2148 2149 kwargs = {} 2150 2151 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2152 kwargs["fields"] = self._parse_string() 2153 if self._match_text_seq("ESCAPED", "BY"): 2154 kwargs["escaped"] = self._parse_string() 2155 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2156 kwargs["collection_items"] = self._parse_string() 2157 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2158 kwargs["map_keys"] = self._parse_string() 2159 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2160 kwargs["lines"] = self._parse_string() 2161 if self._match_text_seq("NULL", "DEFINED", "AS"): 2162 kwargs["null"] = self._parse_string() 2163 2164 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2165 2166 def _parse_load(self) -> exp.LoadData | exp.Command: 2167 if self._match_text_seq("DATA"): 2168 local = self._match_text_seq("LOCAL") 2169 self._match_text_seq("INPATH") 2170 inpath = self._parse_string() 2171 overwrite = self._match(TokenType.OVERWRITE) 2172 self._match_pair(TokenType.INTO, TokenType.TABLE) 2173 2174 return self.expression( 2175 exp.LoadData, 2176 this=self._parse_table(schema=True), 2177 local=local, 2178 overwrite=overwrite, 2179 inpath=inpath, 2180 partition=self._parse_partition(), 2181 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2182 serde=self._match_text_seq("SERDE") and self._parse_string(), 2183 ) 2184 return self._parse_as_command(self._prev) 2185 2186 def _parse_delete(self) -> exp.Delete: 2187 # This handles MySQL's "Multiple-Table Syntax" 2188 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2189 tables = None 2190 comments = self._prev_comments 2191 if not self._match(TokenType.FROM, advance=False): 2192 tables = self._parse_csv(self._parse_table) or None 2193 2194 returning = self._parse_returning() 2195 2196 return self.expression( 2197 exp.Delete, 2198 comments=comments, 2199 tables=tables, 2200 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2201 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2202 where=self._parse_where(), 2203 returning=returning or self._parse_returning(), 2204 limit=self._parse_limit(), 2205 ) 2206 2207 def _parse_update(self) -> exp.Update: 2208 comments = self._prev_comments 2209 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2210 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2211 returning = self._parse_returning() 2212 return self.expression( 2213 exp.Update, 2214 comments=comments, 2215 **{ # type: ignore 2216 "this": this, 2217 "expressions": expressions, 2218 "from": self._parse_from(joins=True), 2219 "where": self._parse_where(), 2220 "returning": returning or self._parse_returning(), 2221 "order": self._parse_order(), 2222 "limit": self._parse_limit(), 2223 }, 2224 ) 2225 2226 def _parse_uncache(self) -> exp.Uncache: 2227 if not self._match(TokenType.TABLE): 2228 self.raise_error("Expecting TABLE after UNCACHE") 2229 2230 return self.expression( 2231 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2232 ) 2233 2234 def _parse_cache(self) -> exp.Cache: 2235 lazy = self._match_text_seq("LAZY") 2236 self._match(TokenType.TABLE) 2237 table = self._parse_table(schema=True) 2238 2239 options = [] 2240 if self._match_text_seq("OPTIONS"): 2241 self._match_l_paren() 2242 k = self._parse_string() 2243 self._match(TokenType.EQ) 2244 v = self._parse_string() 2245 options = [k, v] 2246 self._match_r_paren() 2247 2248 self._match(TokenType.ALIAS) 2249 return self.expression( 2250 exp.Cache, 2251 this=table, 2252 lazy=lazy, 2253 options=options, 2254 expression=self._parse_select(nested=True), 2255 ) 2256 2257 def _parse_partition(self) -> t.Optional[exp.Partition]: 2258 if not self._match(TokenType.PARTITION): 2259 return None 2260 2261 return self.expression( 2262 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2263 ) 2264 2265 def _parse_value(self) -> exp.Tuple: 2266 if self._match(TokenType.L_PAREN): 2267 expressions = self._parse_csv(self._parse_expression) 2268 self._match_r_paren() 2269 return self.expression(exp.Tuple, expressions=expressions) 2270 2271 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2272 # https://prestodb.io/docs/current/sql/values.html 2273 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2274 2275 def _parse_projections(self) -> t.List[exp.Expression]: 2276 return self._parse_expressions() 2277 2278 def _parse_select( 2279 self, 2280 nested: bool = False, 2281 table: bool = False, 2282 parse_subquery_alias: bool = True, 2283 parse_set_operation: bool = True, 2284 ) -> t.Optional[exp.Expression]: 2285 cte = self._parse_with() 2286 2287 if cte: 2288 this = self._parse_statement() 2289 2290 if not this: 2291 self.raise_error("Failed to parse any statement following CTE") 2292 return cte 2293 2294 if "with" in this.arg_types: 2295 this.set("with", cte) 2296 else: 2297 self.raise_error(f"{this.key} does not support CTE") 2298 this = cte 2299 2300 return this 2301 2302 # duckdb supports leading with FROM x 2303 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2304 2305 if self._match(TokenType.SELECT): 2306 comments = self._prev_comments 2307 2308 hint = self._parse_hint() 2309 all_ = self._match(TokenType.ALL) 2310 distinct = self._match_set(self.DISTINCT_TOKENS) 2311 2312 kind = ( 2313 self._match(TokenType.ALIAS) 2314 and self._match_texts(("STRUCT", "VALUE")) 2315 and self._prev.text.upper() 2316 ) 2317 2318 if distinct: 2319 distinct = self.expression( 2320 exp.Distinct, 2321 on=self._parse_value() if self._match(TokenType.ON) else None, 2322 ) 2323 2324 if all_ and distinct: 2325 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2326 2327 limit = self._parse_limit(top=True) 2328 projections = self._parse_projections() 2329 2330 this = self.expression( 2331 exp.Select, 2332 kind=kind, 2333 hint=hint, 2334 distinct=distinct, 2335 expressions=projections, 2336 limit=limit, 2337 ) 2338 this.comments = comments 2339 2340 into = self._parse_into() 2341 if into: 2342 this.set("into", into) 2343 2344 if not from_: 2345 from_ = self._parse_from() 2346 2347 if from_: 2348 this.set("from", from_) 2349 2350 this = self._parse_query_modifiers(this) 2351 elif (table or nested) and self._match(TokenType.L_PAREN): 2352 if self._match(TokenType.PIVOT): 2353 this = self._parse_simplified_pivot() 2354 elif self._match(TokenType.FROM): 2355 this = exp.select("*").from_( 2356 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2357 ) 2358 else: 2359 this = ( 2360 self._parse_table() 2361 if table 2362 else self._parse_select(nested=True, parse_set_operation=False) 2363 ) 2364 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2365 2366 self._match_r_paren() 2367 2368 # We return early here so that the UNION isn't attached to the subquery by the 2369 # following call to _parse_set_operations, but instead becomes the parent node 2370 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2371 elif self._match(TokenType.VALUES): 2372 this = self.expression( 2373 exp.Values, 2374 expressions=self._parse_csv(self._parse_value), 2375 alias=self._parse_table_alias(), 2376 ) 2377 elif from_: 2378 this = exp.select("*").from_(from_.this, copy=False) 2379 else: 2380 this = None 2381 2382 if parse_set_operation: 2383 return self._parse_set_operations(this) 2384 return this 2385 2386 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2387 if not skip_with_token and not self._match(TokenType.WITH): 2388 return None 2389 2390 comments = self._prev_comments 2391 recursive = self._match(TokenType.RECURSIVE) 2392 2393 expressions = [] 2394 while True: 2395 expressions.append(self._parse_cte()) 2396 2397 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2398 break 2399 else: 2400 self._match(TokenType.WITH) 2401 2402 return self.expression( 2403 exp.With, comments=comments, expressions=expressions, recursive=recursive 2404 ) 2405 2406 def _parse_cte(self) -> exp.CTE: 2407 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2408 if not alias or not alias.this: 2409 self.raise_error("Expected CTE to have alias") 2410 2411 self._match(TokenType.ALIAS) 2412 return self.expression( 2413 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2414 ) 2415 2416 def _parse_table_alias( 2417 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2418 ) -> t.Optional[exp.TableAlias]: 2419 any_token = self._match(TokenType.ALIAS) 2420 alias = ( 2421 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2422 or self._parse_string_as_identifier() 2423 ) 2424 2425 index = self._index 2426 if self._match(TokenType.L_PAREN): 2427 columns = self._parse_csv(self._parse_function_parameter) 2428 self._match_r_paren() if columns else self._retreat(index) 2429 else: 2430 columns = None 2431 2432 if not alias and not columns: 2433 return None 2434 2435 return self.expression(exp.TableAlias, this=alias, columns=columns) 2436 2437 def _parse_subquery( 2438 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2439 ) -> t.Optional[exp.Subquery]: 2440 if not this: 2441 return None 2442 2443 return self.expression( 2444 exp.Subquery, 2445 this=this, 2446 pivots=self._parse_pivots(), 2447 alias=self._parse_table_alias() if parse_alias else None, 2448 ) 2449 2450 def _parse_query_modifiers( 2451 self, this: t.Optional[exp.Expression] 2452 ) -> t.Optional[exp.Expression]: 2453 if isinstance(this, self.MODIFIABLES): 2454 for join in iter(self._parse_join, None): 2455 this.append("joins", join) 2456 for lateral in iter(self._parse_lateral, None): 2457 this.append("laterals", lateral) 2458 2459 while True: 2460 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2461 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2462 key, expression = parser(self) 2463 2464 if expression: 2465 this.set(key, expression) 2466 if key == "limit": 2467 offset = expression.args.pop("offset", None) 2468 2469 if offset: 2470 offset = exp.Offset(expression=offset) 2471 this.set("offset", offset) 2472 2473 limit_by_expressions = expression.expressions 2474 expression.set("expressions", None) 2475 offset.set("expressions", limit_by_expressions) 2476 continue 2477 break 2478 return this 2479 2480 def _parse_hint(self) -> t.Optional[exp.Hint]: 2481 if self._match(TokenType.HINT): 2482 hints = [] 2483 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2484 hints.extend(hint) 2485 2486 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2487 self.raise_error("Expected */ after HINT") 2488 2489 return self.expression(exp.Hint, expressions=hints) 2490 2491 return None 2492 2493 def _parse_into(self) -> t.Optional[exp.Into]: 2494 if not self._match(TokenType.INTO): 2495 return None 2496 2497 temp = self._match(TokenType.TEMPORARY) 2498 unlogged = self._match_text_seq("UNLOGGED") 2499 self._match(TokenType.TABLE) 2500 2501 return self.expression( 2502 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2503 ) 2504 2505 def _parse_from( 2506 self, joins: bool = False, skip_from_token: bool = False 2507 ) -> t.Optional[exp.From]: 2508 if not skip_from_token and not self._match(TokenType.FROM): 2509 return None 2510 2511 return self.expression( 2512 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2513 ) 2514 2515 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2516 if not self._match(TokenType.MATCH_RECOGNIZE): 2517 return None 2518 2519 self._match_l_paren() 2520 2521 partition = self._parse_partition_by() 2522 order = self._parse_order() 2523 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2524 2525 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2526 rows = exp.var("ONE ROW PER MATCH") 2527 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2528 text = "ALL ROWS PER MATCH" 2529 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2530 text += " SHOW EMPTY MATCHES" 2531 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2532 text += " OMIT EMPTY MATCHES" 2533 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2534 text += " WITH UNMATCHED ROWS" 2535 rows = exp.var(text) 2536 else: 2537 rows = None 2538 2539 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2540 text = "AFTER MATCH SKIP" 2541 if self._match_text_seq("PAST", "LAST", "ROW"): 2542 text += " PAST LAST ROW" 2543 elif self._match_text_seq("TO", "NEXT", "ROW"): 2544 text += " TO NEXT ROW" 2545 elif self._match_text_seq("TO", "FIRST"): 2546 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2547 elif self._match_text_seq("TO", "LAST"): 2548 text += f" TO LAST {self._advance_any().text}" # type: ignore 2549 after = exp.var(text) 2550 else: 2551 after = None 2552 2553 if self._match_text_seq("PATTERN"): 2554 self._match_l_paren() 2555 2556 if not self._curr: 2557 self.raise_error("Expecting )", self._curr) 2558 2559 paren = 1 2560 start = self._curr 2561 2562 while self._curr and paren > 0: 2563 if self._curr.token_type == TokenType.L_PAREN: 2564 paren += 1 2565 if self._curr.token_type == TokenType.R_PAREN: 2566 paren -= 1 2567 2568 end = self._prev 2569 self._advance() 2570 2571 if paren > 0: 2572 self.raise_error("Expecting )", self._curr) 2573 2574 pattern = exp.var(self._find_sql(start, end)) 2575 else: 2576 pattern = None 2577 2578 define = ( 2579 self._parse_csv(self._parse_name_as_expression) 2580 if self._match_text_seq("DEFINE") 2581 else None 2582 ) 2583 2584 self._match_r_paren() 2585 2586 return self.expression( 2587 exp.MatchRecognize, 2588 partition_by=partition, 2589 order=order, 2590 measures=measures, 2591 rows=rows, 2592 after=after, 2593 pattern=pattern, 2594 define=define, 2595 alias=self._parse_table_alias(), 2596 ) 2597 2598 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2599 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2600 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2601 cross_apply = False 2602 2603 if cross_apply is not None: 2604 this = self._parse_select(table=True) 2605 view = None 2606 outer = None 2607 elif self._match(TokenType.LATERAL): 2608 this = self._parse_select(table=True) 2609 view = self._match(TokenType.VIEW) 2610 outer = self._match(TokenType.OUTER) 2611 else: 2612 return None 2613 2614 if not this: 2615 this = ( 2616 self._parse_unnest() 2617 or self._parse_function() 2618 or self._parse_id_var(any_token=False) 2619 ) 2620 2621 while self._match(TokenType.DOT): 2622 this = exp.Dot( 2623 this=this, 2624 expression=self._parse_function() or self._parse_id_var(any_token=False), 2625 ) 2626 2627 if view: 2628 table = self._parse_id_var(any_token=False) 2629 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2630 table_alias: t.Optional[exp.TableAlias] = self.expression( 2631 exp.TableAlias, this=table, columns=columns 2632 ) 2633 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2634 # We move the alias from the lateral's child node to the lateral itself 2635 table_alias = this.args["alias"].pop() 2636 else: 2637 table_alias = self._parse_table_alias() 2638 2639 return self.expression( 2640 exp.Lateral, 2641 this=this, 2642 view=view, 2643 outer=outer, 2644 alias=table_alias, 2645 cross_apply=cross_apply, 2646 ) 2647 2648 def _parse_join_parts( 2649 self, 2650 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2651 return ( 2652 self._match_set(self.JOIN_METHODS) and self._prev, 2653 self._match_set(self.JOIN_SIDES) and self._prev, 2654 self._match_set(self.JOIN_KINDS) and self._prev, 2655 ) 2656 2657 def _parse_join( 2658 self, skip_join_token: bool = False, parse_bracket: bool = False 2659 ) -> t.Optional[exp.Join]: 2660 if self._match(TokenType.COMMA): 2661 return self.expression(exp.Join, this=self._parse_table()) 2662 2663 index = self._index 2664 method, side, kind = self._parse_join_parts() 2665 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2666 join = self._match(TokenType.JOIN) 2667 2668 if not skip_join_token and not join: 2669 self._retreat(index) 2670 kind = None 2671 method = None 2672 side = None 2673 2674 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2675 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2676 2677 if not skip_join_token and not join and not outer_apply and not cross_apply: 2678 return None 2679 2680 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2681 2682 if method: 2683 kwargs["method"] = method.text 2684 if side: 2685 kwargs["side"] = side.text 2686 if kind: 2687 kwargs["kind"] = kind.text 2688 if hint: 2689 kwargs["hint"] = hint 2690 2691 if self._match(TokenType.ON): 2692 kwargs["on"] = self._parse_conjunction() 2693 elif self._match(TokenType.USING): 2694 kwargs["using"] = self._parse_wrapped_id_vars() 2695 elif not (kind and kind.token_type == TokenType.CROSS): 2696 index = self._index 2697 join = self._parse_join() 2698 2699 if join and self._match(TokenType.ON): 2700 kwargs["on"] = self._parse_conjunction() 2701 elif join and self._match(TokenType.USING): 2702 kwargs["using"] = self._parse_wrapped_id_vars() 2703 else: 2704 join = None 2705 self._retreat(index) 2706 2707 kwargs["this"].set("joins", [join] if join else None) 2708 2709 comments = [c for token in (method, side, kind) if token for c in token.comments] 2710 return self.expression(exp.Join, comments=comments, **kwargs) 2711 2712 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2713 this = self._parse_conjunction() 2714 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2715 return this 2716 2717 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2718 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2719 2720 return this 2721 2722 def _parse_index( 2723 self, 2724 index: t.Optional[exp.Expression] = None, 2725 ) -> t.Optional[exp.Index]: 2726 if index: 2727 unique = None 2728 primary = None 2729 amp = None 2730 2731 self._match(TokenType.ON) 2732 self._match(TokenType.TABLE) # hive 2733 table = self._parse_table_parts(schema=True) 2734 else: 2735 unique = self._match(TokenType.UNIQUE) 2736 primary = self._match_text_seq("PRIMARY") 2737 amp = self._match_text_seq("AMP") 2738 2739 if not self._match(TokenType.INDEX): 2740 return None 2741 2742 index = self._parse_id_var() 2743 table = None 2744 2745 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2746 2747 if self._match(TokenType.L_PAREN, advance=False): 2748 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2749 else: 2750 columns = None 2751 2752 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2753 2754 return self.expression( 2755 exp.Index, 2756 this=index, 2757 table=table, 2758 using=using, 2759 columns=columns, 2760 unique=unique, 2761 primary=primary, 2762 amp=amp, 2763 include=include, 2764 partition_by=self._parse_partition_by(), 2765 where=self._parse_where(), 2766 ) 2767 2768 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2769 hints: t.List[exp.Expression] = [] 2770 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2771 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2772 hints.append( 2773 self.expression( 2774 exp.WithTableHint, 2775 expressions=self._parse_csv( 2776 lambda: self._parse_function() or self._parse_var(any_token=True) 2777 ), 2778 ) 2779 ) 2780 self._match_r_paren() 2781 else: 2782 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2783 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2784 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2785 2786 self._match_texts(("INDEX", "KEY")) 2787 if self._match(TokenType.FOR): 2788 hint.set("target", self._advance_any() and self._prev.text.upper()) 2789 2790 hint.set("expressions", self._parse_wrapped_id_vars()) 2791 hints.append(hint) 2792 2793 return hints or None 2794 2795 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2796 return ( 2797 (not schema and self._parse_function(optional_parens=False)) 2798 or self._parse_id_var(any_token=False) 2799 or self._parse_string_as_identifier() 2800 or self._parse_placeholder() 2801 ) 2802 2803 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2804 catalog = None 2805 db = None 2806 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2807 2808 while self._match(TokenType.DOT): 2809 if catalog: 2810 # This allows nesting the table in arbitrarily many dot expressions if needed 2811 table = self.expression( 2812 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2813 ) 2814 else: 2815 catalog = db 2816 db = table 2817 table = self._parse_table_part(schema=schema) or "" 2818 2819 if is_db_reference: 2820 catalog = db 2821 db = table 2822 table = None 2823 2824 if not table and not is_db_reference: 2825 self.raise_error(f"Expected table name but got {self._curr}") 2826 if not db and is_db_reference: 2827 self.raise_error(f"Expected database name but got {self._curr}") 2828 2829 return self.expression( 2830 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2831 ) 2832 2833 def _parse_table( 2834 self, 2835 schema: bool = False, 2836 joins: bool = False, 2837 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2838 parse_bracket: bool = False, 2839 is_db_reference: bool = False, 2840 ) -> t.Optional[exp.Expression]: 2841 lateral = self._parse_lateral() 2842 if lateral: 2843 return lateral 2844 2845 unnest = self._parse_unnest() 2846 if unnest: 2847 return unnest 2848 2849 values = self._parse_derived_table_values() 2850 if values: 2851 return values 2852 2853 subquery = self._parse_select(table=True) 2854 if subquery: 2855 if not subquery.args.get("pivots"): 2856 subquery.set("pivots", self._parse_pivots()) 2857 return subquery 2858 2859 bracket = parse_bracket and self._parse_bracket(None) 2860 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2861 this = t.cast( 2862 exp.Expression, 2863 bracket 2864 or self._parse_bracket( 2865 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2866 ), 2867 ) 2868 2869 if schema: 2870 return self._parse_schema(this=this) 2871 2872 version = self._parse_version() 2873 2874 if version: 2875 this.set("version", version) 2876 2877 if self.dialect.ALIAS_POST_TABLESAMPLE: 2878 table_sample = self._parse_table_sample() 2879 2880 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2881 if alias: 2882 this.set("alias", alias) 2883 2884 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2885 return self.expression( 2886 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2887 ) 2888 2889 this.set("hints", self._parse_table_hints()) 2890 2891 if not this.args.get("pivots"): 2892 this.set("pivots", self._parse_pivots()) 2893 2894 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2895 table_sample = self._parse_table_sample() 2896 2897 if table_sample: 2898 table_sample.set("this", this) 2899 this = table_sample 2900 2901 if joins: 2902 for join in iter(self._parse_join, None): 2903 this.append("joins", join) 2904 2905 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2906 this.set("ordinality", True) 2907 this.set("alias", self._parse_table_alias()) 2908 2909 return this 2910 2911 def _parse_version(self) -> t.Optional[exp.Version]: 2912 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2913 this = "TIMESTAMP" 2914 elif self._match(TokenType.VERSION_SNAPSHOT): 2915 this = "VERSION" 2916 else: 2917 return None 2918 2919 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2920 kind = self._prev.text.upper() 2921 start = self._parse_bitwise() 2922 self._match_texts(("TO", "AND")) 2923 end = self._parse_bitwise() 2924 expression: t.Optional[exp.Expression] = self.expression( 2925 exp.Tuple, expressions=[start, end] 2926 ) 2927 elif self._match_text_seq("CONTAINED", "IN"): 2928 kind = "CONTAINED IN" 2929 expression = self.expression( 2930 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2931 ) 2932 elif self._match(TokenType.ALL): 2933 kind = "ALL" 2934 expression = None 2935 else: 2936 self._match_text_seq("AS", "OF") 2937 kind = "AS OF" 2938 expression = self._parse_type() 2939 2940 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2941 2942 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2943 if not self._match(TokenType.UNNEST): 2944 return None 2945 2946 expressions = self._parse_wrapped_csv(self._parse_equality) 2947 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2948 2949 alias = self._parse_table_alias() if with_alias else None 2950 2951 if alias: 2952 if self.dialect.UNNEST_COLUMN_ONLY: 2953 if alias.args.get("columns"): 2954 self.raise_error("Unexpected extra column alias in unnest.") 2955 2956 alias.set("columns", [alias.this]) 2957 alias.set("this", None) 2958 2959 columns = alias.args.get("columns") or [] 2960 if offset and len(expressions) < len(columns): 2961 offset = columns.pop() 2962 2963 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2964 self._match(TokenType.ALIAS) 2965 offset = self._parse_id_var( 2966 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2967 ) or exp.to_identifier("offset") 2968 2969 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2970 2971 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2972 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2973 if not is_derived and not self._match(TokenType.VALUES): 2974 return None 2975 2976 expressions = self._parse_csv(self._parse_value) 2977 alias = self._parse_table_alias() 2978 2979 if is_derived: 2980 self._match_r_paren() 2981 2982 return self.expression( 2983 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2984 ) 2985 2986 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2987 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2988 as_modifier and self._match_text_seq("USING", "SAMPLE") 2989 ): 2990 return None 2991 2992 bucket_numerator = None 2993 bucket_denominator = None 2994 bucket_field = None 2995 percent = None 2996 size = None 2997 seed = None 2998 2999 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3000 matched_l_paren = self._match(TokenType.L_PAREN) 3001 3002 if self.TABLESAMPLE_CSV: 3003 num = None 3004 expressions = self._parse_csv(self._parse_primary) 3005 else: 3006 expressions = None 3007 num = ( 3008 self._parse_factor() 3009 if self._match(TokenType.NUMBER, advance=False) 3010 else self._parse_primary() or self._parse_placeholder() 3011 ) 3012 3013 if self._match_text_seq("BUCKET"): 3014 bucket_numerator = self._parse_number() 3015 self._match_text_seq("OUT", "OF") 3016 bucket_denominator = bucket_denominator = self._parse_number() 3017 self._match(TokenType.ON) 3018 bucket_field = self._parse_field() 3019 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3020 percent = num 3021 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3022 size = num 3023 else: 3024 percent = num 3025 3026 if matched_l_paren: 3027 self._match_r_paren() 3028 3029 if self._match(TokenType.L_PAREN): 3030 method = self._parse_var(upper=True) 3031 seed = self._match(TokenType.COMMA) and self._parse_number() 3032 self._match_r_paren() 3033 elif self._match_texts(("SEED", "REPEATABLE")): 3034 seed = self._parse_wrapped(self._parse_number) 3035 3036 return self.expression( 3037 exp.TableSample, 3038 expressions=expressions, 3039 method=method, 3040 bucket_numerator=bucket_numerator, 3041 bucket_denominator=bucket_denominator, 3042 bucket_field=bucket_field, 3043 percent=percent, 3044 size=size, 3045 seed=seed, 3046 ) 3047 3048 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3049 return list(iter(self._parse_pivot, None)) or None 3050 3051 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3052 return list(iter(self._parse_join, None)) or None 3053 3054 # https://duckdb.org/docs/sql/statements/pivot 3055 def _parse_simplified_pivot(self) -> exp.Pivot: 3056 def _parse_on() -> t.Optional[exp.Expression]: 3057 this = self._parse_bitwise() 3058 return self._parse_in(this) if self._match(TokenType.IN) else this 3059 3060 this = self._parse_table() 3061 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3062 using = self._match(TokenType.USING) and self._parse_csv( 3063 lambda: self._parse_alias(self._parse_function()) 3064 ) 3065 group = self._parse_group() 3066 return self.expression( 3067 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3068 ) 3069 3070 def _parse_pivot_in(self) -> exp.In: 3071 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3072 this = self._parse_conjunction() 3073 3074 self._match(TokenType.ALIAS) 3075 alias = self._parse_field() 3076 if alias: 3077 return self.expression(exp.PivotAlias, this=this, alias=alias) 3078 3079 return this 3080 3081 value = self._parse_column() 3082 3083 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3084 self.raise_error("Expecting IN (") 3085 3086 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3087 3088 self._match_r_paren() 3089 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3090 3091 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3092 index = self._index 3093 include_nulls = None 3094 3095 if self._match(TokenType.PIVOT): 3096 unpivot = False 3097 elif self._match(TokenType.UNPIVOT): 3098 unpivot = True 3099 3100 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3101 if self._match_text_seq("INCLUDE", "NULLS"): 3102 include_nulls = True 3103 elif self._match_text_seq("EXCLUDE", "NULLS"): 3104 include_nulls = False 3105 else: 3106 return None 3107 3108 expressions = [] 3109 3110 if not self._match(TokenType.L_PAREN): 3111 self._retreat(index) 3112 return None 3113 3114 if unpivot: 3115 expressions = self._parse_csv(self._parse_column) 3116 else: 3117 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3118 3119 if not expressions: 3120 self.raise_error("Failed to parse PIVOT's aggregation list") 3121 3122 if not self._match(TokenType.FOR): 3123 self.raise_error("Expecting FOR") 3124 3125 field = self._parse_pivot_in() 3126 3127 self._match_r_paren() 3128 3129 pivot = self.expression( 3130 exp.Pivot, 3131 expressions=expressions, 3132 field=field, 3133 unpivot=unpivot, 3134 include_nulls=include_nulls, 3135 ) 3136 3137 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3138 pivot.set("alias", self._parse_table_alias()) 3139 3140 if not unpivot: 3141 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3142 3143 columns: t.List[exp.Expression] = [] 3144 for fld in pivot.args["field"].expressions: 3145 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3146 for name in names: 3147 if self.PREFIXED_PIVOT_COLUMNS: 3148 name = f"{name}_{field_name}" if name else field_name 3149 else: 3150 name = f"{field_name}_{name}" if name else field_name 3151 3152 columns.append(exp.to_identifier(name)) 3153 3154 pivot.set("columns", columns) 3155 3156 return pivot 3157 3158 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3159 return [agg.alias for agg in aggregations] 3160 3161 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3162 if not skip_where_token and not self._match(TokenType.WHERE): 3163 return None 3164 3165 return self.expression( 3166 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3167 ) 3168 3169 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3170 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3171 return None 3172 3173 elements = defaultdict(list) 3174 3175 if self._match(TokenType.ALL): 3176 return self.expression(exp.Group, all=True) 3177 3178 while True: 3179 expressions = self._parse_csv(self._parse_conjunction) 3180 if expressions: 3181 elements["expressions"].extend(expressions) 3182 3183 grouping_sets = self._parse_grouping_sets() 3184 if grouping_sets: 3185 elements["grouping_sets"].extend(grouping_sets) 3186 3187 rollup = None 3188 cube = None 3189 totals = None 3190 3191 index = self._index 3192 with_ = self._match(TokenType.WITH) 3193 if self._match(TokenType.ROLLUP): 3194 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3195 elements["rollup"].extend(ensure_list(rollup)) 3196 3197 if self._match(TokenType.CUBE): 3198 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3199 elements["cube"].extend(ensure_list(cube)) 3200 3201 if self._match_text_seq("TOTALS"): 3202 totals = True 3203 elements["totals"] = True # type: ignore 3204 3205 if not (grouping_sets or rollup or cube or totals): 3206 if with_: 3207 self._retreat(index) 3208 break 3209 3210 return self.expression(exp.Group, **elements) # type: ignore 3211 3212 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3213 if not self._match(TokenType.GROUPING_SETS): 3214 return None 3215 3216 return self._parse_wrapped_csv(self._parse_grouping_set) 3217 3218 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3219 if self._match(TokenType.L_PAREN): 3220 grouping_set = self._parse_csv(self._parse_column) 3221 self._match_r_paren() 3222 return self.expression(exp.Tuple, expressions=grouping_set) 3223 3224 return self._parse_column() 3225 3226 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3227 if not skip_having_token and not self._match(TokenType.HAVING): 3228 return None 3229 return self.expression(exp.Having, this=self._parse_conjunction()) 3230 3231 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3232 if not self._match(TokenType.QUALIFY): 3233 return None 3234 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3235 3236 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3237 if skip_start_token: 3238 start = None 3239 elif self._match(TokenType.START_WITH): 3240 start = self._parse_conjunction() 3241 else: 3242 return None 3243 3244 self._match(TokenType.CONNECT_BY) 3245 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3246 exp.Prior, this=self._parse_bitwise() 3247 ) 3248 connect = self._parse_conjunction() 3249 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3250 3251 if not start and self._match(TokenType.START_WITH): 3252 start = self._parse_conjunction() 3253 3254 return self.expression(exp.Connect, start=start, connect=connect) 3255 3256 def _parse_name_as_expression(self) -> exp.Alias: 3257 return self.expression( 3258 exp.Alias, 3259 alias=self._parse_id_var(any_token=True), 3260 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3261 ) 3262 3263 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3264 if self._match_text_seq("INTERPOLATE"): 3265 return self._parse_wrapped_csv(self._parse_name_as_expression) 3266 return None 3267 3268 def _parse_order( 3269 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3270 ) -> t.Optional[exp.Expression]: 3271 siblings = None 3272 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3273 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3274 return this 3275 3276 siblings = True 3277 3278 return self.expression( 3279 exp.Order, 3280 this=this, 3281 expressions=self._parse_csv(self._parse_ordered), 3282 interpolate=self._parse_interpolate(), 3283 siblings=siblings, 3284 ) 3285 3286 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3287 if not self._match(token): 3288 return None 3289 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3290 3291 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3292 this = parse_method() if parse_method else self._parse_conjunction() 3293 3294 asc = self._match(TokenType.ASC) 3295 desc = self._match(TokenType.DESC) or (asc and False) 3296 3297 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3298 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3299 3300 nulls_first = is_nulls_first or False 3301 explicitly_null_ordered = is_nulls_first or is_nulls_last 3302 3303 if ( 3304 not explicitly_null_ordered 3305 and ( 3306 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3307 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3308 ) 3309 and self.dialect.NULL_ORDERING != "nulls_are_last" 3310 ): 3311 nulls_first = True 3312 3313 if self._match_text_seq("WITH", "FILL"): 3314 with_fill = self.expression( 3315 exp.WithFill, 3316 **{ # type: ignore 3317 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3318 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3319 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3320 }, 3321 ) 3322 else: 3323 with_fill = None 3324 3325 return self.expression( 3326 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3327 ) 3328 3329 def _parse_limit( 3330 self, this: t.Optional[exp.Expression] = None, top: bool = False 3331 ) -> t.Optional[exp.Expression]: 3332 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3333 comments = self._prev_comments 3334 if top: 3335 limit_paren = self._match(TokenType.L_PAREN) 3336 expression = self._parse_term() if limit_paren else self._parse_number() 3337 3338 if limit_paren: 3339 self._match_r_paren() 3340 else: 3341 expression = self._parse_term() 3342 3343 if self._match(TokenType.COMMA): 3344 offset = expression 3345 expression = self._parse_term() 3346 else: 3347 offset = None 3348 3349 limit_exp = self.expression( 3350 exp.Limit, 3351 this=this, 3352 expression=expression, 3353 offset=offset, 3354 comments=comments, 3355 expressions=self._parse_limit_by(), 3356 ) 3357 3358 return limit_exp 3359 3360 if self._match(TokenType.FETCH): 3361 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3362 direction = self._prev.text.upper() if direction else "FIRST" 3363 3364 count = self._parse_field(tokens=self.FETCH_TOKENS) 3365 percent = self._match(TokenType.PERCENT) 3366 3367 self._match_set((TokenType.ROW, TokenType.ROWS)) 3368 3369 only = self._match_text_seq("ONLY") 3370 with_ties = self._match_text_seq("WITH", "TIES") 3371 3372 if only and with_ties: 3373 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3374 3375 return self.expression( 3376 exp.Fetch, 3377 direction=direction, 3378 count=count, 3379 percent=percent, 3380 with_ties=with_ties, 3381 ) 3382 3383 return this 3384 3385 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3386 if not self._match(TokenType.OFFSET): 3387 return this 3388 3389 count = self._parse_term() 3390 self._match_set((TokenType.ROW, TokenType.ROWS)) 3391 3392 return self.expression( 3393 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3394 ) 3395 3396 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3397 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3398 3399 def _parse_locks(self) -> t.List[exp.Lock]: 3400 locks = [] 3401 while True: 3402 if self._match_text_seq("FOR", "UPDATE"): 3403 update = True 3404 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3405 "LOCK", "IN", "SHARE", "MODE" 3406 ): 3407 update = False 3408 else: 3409 break 3410 3411 expressions = None 3412 if self._match_text_seq("OF"): 3413 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3414 3415 wait: t.Optional[bool | exp.Expression] = None 3416 if self._match_text_seq("NOWAIT"): 3417 wait = True 3418 elif self._match_text_seq("WAIT"): 3419 wait = self._parse_primary() 3420 elif self._match_text_seq("SKIP", "LOCKED"): 3421 wait = False 3422 3423 locks.append( 3424 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3425 ) 3426 3427 return locks 3428 3429 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3430 while this and self._match_set(self.SET_OPERATIONS): 3431 token_type = self._prev.token_type 3432 3433 if token_type == TokenType.UNION: 3434 operation = exp.Union 3435 elif token_type == TokenType.EXCEPT: 3436 operation = exp.Except 3437 else: 3438 operation = exp.Intersect 3439 3440 comments = self._prev.comments 3441 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3442 by_name = self._match_text_seq("BY", "NAME") 3443 expression = self._parse_select(nested=True, parse_set_operation=False) 3444 3445 this = self.expression( 3446 operation, 3447 comments=comments, 3448 this=this, 3449 distinct=distinct, 3450 by_name=by_name, 3451 expression=expression, 3452 ) 3453 3454 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3455 expression = this.expression 3456 3457 if expression: 3458 for arg in self.UNION_MODIFIERS: 3459 expr = expression.args.get(arg) 3460 if expr: 3461 this.set(arg, expr.pop()) 3462 3463 return this 3464 3465 def _parse_expression(self) -> t.Optional[exp.Expression]: 3466 return self._parse_alias(self._parse_conjunction()) 3467 3468 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3469 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3470 3471 def _parse_equality(self) -> t.Optional[exp.Expression]: 3472 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3473 3474 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3475 return self._parse_tokens(self._parse_range, self.COMPARISON) 3476 3477 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3478 this = this or self._parse_bitwise() 3479 negate = self._match(TokenType.NOT) 3480 3481 if self._match_set(self.RANGE_PARSERS): 3482 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3483 if not expression: 3484 return this 3485 3486 this = expression 3487 elif self._match(TokenType.ISNULL): 3488 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3489 3490 # Postgres supports ISNULL and NOTNULL for conditions. 3491 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3492 if self._match(TokenType.NOTNULL): 3493 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3494 this = self.expression(exp.Not, this=this) 3495 3496 if negate: 3497 this = self.expression(exp.Not, this=this) 3498 3499 if self._match(TokenType.IS): 3500 this = self._parse_is(this) 3501 3502 return this 3503 3504 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3505 index = self._index - 1 3506 negate = self._match(TokenType.NOT) 3507 3508 if self._match_text_seq("DISTINCT", "FROM"): 3509 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3510 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3511 3512 expression = self._parse_null() or self._parse_boolean() 3513 if not expression: 3514 self._retreat(index) 3515 return None 3516 3517 this = self.expression(exp.Is, this=this, expression=expression) 3518 return self.expression(exp.Not, this=this) if negate else this 3519 3520 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3521 unnest = self._parse_unnest(with_alias=False) 3522 if unnest: 3523 this = self.expression(exp.In, this=this, unnest=unnest) 3524 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3525 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3526 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3527 3528 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3529 this = self.expression(exp.In, this=this, query=expressions[0]) 3530 else: 3531 this = self.expression(exp.In, this=this, expressions=expressions) 3532 3533 if matched_l_paren: 3534 self._match_r_paren(this) 3535 elif not self._match(TokenType.R_BRACKET, expression=this): 3536 self.raise_error("Expecting ]") 3537 else: 3538 this = self.expression(exp.In, this=this, field=self._parse_field()) 3539 3540 return this 3541 3542 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3543 low = self._parse_bitwise() 3544 self._match(TokenType.AND) 3545 high = self._parse_bitwise() 3546 return self.expression(exp.Between, this=this, low=low, high=high) 3547 3548 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3549 if not self._match(TokenType.ESCAPE): 3550 return this 3551 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3552 3553 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3554 index = self._index 3555 3556 if not self._match(TokenType.INTERVAL) and match_interval: 3557 return None 3558 3559 if self._match(TokenType.STRING, advance=False): 3560 this = self._parse_primary() 3561 else: 3562 this = self._parse_term() 3563 3564 if not this or ( 3565 isinstance(this, exp.Column) 3566 and not this.table 3567 and not this.this.quoted 3568 and this.name.upper() == "IS" 3569 ): 3570 self._retreat(index) 3571 return None 3572 3573 unit = self._parse_function() or ( 3574 not self._match(TokenType.ALIAS, advance=False) 3575 and self._parse_var(any_token=True, upper=True) 3576 ) 3577 3578 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3579 # each INTERVAL expression into this canonical form so it's easy to transpile 3580 if this and this.is_number: 3581 this = exp.Literal.string(this.name) 3582 elif this and this.is_string: 3583 parts = this.name.split() 3584 3585 if len(parts) == 2: 3586 if unit: 3587 # This is not actually a unit, it's something else (e.g. a "window side") 3588 unit = None 3589 self._retreat(self._index - 1) 3590 3591 this = exp.Literal.string(parts[0]) 3592 unit = self.expression(exp.Var, this=parts[1].upper()) 3593 3594 return self.expression(exp.Interval, this=this, unit=unit) 3595 3596 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3597 this = self._parse_term() 3598 3599 while True: 3600 if self._match_set(self.BITWISE): 3601 this = self.expression( 3602 self.BITWISE[self._prev.token_type], 3603 this=this, 3604 expression=self._parse_term(), 3605 ) 3606 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3607 this = self.expression( 3608 exp.DPipe, 3609 this=this, 3610 expression=self._parse_term(), 3611 safe=not self.dialect.STRICT_STRING_CONCAT, 3612 ) 3613 elif self._match(TokenType.DQMARK): 3614 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3615 elif self._match_pair(TokenType.LT, TokenType.LT): 3616 this = self.expression( 3617 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3618 ) 3619 elif self._match_pair(TokenType.GT, TokenType.GT): 3620 this = self.expression( 3621 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3622 ) 3623 else: 3624 break 3625 3626 return this 3627 3628 def _parse_term(self) -> t.Optional[exp.Expression]: 3629 return self._parse_tokens(self._parse_factor, self.TERM) 3630 3631 def _parse_factor(self) -> t.Optional[exp.Expression]: 3632 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3633 this = parse_method() 3634 3635 while self._match_set(self.FACTOR): 3636 this = self.expression( 3637 self.FACTOR[self._prev.token_type], 3638 this=this, 3639 comments=self._prev_comments, 3640 expression=parse_method(), 3641 ) 3642 if isinstance(this, exp.Div): 3643 this.args["typed"] = self.dialect.TYPED_DIVISION 3644 this.args["safe"] = self.dialect.SAFE_DIVISION 3645 3646 return this 3647 3648 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3649 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3650 3651 def _parse_unary(self) -> t.Optional[exp.Expression]: 3652 if self._match_set(self.UNARY_PARSERS): 3653 return self.UNARY_PARSERS[self._prev.token_type](self) 3654 return self._parse_at_time_zone(self._parse_type()) 3655 3656 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3657 interval = parse_interval and self._parse_interval() 3658 if interval: 3659 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3660 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3661 interval = self.expression( # type: ignore 3662 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3663 ) 3664 3665 return interval 3666 3667 index = self._index 3668 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3669 this = self._parse_column() 3670 3671 if data_type: 3672 if isinstance(this, exp.Literal): 3673 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3674 if parser: 3675 return parser(self, this, data_type) 3676 return self.expression(exp.Cast, this=this, to=data_type) 3677 if not data_type.expressions: 3678 self._retreat(index) 3679 return self._parse_column() 3680 return self._parse_column_ops(data_type) 3681 3682 return this and self._parse_column_ops(this) 3683 3684 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3685 this = self._parse_type() 3686 if not this: 3687 return None 3688 3689 return self.expression( 3690 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3691 ) 3692 3693 def _parse_types( 3694 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3695 ) -> t.Optional[exp.Expression]: 3696 index = self._index 3697 3698 prefix = self._match_text_seq("SYSUDTLIB", ".") 3699 3700 if not self._match_set(self.TYPE_TOKENS): 3701 identifier = allow_identifiers and self._parse_id_var( 3702 any_token=False, tokens=(TokenType.VAR,) 3703 ) 3704 if identifier: 3705 tokens = self.dialect.tokenize(identifier.name) 3706 3707 if len(tokens) != 1: 3708 self.raise_error("Unexpected identifier", self._prev) 3709 3710 if tokens[0].token_type in self.TYPE_TOKENS: 3711 self._prev = tokens[0] 3712 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3713 type_name = identifier.name 3714 3715 while self._match(TokenType.DOT): 3716 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3717 3718 return exp.DataType.build(type_name, udt=True) 3719 else: 3720 self._retreat(self._index - 1) 3721 return None 3722 else: 3723 return None 3724 3725 type_token = self._prev.token_type 3726 3727 if type_token == TokenType.PSEUDO_TYPE: 3728 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3729 3730 if type_token == TokenType.OBJECT_IDENTIFIER: 3731 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3732 3733 nested = type_token in self.NESTED_TYPE_TOKENS 3734 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3735 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3736 expressions = None 3737 maybe_func = False 3738 3739 if self._match(TokenType.L_PAREN): 3740 if is_struct: 3741 expressions = self._parse_csv(self._parse_struct_types) 3742 elif nested: 3743 expressions = self._parse_csv( 3744 lambda: self._parse_types( 3745 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3746 ) 3747 ) 3748 elif type_token in self.ENUM_TYPE_TOKENS: 3749 expressions = self._parse_csv(self._parse_equality) 3750 elif is_aggregate: 3751 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3752 any_token=False, tokens=(TokenType.VAR,) 3753 ) 3754 if not func_or_ident or not self._match(TokenType.COMMA): 3755 return None 3756 expressions = self._parse_csv( 3757 lambda: self._parse_types( 3758 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3759 ) 3760 ) 3761 expressions.insert(0, func_or_ident) 3762 else: 3763 expressions = self._parse_csv(self._parse_type_size) 3764 3765 if not expressions or not self._match(TokenType.R_PAREN): 3766 self._retreat(index) 3767 return None 3768 3769 maybe_func = True 3770 3771 this: t.Optional[exp.Expression] = None 3772 values: t.Optional[t.List[exp.Expression]] = None 3773 3774 if nested and self._match(TokenType.LT): 3775 if is_struct: 3776 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3777 else: 3778 expressions = self._parse_csv( 3779 lambda: self._parse_types( 3780 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3781 ) 3782 ) 3783 3784 if not self._match(TokenType.GT): 3785 self.raise_error("Expecting >") 3786 3787 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3788 values = self._parse_csv(self._parse_conjunction) 3789 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3790 3791 if type_token in self.TIMESTAMPS: 3792 if self._match_text_seq("WITH", "TIME", "ZONE"): 3793 maybe_func = False 3794 tz_type = ( 3795 exp.DataType.Type.TIMETZ 3796 if type_token in self.TIMES 3797 else exp.DataType.Type.TIMESTAMPTZ 3798 ) 3799 this = exp.DataType(this=tz_type, expressions=expressions) 3800 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3801 maybe_func = False 3802 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3803 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3804 maybe_func = False 3805 elif type_token == TokenType.INTERVAL: 3806 unit = self._parse_var() 3807 3808 if self._match_text_seq("TO"): 3809 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3810 else: 3811 span = None 3812 3813 if span or not unit: 3814 this = self.expression( 3815 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3816 ) 3817 else: 3818 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3819 3820 if maybe_func and check_func: 3821 index2 = self._index 3822 peek = self._parse_string() 3823 3824 if not peek: 3825 self._retreat(index) 3826 return None 3827 3828 self._retreat(index2) 3829 3830 if not this: 3831 if self._match_text_seq("UNSIGNED"): 3832 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3833 if not unsigned_type_token: 3834 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3835 3836 type_token = unsigned_type_token or type_token 3837 3838 this = exp.DataType( 3839 this=exp.DataType.Type[type_token.value], 3840 expressions=expressions, 3841 nested=nested, 3842 values=values, 3843 prefix=prefix, 3844 ) 3845 3846 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3847 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3848 3849 return this 3850 3851 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3852 index = self._index 3853 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3854 self._match(TokenType.COLON) 3855 column_def = self._parse_column_def(this) 3856 3857 if type_required and ( 3858 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3859 ): 3860 self._retreat(index) 3861 return self._parse_types() 3862 3863 return column_def 3864 3865 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3866 if not self._match_text_seq("AT", "TIME", "ZONE"): 3867 return this 3868 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3869 3870 def _parse_column(self) -> t.Optional[exp.Expression]: 3871 this = self._parse_column_reference() 3872 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3873 3874 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3875 this = self._parse_field() 3876 if isinstance(this, exp.Identifier): 3877 this = self.expression(exp.Column, this=this) 3878 return this 3879 3880 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3881 this = self._parse_bracket(this) 3882 3883 while self._match_set(self.COLUMN_OPERATORS): 3884 op_token = self._prev.token_type 3885 op = self.COLUMN_OPERATORS.get(op_token) 3886 3887 if op_token == TokenType.DCOLON: 3888 field = self._parse_types() 3889 if not field: 3890 self.raise_error("Expected type") 3891 elif op and self._curr: 3892 field = self._parse_column_reference() 3893 else: 3894 field = self._parse_field(anonymous_func=True, any_token=True) 3895 3896 if isinstance(field, exp.Func): 3897 # bigquery allows function calls like x.y.count(...) 3898 # SAFE.SUBSTR(...) 3899 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3900 this = self._replace_columns_with_dots(this) 3901 3902 if op: 3903 this = op(self, this, field) 3904 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3905 this = self.expression( 3906 exp.Column, 3907 this=field, 3908 table=this.this, 3909 db=this.args.get("table"), 3910 catalog=this.args.get("db"), 3911 ) 3912 else: 3913 this = self.expression(exp.Dot, this=this, expression=field) 3914 this = self._parse_bracket(this) 3915 return this 3916 3917 def _parse_primary(self) -> t.Optional[exp.Expression]: 3918 if self._match_set(self.PRIMARY_PARSERS): 3919 token_type = self._prev.token_type 3920 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3921 3922 if token_type == TokenType.STRING: 3923 expressions = [primary] 3924 while self._match(TokenType.STRING): 3925 expressions.append(exp.Literal.string(self._prev.text)) 3926 3927 if len(expressions) > 1: 3928 return self.expression(exp.Concat, expressions=expressions) 3929 3930 return primary 3931 3932 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3933 return exp.Literal.number(f"0.{self._prev.text}") 3934 3935 if self._match(TokenType.L_PAREN): 3936 comments = self._prev_comments 3937 query = self._parse_select() 3938 3939 if query: 3940 expressions = [query] 3941 else: 3942 expressions = self._parse_expressions() 3943 3944 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3945 3946 if isinstance(this, exp.Subqueryable): 3947 this = self._parse_set_operations( 3948 self._parse_subquery(this=this, parse_alias=False) 3949 ) 3950 elif len(expressions) > 1: 3951 this = self.expression(exp.Tuple, expressions=expressions) 3952 else: 3953 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3954 3955 if this: 3956 this.add_comments(comments) 3957 3958 self._match_r_paren(expression=this) 3959 return this 3960 3961 return None 3962 3963 def _parse_field( 3964 self, 3965 any_token: bool = False, 3966 tokens: t.Optional[t.Collection[TokenType]] = None, 3967 anonymous_func: bool = False, 3968 ) -> t.Optional[exp.Expression]: 3969 return ( 3970 self._parse_primary() 3971 or self._parse_function(anonymous=anonymous_func) 3972 or self._parse_id_var(any_token=any_token, tokens=tokens) 3973 ) 3974 3975 def _parse_function( 3976 self, 3977 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3978 anonymous: bool = False, 3979 optional_parens: bool = True, 3980 ) -> t.Optional[exp.Expression]: 3981 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3982 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3983 fn_syntax = False 3984 if ( 3985 self._match(TokenType.L_BRACE, advance=False) 3986 and self._next 3987 and self._next.text.upper() == "FN" 3988 ): 3989 self._advance(2) 3990 fn_syntax = True 3991 3992 func = self._parse_function_call( 3993 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3994 ) 3995 3996 if fn_syntax: 3997 self._match(TokenType.R_BRACE) 3998 3999 return func 4000 4001 def _parse_function_call( 4002 self, 4003 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4004 anonymous: bool = False, 4005 optional_parens: bool = True, 4006 ) -> t.Optional[exp.Expression]: 4007 if not self._curr: 4008 return None 4009 4010 comments = self._curr.comments 4011 token_type = self._curr.token_type 4012 this = self._curr.text 4013 upper = this.upper() 4014 4015 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4016 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4017 self._advance() 4018 return parser(self) 4019 4020 if not self._next or self._next.token_type != TokenType.L_PAREN: 4021 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4022 self._advance() 4023 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4024 4025 return None 4026 4027 if token_type not in self.FUNC_TOKENS: 4028 return None 4029 4030 self._advance(2) 4031 4032 parser = self.FUNCTION_PARSERS.get(upper) 4033 if parser and not anonymous: 4034 this = parser(self) 4035 else: 4036 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4037 4038 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4039 this = self.expression(subquery_predicate, this=self._parse_select()) 4040 self._match_r_paren() 4041 return this 4042 4043 if functions is None: 4044 functions = self.FUNCTIONS 4045 4046 function = functions.get(upper) 4047 4048 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4049 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4050 4051 if function and not anonymous: 4052 if "dialect" in function.__code__.co_varnames: 4053 func = function(args, dialect=self.dialect) 4054 else: 4055 func = function(args) 4056 4057 func = self.validate_expression(func, args) 4058 if not self.dialect.NORMALIZE_FUNCTIONS: 4059 func.meta["name"] = this 4060 4061 this = func 4062 else: 4063 this = self.expression(exp.Anonymous, this=this, expressions=args) 4064 4065 if isinstance(this, exp.Expression): 4066 this.add_comments(comments) 4067 4068 self._match_r_paren(this) 4069 return self._parse_window(this) 4070 4071 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4072 return self._parse_column_def(self._parse_id_var()) 4073 4074 def _parse_user_defined_function( 4075 self, kind: t.Optional[TokenType] = None 4076 ) -> t.Optional[exp.Expression]: 4077 this = self._parse_id_var() 4078 4079 while self._match(TokenType.DOT): 4080 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4081 4082 if not self._match(TokenType.L_PAREN): 4083 return this 4084 4085 expressions = self._parse_csv(self._parse_function_parameter) 4086 self._match_r_paren() 4087 return self.expression( 4088 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4089 ) 4090 4091 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4092 literal = self._parse_primary() 4093 if literal: 4094 return self.expression(exp.Introducer, this=token.text, expression=literal) 4095 4096 return self.expression(exp.Identifier, this=token.text) 4097 4098 def _parse_session_parameter(self) -> exp.SessionParameter: 4099 kind = None 4100 this = self._parse_id_var() or self._parse_primary() 4101 4102 if this and self._match(TokenType.DOT): 4103 kind = this.name 4104 this = self._parse_var() or self._parse_primary() 4105 4106 return self.expression(exp.SessionParameter, this=this, kind=kind) 4107 4108 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4109 index = self._index 4110 4111 if self._match(TokenType.L_PAREN): 4112 expressions = t.cast( 4113 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4114 ) 4115 4116 if not self._match(TokenType.R_PAREN): 4117 self._retreat(index) 4118 else: 4119 expressions = [self._parse_id_var()] 4120 4121 if self._match_set(self.LAMBDAS): 4122 return self.LAMBDAS[self._prev.token_type](self, expressions) 4123 4124 self._retreat(index) 4125 4126 this: t.Optional[exp.Expression] 4127 4128 if self._match(TokenType.DISTINCT): 4129 this = self.expression( 4130 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4131 ) 4132 else: 4133 this = self._parse_select_or_expression(alias=alias) 4134 4135 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4136 4137 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4138 index = self._index 4139 4140 if not self.errors: 4141 try: 4142 if self._parse_select(nested=True): 4143 return this 4144 except ParseError: 4145 pass 4146 finally: 4147 self.errors.clear() 4148 self._retreat(index) 4149 4150 if not self._match(TokenType.L_PAREN): 4151 return this 4152 4153 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4154 4155 self._match_r_paren() 4156 return self.expression(exp.Schema, this=this, expressions=args) 4157 4158 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4159 return self._parse_column_def(self._parse_field(any_token=True)) 4160 4161 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4162 # column defs are not really columns, they're identifiers 4163 if isinstance(this, exp.Column): 4164 this = this.this 4165 4166 kind = self._parse_types(schema=True) 4167 4168 if self._match_text_seq("FOR", "ORDINALITY"): 4169 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4170 4171 constraints: t.List[exp.Expression] = [] 4172 4173 if not kind and self._match(TokenType.ALIAS): 4174 constraints.append( 4175 self.expression( 4176 exp.ComputedColumnConstraint, 4177 this=self._parse_conjunction(), 4178 persisted=self._match_text_seq("PERSISTED"), 4179 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4180 ) 4181 ) 4182 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4183 self._match(TokenType.ALIAS) 4184 constraints.append( 4185 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4186 ) 4187 4188 while True: 4189 constraint = self._parse_column_constraint() 4190 if not constraint: 4191 break 4192 constraints.append(constraint) 4193 4194 if not kind and not constraints: 4195 return this 4196 4197 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4198 4199 def _parse_auto_increment( 4200 self, 4201 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4202 start = None 4203 increment = None 4204 4205 if self._match(TokenType.L_PAREN, advance=False): 4206 args = self._parse_wrapped_csv(self._parse_bitwise) 4207 start = seq_get(args, 0) 4208 increment = seq_get(args, 1) 4209 elif self._match_text_seq("START"): 4210 start = self._parse_bitwise() 4211 self._match_text_seq("INCREMENT") 4212 increment = self._parse_bitwise() 4213 4214 if start and increment: 4215 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4216 4217 return exp.AutoIncrementColumnConstraint() 4218 4219 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4220 if not self._match_text_seq("REFRESH"): 4221 self._retreat(self._index - 1) 4222 return None 4223 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4224 4225 def _parse_compress(self) -> exp.CompressColumnConstraint: 4226 if self._match(TokenType.L_PAREN, advance=False): 4227 return self.expression( 4228 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4229 ) 4230 4231 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4232 4233 def _parse_generated_as_identity( 4234 self, 4235 ) -> ( 4236 exp.GeneratedAsIdentityColumnConstraint 4237 | exp.ComputedColumnConstraint 4238 | exp.GeneratedAsRowColumnConstraint 4239 ): 4240 if self._match_text_seq("BY", "DEFAULT"): 4241 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4242 this = self.expression( 4243 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4244 ) 4245 else: 4246 self._match_text_seq("ALWAYS") 4247 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4248 4249 self._match(TokenType.ALIAS) 4250 4251 if self._match_text_seq("ROW"): 4252 start = self._match_text_seq("START") 4253 if not start: 4254 self._match(TokenType.END) 4255 hidden = self._match_text_seq("HIDDEN") 4256 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4257 4258 identity = self._match_text_seq("IDENTITY") 4259 4260 if self._match(TokenType.L_PAREN): 4261 if self._match(TokenType.START_WITH): 4262 this.set("start", self._parse_bitwise()) 4263 if self._match_text_seq("INCREMENT", "BY"): 4264 this.set("increment", self._parse_bitwise()) 4265 if self._match_text_seq("MINVALUE"): 4266 this.set("minvalue", self._parse_bitwise()) 4267 if self._match_text_seq("MAXVALUE"): 4268 this.set("maxvalue", self._parse_bitwise()) 4269 4270 if self._match_text_seq("CYCLE"): 4271 this.set("cycle", True) 4272 elif self._match_text_seq("NO", "CYCLE"): 4273 this.set("cycle", False) 4274 4275 if not identity: 4276 this.set("expression", self._parse_bitwise()) 4277 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4278 args = self._parse_csv(self._parse_bitwise) 4279 this.set("start", seq_get(args, 0)) 4280 this.set("increment", seq_get(args, 1)) 4281 4282 self._match_r_paren() 4283 4284 return this 4285 4286 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4287 self._match_text_seq("LENGTH") 4288 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4289 4290 def _parse_not_constraint( 4291 self, 4292 ) -> t.Optional[exp.Expression]: 4293 if self._match_text_seq("NULL"): 4294 return self.expression(exp.NotNullColumnConstraint) 4295 if self._match_text_seq("CASESPECIFIC"): 4296 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4297 if self._match_text_seq("FOR", "REPLICATION"): 4298 return self.expression(exp.NotForReplicationColumnConstraint) 4299 return None 4300 4301 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4302 if self._match(TokenType.CONSTRAINT): 4303 this = self._parse_id_var() 4304 else: 4305 this = None 4306 4307 if self._match_texts(self.CONSTRAINT_PARSERS): 4308 return self.expression( 4309 exp.ColumnConstraint, 4310 this=this, 4311 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4312 ) 4313 4314 return this 4315 4316 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4317 if not self._match(TokenType.CONSTRAINT): 4318 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4319 4320 this = self._parse_id_var() 4321 expressions = [] 4322 4323 while True: 4324 constraint = self._parse_unnamed_constraint() or self._parse_function() 4325 if not constraint: 4326 break 4327 expressions.append(constraint) 4328 4329 return self.expression(exp.Constraint, this=this, expressions=expressions) 4330 4331 def _parse_unnamed_constraint( 4332 self, constraints: t.Optional[t.Collection[str]] = None 4333 ) -> t.Optional[exp.Expression]: 4334 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4335 constraints or self.CONSTRAINT_PARSERS 4336 ): 4337 return None 4338 4339 constraint = self._prev.text.upper() 4340 if constraint not in self.CONSTRAINT_PARSERS: 4341 self.raise_error(f"No parser found for schema constraint {constraint}.") 4342 4343 return self.CONSTRAINT_PARSERS[constraint](self) 4344 4345 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4346 self._match_text_seq("KEY") 4347 return self.expression( 4348 exp.UniqueColumnConstraint, 4349 this=self._parse_schema(self._parse_id_var(any_token=False)), 4350 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4351 ) 4352 4353 def _parse_key_constraint_options(self) -> t.List[str]: 4354 options = [] 4355 while True: 4356 if not self._curr: 4357 break 4358 4359 if self._match(TokenType.ON): 4360 action = None 4361 on = self._advance_any() and self._prev.text 4362 4363 if self._match_text_seq("NO", "ACTION"): 4364 action = "NO ACTION" 4365 elif self._match_text_seq("CASCADE"): 4366 action = "CASCADE" 4367 elif self._match_text_seq("RESTRICT"): 4368 action = "RESTRICT" 4369 elif self._match_pair(TokenType.SET, TokenType.NULL): 4370 action = "SET NULL" 4371 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4372 action = "SET DEFAULT" 4373 else: 4374 self.raise_error("Invalid key constraint") 4375 4376 options.append(f"ON {on} {action}") 4377 elif self._match_text_seq("NOT", "ENFORCED"): 4378 options.append("NOT ENFORCED") 4379 elif self._match_text_seq("DEFERRABLE"): 4380 options.append("DEFERRABLE") 4381 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4382 options.append("INITIALLY DEFERRED") 4383 elif self._match_text_seq("NORELY"): 4384 options.append("NORELY") 4385 elif self._match_text_seq("MATCH", "FULL"): 4386 options.append("MATCH FULL") 4387 else: 4388 break 4389 4390 return options 4391 4392 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4393 if match and not self._match(TokenType.REFERENCES): 4394 return None 4395 4396 expressions = None 4397 this = self._parse_table(schema=True) 4398 options = self._parse_key_constraint_options() 4399 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4400 4401 def _parse_foreign_key(self) -> exp.ForeignKey: 4402 expressions = self._parse_wrapped_id_vars() 4403 reference = self._parse_references() 4404 options = {} 4405 4406 while self._match(TokenType.ON): 4407 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4408 self.raise_error("Expected DELETE or UPDATE") 4409 4410 kind = self._prev.text.lower() 4411 4412 if self._match_text_seq("NO", "ACTION"): 4413 action = "NO ACTION" 4414 elif self._match(TokenType.SET): 4415 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4416 action = "SET " + self._prev.text.upper() 4417 else: 4418 self._advance() 4419 action = self._prev.text.upper() 4420 4421 options[kind] = action 4422 4423 return self.expression( 4424 exp.ForeignKey, 4425 expressions=expressions, 4426 reference=reference, 4427 **options, # type: ignore 4428 ) 4429 4430 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4431 return self._parse_field() 4432 4433 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4434 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4435 self._retreat(self._index - 1) 4436 return None 4437 4438 id_vars = self._parse_wrapped_id_vars() 4439 return self.expression( 4440 exp.PeriodForSystemTimeConstraint, 4441 this=seq_get(id_vars, 0), 4442 expression=seq_get(id_vars, 1), 4443 ) 4444 4445 def _parse_primary_key( 4446 self, wrapped_optional: bool = False, in_props: bool = False 4447 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4448 desc = ( 4449 self._match_set((TokenType.ASC, TokenType.DESC)) 4450 and self._prev.token_type == TokenType.DESC 4451 ) 4452 4453 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4454 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4455 4456 expressions = self._parse_wrapped_csv( 4457 self._parse_primary_key_part, optional=wrapped_optional 4458 ) 4459 options = self._parse_key_constraint_options() 4460 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4461 4462 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4463 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4464 4465 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4466 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4467 return this 4468 4469 bracket_kind = self._prev.token_type 4470 expressions = self._parse_csv( 4471 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4472 ) 4473 4474 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4475 self.raise_error("Expected ]") 4476 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4477 self.raise_error("Expected }") 4478 4479 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4480 if bracket_kind == TokenType.L_BRACE: 4481 this = self.expression(exp.Struct, expressions=expressions) 4482 elif not this or this.name.upper() == "ARRAY": 4483 this = self.expression(exp.Array, expressions=expressions) 4484 else: 4485 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4486 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4487 4488 self._add_comments(this) 4489 return self._parse_bracket(this) 4490 4491 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4492 if self._match(TokenType.COLON): 4493 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4494 return this 4495 4496 def _parse_case(self) -> t.Optional[exp.Expression]: 4497 ifs = [] 4498 default = None 4499 4500 comments = self._prev_comments 4501 expression = self._parse_conjunction() 4502 4503 while self._match(TokenType.WHEN): 4504 this = self._parse_conjunction() 4505 self._match(TokenType.THEN) 4506 then = self._parse_conjunction() 4507 ifs.append(self.expression(exp.If, this=this, true=then)) 4508 4509 if self._match(TokenType.ELSE): 4510 default = self._parse_conjunction() 4511 4512 if not self._match(TokenType.END): 4513 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4514 default = exp.column("interval") 4515 else: 4516 self.raise_error("Expected END after CASE", self._prev) 4517 4518 return self._parse_window( 4519 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4520 ) 4521 4522 def _parse_if(self) -> t.Optional[exp.Expression]: 4523 if self._match(TokenType.L_PAREN): 4524 args = self._parse_csv(self._parse_conjunction) 4525 this = self.validate_expression(exp.If.from_arg_list(args), args) 4526 self._match_r_paren() 4527 else: 4528 index = self._index - 1 4529 4530 if self.NO_PAREN_IF_COMMANDS and index == 0: 4531 return self._parse_as_command(self._prev) 4532 4533 condition = self._parse_conjunction() 4534 4535 if not condition: 4536 self._retreat(index) 4537 return None 4538 4539 self._match(TokenType.THEN) 4540 true = self._parse_conjunction() 4541 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4542 self._match(TokenType.END) 4543 this = self.expression(exp.If, this=condition, true=true, false=false) 4544 4545 return self._parse_window(this) 4546 4547 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4548 if not self._match_text_seq("VALUE", "FOR"): 4549 self._retreat(self._index - 1) 4550 return None 4551 4552 return self.expression( 4553 exp.NextValueFor, 4554 this=self._parse_column(), 4555 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4556 ) 4557 4558 def _parse_extract(self) -> exp.Extract: 4559 this = self._parse_function() or self._parse_var() or self._parse_type() 4560 4561 if self._match(TokenType.FROM): 4562 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4563 4564 if not self._match(TokenType.COMMA): 4565 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4566 4567 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4568 4569 def _parse_any_value(self) -> exp.AnyValue: 4570 this = self._parse_lambda() 4571 is_max = None 4572 having = None 4573 4574 if self._match(TokenType.HAVING): 4575 self._match_texts(("MAX", "MIN")) 4576 is_max = self._prev.text == "MAX" 4577 having = self._parse_column() 4578 4579 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4580 4581 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4582 this = self._parse_conjunction() 4583 4584 if not self._match(TokenType.ALIAS): 4585 if self._match(TokenType.COMMA): 4586 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4587 4588 self.raise_error("Expected AS after CAST") 4589 4590 fmt = None 4591 to = self._parse_types() 4592 4593 if self._match(TokenType.FORMAT): 4594 fmt_string = self._parse_string() 4595 fmt = self._parse_at_time_zone(fmt_string) 4596 4597 if not to: 4598 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4599 if to.this in exp.DataType.TEMPORAL_TYPES: 4600 this = self.expression( 4601 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4602 this=this, 4603 format=exp.Literal.string( 4604 format_time( 4605 fmt_string.this if fmt_string else "", 4606 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4607 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4608 ) 4609 ), 4610 ) 4611 4612 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4613 this.set("zone", fmt.args["zone"]) 4614 return this 4615 elif not to: 4616 self.raise_error("Expected TYPE after CAST") 4617 elif isinstance(to, exp.Identifier): 4618 to = exp.DataType.build(to.name, udt=True) 4619 elif to.this == exp.DataType.Type.CHAR: 4620 if self._match(TokenType.CHARACTER_SET): 4621 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4622 4623 return self.expression( 4624 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4625 ) 4626 4627 def _parse_string_agg(self) -> exp.Expression: 4628 if self._match(TokenType.DISTINCT): 4629 args: t.List[t.Optional[exp.Expression]] = [ 4630 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4631 ] 4632 if self._match(TokenType.COMMA): 4633 args.extend(self._parse_csv(self._parse_conjunction)) 4634 else: 4635 args = self._parse_csv(self._parse_conjunction) # type: ignore 4636 4637 index = self._index 4638 if not self._match(TokenType.R_PAREN) and args: 4639 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4640 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4641 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4642 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4643 4644 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4645 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4646 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4647 if not self._match_text_seq("WITHIN", "GROUP"): 4648 self._retreat(index) 4649 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4650 4651 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4652 order = self._parse_order(this=seq_get(args, 0)) 4653 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4654 4655 def _parse_convert( 4656 self, strict: bool, safe: t.Optional[bool] = None 4657 ) -> t.Optional[exp.Expression]: 4658 this = self._parse_bitwise() 4659 4660 if self._match(TokenType.USING): 4661 to: t.Optional[exp.Expression] = self.expression( 4662 exp.CharacterSet, this=self._parse_var() 4663 ) 4664 elif self._match(TokenType.COMMA): 4665 to = self._parse_types() 4666 else: 4667 to = None 4668 4669 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4670 4671 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4672 """ 4673 There are generally two variants of the DECODE function: 4674 4675 - DECODE(bin, charset) 4676 - DECODE(expression, search, result [, search, result] ... [, default]) 4677 4678 The second variant will always be parsed into a CASE expression. Note that NULL 4679 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4680 instead of relying on pattern matching. 4681 """ 4682 args = self._parse_csv(self._parse_conjunction) 4683 4684 if len(args) < 3: 4685 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4686 4687 expression, *expressions = args 4688 if not expression: 4689 return None 4690 4691 ifs = [] 4692 for search, result in zip(expressions[::2], expressions[1::2]): 4693 if not search or not result: 4694 return None 4695 4696 if isinstance(search, exp.Literal): 4697 ifs.append( 4698 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4699 ) 4700 elif isinstance(search, exp.Null): 4701 ifs.append( 4702 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4703 ) 4704 else: 4705 cond = exp.or_( 4706 exp.EQ(this=expression.copy(), expression=search), 4707 exp.and_( 4708 exp.Is(this=expression.copy(), expression=exp.Null()), 4709 exp.Is(this=search.copy(), expression=exp.Null()), 4710 copy=False, 4711 ), 4712 copy=False, 4713 ) 4714 ifs.append(exp.If(this=cond, true=result)) 4715 4716 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4717 4718 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4719 self._match_text_seq("KEY") 4720 key = self._parse_column() 4721 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4722 self._match_text_seq("VALUE") 4723 value = self._parse_bitwise() 4724 4725 if not key and not value: 4726 return None 4727 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4728 4729 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4730 if not this or not self._match_text_seq("FORMAT", "JSON"): 4731 return this 4732 4733 return self.expression(exp.FormatJson, this=this) 4734 4735 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4736 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4737 for value in values: 4738 if self._match_text_seq(value, "ON", on): 4739 return f"{value} ON {on}" 4740 4741 return None 4742 4743 @t.overload 4744 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4745 ... 4746 4747 @t.overload 4748 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4749 ... 4750 4751 def _parse_json_object(self, agg=False): 4752 star = self._parse_star() 4753 expressions = ( 4754 [star] 4755 if star 4756 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4757 ) 4758 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4759 4760 unique_keys = None 4761 if self._match_text_seq("WITH", "UNIQUE"): 4762 unique_keys = True 4763 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4764 unique_keys = False 4765 4766 self._match_text_seq("KEYS") 4767 4768 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4769 self._parse_type() 4770 ) 4771 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4772 4773 return self.expression( 4774 exp.JSONObjectAgg if agg else exp.JSONObject, 4775 expressions=expressions, 4776 null_handling=null_handling, 4777 unique_keys=unique_keys, 4778 return_type=return_type, 4779 encoding=encoding, 4780 ) 4781 4782 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4783 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4784 if not self._match_text_seq("NESTED"): 4785 this = self._parse_id_var() 4786 kind = self._parse_types(allow_identifiers=False) 4787 nested = None 4788 else: 4789 this = None 4790 kind = None 4791 nested = True 4792 4793 path = self._match_text_seq("PATH") and self._parse_string() 4794 nested_schema = nested and self._parse_json_schema() 4795 4796 return self.expression( 4797 exp.JSONColumnDef, 4798 this=this, 4799 kind=kind, 4800 path=path, 4801 nested_schema=nested_schema, 4802 ) 4803 4804 def _parse_json_schema(self) -> exp.JSONSchema: 4805 self._match_text_seq("COLUMNS") 4806 return self.expression( 4807 exp.JSONSchema, 4808 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4809 ) 4810 4811 def _parse_json_table(self) -> exp.JSONTable: 4812 this = self._parse_format_json(self._parse_bitwise()) 4813 path = self._match(TokenType.COMMA) and self._parse_string() 4814 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4815 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4816 schema = self._parse_json_schema() 4817 4818 return exp.JSONTable( 4819 this=this, 4820 schema=schema, 4821 path=path, 4822 error_handling=error_handling, 4823 empty_handling=empty_handling, 4824 ) 4825 4826 def _parse_match_against(self) -> exp.MatchAgainst: 4827 expressions = self._parse_csv(self._parse_column) 4828 4829 self._match_text_seq(")", "AGAINST", "(") 4830 4831 this = self._parse_string() 4832 4833 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4834 modifier = "IN NATURAL LANGUAGE MODE" 4835 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4836 modifier = f"{modifier} WITH QUERY EXPANSION" 4837 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4838 modifier = "IN BOOLEAN MODE" 4839 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4840 modifier = "WITH QUERY EXPANSION" 4841 else: 4842 modifier = None 4843 4844 return self.expression( 4845 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4846 ) 4847 4848 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4849 def _parse_open_json(self) -> exp.OpenJSON: 4850 this = self._parse_bitwise() 4851 path = self._match(TokenType.COMMA) and self._parse_string() 4852 4853 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4854 this = self._parse_field(any_token=True) 4855 kind = self._parse_types() 4856 path = self._parse_string() 4857 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4858 4859 return self.expression( 4860 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4861 ) 4862 4863 expressions = None 4864 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4865 self._match_l_paren() 4866 expressions = self._parse_csv(_parse_open_json_column_def) 4867 4868 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4869 4870 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4871 args = self._parse_csv(self._parse_bitwise) 4872 4873 if self._match(TokenType.IN): 4874 return self.expression( 4875 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4876 ) 4877 4878 if haystack_first: 4879 haystack = seq_get(args, 0) 4880 needle = seq_get(args, 1) 4881 else: 4882 needle = seq_get(args, 0) 4883 haystack = seq_get(args, 1) 4884 4885 return self.expression( 4886 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4887 ) 4888 4889 def _parse_predict(self) -> exp.Predict: 4890 self._match_text_seq("MODEL") 4891 this = self._parse_table() 4892 4893 self._match(TokenType.COMMA) 4894 self._match_text_seq("TABLE") 4895 4896 return self.expression( 4897 exp.Predict, 4898 this=this, 4899 expression=self._parse_table(), 4900 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4901 ) 4902 4903 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4904 args = self._parse_csv(self._parse_table) 4905 return exp.JoinHint(this=func_name.upper(), expressions=args) 4906 4907 def _parse_substring(self) -> exp.Substring: 4908 # Postgres supports the form: substring(string [from int] [for int]) 4909 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4910 4911 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4912 4913 if self._match(TokenType.FROM): 4914 args.append(self._parse_bitwise()) 4915 if self._match(TokenType.FOR): 4916 args.append(self._parse_bitwise()) 4917 4918 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4919 4920 def _parse_trim(self) -> exp.Trim: 4921 # https://www.w3resource.com/sql/character-functions/trim.php 4922 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4923 4924 position = None 4925 collation = None 4926 expression = None 4927 4928 if self._match_texts(self.TRIM_TYPES): 4929 position = self._prev.text.upper() 4930 4931 this = self._parse_bitwise() 4932 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4933 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4934 expression = self._parse_bitwise() 4935 4936 if invert_order: 4937 this, expression = expression, this 4938 4939 if self._match(TokenType.COLLATE): 4940 collation = self._parse_bitwise() 4941 4942 return self.expression( 4943 exp.Trim, this=this, position=position, expression=expression, collation=collation 4944 ) 4945 4946 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4947 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4948 4949 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4950 return self._parse_window(self._parse_id_var(), alias=True) 4951 4952 def _parse_respect_or_ignore_nulls( 4953 self, this: t.Optional[exp.Expression] 4954 ) -> t.Optional[exp.Expression]: 4955 if self._match_text_seq("IGNORE", "NULLS"): 4956 return self.expression(exp.IgnoreNulls, this=this) 4957 if self._match_text_seq("RESPECT", "NULLS"): 4958 return self.expression(exp.RespectNulls, this=this) 4959 return this 4960 4961 def _parse_window( 4962 self, this: t.Optional[exp.Expression], alias: bool = False 4963 ) -> t.Optional[exp.Expression]: 4964 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4965 self._match(TokenType.WHERE) 4966 this = self.expression( 4967 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4968 ) 4969 self._match_r_paren() 4970 4971 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4972 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4973 if self._match_text_seq("WITHIN", "GROUP"): 4974 order = self._parse_wrapped(self._parse_order) 4975 this = self.expression(exp.WithinGroup, this=this, expression=order) 4976 4977 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4978 # Some dialects choose to implement and some do not. 4979 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4980 4981 # There is some code above in _parse_lambda that handles 4982 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4983 4984 # The below changes handle 4985 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4986 4987 # Oracle allows both formats 4988 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4989 # and Snowflake chose to do the same for familiarity 4990 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4991 if isinstance(this, exp.AggFunc): 4992 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 4993 4994 if ignore_respect and ignore_respect is not this: 4995 ignore_respect.replace(ignore_respect.this) 4996 this = self.expression(ignore_respect.__class__, this=this) 4997 4998 this = self._parse_respect_or_ignore_nulls(this) 4999 5000 # bigquery select from window x AS (partition by ...) 5001 if alias: 5002 over = None 5003 self._match(TokenType.ALIAS) 5004 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5005 return this 5006 else: 5007 over = self._prev.text.upper() 5008 5009 if not self._match(TokenType.L_PAREN): 5010 return self.expression( 5011 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5012 ) 5013 5014 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5015 5016 first = self._match(TokenType.FIRST) 5017 if self._match_text_seq("LAST"): 5018 first = False 5019 5020 partition, order = self._parse_partition_and_order() 5021 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5022 5023 if kind: 5024 self._match(TokenType.BETWEEN) 5025 start = self._parse_window_spec() 5026 self._match(TokenType.AND) 5027 end = self._parse_window_spec() 5028 5029 spec = self.expression( 5030 exp.WindowSpec, 5031 kind=kind, 5032 start=start["value"], 5033 start_side=start["side"], 5034 end=end["value"], 5035 end_side=end["side"], 5036 ) 5037 else: 5038 spec = None 5039 5040 self._match_r_paren() 5041 5042 window = self.expression( 5043 exp.Window, 5044 this=this, 5045 partition_by=partition, 5046 order=order, 5047 spec=spec, 5048 alias=window_alias, 5049 over=over, 5050 first=first, 5051 ) 5052 5053 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5054 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5055 return self._parse_window(window, alias=alias) 5056 5057 return window 5058 5059 def _parse_partition_and_order( 5060 self, 5061 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5062 return self._parse_partition_by(), self._parse_order() 5063 5064 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5065 self._match(TokenType.BETWEEN) 5066 5067 return { 5068 "value": ( 5069 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5070 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5071 or self._parse_bitwise() 5072 ), 5073 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5074 } 5075 5076 def _parse_alias( 5077 self, this: t.Optional[exp.Expression], explicit: bool = False 5078 ) -> t.Optional[exp.Expression]: 5079 any_token = self._match(TokenType.ALIAS) 5080 comments = self._prev_comments 5081 5082 if explicit and not any_token: 5083 return this 5084 5085 if self._match(TokenType.L_PAREN): 5086 aliases = self.expression( 5087 exp.Aliases, 5088 comments=comments, 5089 this=this, 5090 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5091 ) 5092 self._match_r_paren(aliases) 5093 return aliases 5094 5095 alias = self._parse_id_var(any_token) or ( 5096 self.STRING_ALIASES and self._parse_string_as_identifier() 5097 ) 5098 5099 if alias: 5100 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5101 column = this.this 5102 5103 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5104 if not this.comments and column and column.comments: 5105 this.comments = column.comments 5106 column.comments = None 5107 5108 return this 5109 5110 def _parse_id_var( 5111 self, 5112 any_token: bool = True, 5113 tokens: t.Optional[t.Collection[TokenType]] = None, 5114 ) -> t.Optional[exp.Expression]: 5115 identifier = self._parse_identifier() 5116 5117 if identifier: 5118 return identifier 5119 5120 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5121 quoted = self._prev.token_type == TokenType.STRING 5122 return exp.Identifier(this=self._prev.text, quoted=quoted) 5123 5124 return None 5125 5126 def _parse_string(self) -> t.Optional[exp.Expression]: 5127 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5128 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5129 return self._parse_placeholder() 5130 5131 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5132 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5133 5134 def _parse_number(self) -> t.Optional[exp.Expression]: 5135 if self._match(TokenType.NUMBER): 5136 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5137 return self._parse_placeholder() 5138 5139 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5140 if self._match(TokenType.IDENTIFIER): 5141 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5142 return self._parse_placeholder() 5143 5144 def _parse_var( 5145 self, 5146 any_token: bool = False, 5147 tokens: t.Optional[t.Collection[TokenType]] = None, 5148 upper: bool = False, 5149 ) -> t.Optional[exp.Expression]: 5150 if ( 5151 (any_token and self._advance_any()) 5152 or self._match(TokenType.VAR) 5153 or (self._match_set(tokens) if tokens else False) 5154 ): 5155 return self.expression( 5156 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5157 ) 5158 return self._parse_placeholder() 5159 5160 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5161 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5162 self._advance() 5163 return self._prev 5164 return None 5165 5166 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5167 return self._parse_var() or self._parse_string() 5168 5169 def _parse_null(self) -> t.Optional[exp.Expression]: 5170 if self._match_set(self.NULL_TOKENS): 5171 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5172 return self._parse_placeholder() 5173 5174 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5175 if self._match(TokenType.TRUE): 5176 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5177 if self._match(TokenType.FALSE): 5178 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5179 return self._parse_placeholder() 5180 5181 def _parse_star(self) -> t.Optional[exp.Expression]: 5182 if self._match(TokenType.STAR): 5183 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5184 return self._parse_placeholder() 5185 5186 def _parse_parameter(self) -> exp.Parameter: 5187 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5188 return ( 5189 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5190 ) 5191 5192 self._match(TokenType.L_BRACE) 5193 this = _parse_parameter_part() 5194 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5195 self._match(TokenType.R_BRACE) 5196 5197 return self.expression(exp.Parameter, this=this, expression=expression) 5198 5199 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5200 if self._match_set(self.PLACEHOLDER_PARSERS): 5201 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5202 if placeholder: 5203 return placeholder 5204 self._advance(-1) 5205 return None 5206 5207 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5208 if not self._match(TokenType.EXCEPT): 5209 return None 5210 if self._match(TokenType.L_PAREN, advance=False): 5211 return self._parse_wrapped_csv(self._parse_column) 5212 5213 except_column = self._parse_column() 5214 return [except_column] if except_column else None 5215 5216 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5217 if not self._match(TokenType.REPLACE): 5218 return None 5219 if self._match(TokenType.L_PAREN, advance=False): 5220 return self._parse_wrapped_csv(self._parse_expression) 5221 5222 replace_expression = self._parse_expression() 5223 return [replace_expression] if replace_expression else None 5224 5225 def _parse_csv( 5226 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5227 ) -> t.List[exp.Expression]: 5228 parse_result = parse_method() 5229 items = [parse_result] if parse_result is not None else [] 5230 5231 while self._match(sep): 5232 self._add_comments(parse_result) 5233 parse_result = parse_method() 5234 if parse_result is not None: 5235 items.append(parse_result) 5236 5237 return items 5238 5239 def _parse_tokens( 5240 self, parse_method: t.Callable, expressions: t.Dict 5241 ) -> t.Optional[exp.Expression]: 5242 this = parse_method() 5243 5244 while self._match_set(expressions): 5245 this = self.expression( 5246 expressions[self._prev.token_type], 5247 this=this, 5248 comments=self._prev_comments, 5249 expression=parse_method(), 5250 ) 5251 5252 return this 5253 5254 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5255 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5256 5257 def _parse_wrapped_csv( 5258 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5259 ) -> t.List[exp.Expression]: 5260 return self._parse_wrapped( 5261 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5262 ) 5263 5264 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5265 wrapped = self._match(TokenType.L_PAREN) 5266 if not wrapped and not optional: 5267 self.raise_error("Expecting (") 5268 parse_result = parse_method() 5269 if wrapped: 5270 self._match_r_paren() 5271 return parse_result 5272 5273 def _parse_expressions(self) -> t.List[exp.Expression]: 5274 return self._parse_csv(self._parse_expression) 5275 5276 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5277 return self._parse_select() or self._parse_set_operations( 5278 self._parse_expression() if alias else self._parse_conjunction() 5279 ) 5280 5281 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5282 return self._parse_query_modifiers( 5283 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5284 ) 5285 5286 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5287 this = None 5288 if self._match_texts(self.TRANSACTION_KIND): 5289 this = self._prev.text 5290 5291 self._match_texts(("TRANSACTION", "WORK")) 5292 5293 modes = [] 5294 while True: 5295 mode = [] 5296 while self._match(TokenType.VAR): 5297 mode.append(self._prev.text) 5298 5299 if mode: 5300 modes.append(" ".join(mode)) 5301 if not self._match(TokenType.COMMA): 5302 break 5303 5304 return self.expression(exp.Transaction, this=this, modes=modes) 5305 5306 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5307 chain = None 5308 savepoint = None 5309 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5310 5311 self._match_texts(("TRANSACTION", "WORK")) 5312 5313 if self._match_text_seq("TO"): 5314 self._match_text_seq("SAVEPOINT") 5315 savepoint = self._parse_id_var() 5316 5317 if self._match(TokenType.AND): 5318 chain = not self._match_text_seq("NO") 5319 self._match_text_seq("CHAIN") 5320 5321 if is_rollback: 5322 return self.expression(exp.Rollback, savepoint=savepoint) 5323 5324 return self.expression(exp.Commit, chain=chain) 5325 5326 def _parse_refresh(self) -> exp.Refresh: 5327 self._match(TokenType.TABLE) 5328 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5329 5330 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5331 if not self._match_text_seq("ADD"): 5332 return None 5333 5334 self._match(TokenType.COLUMN) 5335 exists_column = self._parse_exists(not_=True) 5336 expression = self._parse_field_def() 5337 5338 if expression: 5339 expression.set("exists", exists_column) 5340 5341 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5342 if self._match_texts(("FIRST", "AFTER")): 5343 position = self._prev.text 5344 column_position = self.expression( 5345 exp.ColumnPosition, this=self._parse_column(), position=position 5346 ) 5347 expression.set("position", column_position) 5348 5349 return expression 5350 5351 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5352 drop = self._match(TokenType.DROP) and self._parse_drop() 5353 if drop and not isinstance(drop, exp.Command): 5354 drop.set("kind", drop.args.get("kind", "COLUMN")) 5355 return drop 5356 5357 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5358 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5359 return self.expression( 5360 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5361 ) 5362 5363 def _parse_add_constraint(self) -> exp.AddConstraint: 5364 this = None 5365 kind = self._prev.token_type 5366 5367 if kind == TokenType.CONSTRAINT: 5368 this = self._parse_id_var() 5369 5370 if self._match_text_seq("CHECK"): 5371 expression = self._parse_wrapped(self._parse_conjunction) 5372 enforced = self._match_text_seq("ENFORCED") or False 5373 5374 return self.expression( 5375 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5376 ) 5377 5378 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5379 expression = self._parse_foreign_key() 5380 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5381 expression = self._parse_primary_key() 5382 else: 5383 expression = None 5384 5385 return self.expression(exp.AddConstraint, this=this, expression=expression) 5386 5387 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5388 index = self._index - 1 5389 5390 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5391 return self._parse_csv(self._parse_add_constraint) 5392 5393 self._retreat(index) 5394 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5395 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5396 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5397 5398 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5399 self._match(TokenType.COLUMN) 5400 column = self._parse_field(any_token=True) 5401 5402 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5403 return self.expression(exp.AlterColumn, this=column, drop=True) 5404 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5405 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5406 if self._match(TokenType.COMMENT): 5407 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5408 5409 self._match_text_seq("SET", "DATA") 5410 return self.expression( 5411 exp.AlterColumn, 5412 this=column, 5413 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5414 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5415 using=self._match(TokenType.USING) and self._parse_conjunction(), 5416 ) 5417 5418 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5419 index = self._index - 1 5420 5421 partition_exists = self._parse_exists() 5422 if self._match(TokenType.PARTITION, advance=False): 5423 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5424 5425 self._retreat(index) 5426 return self._parse_csv(self._parse_drop_column) 5427 5428 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5429 if self._match(TokenType.COLUMN): 5430 exists = self._parse_exists() 5431 old_column = self._parse_column() 5432 to = self._match_text_seq("TO") 5433 new_column = self._parse_column() 5434 5435 if old_column is None or to is None or new_column is None: 5436 return None 5437 5438 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5439 5440 self._match_text_seq("TO") 5441 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5442 5443 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5444 start = self._prev 5445 5446 if not self._match(TokenType.TABLE): 5447 return self._parse_as_command(start) 5448 5449 exists = self._parse_exists() 5450 only = self._match_text_seq("ONLY") 5451 this = self._parse_table(schema=True) 5452 5453 if self._next: 5454 self._advance() 5455 5456 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5457 if parser: 5458 actions = ensure_list(parser(self)) 5459 5460 if not self._curr and actions: 5461 return self.expression( 5462 exp.AlterTable, 5463 this=this, 5464 exists=exists, 5465 actions=actions, 5466 only=only, 5467 ) 5468 5469 return self._parse_as_command(start) 5470 5471 def _parse_merge(self) -> exp.Merge: 5472 self._match(TokenType.INTO) 5473 target = self._parse_table() 5474 5475 if target and self._match(TokenType.ALIAS, advance=False): 5476 target.set("alias", self._parse_table_alias()) 5477 5478 self._match(TokenType.USING) 5479 using = self._parse_table() 5480 5481 self._match(TokenType.ON) 5482 on = self._parse_conjunction() 5483 5484 return self.expression( 5485 exp.Merge, 5486 this=target, 5487 using=using, 5488 on=on, 5489 expressions=self._parse_when_matched(), 5490 ) 5491 5492 def _parse_when_matched(self) -> t.List[exp.When]: 5493 whens = [] 5494 5495 while self._match(TokenType.WHEN): 5496 matched = not self._match(TokenType.NOT) 5497 self._match_text_seq("MATCHED") 5498 source = ( 5499 False 5500 if self._match_text_seq("BY", "TARGET") 5501 else self._match_text_seq("BY", "SOURCE") 5502 ) 5503 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5504 5505 self._match(TokenType.THEN) 5506 5507 if self._match(TokenType.INSERT): 5508 _this = self._parse_star() 5509 if _this: 5510 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5511 else: 5512 then = self.expression( 5513 exp.Insert, 5514 this=self._parse_value(), 5515 expression=self._match(TokenType.VALUES) and self._parse_value(), 5516 ) 5517 elif self._match(TokenType.UPDATE): 5518 expressions = self._parse_star() 5519 if expressions: 5520 then = self.expression(exp.Update, expressions=expressions) 5521 else: 5522 then = self.expression( 5523 exp.Update, 5524 expressions=self._match(TokenType.SET) 5525 and self._parse_csv(self._parse_equality), 5526 ) 5527 elif self._match(TokenType.DELETE): 5528 then = self.expression(exp.Var, this=self._prev.text) 5529 else: 5530 then = None 5531 5532 whens.append( 5533 self.expression( 5534 exp.When, 5535 matched=matched, 5536 source=source, 5537 condition=condition, 5538 then=then, 5539 ) 5540 ) 5541 return whens 5542 5543 def _parse_show(self) -> t.Optional[exp.Expression]: 5544 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5545 if parser: 5546 return parser(self) 5547 return self._parse_as_command(self._prev) 5548 5549 def _parse_set_item_assignment( 5550 self, kind: t.Optional[str] = None 5551 ) -> t.Optional[exp.Expression]: 5552 index = self._index 5553 5554 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5555 return self._parse_set_transaction(global_=kind == "GLOBAL") 5556 5557 left = self._parse_primary() or self._parse_id_var() 5558 assignment_delimiter = self._match_texts(("=", "TO")) 5559 5560 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5561 self._retreat(index) 5562 return None 5563 5564 right = self._parse_statement() or self._parse_id_var() 5565 this = self.expression(exp.EQ, this=left, expression=right) 5566 5567 return self.expression(exp.SetItem, this=this, kind=kind) 5568 5569 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5570 self._match_text_seq("TRANSACTION") 5571 characteristics = self._parse_csv( 5572 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5573 ) 5574 return self.expression( 5575 exp.SetItem, 5576 expressions=characteristics, 5577 kind="TRANSACTION", 5578 **{"global": global_}, # type: ignore 5579 ) 5580 5581 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5582 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5583 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5584 5585 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5586 index = self._index 5587 set_ = self.expression( 5588 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5589 ) 5590 5591 if self._curr: 5592 self._retreat(index) 5593 return self._parse_as_command(self._prev) 5594 5595 return set_ 5596 5597 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5598 for option in options: 5599 if self._match_text_seq(*option.split(" ")): 5600 return exp.var(option) 5601 return None 5602 5603 def _parse_as_command(self, start: Token) -> exp.Command: 5604 while self._curr: 5605 self._advance() 5606 text = self._find_sql(start, self._prev) 5607 size = len(start.text) 5608 self._warn_unsupported() 5609 return exp.Command(this=text[:size], expression=text[size:]) 5610 5611 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5612 settings = [] 5613 5614 self._match_l_paren() 5615 kind = self._parse_id_var() 5616 5617 if self._match(TokenType.L_PAREN): 5618 while True: 5619 key = self._parse_id_var() 5620 value = self._parse_primary() 5621 5622 if not key and value is None: 5623 break 5624 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5625 self._match(TokenType.R_PAREN) 5626 5627 self._match_r_paren() 5628 5629 return self.expression( 5630 exp.DictProperty, 5631 this=this, 5632 kind=kind.this if kind else None, 5633 settings=settings, 5634 ) 5635 5636 def _parse_dict_range(self, this: str) -> exp.DictRange: 5637 self._match_l_paren() 5638 has_min = self._match_text_seq("MIN") 5639 if has_min: 5640 min = self._parse_var() or self._parse_primary() 5641 self._match_text_seq("MAX") 5642 max = self._parse_var() or self._parse_primary() 5643 else: 5644 max = self._parse_var() or self._parse_primary() 5645 min = exp.Literal.number(0) 5646 self._match_r_paren() 5647 return self.expression(exp.DictRange, this=this, min=min, max=max) 5648 5649 def _parse_comprehension( 5650 self, this: t.Optional[exp.Expression] 5651 ) -> t.Optional[exp.Comprehension]: 5652 index = self._index 5653 expression = self._parse_column() 5654 if not self._match(TokenType.IN): 5655 self._retreat(index - 1) 5656 return None 5657 iterator = self._parse_column() 5658 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5659 return self.expression( 5660 exp.Comprehension, 5661 this=this, 5662 expression=expression, 5663 iterator=iterator, 5664 condition=condition, 5665 ) 5666 5667 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5668 if self._match(TokenType.HEREDOC_STRING): 5669 return self.expression(exp.Heredoc, this=self._prev.text) 5670 5671 if not self._match_text_seq("$"): 5672 return None 5673 5674 tags = ["$"] 5675 tag_text = None 5676 5677 if self._is_connected(): 5678 self._advance() 5679 tags.append(self._prev.text.upper()) 5680 else: 5681 self.raise_error("No closing $ found") 5682 5683 if tags[-1] != "$": 5684 if self._is_connected() and self._match_text_seq("$"): 5685 tag_text = tags[-1] 5686 tags.append("$") 5687 else: 5688 self.raise_error("No closing $ found") 5689 5690 heredoc_start = self._curr 5691 5692 while self._curr: 5693 if self._match_text_seq(*tags, advance=False): 5694 this = self._find_sql(heredoc_start, self._prev) 5695 self._advance(len(tags)) 5696 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5697 5698 self._advance() 5699 5700 self.raise_error(f"No closing {''.join(tags)} found") 5701 return None 5702 5703 def _find_parser( 5704 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5705 ) -> t.Optional[t.Callable]: 5706 if not self._curr: 5707 return None 5708 5709 index = self._index 5710 this = [] 5711 while True: 5712 # The current token might be multiple words 5713 curr = self._curr.text.upper() 5714 key = curr.split(" ") 5715 this.append(curr) 5716 5717 self._advance() 5718 result, trie = in_trie(trie, key) 5719 if result == TrieResult.FAILED: 5720 break 5721 5722 if result == TrieResult.EXISTS: 5723 subparser = parsers[" ".join(this)] 5724 return subparser 5725 5726 self._retreat(index) 5727 return None 5728 5729 def _match(self, token_type, advance=True, expression=None): 5730 if not self._curr: 5731 return None 5732 5733 if self._curr.token_type == token_type: 5734 if advance: 5735 self._advance() 5736 self._add_comments(expression) 5737 return True 5738 5739 return None 5740 5741 def _match_set(self, types, advance=True): 5742 if not self._curr: 5743 return None 5744 5745 if self._curr.token_type in types: 5746 if advance: 5747 self._advance() 5748 return True 5749 5750 return None 5751 5752 def _match_pair(self, token_type_a, token_type_b, advance=True): 5753 if not self._curr or not self._next: 5754 return None 5755 5756 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5757 if advance: 5758 self._advance(2) 5759 return True 5760 5761 return None 5762 5763 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5764 if not self._match(TokenType.L_PAREN, expression=expression): 5765 self.raise_error("Expecting (") 5766 5767 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5768 if not self._match(TokenType.R_PAREN, expression=expression): 5769 self.raise_error("Expecting )") 5770 5771 def _match_texts(self, texts, advance=True): 5772 if self._curr and self._curr.text.upper() in texts: 5773 if advance: 5774 self._advance() 5775 return True 5776 return None 5777 5778 def _match_text_seq(self, *texts, advance=True): 5779 index = self._index 5780 for text in texts: 5781 if self._curr and self._curr.text.upper() == text: 5782 self._advance() 5783 else: 5784 self._retreat(index) 5785 return None 5786 5787 if not advance: 5788 self._retreat(index) 5789 5790 return True 5791 5792 @t.overload 5793 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5794 ... 5795 5796 @t.overload 5797 def _replace_columns_with_dots( 5798 self, this: t.Optional[exp.Expression] 5799 ) -> t.Optional[exp.Expression]: 5800 ... 5801 5802 def _replace_columns_with_dots(self, this): 5803 if isinstance(this, exp.Dot): 5804 exp.replace_children(this, self._replace_columns_with_dots) 5805 elif isinstance(this, exp.Column): 5806 exp.replace_children(this, self._replace_columns_with_dots) 5807 table = this.args.get("table") 5808 this = ( 5809 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5810 ) 5811 5812 return this 5813 5814 def _replace_lambda( 5815 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5816 ) -> t.Optional[exp.Expression]: 5817 if not node: 5818 return node 5819 5820 for column in node.find_all(exp.Column): 5821 if column.parts[0].name in lambda_variables: 5822 dot_or_id = column.to_dot() if column.table else column.this 5823 parent = column.parent 5824 5825 while isinstance(parent, exp.Dot): 5826 if not isinstance(parent.parent, exp.Dot): 5827 parent.replace(dot_or_id) 5828 break 5829 parent = parent.parent 5830 else: 5831 if column is node: 5832 node = dot_or_id 5833 else: 5834 column.replace(dot_or_id) 5835 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
64def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _parser(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _parser
87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: Determines the amount of context to capture from a 95 query string when displaying the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": parse_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": parse_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": parse_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": parse_like, 123 "LOG": parse_logarithm, 124 "TIME_TO_TIME_STR": lambda args: exp.Cast( 125 this=seq_get(args, 0), 126 to=exp.DataType(this=exp.DataType.Type.TEXT), 127 ), 128 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 129 this=exp.Cast( 130 this=seq_get(args, 0), 131 to=exp.DataType(this=exp.DataType.Type.TEXT), 132 ), 133 start=exp.Literal.number(1), 134 length=exp.Literal.number(10), 135 ), 136 "VAR_MAP": parse_var_map, 137 } 138 139 NO_PAREN_FUNCTIONS = { 140 TokenType.CURRENT_DATE: exp.CurrentDate, 141 TokenType.CURRENT_DATETIME: exp.CurrentDate, 142 TokenType.CURRENT_TIME: exp.CurrentTime, 143 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 144 TokenType.CURRENT_USER: exp.CurrentUser, 145 } 146 147 STRUCT_TYPE_TOKENS = { 148 TokenType.NESTED, 149 TokenType.STRUCT, 150 } 151 152 NESTED_TYPE_TOKENS = { 153 TokenType.ARRAY, 154 TokenType.LOWCARDINALITY, 155 TokenType.MAP, 156 TokenType.NULLABLE, 157 *STRUCT_TYPE_TOKENS, 158 } 159 160 ENUM_TYPE_TOKENS = { 161 TokenType.ENUM, 162 TokenType.ENUM8, 163 TokenType.ENUM16, 164 } 165 166 AGGREGATE_TYPE_TOKENS = { 167 TokenType.AGGREGATEFUNCTION, 168 TokenType.SIMPLEAGGREGATEFUNCTION, 169 } 170 171 TYPE_TOKENS = { 172 TokenType.BIT, 173 TokenType.BOOLEAN, 174 TokenType.TINYINT, 175 TokenType.UTINYINT, 176 TokenType.SMALLINT, 177 TokenType.USMALLINT, 178 TokenType.INT, 179 TokenType.UINT, 180 TokenType.BIGINT, 181 TokenType.UBIGINT, 182 TokenType.INT128, 183 TokenType.UINT128, 184 TokenType.INT256, 185 TokenType.UINT256, 186 TokenType.MEDIUMINT, 187 TokenType.UMEDIUMINT, 188 TokenType.FIXEDSTRING, 189 TokenType.FLOAT, 190 TokenType.DOUBLE, 191 TokenType.CHAR, 192 TokenType.NCHAR, 193 TokenType.VARCHAR, 194 TokenType.NVARCHAR, 195 TokenType.BPCHAR, 196 TokenType.TEXT, 197 TokenType.MEDIUMTEXT, 198 TokenType.LONGTEXT, 199 TokenType.MEDIUMBLOB, 200 TokenType.LONGBLOB, 201 TokenType.BINARY, 202 TokenType.VARBINARY, 203 TokenType.JSON, 204 TokenType.JSONB, 205 TokenType.INTERVAL, 206 TokenType.TINYBLOB, 207 TokenType.TINYTEXT, 208 TokenType.TIME, 209 TokenType.TIMETZ, 210 TokenType.TIMESTAMP, 211 TokenType.TIMESTAMP_S, 212 TokenType.TIMESTAMP_MS, 213 TokenType.TIMESTAMP_NS, 214 TokenType.TIMESTAMPTZ, 215 TokenType.TIMESTAMPLTZ, 216 TokenType.DATETIME, 217 TokenType.DATETIME64, 218 TokenType.DATE, 219 TokenType.DATE32, 220 TokenType.INT4RANGE, 221 TokenType.INT4MULTIRANGE, 222 TokenType.INT8RANGE, 223 TokenType.INT8MULTIRANGE, 224 TokenType.NUMRANGE, 225 TokenType.NUMMULTIRANGE, 226 TokenType.TSRANGE, 227 TokenType.TSMULTIRANGE, 228 TokenType.TSTZRANGE, 229 TokenType.TSTZMULTIRANGE, 230 TokenType.DATERANGE, 231 TokenType.DATEMULTIRANGE, 232 TokenType.DECIMAL, 233 TokenType.UDECIMAL, 234 TokenType.BIGDECIMAL, 235 TokenType.UUID, 236 TokenType.GEOGRAPHY, 237 TokenType.GEOMETRY, 238 TokenType.HLLSKETCH, 239 TokenType.HSTORE, 240 TokenType.PSEUDO_TYPE, 241 TokenType.SUPER, 242 TokenType.SERIAL, 243 TokenType.SMALLSERIAL, 244 TokenType.BIGSERIAL, 245 TokenType.XML, 246 TokenType.YEAR, 247 TokenType.UNIQUEIDENTIFIER, 248 TokenType.USERDEFINED, 249 TokenType.MONEY, 250 TokenType.SMALLMONEY, 251 TokenType.ROWVERSION, 252 TokenType.IMAGE, 253 TokenType.VARIANT, 254 TokenType.OBJECT, 255 TokenType.OBJECT_IDENTIFIER, 256 TokenType.INET, 257 TokenType.IPADDRESS, 258 TokenType.IPPREFIX, 259 TokenType.IPV4, 260 TokenType.IPV6, 261 TokenType.UNKNOWN, 262 TokenType.NULL, 263 *ENUM_TYPE_TOKENS, 264 *NESTED_TYPE_TOKENS, 265 *AGGREGATE_TYPE_TOKENS, 266 } 267 268 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 269 TokenType.BIGINT: TokenType.UBIGINT, 270 TokenType.INT: TokenType.UINT, 271 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 272 TokenType.SMALLINT: TokenType.USMALLINT, 273 TokenType.TINYINT: TokenType.UTINYINT, 274 TokenType.DECIMAL: TokenType.UDECIMAL, 275 } 276 277 SUBQUERY_PREDICATES = { 278 TokenType.ANY: exp.Any, 279 TokenType.ALL: exp.All, 280 TokenType.EXISTS: exp.Exists, 281 TokenType.SOME: exp.Any, 282 } 283 284 RESERVED_TOKENS = { 285 *Tokenizer.SINGLE_TOKENS.values(), 286 TokenType.SELECT, 287 } 288 289 DB_CREATABLES = { 290 TokenType.DATABASE, 291 TokenType.SCHEMA, 292 TokenType.TABLE, 293 TokenType.VIEW, 294 TokenType.MODEL, 295 TokenType.DICTIONARY, 296 } 297 298 CREATABLES = { 299 TokenType.COLUMN, 300 TokenType.CONSTRAINT, 301 TokenType.FUNCTION, 302 TokenType.INDEX, 303 TokenType.PROCEDURE, 304 TokenType.FOREIGN_KEY, 305 *DB_CREATABLES, 306 } 307 308 # Tokens that can represent identifiers 309 ID_VAR_TOKENS = { 310 TokenType.VAR, 311 TokenType.ANTI, 312 TokenType.APPLY, 313 TokenType.ASC, 314 TokenType.AUTO_INCREMENT, 315 TokenType.BEGIN, 316 TokenType.BPCHAR, 317 TokenType.CACHE, 318 TokenType.CASE, 319 TokenType.COLLATE, 320 TokenType.COMMAND, 321 TokenType.COMMENT, 322 TokenType.COMMIT, 323 TokenType.CONSTRAINT, 324 TokenType.DEFAULT, 325 TokenType.DELETE, 326 TokenType.DESC, 327 TokenType.DESCRIBE, 328 TokenType.DICTIONARY, 329 TokenType.DIV, 330 TokenType.END, 331 TokenType.EXECUTE, 332 TokenType.ESCAPE, 333 TokenType.FALSE, 334 TokenType.FIRST, 335 TokenType.FILTER, 336 TokenType.FINAL, 337 TokenType.FORMAT, 338 TokenType.FULL, 339 TokenType.IS, 340 TokenType.ISNULL, 341 TokenType.INTERVAL, 342 TokenType.KEEP, 343 TokenType.KILL, 344 TokenType.LEFT, 345 TokenType.LOAD, 346 TokenType.MERGE, 347 TokenType.NATURAL, 348 TokenType.NEXT, 349 TokenType.OFFSET, 350 TokenType.OPERATOR, 351 TokenType.ORDINALITY, 352 TokenType.OVERLAPS, 353 TokenType.OVERWRITE, 354 TokenType.PARTITION, 355 TokenType.PERCENT, 356 TokenType.PIVOT, 357 TokenType.PRAGMA, 358 TokenType.RANGE, 359 TokenType.RECURSIVE, 360 TokenType.REFERENCES, 361 TokenType.REFRESH, 362 TokenType.REPLACE, 363 TokenType.RIGHT, 364 TokenType.ROW, 365 TokenType.ROWS, 366 TokenType.SEMI, 367 TokenType.SET, 368 TokenType.SETTINGS, 369 TokenType.SHOW, 370 TokenType.TEMPORARY, 371 TokenType.TOP, 372 TokenType.TRUE, 373 TokenType.UNIQUE, 374 TokenType.UNPIVOT, 375 TokenType.UPDATE, 376 TokenType.USE, 377 TokenType.VOLATILE, 378 TokenType.WINDOW, 379 *CREATABLES, 380 *SUBQUERY_PREDICATES, 381 *TYPE_TOKENS, 382 *NO_PAREN_FUNCTIONS, 383 } 384 385 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 386 387 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 388 TokenType.ANTI, 389 TokenType.APPLY, 390 TokenType.ASOF, 391 TokenType.FULL, 392 TokenType.LEFT, 393 TokenType.LOCK, 394 TokenType.NATURAL, 395 TokenType.OFFSET, 396 TokenType.RIGHT, 397 TokenType.SEMI, 398 TokenType.WINDOW, 399 } 400 401 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 402 403 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 404 405 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 406 407 FUNC_TOKENS = { 408 TokenType.COLLATE, 409 TokenType.COMMAND, 410 TokenType.CURRENT_DATE, 411 TokenType.CURRENT_DATETIME, 412 TokenType.CURRENT_TIMESTAMP, 413 TokenType.CURRENT_TIME, 414 TokenType.CURRENT_USER, 415 TokenType.FILTER, 416 TokenType.FIRST, 417 TokenType.FORMAT, 418 TokenType.GLOB, 419 TokenType.IDENTIFIER, 420 TokenType.INDEX, 421 TokenType.ISNULL, 422 TokenType.ILIKE, 423 TokenType.INSERT, 424 TokenType.LIKE, 425 TokenType.MERGE, 426 TokenType.OFFSET, 427 TokenType.PRIMARY_KEY, 428 TokenType.RANGE, 429 TokenType.REPLACE, 430 TokenType.RLIKE, 431 TokenType.ROW, 432 TokenType.UNNEST, 433 TokenType.VAR, 434 TokenType.LEFT, 435 TokenType.RIGHT, 436 TokenType.DATE, 437 TokenType.DATETIME, 438 TokenType.TABLE, 439 TokenType.TIMESTAMP, 440 TokenType.TIMESTAMPTZ, 441 TokenType.WINDOW, 442 TokenType.XOR, 443 *TYPE_TOKENS, 444 *SUBQUERY_PREDICATES, 445 } 446 447 CONJUNCTION = { 448 TokenType.AND: exp.And, 449 TokenType.OR: exp.Or, 450 } 451 452 EQUALITY = { 453 TokenType.COLON_EQ: exp.PropertyEQ, 454 TokenType.EQ: exp.EQ, 455 TokenType.NEQ: exp.NEQ, 456 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 457 } 458 459 COMPARISON = { 460 TokenType.GT: exp.GT, 461 TokenType.GTE: exp.GTE, 462 TokenType.LT: exp.LT, 463 TokenType.LTE: exp.LTE, 464 } 465 466 BITWISE = { 467 TokenType.AMP: exp.BitwiseAnd, 468 TokenType.CARET: exp.BitwiseXor, 469 TokenType.PIPE: exp.BitwiseOr, 470 } 471 472 TERM = { 473 TokenType.DASH: exp.Sub, 474 TokenType.PLUS: exp.Add, 475 TokenType.MOD: exp.Mod, 476 TokenType.COLLATE: exp.Collate, 477 } 478 479 FACTOR = { 480 TokenType.DIV: exp.IntDiv, 481 TokenType.LR_ARROW: exp.Distance, 482 TokenType.SLASH: exp.Div, 483 TokenType.STAR: exp.Mul, 484 } 485 486 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 487 488 TIMES = { 489 TokenType.TIME, 490 TokenType.TIMETZ, 491 } 492 493 TIMESTAMPS = { 494 TokenType.TIMESTAMP, 495 TokenType.TIMESTAMPTZ, 496 TokenType.TIMESTAMPLTZ, 497 *TIMES, 498 } 499 500 SET_OPERATIONS = { 501 TokenType.UNION, 502 TokenType.INTERSECT, 503 TokenType.EXCEPT, 504 } 505 506 JOIN_METHODS = { 507 TokenType.NATURAL, 508 TokenType.ASOF, 509 } 510 511 JOIN_SIDES = { 512 TokenType.LEFT, 513 TokenType.RIGHT, 514 TokenType.FULL, 515 } 516 517 JOIN_KINDS = { 518 TokenType.INNER, 519 TokenType.OUTER, 520 TokenType.CROSS, 521 TokenType.SEMI, 522 TokenType.ANTI, 523 } 524 525 JOIN_HINTS: t.Set[str] = set() 526 527 LAMBDAS = { 528 TokenType.ARROW: lambda self, expressions: self.expression( 529 exp.Lambda, 530 this=self._replace_lambda( 531 self._parse_conjunction(), 532 {node.name for node in expressions}, 533 ), 534 expressions=expressions, 535 ), 536 TokenType.FARROW: lambda self, expressions: self.expression( 537 exp.Kwarg, 538 this=exp.var(expressions[0].name), 539 expression=self._parse_conjunction(), 540 ), 541 } 542 543 COLUMN_OPERATORS = { 544 TokenType.DOT: None, 545 TokenType.DCOLON: lambda self, this, to: self.expression( 546 exp.Cast if self.STRICT_CAST else exp.TryCast, 547 this=this, 548 to=to, 549 ), 550 TokenType.ARROW: lambda self, this, path: self.expression( 551 exp.JSONExtract, 552 this=this, 553 expression=self.dialect.to_json_path(path), 554 ), 555 TokenType.DARROW: lambda self, this, path: self.expression( 556 exp.JSONExtractScalar, 557 this=this, 558 expression=self.dialect.to_json_path(path), 559 ), 560 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 561 exp.JSONBExtract, 562 this=this, 563 expression=path, 564 ), 565 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 566 exp.JSONBExtractScalar, 567 this=this, 568 expression=path, 569 ), 570 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 571 exp.JSONBContains, 572 this=this, 573 expression=key, 574 ), 575 } 576 577 EXPRESSION_PARSERS = { 578 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 579 exp.Column: lambda self: self._parse_column(), 580 exp.Condition: lambda self: self._parse_conjunction(), 581 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 582 exp.Expression: lambda self: self._parse_statement(), 583 exp.From: lambda self: self._parse_from(), 584 exp.Group: lambda self: self._parse_group(), 585 exp.Having: lambda self: self._parse_having(), 586 exp.Identifier: lambda self: self._parse_id_var(), 587 exp.Join: lambda self: self._parse_join(), 588 exp.Lambda: lambda self: self._parse_lambda(), 589 exp.Lateral: lambda self: self._parse_lateral(), 590 exp.Limit: lambda self: self._parse_limit(), 591 exp.Offset: lambda self: self._parse_offset(), 592 exp.Order: lambda self: self._parse_order(), 593 exp.Ordered: lambda self: self._parse_ordered(), 594 exp.Properties: lambda self: self._parse_properties(), 595 exp.Qualify: lambda self: self._parse_qualify(), 596 exp.Returning: lambda self: self._parse_returning(), 597 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 598 exp.Table: lambda self: self._parse_table_parts(), 599 exp.TableAlias: lambda self: self._parse_table_alias(), 600 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 601 exp.Where: lambda self: self._parse_where(), 602 exp.Window: lambda self: self._parse_named_window(), 603 exp.With: lambda self: self._parse_with(), 604 "JOIN_TYPE": lambda self: self._parse_join_parts(), 605 } 606 607 STATEMENT_PARSERS = { 608 TokenType.ALTER: lambda self: self._parse_alter(), 609 TokenType.BEGIN: lambda self: self._parse_transaction(), 610 TokenType.CACHE: lambda self: self._parse_cache(), 611 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 612 TokenType.COMMENT: lambda self: self._parse_comment(), 613 TokenType.CREATE: lambda self: self._parse_create(), 614 TokenType.DELETE: lambda self: self._parse_delete(), 615 TokenType.DESC: lambda self: self._parse_describe(), 616 TokenType.DESCRIBE: lambda self: self._parse_describe(), 617 TokenType.DROP: lambda self: self._parse_drop(), 618 TokenType.INSERT: lambda self: self._parse_insert(), 619 TokenType.KILL: lambda self: self._parse_kill(), 620 TokenType.LOAD: lambda self: self._parse_load(), 621 TokenType.MERGE: lambda self: self._parse_merge(), 622 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 623 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 624 TokenType.REFRESH: lambda self: self._parse_refresh(), 625 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 626 TokenType.SET: lambda self: self._parse_set(), 627 TokenType.UNCACHE: lambda self: self._parse_uncache(), 628 TokenType.UPDATE: lambda self: self._parse_update(), 629 TokenType.USE: lambda self: self.expression( 630 exp.Use, 631 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 632 and exp.var(self._prev.text), 633 this=self._parse_table(schema=False), 634 ), 635 } 636 637 UNARY_PARSERS = { 638 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 639 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 640 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 641 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 642 } 643 644 PRIMARY_PARSERS = { 645 TokenType.STRING: lambda self, token: self.expression( 646 exp.Literal, this=token.text, is_string=True 647 ), 648 TokenType.NUMBER: lambda self, token: self.expression( 649 exp.Literal, this=token.text, is_string=False 650 ), 651 TokenType.STAR: lambda self, _: self.expression( 652 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 653 ), 654 TokenType.NULL: lambda self, _: self.expression(exp.Null), 655 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 656 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 657 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 658 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 659 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 660 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 661 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 662 exp.National, this=token.text 663 ), 664 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 665 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 666 exp.RawString, this=token.text 667 ), 668 TokenType.UNICODE_STRING: lambda self, token: self.expression( 669 exp.UnicodeString, 670 this=token.text, 671 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 672 ), 673 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 674 } 675 676 PLACEHOLDER_PARSERS = { 677 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 678 TokenType.PARAMETER: lambda self: self._parse_parameter(), 679 TokenType.COLON: lambda self: ( 680 self.expression(exp.Placeholder, this=self._prev.text) 681 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 682 else None 683 ), 684 } 685 686 RANGE_PARSERS = { 687 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 688 TokenType.GLOB: binary_range_parser(exp.Glob), 689 TokenType.ILIKE: binary_range_parser(exp.ILike), 690 TokenType.IN: lambda self, this: self._parse_in(this), 691 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 692 TokenType.IS: lambda self, this: self._parse_is(this), 693 TokenType.LIKE: binary_range_parser(exp.Like), 694 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 695 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 696 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 697 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 698 } 699 700 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 701 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 702 "AUTO": lambda self: self._parse_auto_property(), 703 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 704 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 705 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 706 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 707 "CHECKSUM": lambda self: self._parse_checksum(), 708 "CLUSTER BY": lambda self: self._parse_cluster(), 709 "CLUSTERED": lambda self: self._parse_clustered_by(), 710 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 711 exp.CollateProperty, **kwargs 712 ), 713 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 714 "CONTAINS": lambda self: self._parse_contains_property(), 715 "COPY": lambda self: self._parse_copy_property(), 716 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 717 "DEFINER": lambda self: self._parse_definer(), 718 "DETERMINISTIC": lambda self: self.expression( 719 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 720 ), 721 "DISTKEY": lambda self: self._parse_distkey(), 722 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 723 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 724 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 725 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 726 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 727 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 728 "FREESPACE": lambda self: self._parse_freespace(), 729 "HEAP": lambda self: self.expression(exp.HeapProperty), 730 "IMMUTABLE": lambda self: self.expression( 731 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 732 ), 733 "INHERITS": lambda self: self.expression( 734 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 735 ), 736 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 737 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 738 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 739 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 740 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 741 "LIKE": lambda self: self._parse_create_like(), 742 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 743 "LOCK": lambda self: self._parse_locking(), 744 "LOCKING": lambda self: self._parse_locking(), 745 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 746 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 747 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 748 "MODIFIES": lambda self: self._parse_modifies_property(), 749 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 750 "NO": lambda self: self._parse_no_property(), 751 "ON": lambda self: self._parse_on_property(), 752 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 753 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 754 "PARTITION": lambda self: self._parse_partitioned_of(), 755 "PARTITION BY": lambda self: self._parse_partitioned_by(), 756 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 757 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 758 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 759 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 760 "READS": lambda self: self._parse_reads_property(), 761 "REMOTE": lambda self: self._parse_remote_with_connection(), 762 "RETURNS": lambda self: self._parse_returns(), 763 "ROW": lambda self: self._parse_row(), 764 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 765 "SAMPLE": lambda self: self.expression( 766 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 767 ), 768 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 769 "SETTINGS": lambda self: self.expression( 770 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 771 ), 772 "SORTKEY": lambda self: self._parse_sortkey(), 773 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 774 "STABLE": lambda self: self.expression( 775 exp.StabilityProperty, this=exp.Literal.string("STABLE") 776 ), 777 "STORED": lambda self: self._parse_stored(), 778 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 779 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 780 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 781 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 782 "TO": lambda self: self._parse_to_table(), 783 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 784 "TRANSFORM": lambda self: self.expression( 785 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 786 ), 787 "TTL": lambda self: self._parse_ttl(), 788 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 789 "VOLATILE": lambda self: self._parse_volatile_property(), 790 "WITH": lambda self: self._parse_with_property(), 791 } 792 793 CONSTRAINT_PARSERS = { 794 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 795 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 796 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 797 "CHARACTER SET": lambda self: self.expression( 798 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 799 ), 800 "CHECK": lambda self: self.expression( 801 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 802 ), 803 "COLLATE": lambda self: self.expression( 804 exp.CollateColumnConstraint, this=self._parse_var() 805 ), 806 "COMMENT": lambda self: self.expression( 807 exp.CommentColumnConstraint, this=self._parse_string() 808 ), 809 "COMPRESS": lambda self: self._parse_compress(), 810 "CLUSTERED": lambda self: self.expression( 811 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 812 ), 813 "NONCLUSTERED": lambda self: self.expression( 814 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 815 ), 816 "DEFAULT": lambda self: self.expression( 817 exp.DefaultColumnConstraint, this=self._parse_bitwise() 818 ), 819 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 820 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 821 "FORMAT": lambda self: self.expression( 822 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 823 ), 824 "GENERATED": lambda self: self._parse_generated_as_identity(), 825 "IDENTITY": lambda self: self._parse_auto_increment(), 826 "INLINE": lambda self: self._parse_inline(), 827 "LIKE": lambda self: self._parse_create_like(), 828 "NOT": lambda self: self._parse_not_constraint(), 829 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 830 "ON": lambda self: ( 831 self._match(TokenType.UPDATE) 832 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 833 ) 834 or self.expression(exp.OnProperty, this=self._parse_id_var()), 835 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 836 "PERIOD": lambda self: self._parse_period_for_system_time(), 837 "PRIMARY KEY": lambda self: self._parse_primary_key(), 838 "REFERENCES": lambda self: self._parse_references(match=False), 839 "TITLE": lambda self: self.expression( 840 exp.TitleColumnConstraint, this=self._parse_var_or_string() 841 ), 842 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 843 "UNIQUE": lambda self: self._parse_unique(), 844 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 845 "WITH": lambda self: self.expression( 846 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 847 ), 848 } 849 850 ALTER_PARSERS = { 851 "ADD": lambda self: self._parse_alter_table_add(), 852 "ALTER": lambda self: self._parse_alter_table_alter(), 853 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 854 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 855 "DROP": lambda self: self._parse_alter_table_drop(), 856 "RENAME": lambda self: self._parse_alter_table_rename(), 857 } 858 859 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 860 861 NO_PAREN_FUNCTION_PARSERS = { 862 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 863 "CASE": lambda self: self._parse_case(), 864 "IF": lambda self: self._parse_if(), 865 "NEXT": lambda self: self._parse_next_value_for(), 866 } 867 868 INVALID_FUNC_NAME_TOKENS = { 869 TokenType.IDENTIFIER, 870 TokenType.STRING, 871 } 872 873 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 874 875 FUNCTION_PARSERS = { 876 "ANY_VALUE": lambda self: self._parse_any_value(), 877 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 878 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 879 "DECODE": lambda self: self._parse_decode(), 880 "EXTRACT": lambda self: self._parse_extract(), 881 "JSON_OBJECT": lambda self: self._parse_json_object(), 882 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 883 "JSON_TABLE": lambda self: self._parse_json_table(), 884 "MATCH": lambda self: self._parse_match_against(), 885 "OPENJSON": lambda self: self._parse_open_json(), 886 "POSITION": lambda self: self._parse_position(), 887 "PREDICT": lambda self: self._parse_predict(), 888 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 889 "STRING_AGG": lambda self: self._parse_string_agg(), 890 "SUBSTRING": lambda self: self._parse_substring(), 891 "TRIM": lambda self: self._parse_trim(), 892 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 893 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 894 } 895 896 QUERY_MODIFIER_PARSERS = { 897 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 898 TokenType.WHERE: lambda self: ("where", self._parse_where()), 899 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 900 TokenType.HAVING: lambda self: ("having", self._parse_having()), 901 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 902 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 903 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 904 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 905 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 906 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 907 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 908 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 909 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 910 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 911 TokenType.CLUSTER_BY: lambda self: ( 912 "cluster", 913 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 914 ), 915 TokenType.DISTRIBUTE_BY: lambda self: ( 916 "distribute", 917 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 918 ), 919 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 920 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 921 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 922 } 923 924 SET_PARSERS = { 925 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 926 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 927 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 928 "TRANSACTION": lambda self: self._parse_set_transaction(), 929 } 930 931 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 932 933 TYPE_LITERAL_PARSERS = { 934 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 935 } 936 937 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 938 939 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 940 941 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 942 943 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 944 TRANSACTION_CHARACTERISTICS = { 945 "ISOLATION LEVEL REPEATABLE READ", 946 "ISOLATION LEVEL READ COMMITTED", 947 "ISOLATION LEVEL READ UNCOMMITTED", 948 "ISOLATION LEVEL SERIALIZABLE", 949 "READ WRITE", 950 "READ ONLY", 951 } 952 953 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 954 955 CLONE_KEYWORDS = {"CLONE", "COPY"} 956 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 957 958 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 959 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 960 961 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 962 963 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 964 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 965 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 966 967 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 968 969 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 970 971 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 972 973 DISTINCT_TOKENS = {TokenType.DISTINCT} 974 975 NULL_TOKENS = {TokenType.NULL} 976 977 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 978 979 STRICT_CAST = True 980 981 PREFIXED_PIVOT_COLUMNS = False 982 IDENTIFY_PIVOT_STRINGS = False 983 984 LOG_DEFAULTS_TO_LN = False 985 986 # Whether or not ADD is present for each column added by ALTER TABLE 987 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 988 989 # Whether or not the table sample clause expects CSV syntax 990 TABLESAMPLE_CSV = False 991 992 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 993 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 994 995 # Whether the TRIM function expects the characters to trim as its first argument 996 TRIM_PATTERN_FIRST = False 997 998 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 999 STRING_ALIASES = False 1000 1001 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1002 MODIFIERS_ATTACHED_TO_UNION = True 1003 UNION_MODIFIERS = {"order", "limit", "offset"} 1004 1005 # parses no parenthesis if statements as commands 1006 NO_PAREN_IF_COMMANDS = True 1007 1008 __slots__ = ( 1009 "error_level", 1010 "error_message_context", 1011 "max_errors", 1012 "dialect", 1013 "sql", 1014 "errors", 1015 "_tokens", 1016 "_index", 1017 "_curr", 1018 "_next", 1019 "_prev", 1020 "_prev_comments", 1021 ) 1022 1023 # Autofilled 1024 SHOW_TRIE: t.Dict = {} 1025 SET_TRIE: t.Dict = {} 1026 1027 def __init__( 1028 self, 1029 error_level: t.Optional[ErrorLevel] = None, 1030 error_message_context: int = 100, 1031 max_errors: int = 3, 1032 dialect: DialectType = None, 1033 ): 1034 from sqlglot.dialects import Dialect 1035 1036 self.error_level = error_level or ErrorLevel.IMMEDIATE 1037 self.error_message_context = error_message_context 1038 self.max_errors = max_errors 1039 self.dialect = Dialect.get_or_raise(dialect) 1040 self.reset() 1041 1042 def reset(self): 1043 self.sql = "" 1044 self.errors = [] 1045 self._tokens = [] 1046 self._index = 0 1047 self._curr = None 1048 self._next = None 1049 self._prev = None 1050 self._prev_comments = None 1051 1052 def parse( 1053 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1054 ) -> t.List[t.Optional[exp.Expression]]: 1055 """ 1056 Parses a list of tokens and returns a list of syntax trees, one tree 1057 per parsed SQL statement. 1058 1059 Args: 1060 raw_tokens: The list of tokens. 1061 sql: The original SQL string, used to produce helpful debug messages. 1062 1063 Returns: 1064 The list of the produced syntax trees. 1065 """ 1066 return self._parse( 1067 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1068 ) 1069 1070 def parse_into( 1071 self, 1072 expression_types: exp.IntoType, 1073 raw_tokens: t.List[Token], 1074 sql: t.Optional[str] = None, 1075 ) -> t.List[t.Optional[exp.Expression]]: 1076 """ 1077 Parses a list of tokens into a given Expression type. If a collection of Expression 1078 types is given instead, this method will try to parse the token list into each one 1079 of them, stopping at the first for which the parsing succeeds. 1080 1081 Args: 1082 expression_types: The expression type(s) to try and parse the token list into. 1083 raw_tokens: The list of tokens. 1084 sql: The original SQL string, used to produce helpful debug messages. 1085 1086 Returns: 1087 The target Expression. 1088 """ 1089 errors = [] 1090 for expression_type in ensure_list(expression_types): 1091 parser = self.EXPRESSION_PARSERS.get(expression_type) 1092 if not parser: 1093 raise TypeError(f"No parser registered for {expression_type}") 1094 1095 try: 1096 return self._parse(parser, raw_tokens, sql) 1097 except ParseError as e: 1098 e.errors[0]["into_expression"] = expression_type 1099 errors.append(e) 1100 1101 raise ParseError( 1102 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1103 errors=merge_errors(errors), 1104 ) from errors[-1] 1105 1106 def _parse( 1107 self, 1108 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1109 raw_tokens: t.List[Token], 1110 sql: t.Optional[str] = None, 1111 ) -> t.List[t.Optional[exp.Expression]]: 1112 self.reset() 1113 self.sql = sql or "" 1114 1115 total = len(raw_tokens) 1116 chunks: t.List[t.List[Token]] = [[]] 1117 1118 for i, token in enumerate(raw_tokens): 1119 if token.token_type == TokenType.SEMICOLON: 1120 if i < total - 1: 1121 chunks.append([]) 1122 else: 1123 chunks[-1].append(token) 1124 1125 expressions = [] 1126 1127 for tokens in chunks: 1128 self._index = -1 1129 self._tokens = tokens 1130 self._advance() 1131 1132 expressions.append(parse_method(self)) 1133 1134 if self._index < len(self._tokens): 1135 self.raise_error("Invalid expression / Unexpected token") 1136 1137 self.check_errors() 1138 1139 return expressions 1140 1141 def check_errors(self) -> None: 1142 """Logs or raises any found errors, depending on the chosen error level setting.""" 1143 if self.error_level == ErrorLevel.WARN: 1144 for error in self.errors: 1145 logger.error(str(error)) 1146 elif self.error_level == ErrorLevel.RAISE and self.errors: 1147 raise ParseError( 1148 concat_messages(self.errors, self.max_errors), 1149 errors=merge_errors(self.errors), 1150 ) 1151 1152 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1153 """ 1154 Appends an error in the list of recorded errors or raises it, depending on the chosen 1155 error level setting. 1156 """ 1157 token = token or self._curr or self._prev or Token.string("") 1158 start = token.start 1159 end = token.end + 1 1160 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1161 highlight = self.sql[start:end] 1162 end_context = self.sql[end : end + self.error_message_context] 1163 1164 error = ParseError.new( 1165 f"{message}. Line {token.line}, Col: {token.col}.\n" 1166 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1167 description=message, 1168 line=token.line, 1169 col=token.col, 1170 start_context=start_context, 1171 highlight=highlight, 1172 end_context=end_context, 1173 ) 1174 1175 if self.error_level == ErrorLevel.IMMEDIATE: 1176 raise error 1177 1178 self.errors.append(error) 1179 1180 def expression( 1181 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1182 ) -> E: 1183 """ 1184 Creates a new, validated Expression. 1185 1186 Args: 1187 exp_class: The expression class to instantiate. 1188 comments: An optional list of comments to attach to the expression. 1189 kwargs: The arguments to set for the expression along with their respective values. 1190 1191 Returns: 1192 The target expression. 1193 """ 1194 instance = exp_class(**kwargs) 1195 instance.add_comments(comments) if comments else self._add_comments(instance) 1196 return self.validate_expression(instance) 1197 1198 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1199 if expression and self._prev_comments: 1200 expression.add_comments(self._prev_comments) 1201 self._prev_comments = None 1202 1203 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1204 """ 1205 Validates an Expression, making sure that all its mandatory arguments are set. 1206 1207 Args: 1208 expression: The expression to validate. 1209 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1210 1211 Returns: 1212 The validated expression. 1213 """ 1214 if self.error_level != ErrorLevel.IGNORE: 1215 for error_message in expression.error_messages(args): 1216 self.raise_error(error_message) 1217 1218 return expression 1219 1220 def _find_sql(self, start: Token, end: Token) -> str: 1221 return self.sql[start.start : end.end + 1] 1222 1223 def _is_connected(self) -> bool: 1224 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1225 1226 def _advance(self, times: int = 1) -> None: 1227 self._index += times 1228 self._curr = seq_get(self._tokens, self._index) 1229 self._next = seq_get(self._tokens, self._index + 1) 1230 1231 if self._index > 0: 1232 self._prev = self._tokens[self._index - 1] 1233 self._prev_comments = self._prev.comments 1234 else: 1235 self._prev = None 1236 self._prev_comments = None 1237 1238 def _retreat(self, index: int) -> None: 1239 if index != self._index: 1240 self._advance(index - self._index) 1241 1242 def _warn_unsupported(self) -> None: 1243 if len(self._tokens) <= 1: 1244 return 1245 1246 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1247 # interested in emitting a warning for the one being currently processed. 1248 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1249 1250 logger.warning( 1251 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1252 ) 1253 1254 def _parse_command(self) -> exp.Command: 1255 self._warn_unsupported() 1256 return self.expression( 1257 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1258 ) 1259 1260 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1261 start = self._prev 1262 exists = self._parse_exists() if allow_exists else None 1263 1264 self._match(TokenType.ON) 1265 1266 kind = self._match_set(self.CREATABLES) and self._prev 1267 if not kind: 1268 return self._parse_as_command(start) 1269 1270 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1271 this = self._parse_user_defined_function(kind=kind.token_type) 1272 elif kind.token_type == TokenType.TABLE: 1273 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1274 elif kind.token_type == TokenType.COLUMN: 1275 this = self._parse_column() 1276 else: 1277 this = self._parse_id_var() 1278 1279 self._match(TokenType.IS) 1280 1281 return self.expression( 1282 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1283 ) 1284 1285 def _parse_to_table( 1286 self, 1287 ) -> exp.ToTableProperty: 1288 table = self._parse_table_parts(schema=True) 1289 return self.expression(exp.ToTableProperty, this=table) 1290 1291 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1292 def _parse_ttl(self) -> exp.Expression: 1293 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1294 this = self._parse_bitwise() 1295 1296 if self._match_text_seq("DELETE"): 1297 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1298 if self._match_text_seq("RECOMPRESS"): 1299 return self.expression( 1300 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1301 ) 1302 if self._match_text_seq("TO", "DISK"): 1303 return self.expression( 1304 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1305 ) 1306 if self._match_text_seq("TO", "VOLUME"): 1307 return self.expression( 1308 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1309 ) 1310 1311 return this 1312 1313 expressions = self._parse_csv(_parse_ttl_action) 1314 where = self._parse_where() 1315 group = self._parse_group() 1316 1317 aggregates = None 1318 if group and self._match(TokenType.SET): 1319 aggregates = self._parse_csv(self._parse_set_item) 1320 1321 return self.expression( 1322 exp.MergeTreeTTL, 1323 expressions=expressions, 1324 where=where, 1325 group=group, 1326 aggregates=aggregates, 1327 ) 1328 1329 def _parse_statement(self) -> t.Optional[exp.Expression]: 1330 if self._curr is None: 1331 return None 1332 1333 if self._match_set(self.STATEMENT_PARSERS): 1334 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1335 1336 if self._match_set(Tokenizer.COMMANDS): 1337 return self._parse_command() 1338 1339 expression = self._parse_expression() 1340 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1341 return self._parse_query_modifiers(expression) 1342 1343 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1344 start = self._prev 1345 temporary = self._match(TokenType.TEMPORARY) 1346 materialized = self._match_text_seq("MATERIALIZED") 1347 1348 kind = self._match_set(self.CREATABLES) and self._prev.text 1349 if not kind: 1350 return self._parse_as_command(start) 1351 1352 return self.expression( 1353 exp.Drop, 1354 comments=start.comments, 1355 exists=exists or self._parse_exists(), 1356 this=self._parse_table( 1357 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1358 ), 1359 kind=kind, 1360 temporary=temporary, 1361 materialized=materialized, 1362 cascade=self._match_text_seq("CASCADE"), 1363 constraints=self._match_text_seq("CONSTRAINTS"), 1364 purge=self._match_text_seq("PURGE"), 1365 ) 1366 1367 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1368 return ( 1369 self._match_text_seq("IF") 1370 and (not not_ or self._match(TokenType.NOT)) 1371 and self._match(TokenType.EXISTS) 1372 ) 1373 1374 def _parse_create(self) -> exp.Create | exp.Command: 1375 # Note: this can't be None because we've matched a statement parser 1376 start = self._prev 1377 comments = self._prev_comments 1378 1379 replace = ( 1380 start.token_type == TokenType.REPLACE 1381 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1382 or self._match_pair(TokenType.OR, TokenType.ALTER) 1383 ) 1384 unique = self._match(TokenType.UNIQUE) 1385 1386 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1387 self._advance() 1388 1389 properties = None 1390 create_token = self._match_set(self.CREATABLES) and self._prev 1391 1392 if not create_token: 1393 # exp.Properties.Location.POST_CREATE 1394 properties = self._parse_properties() 1395 create_token = self._match_set(self.CREATABLES) and self._prev 1396 1397 if not properties or not create_token: 1398 return self._parse_as_command(start) 1399 1400 exists = self._parse_exists(not_=True) 1401 this = None 1402 expression: t.Optional[exp.Expression] = None 1403 indexes = None 1404 no_schema_binding = None 1405 begin = None 1406 end = None 1407 clone = None 1408 1409 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1410 nonlocal properties 1411 if properties and temp_props: 1412 properties.expressions.extend(temp_props.expressions) 1413 elif temp_props: 1414 properties = temp_props 1415 1416 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1417 this = self._parse_user_defined_function(kind=create_token.token_type) 1418 1419 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1420 extend_props(self._parse_properties()) 1421 1422 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1423 1424 if not expression: 1425 if self._match(TokenType.COMMAND): 1426 expression = self._parse_as_command(self._prev) 1427 else: 1428 begin = self._match(TokenType.BEGIN) 1429 return_ = self._match_text_seq("RETURN") 1430 1431 if self._match(TokenType.STRING, advance=False): 1432 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1433 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1434 expression = self._parse_string() 1435 extend_props(self._parse_properties()) 1436 else: 1437 expression = self._parse_statement() 1438 1439 end = self._match_text_seq("END") 1440 1441 if return_: 1442 expression = self.expression(exp.Return, this=expression) 1443 elif create_token.token_type == TokenType.INDEX: 1444 this = self._parse_index(index=self._parse_id_var()) 1445 elif create_token.token_type in self.DB_CREATABLES: 1446 table_parts = self._parse_table_parts( 1447 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1448 ) 1449 1450 # exp.Properties.Location.POST_NAME 1451 self._match(TokenType.COMMA) 1452 extend_props(self._parse_properties(before=True)) 1453 1454 this = self._parse_schema(this=table_parts) 1455 1456 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1457 extend_props(self._parse_properties()) 1458 1459 self._match(TokenType.ALIAS) 1460 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1461 # exp.Properties.Location.POST_ALIAS 1462 extend_props(self._parse_properties()) 1463 1464 expression = self._parse_ddl_select() 1465 1466 if create_token.token_type == TokenType.TABLE: 1467 # exp.Properties.Location.POST_EXPRESSION 1468 extend_props(self._parse_properties()) 1469 1470 indexes = [] 1471 while True: 1472 index = self._parse_index() 1473 1474 # exp.Properties.Location.POST_INDEX 1475 extend_props(self._parse_properties()) 1476 1477 if not index: 1478 break 1479 else: 1480 self._match(TokenType.COMMA) 1481 indexes.append(index) 1482 elif create_token.token_type == TokenType.VIEW: 1483 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1484 no_schema_binding = True 1485 1486 shallow = self._match_text_seq("SHALLOW") 1487 1488 if self._match_texts(self.CLONE_KEYWORDS): 1489 copy = self._prev.text.lower() == "copy" 1490 clone = self.expression( 1491 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1492 ) 1493 1494 if self._curr: 1495 return self._parse_as_command(start) 1496 1497 return self.expression( 1498 exp.Create, 1499 comments=comments, 1500 this=this, 1501 kind=create_token.text.upper(), 1502 replace=replace, 1503 unique=unique, 1504 expression=expression, 1505 exists=exists, 1506 properties=properties, 1507 indexes=indexes, 1508 no_schema_binding=no_schema_binding, 1509 begin=begin, 1510 end=end, 1511 clone=clone, 1512 ) 1513 1514 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1515 # only used for teradata currently 1516 self._match(TokenType.COMMA) 1517 1518 kwargs = { 1519 "no": self._match_text_seq("NO"), 1520 "dual": self._match_text_seq("DUAL"), 1521 "before": self._match_text_seq("BEFORE"), 1522 "default": self._match_text_seq("DEFAULT"), 1523 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1524 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1525 "after": self._match_text_seq("AFTER"), 1526 "minimum": self._match_texts(("MIN", "MINIMUM")), 1527 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1528 } 1529 1530 if self._match_texts(self.PROPERTY_PARSERS): 1531 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1532 try: 1533 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1534 except TypeError: 1535 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1536 1537 return None 1538 1539 def _parse_property(self) -> t.Optional[exp.Expression]: 1540 if self._match_texts(self.PROPERTY_PARSERS): 1541 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1542 1543 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1544 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1545 1546 if self._match_text_seq("COMPOUND", "SORTKEY"): 1547 return self._parse_sortkey(compound=True) 1548 1549 if self._match_text_seq("SQL", "SECURITY"): 1550 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1551 1552 index = self._index 1553 key = self._parse_column() 1554 1555 if not self._match(TokenType.EQ): 1556 self._retreat(index) 1557 return None 1558 1559 return self.expression( 1560 exp.Property, 1561 this=key.to_dot() if isinstance(key, exp.Column) else key, 1562 value=self._parse_column() or self._parse_var(any_token=True), 1563 ) 1564 1565 def _parse_stored(self) -> exp.FileFormatProperty: 1566 self._match(TokenType.ALIAS) 1567 1568 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1569 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1570 1571 return self.expression( 1572 exp.FileFormatProperty, 1573 this=( 1574 self.expression( 1575 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1576 ) 1577 if input_format or output_format 1578 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1579 ), 1580 ) 1581 1582 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1583 self._match(TokenType.EQ) 1584 self._match(TokenType.ALIAS) 1585 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1586 1587 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1588 properties = [] 1589 while True: 1590 if before: 1591 prop = self._parse_property_before() 1592 else: 1593 prop = self._parse_property() 1594 1595 if not prop: 1596 break 1597 for p in ensure_list(prop): 1598 properties.append(p) 1599 1600 if properties: 1601 return self.expression(exp.Properties, expressions=properties) 1602 1603 return None 1604 1605 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1606 return self.expression( 1607 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1608 ) 1609 1610 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1611 if self._index >= 2: 1612 pre_volatile_token = self._tokens[self._index - 2] 1613 else: 1614 pre_volatile_token = None 1615 1616 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1617 return exp.VolatileProperty() 1618 1619 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1620 1621 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1622 self._match_pair(TokenType.EQ, TokenType.ON) 1623 1624 prop = self.expression(exp.WithSystemVersioningProperty) 1625 if self._match(TokenType.L_PAREN): 1626 self._match_text_seq("HISTORY_TABLE", "=") 1627 prop.set("this", self._parse_table_parts()) 1628 1629 if self._match(TokenType.COMMA): 1630 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1631 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1632 1633 self._match_r_paren() 1634 1635 return prop 1636 1637 def _parse_with_property( 1638 self, 1639 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1640 if self._match(TokenType.L_PAREN, advance=False): 1641 return self._parse_wrapped_csv(self._parse_property) 1642 1643 if self._match_text_seq("JOURNAL"): 1644 return self._parse_withjournaltable() 1645 1646 if self._match_text_seq("DATA"): 1647 return self._parse_withdata(no=False) 1648 elif self._match_text_seq("NO", "DATA"): 1649 return self._parse_withdata(no=True) 1650 1651 if not self._next: 1652 return None 1653 1654 return self._parse_withisolatedloading() 1655 1656 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1657 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1658 self._match(TokenType.EQ) 1659 1660 user = self._parse_id_var() 1661 self._match(TokenType.PARAMETER) 1662 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1663 1664 if not user or not host: 1665 return None 1666 1667 return exp.DefinerProperty(this=f"{user}@{host}") 1668 1669 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1670 self._match(TokenType.TABLE) 1671 self._match(TokenType.EQ) 1672 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1673 1674 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1675 return self.expression(exp.LogProperty, no=no) 1676 1677 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1678 return self.expression(exp.JournalProperty, **kwargs) 1679 1680 def _parse_checksum(self) -> exp.ChecksumProperty: 1681 self._match(TokenType.EQ) 1682 1683 on = None 1684 if self._match(TokenType.ON): 1685 on = True 1686 elif self._match_text_seq("OFF"): 1687 on = False 1688 1689 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1690 1691 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1692 return self.expression( 1693 exp.Cluster, 1694 expressions=( 1695 self._parse_wrapped_csv(self._parse_ordered) 1696 if wrapped 1697 else self._parse_csv(self._parse_ordered) 1698 ), 1699 ) 1700 1701 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1702 self._match_text_seq("BY") 1703 1704 self._match_l_paren() 1705 expressions = self._parse_csv(self._parse_column) 1706 self._match_r_paren() 1707 1708 if self._match_text_seq("SORTED", "BY"): 1709 self._match_l_paren() 1710 sorted_by = self._parse_csv(self._parse_ordered) 1711 self._match_r_paren() 1712 else: 1713 sorted_by = None 1714 1715 self._match(TokenType.INTO) 1716 buckets = self._parse_number() 1717 self._match_text_seq("BUCKETS") 1718 1719 return self.expression( 1720 exp.ClusteredByProperty, 1721 expressions=expressions, 1722 sorted_by=sorted_by, 1723 buckets=buckets, 1724 ) 1725 1726 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1727 if not self._match_text_seq("GRANTS"): 1728 self._retreat(self._index - 1) 1729 return None 1730 1731 return self.expression(exp.CopyGrantsProperty) 1732 1733 def _parse_freespace(self) -> exp.FreespaceProperty: 1734 self._match(TokenType.EQ) 1735 return self.expression( 1736 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1737 ) 1738 1739 def _parse_mergeblockratio( 1740 self, no: bool = False, default: bool = False 1741 ) -> exp.MergeBlockRatioProperty: 1742 if self._match(TokenType.EQ): 1743 return self.expression( 1744 exp.MergeBlockRatioProperty, 1745 this=self._parse_number(), 1746 percent=self._match(TokenType.PERCENT), 1747 ) 1748 1749 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1750 1751 def _parse_datablocksize( 1752 self, 1753 default: t.Optional[bool] = None, 1754 minimum: t.Optional[bool] = None, 1755 maximum: t.Optional[bool] = None, 1756 ) -> exp.DataBlocksizeProperty: 1757 self._match(TokenType.EQ) 1758 size = self._parse_number() 1759 1760 units = None 1761 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1762 units = self._prev.text 1763 1764 return self.expression( 1765 exp.DataBlocksizeProperty, 1766 size=size, 1767 units=units, 1768 default=default, 1769 minimum=minimum, 1770 maximum=maximum, 1771 ) 1772 1773 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1774 self._match(TokenType.EQ) 1775 always = self._match_text_seq("ALWAYS") 1776 manual = self._match_text_seq("MANUAL") 1777 never = self._match_text_seq("NEVER") 1778 default = self._match_text_seq("DEFAULT") 1779 1780 autotemp = None 1781 if self._match_text_seq("AUTOTEMP"): 1782 autotemp = self._parse_schema() 1783 1784 return self.expression( 1785 exp.BlockCompressionProperty, 1786 always=always, 1787 manual=manual, 1788 never=never, 1789 default=default, 1790 autotemp=autotemp, 1791 ) 1792 1793 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1794 no = self._match_text_seq("NO") 1795 concurrent = self._match_text_seq("CONCURRENT") 1796 self._match_text_seq("ISOLATED", "LOADING") 1797 for_all = self._match_text_seq("FOR", "ALL") 1798 for_insert = self._match_text_seq("FOR", "INSERT") 1799 for_none = self._match_text_seq("FOR", "NONE") 1800 return self.expression( 1801 exp.IsolatedLoadingProperty, 1802 no=no, 1803 concurrent=concurrent, 1804 for_all=for_all, 1805 for_insert=for_insert, 1806 for_none=for_none, 1807 ) 1808 1809 def _parse_locking(self) -> exp.LockingProperty: 1810 if self._match(TokenType.TABLE): 1811 kind = "TABLE" 1812 elif self._match(TokenType.VIEW): 1813 kind = "VIEW" 1814 elif self._match(TokenType.ROW): 1815 kind = "ROW" 1816 elif self._match_text_seq("DATABASE"): 1817 kind = "DATABASE" 1818 else: 1819 kind = None 1820 1821 if kind in ("DATABASE", "TABLE", "VIEW"): 1822 this = self._parse_table_parts() 1823 else: 1824 this = None 1825 1826 if self._match(TokenType.FOR): 1827 for_or_in = "FOR" 1828 elif self._match(TokenType.IN): 1829 for_or_in = "IN" 1830 else: 1831 for_or_in = None 1832 1833 if self._match_text_seq("ACCESS"): 1834 lock_type = "ACCESS" 1835 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1836 lock_type = "EXCLUSIVE" 1837 elif self._match_text_seq("SHARE"): 1838 lock_type = "SHARE" 1839 elif self._match_text_seq("READ"): 1840 lock_type = "READ" 1841 elif self._match_text_seq("WRITE"): 1842 lock_type = "WRITE" 1843 elif self._match_text_seq("CHECKSUM"): 1844 lock_type = "CHECKSUM" 1845 else: 1846 lock_type = None 1847 1848 override = self._match_text_seq("OVERRIDE") 1849 1850 return self.expression( 1851 exp.LockingProperty, 1852 this=this, 1853 kind=kind, 1854 for_or_in=for_or_in, 1855 lock_type=lock_type, 1856 override=override, 1857 ) 1858 1859 def _parse_partition_by(self) -> t.List[exp.Expression]: 1860 if self._match(TokenType.PARTITION_BY): 1861 return self._parse_csv(self._parse_conjunction) 1862 return [] 1863 1864 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1865 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1866 if self._match_text_seq("MINVALUE"): 1867 return exp.var("MINVALUE") 1868 if self._match_text_seq("MAXVALUE"): 1869 return exp.var("MAXVALUE") 1870 return self._parse_bitwise() 1871 1872 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1873 expression = None 1874 from_expressions = None 1875 to_expressions = None 1876 1877 if self._match(TokenType.IN): 1878 this = self._parse_wrapped_csv(self._parse_bitwise) 1879 elif self._match(TokenType.FROM): 1880 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1881 self._match_text_seq("TO") 1882 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1883 elif self._match_text_seq("WITH", "(", "MODULUS"): 1884 this = self._parse_number() 1885 self._match_text_seq(",", "REMAINDER") 1886 expression = self._parse_number() 1887 self._match_r_paren() 1888 else: 1889 self.raise_error("Failed to parse partition bound spec.") 1890 1891 return self.expression( 1892 exp.PartitionBoundSpec, 1893 this=this, 1894 expression=expression, 1895 from_expressions=from_expressions, 1896 to_expressions=to_expressions, 1897 ) 1898 1899 # https://www.postgresql.org/docs/current/sql-createtable.html 1900 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1901 if not self._match_text_seq("OF"): 1902 self._retreat(self._index - 1) 1903 return None 1904 1905 this = self._parse_table(schema=True) 1906 1907 if self._match(TokenType.DEFAULT): 1908 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1909 elif self._match_text_seq("FOR", "VALUES"): 1910 expression = self._parse_partition_bound_spec() 1911 else: 1912 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1913 1914 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1915 1916 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1917 self._match(TokenType.EQ) 1918 return self.expression( 1919 exp.PartitionedByProperty, 1920 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1921 ) 1922 1923 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1924 if self._match_text_seq("AND", "STATISTICS"): 1925 statistics = True 1926 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1927 statistics = False 1928 else: 1929 statistics = None 1930 1931 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1932 1933 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1934 if self._match_text_seq("SQL"): 1935 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1936 return None 1937 1938 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1939 if self._match_text_seq("SQL", "DATA"): 1940 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1941 return None 1942 1943 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1944 if self._match_text_seq("PRIMARY", "INDEX"): 1945 return exp.NoPrimaryIndexProperty() 1946 if self._match_text_seq("SQL"): 1947 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1948 return None 1949 1950 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1951 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1952 return exp.OnCommitProperty() 1953 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1954 return exp.OnCommitProperty(delete=True) 1955 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1956 1957 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1958 if self._match_text_seq("SQL", "DATA"): 1959 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1960 return None 1961 1962 def _parse_distkey(self) -> exp.DistKeyProperty: 1963 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1964 1965 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1966 table = self._parse_table(schema=True) 1967 1968 options = [] 1969 while self._match_texts(("INCLUDING", "EXCLUDING")): 1970 this = self._prev.text.upper() 1971 1972 id_var = self._parse_id_var() 1973 if not id_var: 1974 return None 1975 1976 options.append( 1977 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1978 ) 1979 1980 return self.expression(exp.LikeProperty, this=table, expressions=options) 1981 1982 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1983 return self.expression( 1984 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1985 ) 1986 1987 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1988 self._match(TokenType.EQ) 1989 return self.expression( 1990 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1991 ) 1992 1993 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1994 self._match_text_seq("WITH", "CONNECTION") 1995 return self.expression( 1996 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1997 ) 1998 1999 def _parse_returns(self) -> exp.ReturnsProperty: 2000 value: t.Optional[exp.Expression] 2001 is_table = self._match(TokenType.TABLE) 2002 2003 if is_table: 2004 if self._match(TokenType.LT): 2005 value = self.expression( 2006 exp.Schema, 2007 this="TABLE", 2008 expressions=self._parse_csv(self._parse_struct_types), 2009 ) 2010 if not self._match(TokenType.GT): 2011 self.raise_error("Expecting >") 2012 else: 2013 value = self._parse_schema(exp.var("TABLE")) 2014 else: 2015 value = self._parse_types() 2016 2017 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2018 2019 def _parse_describe(self) -> exp.Describe: 2020 kind = self._match_set(self.CREATABLES) and self._prev.text 2021 extended = self._match_text_seq("EXTENDED") 2022 this = self._parse_table(schema=True) 2023 properties = self._parse_properties() 2024 expressions = properties.expressions if properties else None 2025 return self.expression( 2026 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2027 ) 2028 2029 def _parse_insert(self) -> exp.Insert: 2030 comments = ensure_list(self._prev_comments) 2031 overwrite = self._match(TokenType.OVERWRITE) 2032 ignore = self._match(TokenType.IGNORE) 2033 local = self._match_text_seq("LOCAL") 2034 alternative = None 2035 2036 if self._match_text_seq("DIRECTORY"): 2037 this: t.Optional[exp.Expression] = self.expression( 2038 exp.Directory, 2039 this=self._parse_var_or_string(), 2040 local=local, 2041 row_format=self._parse_row_format(match_row=True), 2042 ) 2043 else: 2044 if self._match(TokenType.OR): 2045 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2046 2047 self._match(TokenType.INTO) 2048 comments += ensure_list(self._prev_comments) 2049 self._match(TokenType.TABLE) 2050 this = self._parse_table(schema=True) 2051 2052 returning = self._parse_returning() 2053 2054 return self.expression( 2055 exp.Insert, 2056 comments=comments, 2057 this=this, 2058 by_name=self._match_text_seq("BY", "NAME"), 2059 exists=self._parse_exists(), 2060 partition=self._parse_partition(), 2061 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2062 and self._parse_conjunction(), 2063 expression=self._parse_ddl_select(), 2064 conflict=self._parse_on_conflict(), 2065 returning=returning or self._parse_returning(), 2066 overwrite=overwrite, 2067 alternative=alternative, 2068 ignore=ignore, 2069 ) 2070 2071 def _parse_kill(self) -> exp.Kill: 2072 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2073 2074 return self.expression( 2075 exp.Kill, 2076 this=self._parse_primary(), 2077 kind=kind, 2078 ) 2079 2080 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2081 conflict = self._match_text_seq("ON", "CONFLICT") 2082 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2083 2084 if not conflict and not duplicate: 2085 return None 2086 2087 nothing = None 2088 expressions = None 2089 key = None 2090 constraint = None 2091 2092 if conflict: 2093 if self._match_text_seq("ON", "CONSTRAINT"): 2094 constraint = self._parse_id_var() 2095 else: 2096 key = self._parse_csv(self._parse_value) 2097 2098 self._match_text_seq("DO") 2099 if self._match_text_seq("NOTHING"): 2100 nothing = True 2101 else: 2102 self._match(TokenType.UPDATE) 2103 self._match(TokenType.SET) 2104 expressions = self._parse_csv(self._parse_equality) 2105 2106 return self.expression( 2107 exp.OnConflict, 2108 duplicate=duplicate, 2109 expressions=expressions, 2110 nothing=nothing, 2111 key=key, 2112 constraint=constraint, 2113 ) 2114 2115 def _parse_returning(self) -> t.Optional[exp.Returning]: 2116 if not self._match(TokenType.RETURNING): 2117 return None 2118 return self.expression( 2119 exp.Returning, 2120 expressions=self._parse_csv(self._parse_expression), 2121 into=self._match(TokenType.INTO) and self._parse_table_part(), 2122 ) 2123 2124 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2125 if not self._match(TokenType.FORMAT): 2126 return None 2127 return self._parse_row_format() 2128 2129 def _parse_row_format( 2130 self, match_row: bool = False 2131 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2132 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2133 return None 2134 2135 if self._match_text_seq("SERDE"): 2136 this = self._parse_string() 2137 2138 serde_properties = None 2139 if self._match(TokenType.SERDE_PROPERTIES): 2140 serde_properties = self.expression( 2141 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2142 ) 2143 2144 return self.expression( 2145 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2146 ) 2147 2148 self._match_text_seq("DELIMITED") 2149 2150 kwargs = {} 2151 2152 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2153 kwargs["fields"] = self._parse_string() 2154 if self._match_text_seq("ESCAPED", "BY"): 2155 kwargs["escaped"] = self._parse_string() 2156 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2157 kwargs["collection_items"] = self._parse_string() 2158 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2159 kwargs["map_keys"] = self._parse_string() 2160 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2161 kwargs["lines"] = self._parse_string() 2162 if self._match_text_seq("NULL", "DEFINED", "AS"): 2163 kwargs["null"] = self._parse_string() 2164 2165 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2166 2167 def _parse_load(self) -> exp.LoadData | exp.Command: 2168 if self._match_text_seq("DATA"): 2169 local = self._match_text_seq("LOCAL") 2170 self._match_text_seq("INPATH") 2171 inpath = self._parse_string() 2172 overwrite = self._match(TokenType.OVERWRITE) 2173 self._match_pair(TokenType.INTO, TokenType.TABLE) 2174 2175 return self.expression( 2176 exp.LoadData, 2177 this=self._parse_table(schema=True), 2178 local=local, 2179 overwrite=overwrite, 2180 inpath=inpath, 2181 partition=self._parse_partition(), 2182 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2183 serde=self._match_text_seq("SERDE") and self._parse_string(), 2184 ) 2185 return self._parse_as_command(self._prev) 2186 2187 def _parse_delete(self) -> exp.Delete: 2188 # This handles MySQL's "Multiple-Table Syntax" 2189 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2190 tables = None 2191 comments = self._prev_comments 2192 if not self._match(TokenType.FROM, advance=False): 2193 tables = self._parse_csv(self._parse_table) or None 2194 2195 returning = self._parse_returning() 2196 2197 return self.expression( 2198 exp.Delete, 2199 comments=comments, 2200 tables=tables, 2201 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2202 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2203 where=self._parse_where(), 2204 returning=returning or self._parse_returning(), 2205 limit=self._parse_limit(), 2206 ) 2207 2208 def _parse_update(self) -> exp.Update: 2209 comments = self._prev_comments 2210 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2211 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2212 returning = self._parse_returning() 2213 return self.expression( 2214 exp.Update, 2215 comments=comments, 2216 **{ # type: ignore 2217 "this": this, 2218 "expressions": expressions, 2219 "from": self._parse_from(joins=True), 2220 "where": self._parse_where(), 2221 "returning": returning or self._parse_returning(), 2222 "order": self._parse_order(), 2223 "limit": self._parse_limit(), 2224 }, 2225 ) 2226 2227 def _parse_uncache(self) -> exp.Uncache: 2228 if not self._match(TokenType.TABLE): 2229 self.raise_error("Expecting TABLE after UNCACHE") 2230 2231 return self.expression( 2232 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2233 ) 2234 2235 def _parse_cache(self) -> exp.Cache: 2236 lazy = self._match_text_seq("LAZY") 2237 self._match(TokenType.TABLE) 2238 table = self._parse_table(schema=True) 2239 2240 options = [] 2241 if self._match_text_seq("OPTIONS"): 2242 self._match_l_paren() 2243 k = self._parse_string() 2244 self._match(TokenType.EQ) 2245 v = self._parse_string() 2246 options = [k, v] 2247 self._match_r_paren() 2248 2249 self._match(TokenType.ALIAS) 2250 return self.expression( 2251 exp.Cache, 2252 this=table, 2253 lazy=lazy, 2254 options=options, 2255 expression=self._parse_select(nested=True), 2256 ) 2257 2258 def _parse_partition(self) -> t.Optional[exp.Partition]: 2259 if not self._match(TokenType.PARTITION): 2260 return None 2261 2262 return self.expression( 2263 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2264 ) 2265 2266 def _parse_value(self) -> exp.Tuple: 2267 if self._match(TokenType.L_PAREN): 2268 expressions = self._parse_csv(self._parse_expression) 2269 self._match_r_paren() 2270 return self.expression(exp.Tuple, expressions=expressions) 2271 2272 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2273 # https://prestodb.io/docs/current/sql/values.html 2274 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2275 2276 def _parse_projections(self) -> t.List[exp.Expression]: 2277 return self._parse_expressions() 2278 2279 def _parse_select( 2280 self, 2281 nested: bool = False, 2282 table: bool = False, 2283 parse_subquery_alias: bool = True, 2284 parse_set_operation: bool = True, 2285 ) -> t.Optional[exp.Expression]: 2286 cte = self._parse_with() 2287 2288 if cte: 2289 this = self._parse_statement() 2290 2291 if not this: 2292 self.raise_error("Failed to parse any statement following CTE") 2293 return cte 2294 2295 if "with" in this.arg_types: 2296 this.set("with", cte) 2297 else: 2298 self.raise_error(f"{this.key} does not support CTE") 2299 this = cte 2300 2301 return this 2302 2303 # duckdb supports leading with FROM x 2304 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2305 2306 if self._match(TokenType.SELECT): 2307 comments = self._prev_comments 2308 2309 hint = self._parse_hint() 2310 all_ = self._match(TokenType.ALL) 2311 distinct = self._match_set(self.DISTINCT_TOKENS) 2312 2313 kind = ( 2314 self._match(TokenType.ALIAS) 2315 and self._match_texts(("STRUCT", "VALUE")) 2316 and self._prev.text.upper() 2317 ) 2318 2319 if distinct: 2320 distinct = self.expression( 2321 exp.Distinct, 2322 on=self._parse_value() if self._match(TokenType.ON) else None, 2323 ) 2324 2325 if all_ and distinct: 2326 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2327 2328 limit = self._parse_limit(top=True) 2329 projections = self._parse_projections() 2330 2331 this = self.expression( 2332 exp.Select, 2333 kind=kind, 2334 hint=hint, 2335 distinct=distinct, 2336 expressions=projections, 2337 limit=limit, 2338 ) 2339 this.comments = comments 2340 2341 into = self._parse_into() 2342 if into: 2343 this.set("into", into) 2344 2345 if not from_: 2346 from_ = self._parse_from() 2347 2348 if from_: 2349 this.set("from", from_) 2350 2351 this = self._parse_query_modifiers(this) 2352 elif (table or nested) and self._match(TokenType.L_PAREN): 2353 if self._match(TokenType.PIVOT): 2354 this = self._parse_simplified_pivot() 2355 elif self._match(TokenType.FROM): 2356 this = exp.select("*").from_( 2357 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2358 ) 2359 else: 2360 this = ( 2361 self._parse_table() 2362 if table 2363 else self._parse_select(nested=True, parse_set_operation=False) 2364 ) 2365 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2366 2367 self._match_r_paren() 2368 2369 # We return early here so that the UNION isn't attached to the subquery by the 2370 # following call to _parse_set_operations, but instead becomes the parent node 2371 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2372 elif self._match(TokenType.VALUES): 2373 this = self.expression( 2374 exp.Values, 2375 expressions=self._parse_csv(self._parse_value), 2376 alias=self._parse_table_alias(), 2377 ) 2378 elif from_: 2379 this = exp.select("*").from_(from_.this, copy=False) 2380 else: 2381 this = None 2382 2383 if parse_set_operation: 2384 return self._parse_set_operations(this) 2385 return this 2386 2387 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2388 if not skip_with_token and not self._match(TokenType.WITH): 2389 return None 2390 2391 comments = self._prev_comments 2392 recursive = self._match(TokenType.RECURSIVE) 2393 2394 expressions = [] 2395 while True: 2396 expressions.append(self._parse_cte()) 2397 2398 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2399 break 2400 else: 2401 self._match(TokenType.WITH) 2402 2403 return self.expression( 2404 exp.With, comments=comments, expressions=expressions, recursive=recursive 2405 ) 2406 2407 def _parse_cte(self) -> exp.CTE: 2408 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2409 if not alias or not alias.this: 2410 self.raise_error("Expected CTE to have alias") 2411 2412 self._match(TokenType.ALIAS) 2413 return self.expression( 2414 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2415 ) 2416 2417 def _parse_table_alias( 2418 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2419 ) -> t.Optional[exp.TableAlias]: 2420 any_token = self._match(TokenType.ALIAS) 2421 alias = ( 2422 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2423 or self._parse_string_as_identifier() 2424 ) 2425 2426 index = self._index 2427 if self._match(TokenType.L_PAREN): 2428 columns = self._parse_csv(self._parse_function_parameter) 2429 self._match_r_paren() if columns else self._retreat(index) 2430 else: 2431 columns = None 2432 2433 if not alias and not columns: 2434 return None 2435 2436 return self.expression(exp.TableAlias, this=alias, columns=columns) 2437 2438 def _parse_subquery( 2439 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2440 ) -> t.Optional[exp.Subquery]: 2441 if not this: 2442 return None 2443 2444 return self.expression( 2445 exp.Subquery, 2446 this=this, 2447 pivots=self._parse_pivots(), 2448 alias=self._parse_table_alias() if parse_alias else None, 2449 ) 2450 2451 def _parse_query_modifiers( 2452 self, this: t.Optional[exp.Expression] 2453 ) -> t.Optional[exp.Expression]: 2454 if isinstance(this, self.MODIFIABLES): 2455 for join in iter(self._parse_join, None): 2456 this.append("joins", join) 2457 for lateral in iter(self._parse_lateral, None): 2458 this.append("laterals", lateral) 2459 2460 while True: 2461 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2462 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2463 key, expression = parser(self) 2464 2465 if expression: 2466 this.set(key, expression) 2467 if key == "limit": 2468 offset = expression.args.pop("offset", None) 2469 2470 if offset: 2471 offset = exp.Offset(expression=offset) 2472 this.set("offset", offset) 2473 2474 limit_by_expressions = expression.expressions 2475 expression.set("expressions", None) 2476 offset.set("expressions", limit_by_expressions) 2477 continue 2478 break 2479 return this 2480 2481 def _parse_hint(self) -> t.Optional[exp.Hint]: 2482 if self._match(TokenType.HINT): 2483 hints = [] 2484 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2485 hints.extend(hint) 2486 2487 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2488 self.raise_error("Expected */ after HINT") 2489 2490 return self.expression(exp.Hint, expressions=hints) 2491 2492 return None 2493 2494 def _parse_into(self) -> t.Optional[exp.Into]: 2495 if not self._match(TokenType.INTO): 2496 return None 2497 2498 temp = self._match(TokenType.TEMPORARY) 2499 unlogged = self._match_text_seq("UNLOGGED") 2500 self._match(TokenType.TABLE) 2501 2502 return self.expression( 2503 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2504 ) 2505 2506 def _parse_from( 2507 self, joins: bool = False, skip_from_token: bool = False 2508 ) -> t.Optional[exp.From]: 2509 if not skip_from_token and not self._match(TokenType.FROM): 2510 return None 2511 2512 return self.expression( 2513 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2514 ) 2515 2516 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2517 if not self._match(TokenType.MATCH_RECOGNIZE): 2518 return None 2519 2520 self._match_l_paren() 2521 2522 partition = self._parse_partition_by() 2523 order = self._parse_order() 2524 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2525 2526 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2527 rows = exp.var("ONE ROW PER MATCH") 2528 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2529 text = "ALL ROWS PER MATCH" 2530 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2531 text += " SHOW EMPTY MATCHES" 2532 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2533 text += " OMIT EMPTY MATCHES" 2534 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2535 text += " WITH UNMATCHED ROWS" 2536 rows = exp.var(text) 2537 else: 2538 rows = None 2539 2540 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2541 text = "AFTER MATCH SKIP" 2542 if self._match_text_seq("PAST", "LAST", "ROW"): 2543 text += " PAST LAST ROW" 2544 elif self._match_text_seq("TO", "NEXT", "ROW"): 2545 text += " TO NEXT ROW" 2546 elif self._match_text_seq("TO", "FIRST"): 2547 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2548 elif self._match_text_seq("TO", "LAST"): 2549 text += f" TO LAST {self._advance_any().text}" # type: ignore 2550 after = exp.var(text) 2551 else: 2552 after = None 2553 2554 if self._match_text_seq("PATTERN"): 2555 self._match_l_paren() 2556 2557 if not self._curr: 2558 self.raise_error("Expecting )", self._curr) 2559 2560 paren = 1 2561 start = self._curr 2562 2563 while self._curr and paren > 0: 2564 if self._curr.token_type == TokenType.L_PAREN: 2565 paren += 1 2566 if self._curr.token_type == TokenType.R_PAREN: 2567 paren -= 1 2568 2569 end = self._prev 2570 self._advance() 2571 2572 if paren > 0: 2573 self.raise_error("Expecting )", self._curr) 2574 2575 pattern = exp.var(self._find_sql(start, end)) 2576 else: 2577 pattern = None 2578 2579 define = ( 2580 self._parse_csv(self._parse_name_as_expression) 2581 if self._match_text_seq("DEFINE") 2582 else None 2583 ) 2584 2585 self._match_r_paren() 2586 2587 return self.expression( 2588 exp.MatchRecognize, 2589 partition_by=partition, 2590 order=order, 2591 measures=measures, 2592 rows=rows, 2593 after=after, 2594 pattern=pattern, 2595 define=define, 2596 alias=self._parse_table_alias(), 2597 ) 2598 2599 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2600 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2601 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2602 cross_apply = False 2603 2604 if cross_apply is not None: 2605 this = self._parse_select(table=True) 2606 view = None 2607 outer = None 2608 elif self._match(TokenType.LATERAL): 2609 this = self._parse_select(table=True) 2610 view = self._match(TokenType.VIEW) 2611 outer = self._match(TokenType.OUTER) 2612 else: 2613 return None 2614 2615 if not this: 2616 this = ( 2617 self._parse_unnest() 2618 or self._parse_function() 2619 or self._parse_id_var(any_token=False) 2620 ) 2621 2622 while self._match(TokenType.DOT): 2623 this = exp.Dot( 2624 this=this, 2625 expression=self._parse_function() or self._parse_id_var(any_token=False), 2626 ) 2627 2628 if view: 2629 table = self._parse_id_var(any_token=False) 2630 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2631 table_alias: t.Optional[exp.TableAlias] = self.expression( 2632 exp.TableAlias, this=table, columns=columns 2633 ) 2634 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2635 # We move the alias from the lateral's child node to the lateral itself 2636 table_alias = this.args["alias"].pop() 2637 else: 2638 table_alias = self._parse_table_alias() 2639 2640 return self.expression( 2641 exp.Lateral, 2642 this=this, 2643 view=view, 2644 outer=outer, 2645 alias=table_alias, 2646 cross_apply=cross_apply, 2647 ) 2648 2649 def _parse_join_parts( 2650 self, 2651 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2652 return ( 2653 self._match_set(self.JOIN_METHODS) and self._prev, 2654 self._match_set(self.JOIN_SIDES) and self._prev, 2655 self._match_set(self.JOIN_KINDS) and self._prev, 2656 ) 2657 2658 def _parse_join( 2659 self, skip_join_token: bool = False, parse_bracket: bool = False 2660 ) -> t.Optional[exp.Join]: 2661 if self._match(TokenType.COMMA): 2662 return self.expression(exp.Join, this=self._parse_table()) 2663 2664 index = self._index 2665 method, side, kind = self._parse_join_parts() 2666 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2667 join = self._match(TokenType.JOIN) 2668 2669 if not skip_join_token and not join: 2670 self._retreat(index) 2671 kind = None 2672 method = None 2673 side = None 2674 2675 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2676 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2677 2678 if not skip_join_token and not join and not outer_apply and not cross_apply: 2679 return None 2680 2681 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2682 2683 if method: 2684 kwargs["method"] = method.text 2685 if side: 2686 kwargs["side"] = side.text 2687 if kind: 2688 kwargs["kind"] = kind.text 2689 if hint: 2690 kwargs["hint"] = hint 2691 2692 if self._match(TokenType.ON): 2693 kwargs["on"] = self._parse_conjunction() 2694 elif self._match(TokenType.USING): 2695 kwargs["using"] = self._parse_wrapped_id_vars() 2696 elif not (kind and kind.token_type == TokenType.CROSS): 2697 index = self._index 2698 join = self._parse_join() 2699 2700 if join and self._match(TokenType.ON): 2701 kwargs["on"] = self._parse_conjunction() 2702 elif join and self._match(TokenType.USING): 2703 kwargs["using"] = self._parse_wrapped_id_vars() 2704 else: 2705 join = None 2706 self._retreat(index) 2707 2708 kwargs["this"].set("joins", [join] if join else None) 2709 2710 comments = [c for token in (method, side, kind) if token for c in token.comments] 2711 return self.expression(exp.Join, comments=comments, **kwargs) 2712 2713 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2714 this = self._parse_conjunction() 2715 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2716 return this 2717 2718 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2719 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2720 2721 return this 2722 2723 def _parse_index( 2724 self, 2725 index: t.Optional[exp.Expression] = None, 2726 ) -> t.Optional[exp.Index]: 2727 if index: 2728 unique = None 2729 primary = None 2730 amp = None 2731 2732 self._match(TokenType.ON) 2733 self._match(TokenType.TABLE) # hive 2734 table = self._parse_table_parts(schema=True) 2735 else: 2736 unique = self._match(TokenType.UNIQUE) 2737 primary = self._match_text_seq("PRIMARY") 2738 amp = self._match_text_seq("AMP") 2739 2740 if not self._match(TokenType.INDEX): 2741 return None 2742 2743 index = self._parse_id_var() 2744 table = None 2745 2746 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2747 2748 if self._match(TokenType.L_PAREN, advance=False): 2749 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2750 else: 2751 columns = None 2752 2753 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2754 2755 return self.expression( 2756 exp.Index, 2757 this=index, 2758 table=table, 2759 using=using, 2760 columns=columns, 2761 unique=unique, 2762 primary=primary, 2763 amp=amp, 2764 include=include, 2765 partition_by=self._parse_partition_by(), 2766 where=self._parse_where(), 2767 ) 2768 2769 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2770 hints: t.List[exp.Expression] = [] 2771 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2772 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2773 hints.append( 2774 self.expression( 2775 exp.WithTableHint, 2776 expressions=self._parse_csv( 2777 lambda: self._parse_function() or self._parse_var(any_token=True) 2778 ), 2779 ) 2780 ) 2781 self._match_r_paren() 2782 else: 2783 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2784 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2785 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2786 2787 self._match_texts(("INDEX", "KEY")) 2788 if self._match(TokenType.FOR): 2789 hint.set("target", self._advance_any() and self._prev.text.upper()) 2790 2791 hint.set("expressions", self._parse_wrapped_id_vars()) 2792 hints.append(hint) 2793 2794 return hints or None 2795 2796 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2797 return ( 2798 (not schema and self._parse_function(optional_parens=False)) 2799 or self._parse_id_var(any_token=False) 2800 or self._parse_string_as_identifier() 2801 or self._parse_placeholder() 2802 ) 2803 2804 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2805 catalog = None 2806 db = None 2807 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2808 2809 while self._match(TokenType.DOT): 2810 if catalog: 2811 # This allows nesting the table in arbitrarily many dot expressions if needed 2812 table = self.expression( 2813 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2814 ) 2815 else: 2816 catalog = db 2817 db = table 2818 table = self._parse_table_part(schema=schema) or "" 2819 2820 if is_db_reference: 2821 catalog = db 2822 db = table 2823 table = None 2824 2825 if not table and not is_db_reference: 2826 self.raise_error(f"Expected table name but got {self._curr}") 2827 if not db and is_db_reference: 2828 self.raise_error(f"Expected database name but got {self._curr}") 2829 2830 return self.expression( 2831 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2832 ) 2833 2834 def _parse_table( 2835 self, 2836 schema: bool = False, 2837 joins: bool = False, 2838 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2839 parse_bracket: bool = False, 2840 is_db_reference: bool = False, 2841 ) -> t.Optional[exp.Expression]: 2842 lateral = self._parse_lateral() 2843 if lateral: 2844 return lateral 2845 2846 unnest = self._parse_unnest() 2847 if unnest: 2848 return unnest 2849 2850 values = self._parse_derived_table_values() 2851 if values: 2852 return values 2853 2854 subquery = self._parse_select(table=True) 2855 if subquery: 2856 if not subquery.args.get("pivots"): 2857 subquery.set("pivots", self._parse_pivots()) 2858 return subquery 2859 2860 bracket = parse_bracket and self._parse_bracket(None) 2861 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2862 this = t.cast( 2863 exp.Expression, 2864 bracket 2865 or self._parse_bracket( 2866 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2867 ), 2868 ) 2869 2870 if schema: 2871 return self._parse_schema(this=this) 2872 2873 version = self._parse_version() 2874 2875 if version: 2876 this.set("version", version) 2877 2878 if self.dialect.ALIAS_POST_TABLESAMPLE: 2879 table_sample = self._parse_table_sample() 2880 2881 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2882 if alias: 2883 this.set("alias", alias) 2884 2885 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2886 return self.expression( 2887 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2888 ) 2889 2890 this.set("hints", self._parse_table_hints()) 2891 2892 if not this.args.get("pivots"): 2893 this.set("pivots", self._parse_pivots()) 2894 2895 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2896 table_sample = self._parse_table_sample() 2897 2898 if table_sample: 2899 table_sample.set("this", this) 2900 this = table_sample 2901 2902 if joins: 2903 for join in iter(self._parse_join, None): 2904 this.append("joins", join) 2905 2906 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2907 this.set("ordinality", True) 2908 this.set("alias", self._parse_table_alias()) 2909 2910 return this 2911 2912 def _parse_version(self) -> t.Optional[exp.Version]: 2913 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2914 this = "TIMESTAMP" 2915 elif self._match(TokenType.VERSION_SNAPSHOT): 2916 this = "VERSION" 2917 else: 2918 return None 2919 2920 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2921 kind = self._prev.text.upper() 2922 start = self._parse_bitwise() 2923 self._match_texts(("TO", "AND")) 2924 end = self._parse_bitwise() 2925 expression: t.Optional[exp.Expression] = self.expression( 2926 exp.Tuple, expressions=[start, end] 2927 ) 2928 elif self._match_text_seq("CONTAINED", "IN"): 2929 kind = "CONTAINED IN" 2930 expression = self.expression( 2931 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2932 ) 2933 elif self._match(TokenType.ALL): 2934 kind = "ALL" 2935 expression = None 2936 else: 2937 self._match_text_seq("AS", "OF") 2938 kind = "AS OF" 2939 expression = self._parse_type() 2940 2941 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2942 2943 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2944 if not self._match(TokenType.UNNEST): 2945 return None 2946 2947 expressions = self._parse_wrapped_csv(self._parse_equality) 2948 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2949 2950 alias = self._parse_table_alias() if with_alias else None 2951 2952 if alias: 2953 if self.dialect.UNNEST_COLUMN_ONLY: 2954 if alias.args.get("columns"): 2955 self.raise_error("Unexpected extra column alias in unnest.") 2956 2957 alias.set("columns", [alias.this]) 2958 alias.set("this", None) 2959 2960 columns = alias.args.get("columns") or [] 2961 if offset and len(expressions) < len(columns): 2962 offset = columns.pop() 2963 2964 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2965 self._match(TokenType.ALIAS) 2966 offset = self._parse_id_var( 2967 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2968 ) or exp.to_identifier("offset") 2969 2970 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2971 2972 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2973 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2974 if not is_derived and not self._match(TokenType.VALUES): 2975 return None 2976 2977 expressions = self._parse_csv(self._parse_value) 2978 alias = self._parse_table_alias() 2979 2980 if is_derived: 2981 self._match_r_paren() 2982 2983 return self.expression( 2984 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2985 ) 2986 2987 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2988 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2989 as_modifier and self._match_text_seq("USING", "SAMPLE") 2990 ): 2991 return None 2992 2993 bucket_numerator = None 2994 bucket_denominator = None 2995 bucket_field = None 2996 percent = None 2997 size = None 2998 seed = None 2999 3000 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3001 matched_l_paren = self._match(TokenType.L_PAREN) 3002 3003 if self.TABLESAMPLE_CSV: 3004 num = None 3005 expressions = self._parse_csv(self._parse_primary) 3006 else: 3007 expressions = None 3008 num = ( 3009 self._parse_factor() 3010 if self._match(TokenType.NUMBER, advance=False) 3011 else self._parse_primary() or self._parse_placeholder() 3012 ) 3013 3014 if self._match_text_seq("BUCKET"): 3015 bucket_numerator = self._parse_number() 3016 self._match_text_seq("OUT", "OF") 3017 bucket_denominator = bucket_denominator = self._parse_number() 3018 self._match(TokenType.ON) 3019 bucket_field = self._parse_field() 3020 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3021 percent = num 3022 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3023 size = num 3024 else: 3025 percent = num 3026 3027 if matched_l_paren: 3028 self._match_r_paren() 3029 3030 if self._match(TokenType.L_PAREN): 3031 method = self._parse_var(upper=True) 3032 seed = self._match(TokenType.COMMA) and self._parse_number() 3033 self._match_r_paren() 3034 elif self._match_texts(("SEED", "REPEATABLE")): 3035 seed = self._parse_wrapped(self._parse_number) 3036 3037 return self.expression( 3038 exp.TableSample, 3039 expressions=expressions, 3040 method=method, 3041 bucket_numerator=bucket_numerator, 3042 bucket_denominator=bucket_denominator, 3043 bucket_field=bucket_field, 3044 percent=percent, 3045 size=size, 3046 seed=seed, 3047 ) 3048 3049 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3050 return list(iter(self._parse_pivot, None)) or None 3051 3052 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3053 return list(iter(self._parse_join, None)) or None 3054 3055 # https://duckdb.org/docs/sql/statements/pivot 3056 def _parse_simplified_pivot(self) -> exp.Pivot: 3057 def _parse_on() -> t.Optional[exp.Expression]: 3058 this = self._parse_bitwise() 3059 return self._parse_in(this) if self._match(TokenType.IN) else this 3060 3061 this = self._parse_table() 3062 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3063 using = self._match(TokenType.USING) and self._parse_csv( 3064 lambda: self._parse_alias(self._parse_function()) 3065 ) 3066 group = self._parse_group() 3067 return self.expression( 3068 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3069 ) 3070 3071 def _parse_pivot_in(self) -> exp.In: 3072 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3073 this = self._parse_conjunction() 3074 3075 self._match(TokenType.ALIAS) 3076 alias = self._parse_field() 3077 if alias: 3078 return self.expression(exp.PivotAlias, this=this, alias=alias) 3079 3080 return this 3081 3082 value = self._parse_column() 3083 3084 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3085 self.raise_error("Expecting IN (") 3086 3087 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3088 3089 self._match_r_paren() 3090 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3091 3092 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3093 index = self._index 3094 include_nulls = None 3095 3096 if self._match(TokenType.PIVOT): 3097 unpivot = False 3098 elif self._match(TokenType.UNPIVOT): 3099 unpivot = True 3100 3101 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3102 if self._match_text_seq("INCLUDE", "NULLS"): 3103 include_nulls = True 3104 elif self._match_text_seq("EXCLUDE", "NULLS"): 3105 include_nulls = False 3106 else: 3107 return None 3108 3109 expressions = [] 3110 3111 if not self._match(TokenType.L_PAREN): 3112 self._retreat(index) 3113 return None 3114 3115 if unpivot: 3116 expressions = self._parse_csv(self._parse_column) 3117 else: 3118 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3119 3120 if not expressions: 3121 self.raise_error("Failed to parse PIVOT's aggregation list") 3122 3123 if not self._match(TokenType.FOR): 3124 self.raise_error("Expecting FOR") 3125 3126 field = self._parse_pivot_in() 3127 3128 self._match_r_paren() 3129 3130 pivot = self.expression( 3131 exp.Pivot, 3132 expressions=expressions, 3133 field=field, 3134 unpivot=unpivot, 3135 include_nulls=include_nulls, 3136 ) 3137 3138 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3139 pivot.set("alias", self._parse_table_alias()) 3140 3141 if not unpivot: 3142 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3143 3144 columns: t.List[exp.Expression] = [] 3145 for fld in pivot.args["field"].expressions: 3146 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3147 for name in names: 3148 if self.PREFIXED_PIVOT_COLUMNS: 3149 name = f"{name}_{field_name}" if name else field_name 3150 else: 3151 name = f"{field_name}_{name}" if name else field_name 3152 3153 columns.append(exp.to_identifier(name)) 3154 3155 pivot.set("columns", columns) 3156 3157 return pivot 3158 3159 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3160 return [agg.alias for agg in aggregations] 3161 3162 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3163 if not skip_where_token and not self._match(TokenType.WHERE): 3164 return None 3165 3166 return self.expression( 3167 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3168 ) 3169 3170 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3171 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3172 return None 3173 3174 elements = defaultdict(list) 3175 3176 if self._match(TokenType.ALL): 3177 return self.expression(exp.Group, all=True) 3178 3179 while True: 3180 expressions = self._parse_csv(self._parse_conjunction) 3181 if expressions: 3182 elements["expressions"].extend(expressions) 3183 3184 grouping_sets = self._parse_grouping_sets() 3185 if grouping_sets: 3186 elements["grouping_sets"].extend(grouping_sets) 3187 3188 rollup = None 3189 cube = None 3190 totals = None 3191 3192 index = self._index 3193 with_ = self._match(TokenType.WITH) 3194 if self._match(TokenType.ROLLUP): 3195 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3196 elements["rollup"].extend(ensure_list(rollup)) 3197 3198 if self._match(TokenType.CUBE): 3199 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3200 elements["cube"].extend(ensure_list(cube)) 3201 3202 if self._match_text_seq("TOTALS"): 3203 totals = True 3204 elements["totals"] = True # type: ignore 3205 3206 if not (grouping_sets or rollup or cube or totals): 3207 if with_: 3208 self._retreat(index) 3209 break 3210 3211 return self.expression(exp.Group, **elements) # type: ignore 3212 3213 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3214 if not self._match(TokenType.GROUPING_SETS): 3215 return None 3216 3217 return self._parse_wrapped_csv(self._parse_grouping_set) 3218 3219 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3220 if self._match(TokenType.L_PAREN): 3221 grouping_set = self._parse_csv(self._parse_column) 3222 self._match_r_paren() 3223 return self.expression(exp.Tuple, expressions=grouping_set) 3224 3225 return self._parse_column() 3226 3227 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3228 if not skip_having_token and not self._match(TokenType.HAVING): 3229 return None 3230 return self.expression(exp.Having, this=self._parse_conjunction()) 3231 3232 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3233 if not self._match(TokenType.QUALIFY): 3234 return None 3235 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3236 3237 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3238 if skip_start_token: 3239 start = None 3240 elif self._match(TokenType.START_WITH): 3241 start = self._parse_conjunction() 3242 else: 3243 return None 3244 3245 self._match(TokenType.CONNECT_BY) 3246 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3247 exp.Prior, this=self._parse_bitwise() 3248 ) 3249 connect = self._parse_conjunction() 3250 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3251 3252 if not start and self._match(TokenType.START_WITH): 3253 start = self._parse_conjunction() 3254 3255 return self.expression(exp.Connect, start=start, connect=connect) 3256 3257 def _parse_name_as_expression(self) -> exp.Alias: 3258 return self.expression( 3259 exp.Alias, 3260 alias=self._parse_id_var(any_token=True), 3261 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3262 ) 3263 3264 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3265 if self._match_text_seq("INTERPOLATE"): 3266 return self._parse_wrapped_csv(self._parse_name_as_expression) 3267 return None 3268 3269 def _parse_order( 3270 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3271 ) -> t.Optional[exp.Expression]: 3272 siblings = None 3273 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3274 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3275 return this 3276 3277 siblings = True 3278 3279 return self.expression( 3280 exp.Order, 3281 this=this, 3282 expressions=self._parse_csv(self._parse_ordered), 3283 interpolate=self._parse_interpolate(), 3284 siblings=siblings, 3285 ) 3286 3287 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3288 if not self._match(token): 3289 return None 3290 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3291 3292 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3293 this = parse_method() if parse_method else self._parse_conjunction() 3294 3295 asc = self._match(TokenType.ASC) 3296 desc = self._match(TokenType.DESC) or (asc and False) 3297 3298 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3299 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3300 3301 nulls_first = is_nulls_first or False 3302 explicitly_null_ordered = is_nulls_first or is_nulls_last 3303 3304 if ( 3305 not explicitly_null_ordered 3306 and ( 3307 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3308 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3309 ) 3310 and self.dialect.NULL_ORDERING != "nulls_are_last" 3311 ): 3312 nulls_first = True 3313 3314 if self._match_text_seq("WITH", "FILL"): 3315 with_fill = self.expression( 3316 exp.WithFill, 3317 **{ # type: ignore 3318 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3319 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3320 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3321 }, 3322 ) 3323 else: 3324 with_fill = None 3325 3326 return self.expression( 3327 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3328 ) 3329 3330 def _parse_limit( 3331 self, this: t.Optional[exp.Expression] = None, top: bool = False 3332 ) -> t.Optional[exp.Expression]: 3333 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3334 comments = self._prev_comments 3335 if top: 3336 limit_paren = self._match(TokenType.L_PAREN) 3337 expression = self._parse_term() if limit_paren else self._parse_number() 3338 3339 if limit_paren: 3340 self._match_r_paren() 3341 else: 3342 expression = self._parse_term() 3343 3344 if self._match(TokenType.COMMA): 3345 offset = expression 3346 expression = self._parse_term() 3347 else: 3348 offset = None 3349 3350 limit_exp = self.expression( 3351 exp.Limit, 3352 this=this, 3353 expression=expression, 3354 offset=offset, 3355 comments=comments, 3356 expressions=self._parse_limit_by(), 3357 ) 3358 3359 return limit_exp 3360 3361 if self._match(TokenType.FETCH): 3362 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3363 direction = self._prev.text.upper() if direction else "FIRST" 3364 3365 count = self._parse_field(tokens=self.FETCH_TOKENS) 3366 percent = self._match(TokenType.PERCENT) 3367 3368 self._match_set((TokenType.ROW, TokenType.ROWS)) 3369 3370 only = self._match_text_seq("ONLY") 3371 with_ties = self._match_text_seq("WITH", "TIES") 3372 3373 if only and with_ties: 3374 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3375 3376 return self.expression( 3377 exp.Fetch, 3378 direction=direction, 3379 count=count, 3380 percent=percent, 3381 with_ties=with_ties, 3382 ) 3383 3384 return this 3385 3386 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3387 if not self._match(TokenType.OFFSET): 3388 return this 3389 3390 count = self._parse_term() 3391 self._match_set((TokenType.ROW, TokenType.ROWS)) 3392 3393 return self.expression( 3394 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3395 ) 3396 3397 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3398 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3399 3400 def _parse_locks(self) -> t.List[exp.Lock]: 3401 locks = [] 3402 while True: 3403 if self._match_text_seq("FOR", "UPDATE"): 3404 update = True 3405 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3406 "LOCK", "IN", "SHARE", "MODE" 3407 ): 3408 update = False 3409 else: 3410 break 3411 3412 expressions = None 3413 if self._match_text_seq("OF"): 3414 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3415 3416 wait: t.Optional[bool | exp.Expression] = None 3417 if self._match_text_seq("NOWAIT"): 3418 wait = True 3419 elif self._match_text_seq("WAIT"): 3420 wait = self._parse_primary() 3421 elif self._match_text_seq("SKIP", "LOCKED"): 3422 wait = False 3423 3424 locks.append( 3425 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3426 ) 3427 3428 return locks 3429 3430 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3431 while this and self._match_set(self.SET_OPERATIONS): 3432 token_type = self._prev.token_type 3433 3434 if token_type == TokenType.UNION: 3435 operation = exp.Union 3436 elif token_type == TokenType.EXCEPT: 3437 operation = exp.Except 3438 else: 3439 operation = exp.Intersect 3440 3441 comments = self._prev.comments 3442 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3443 by_name = self._match_text_seq("BY", "NAME") 3444 expression = self._parse_select(nested=True, parse_set_operation=False) 3445 3446 this = self.expression( 3447 operation, 3448 comments=comments, 3449 this=this, 3450 distinct=distinct, 3451 by_name=by_name, 3452 expression=expression, 3453 ) 3454 3455 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3456 expression = this.expression 3457 3458 if expression: 3459 for arg in self.UNION_MODIFIERS: 3460 expr = expression.args.get(arg) 3461 if expr: 3462 this.set(arg, expr.pop()) 3463 3464 return this 3465 3466 def _parse_expression(self) -> t.Optional[exp.Expression]: 3467 return self._parse_alias(self._parse_conjunction()) 3468 3469 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3470 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3471 3472 def _parse_equality(self) -> t.Optional[exp.Expression]: 3473 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3474 3475 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3476 return self._parse_tokens(self._parse_range, self.COMPARISON) 3477 3478 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3479 this = this or self._parse_bitwise() 3480 negate = self._match(TokenType.NOT) 3481 3482 if self._match_set(self.RANGE_PARSERS): 3483 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3484 if not expression: 3485 return this 3486 3487 this = expression 3488 elif self._match(TokenType.ISNULL): 3489 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3490 3491 # Postgres supports ISNULL and NOTNULL for conditions. 3492 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3493 if self._match(TokenType.NOTNULL): 3494 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3495 this = self.expression(exp.Not, this=this) 3496 3497 if negate: 3498 this = self.expression(exp.Not, this=this) 3499 3500 if self._match(TokenType.IS): 3501 this = self._parse_is(this) 3502 3503 return this 3504 3505 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3506 index = self._index - 1 3507 negate = self._match(TokenType.NOT) 3508 3509 if self._match_text_seq("DISTINCT", "FROM"): 3510 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3511 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3512 3513 expression = self._parse_null() or self._parse_boolean() 3514 if not expression: 3515 self._retreat(index) 3516 return None 3517 3518 this = self.expression(exp.Is, this=this, expression=expression) 3519 return self.expression(exp.Not, this=this) if negate else this 3520 3521 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3522 unnest = self._parse_unnest(with_alias=False) 3523 if unnest: 3524 this = self.expression(exp.In, this=this, unnest=unnest) 3525 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3526 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3527 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3528 3529 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3530 this = self.expression(exp.In, this=this, query=expressions[0]) 3531 else: 3532 this = self.expression(exp.In, this=this, expressions=expressions) 3533 3534 if matched_l_paren: 3535 self._match_r_paren(this) 3536 elif not self._match(TokenType.R_BRACKET, expression=this): 3537 self.raise_error("Expecting ]") 3538 else: 3539 this = self.expression(exp.In, this=this, field=self._parse_field()) 3540 3541 return this 3542 3543 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3544 low = self._parse_bitwise() 3545 self._match(TokenType.AND) 3546 high = self._parse_bitwise() 3547 return self.expression(exp.Between, this=this, low=low, high=high) 3548 3549 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3550 if not self._match(TokenType.ESCAPE): 3551 return this 3552 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3553 3554 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3555 index = self._index 3556 3557 if not self._match(TokenType.INTERVAL) and match_interval: 3558 return None 3559 3560 if self._match(TokenType.STRING, advance=False): 3561 this = self._parse_primary() 3562 else: 3563 this = self._parse_term() 3564 3565 if not this or ( 3566 isinstance(this, exp.Column) 3567 and not this.table 3568 and not this.this.quoted 3569 and this.name.upper() == "IS" 3570 ): 3571 self._retreat(index) 3572 return None 3573 3574 unit = self._parse_function() or ( 3575 not self._match(TokenType.ALIAS, advance=False) 3576 and self._parse_var(any_token=True, upper=True) 3577 ) 3578 3579 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3580 # each INTERVAL expression into this canonical form so it's easy to transpile 3581 if this and this.is_number: 3582 this = exp.Literal.string(this.name) 3583 elif this and this.is_string: 3584 parts = this.name.split() 3585 3586 if len(parts) == 2: 3587 if unit: 3588 # This is not actually a unit, it's something else (e.g. a "window side") 3589 unit = None 3590 self._retreat(self._index - 1) 3591 3592 this = exp.Literal.string(parts[0]) 3593 unit = self.expression(exp.Var, this=parts[1].upper()) 3594 3595 return self.expression(exp.Interval, this=this, unit=unit) 3596 3597 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3598 this = self._parse_term() 3599 3600 while True: 3601 if self._match_set(self.BITWISE): 3602 this = self.expression( 3603 self.BITWISE[self._prev.token_type], 3604 this=this, 3605 expression=self._parse_term(), 3606 ) 3607 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3608 this = self.expression( 3609 exp.DPipe, 3610 this=this, 3611 expression=self._parse_term(), 3612 safe=not self.dialect.STRICT_STRING_CONCAT, 3613 ) 3614 elif self._match(TokenType.DQMARK): 3615 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3616 elif self._match_pair(TokenType.LT, TokenType.LT): 3617 this = self.expression( 3618 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3619 ) 3620 elif self._match_pair(TokenType.GT, TokenType.GT): 3621 this = self.expression( 3622 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3623 ) 3624 else: 3625 break 3626 3627 return this 3628 3629 def _parse_term(self) -> t.Optional[exp.Expression]: 3630 return self._parse_tokens(self._parse_factor, self.TERM) 3631 3632 def _parse_factor(self) -> t.Optional[exp.Expression]: 3633 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3634 this = parse_method() 3635 3636 while self._match_set(self.FACTOR): 3637 this = self.expression( 3638 self.FACTOR[self._prev.token_type], 3639 this=this, 3640 comments=self._prev_comments, 3641 expression=parse_method(), 3642 ) 3643 if isinstance(this, exp.Div): 3644 this.args["typed"] = self.dialect.TYPED_DIVISION 3645 this.args["safe"] = self.dialect.SAFE_DIVISION 3646 3647 return this 3648 3649 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3650 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3651 3652 def _parse_unary(self) -> t.Optional[exp.Expression]: 3653 if self._match_set(self.UNARY_PARSERS): 3654 return self.UNARY_PARSERS[self._prev.token_type](self) 3655 return self._parse_at_time_zone(self._parse_type()) 3656 3657 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3658 interval = parse_interval and self._parse_interval() 3659 if interval: 3660 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3661 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3662 interval = self.expression( # type: ignore 3663 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3664 ) 3665 3666 return interval 3667 3668 index = self._index 3669 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3670 this = self._parse_column() 3671 3672 if data_type: 3673 if isinstance(this, exp.Literal): 3674 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3675 if parser: 3676 return parser(self, this, data_type) 3677 return self.expression(exp.Cast, this=this, to=data_type) 3678 if not data_type.expressions: 3679 self._retreat(index) 3680 return self._parse_column() 3681 return self._parse_column_ops(data_type) 3682 3683 return this and self._parse_column_ops(this) 3684 3685 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3686 this = self._parse_type() 3687 if not this: 3688 return None 3689 3690 return self.expression( 3691 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3692 ) 3693 3694 def _parse_types( 3695 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3696 ) -> t.Optional[exp.Expression]: 3697 index = self._index 3698 3699 prefix = self._match_text_seq("SYSUDTLIB", ".") 3700 3701 if not self._match_set(self.TYPE_TOKENS): 3702 identifier = allow_identifiers and self._parse_id_var( 3703 any_token=False, tokens=(TokenType.VAR,) 3704 ) 3705 if identifier: 3706 tokens = self.dialect.tokenize(identifier.name) 3707 3708 if len(tokens) != 1: 3709 self.raise_error("Unexpected identifier", self._prev) 3710 3711 if tokens[0].token_type in self.TYPE_TOKENS: 3712 self._prev = tokens[0] 3713 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3714 type_name = identifier.name 3715 3716 while self._match(TokenType.DOT): 3717 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3718 3719 return exp.DataType.build(type_name, udt=True) 3720 else: 3721 self._retreat(self._index - 1) 3722 return None 3723 else: 3724 return None 3725 3726 type_token = self._prev.token_type 3727 3728 if type_token == TokenType.PSEUDO_TYPE: 3729 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3730 3731 if type_token == TokenType.OBJECT_IDENTIFIER: 3732 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3733 3734 nested = type_token in self.NESTED_TYPE_TOKENS 3735 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3736 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3737 expressions = None 3738 maybe_func = False 3739 3740 if self._match(TokenType.L_PAREN): 3741 if is_struct: 3742 expressions = self._parse_csv(self._parse_struct_types) 3743 elif nested: 3744 expressions = self._parse_csv( 3745 lambda: self._parse_types( 3746 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3747 ) 3748 ) 3749 elif type_token in self.ENUM_TYPE_TOKENS: 3750 expressions = self._parse_csv(self._parse_equality) 3751 elif is_aggregate: 3752 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3753 any_token=False, tokens=(TokenType.VAR,) 3754 ) 3755 if not func_or_ident or not self._match(TokenType.COMMA): 3756 return None 3757 expressions = self._parse_csv( 3758 lambda: self._parse_types( 3759 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3760 ) 3761 ) 3762 expressions.insert(0, func_or_ident) 3763 else: 3764 expressions = self._parse_csv(self._parse_type_size) 3765 3766 if not expressions or not self._match(TokenType.R_PAREN): 3767 self._retreat(index) 3768 return None 3769 3770 maybe_func = True 3771 3772 this: t.Optional[exp.Expression] = None 3773 values: t.Optional[t.List[exp.Expression]] = None 3774 3775 if nested and self._match(TokenType.LT): 3776 if is_struct: 3777 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3778 else: 3779 expressions = self._parse_csv( 3780 lambda: self._parse_types( 3781 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3782 ) 3783 ) 3784 3785 if not self._match(TokenType.GT): 3786 self.raise_error("Expecting >") 3787 3788 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3789 values = self._parse_csv(self._parse_conjunction) 3790 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3791 3792 if type_token in self.TIMESTAMPS: 3793 if self._match_text_seq("WITH", "TIME", "ZONE"): 3794 maybe_func = False 3795 tz_type = ( 3796 exp.DataType.Type.TIMETZ 3797 if type_token in self.TIMES 3798 else exp.DataType.Type.TIMESTAMPTZ 3799 ) 3800 this = exp.DataType(this=tz_type, expressions=expressions) 3801 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3802 maybe_func = False 3803 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3804 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3805 maybe_func = False 3806 elif type_token == TokenType.INTERVAL: 3807 unit = self._parse_var() 3808 3809 if self._match_text_seq("TO"): 3810 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3811 else: 3812 span = None 3813 3814 if span or not unit: 3815 this = self.expression( 3816 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3817 ) 3818 else: 3819 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3820 3821 if maybe_func and check_func: 3822 index2 = self._index 3823 peek = self._parse_string() 3824 3825 if not peek: 3826 self._retreat(index) 3827 return None 3828 3829 self._retreat(index2) 3830 3831 if not this: 3832 if self._match_text_seq("UNSIGNED"): 3833 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3834 if not unsigned_type_token: 3835 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3836 3837 type_token = unsigned_type_token or type_token 3838 3839 this = exp.DataType( 3840 this=exp.DataType.Type[type_token.value], 3841 expressions=expressions, 3842 nested=nested, 3843 values=values, 3844 prefix=prefix, 3845 ) 3846 3847 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3848 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3849 3850 return this 3851 3852 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3853 index = self._index 3854 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3855 self._match(TokenType.COLON) 3856 column_def = self._parse_column_def(this) 3857 3858 if type_required and ( 3859 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3860 ): 3861 self._retreat(index) 3862 return self._parse_types() 3863 3864 return column_def 3865 3866 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3867 if not self._match_text_seq("AT", "TIME", "ZONE"): 3868 return this 3869 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3870 3871 def _parse_column(self) -> t.Optional[exp.Expression]: 3872 this = self._parse_column_reference() 3873 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3874 3875 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3876 this = self._parse_field() 3877 if isinstance(this, exp.Identifier): 3878 this = self.expression(exp.Column, this=this) 3879 return this 3880 3881 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3882 this = self._parse_bracket(this) 3883 3884 while self._match_set(self.COLUMN_OPERATORS): 3885 op_token = self._prev.token_type 3886 op = self.COLUMN_OPERATORS.get(op_token) 3887 3888 if op_token == TokenType.DCOLON: 3889 field = self._parse_types() 3890 if not field: 3891 self.raise_error("Expected type") 3892 elif op and self._curr: 3893 field = self._parse_column_reference() 3894 else: 3895 field = self._parse_field(anonymous_func=True, any_token=True) 3896 3897 if isinstance(field, exp.Func): 3898 # bigquery allows function calls like x.y.count(...) 3899 # SAFE.SUBSTR(...) 3900 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3901 this = self._replace_columns_with_dots(this) 3902 3903 if op: 3904 this = op(self, this, field) 3905 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3906 this = self.expression( 3907 exp.Column, 3908 this=field, 3909 table=this.this, 3910 db=this.args.get("table"), 3911 catalog=this.args.get("db"), 3912 ) 3913 else: 3914 this = self.expression(exp.Dot, this=this, expression=field) 3915 this = self._parse_bracket(this) 3916 return this 3917 3918 def _parse_primary(self) -> t.Optional[exp.Expression]: 3919 if self._match_set(self.PRIMARY_PARSERS): 3920 token_type = self._prev.token_type 3921 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3922 3923 if token_type == TokenType.STRING: 3924 expressions = [primary] 3925 while self._match(TokenType.STRING): 3926 expressions.append(exp.Literal.string(self._prev.text)) 3927 3928 if len(expressions) > 1: 3929 return self.expression(exp.Concat, expressions=expressions) 3930 3931 return primary 3932 3933 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3934 return exp.Literal.number(f"0.{self._prev.text}") 3935 3936 if self._match(TokenType.L_PAREN): 3937 comments = self._prev_comments 3938 query = self._parse_select() 3939 3940 if query: 3941 expressions = [query] 3942 else: 3943 expressions = self._parse_expressions() 3944 3945 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3946 3947 if isinstance(this, exp.Subqueryable): 3948 this = self._parse_set_operations( 3949 self._parse_subquery(this=this, parse_alias=False) 3950 ) 3951 elif len(expressions) > 1: 3952 this = self.expression(exp.Tuple, expressions=expressions) 3953 else: 3954 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3955 3956 if this: 3957 this.add_comments(comments) 3958 3959 self._match_r_paren(expression=this) 3960 return this 3961 3962 return None 3963 3964 def _parse_field( 3965 self, 3966 any_token: bool = False, 3967 tokens: t.Optional[t.Collection[TokenType]] = None, 3968 anonymous_func: bool = False, 3969 ) -> t.Optional[exp.Expression]: 3970 return ( 3971 self._parse_primary() 3972 or self._parse_function(anonymous=anonymous_func) 3973 or self._parse_id_var(any_token=any_token, tokens=tokens) 3974 ) 3975 3976 def _parse_function( 3977 self, 3978 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3979 anonymous: bool = False, 3980 optional_parens: bool = True, 3981 ) -> t.Optional[exp.Expression]: 3982 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3983 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3984 fn_syntax = False 3985 if ( 3986 self._match(TokenType.L_BRACE, advance=False) 3987 and self._next 3988 and self._next.text.upper() == "FN" 3989 ): 3990 self._advance(2) 3991 fn_syntax = True 3992 3993 func = self._parse_function_call( 3994 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3995 ) 3996 3997 if fn_syntax: 3998 self._match(TokenType.R_BRACE) 3999 4000 return func 4001 4002 def _parse_function_call( 4003 self, 4004 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4005 anonymous: bool = False, 4006 optional_parens: bool = True, 4007 ) -> t.Optional[exp.Expression]: 4008 if not self._curr: 4009 return None 4010 4011 comments = self._curr.comments 4012 token_type = self._curr.token_type 4013 this = self._curr.text 4014 upper = this.upper() 4015 4016 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4017 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4018 self._advance() 4019 return parser(self) 4020 4021 if not self._next or self._next.token_type != TokenType.L_PAREN: 4022 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4023 self._advance() 4024 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4025 4026 return None 4027 4028 if token_type not in self.FUNC_TOKENS: 4029 return None 4030 4031 self._advance(2) 4032 4033 parser = self.FUNCTION_PARSERS.get(upper) 4034 if parser and not anonymous: 4035 this = parser(self) 4036 else: 4037 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4038 4039 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4040 this = self.expression(subquery_predicate, this=self._parse_select()) 4041 self._match_r_paren() 4042 return this 4043 4044 if functions is None: 4045 functions = self.FUNCTIONS 4046 4047 function = functions.get(upper) 4048 4049 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4050 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4051 4052 if function and not anonymous: 4053 if "dialect" in function.__code__.co_varnames: 4054 func = function(args, dialect=self.dialect) 4055 else: 4056 func = function(args) 4057 4058 func = self.validate_expression(func, args) 4059 if not self.dialect.NORMALIZE_FUNCTIONS: 4060 func.meta["name"] = this 4061 4062 this = func 4063 else: 4064 this = self.expression(exp.Anonymous, this=this, expressions=args) 4065 4066 if isinstance(this, exp.Expression): 4067 this.add_comments(comments) 4068 4069 self._match_r_paren(this) 4070 return self._parse_window(this) 4071 4072 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4073 return self._parse_column_def(self._parse_id_var()) 4074 4075 def _parse_user_defined_function( 4076 self, kind: t.Optional[TokenType] = None 4077 ) -> t.Optional[exp.Expression]: 4078 this = self._parse_id_var() 4079 4080 while self._match(TokenType.DOT): 4081 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4082 4083 if not self._match(TokenType.L_PAREN): 4084 return this 4085 4086 expressions = self._parse_csv(self._parse_function_parameter) 4087 self._match_r_paren() 4088 return self.expression( 4089 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4090 ) 4091 4092 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4093 literal = self._parse_primary() 4094 if literal: 4095 return self.expression(exp.Introducer, this=token.text, expression=literal) 4096 4097 return self.expression(exp.Identifier, this=token.text) 4098 4099 def _parse_session_parameter(self) -> exp.SessionParameter: 4100 kind = None 4101 this = self._parse_id_var() or self._parse_primary() 4102 4103 if this and self._match(TokenType.DOT): 4104 kind = this.name 4105 this = self._parse_var() or self._parse_primary() 4106 4107 return self.expression(exp.SessionParameter, this=this, kind=kind) 4108 4109 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4110 index = self._index 4111 4112 if self._match(TokenType.L_PAREN): 4113 expressions = t.cast( 4114 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4115 ) 4116 4117 if not self._match(TokenType.R_PAREN): 4118 self._retreat(index) 4119 else: 4120 expressions = [self._parse_id_var()] 4121 4122 if self._match_set(self.LAMBDAS): 4123 return self.LAMBDAS[self._prev.token_type](self, expressions) 4124 4125 self._retreat(index) 4126 4127 this: t.Optional[exp.Expression] 4128 4129 if self._match(TokenType.DISTINCT): 4130 this = self.expression( 4131 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4132 ) 4133 else: 4134 this = self._parse_select_or_expression(alias=alias) 4135 4136 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4137 4138 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4139 index = self._index 4140 4141 if not self.errors: 4142 try: 4143 if self._parse_select(nested=True): 4144 return this 4145 except ParseError: 4146 pass 4147 finally: 4148 self.errors.clear() 4149 self._retreat(index) 4150 4151 if not self._match(TokenType.L_PAREN): 4152 return this 4153 4154 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4155 4156 self._match_r_paren() 4157 return self.expression(exp.Schema, this=this, expressions=args) 4158 4159 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4160 return self._parse_column_def(self._parse_field(any_token=True)) 4161 4162 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4163 # column defs are not really columns, they're identifiers 4164 if isinstance(this, exp.Column): 4165 this = this.this 4166 4167 kind = self._parse_types(schema=True) 4168 4169 if self._match_text_seq("FOR", "ORDINALITY"): 4170 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4171 4172 constraints: t.List[exp.Expression] = [] 4173 4174 if not kind and self._match(TokenType.ALIAS): 4175 constraints.append( 4176 self.expression( 4177 exp.ComputedColumnConstraint, 4178 this=self._parse_conjunction(), 4179 persisted=self._match_text_seq("PERSISTED"), 4180 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4181 ) 4182 ) 4183 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4184 self._match(TokenType.ALIAS) 4185 constraints.append( 4186 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4187 ) 4188 4189 while True: 4190 constraint = self._parse_column_constraint() 4191 if not constraint: 4192 break 4193 constraints.append(constraint) 4194 4195 if not kind and not constraints: 4196 return this 4197 4198 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4199 4200 def _parse_auto_increment( 4201 self, 4202 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4203 start = None 4204 increment = None 4205 4206 if self._match(TokenType.L_PAREN, advance=False): 4207 args = self._parse_wrapped_csv(self._parse_bitwise) 4208 start = seq_get(args, 0) 4209 increment = seq_get(args, 1) 4210 elif self._match_text_seq("START"): 4211 start = self._parse_bitwise() 4212 self._match_text_seq("INCREMENT") 4213 increment = self._parse_bitwise() 4214 4215 if start and increment: 4216 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4217 4218 return exp.AutoIncrementColumnConstraint() 4219 4220 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4221 if not self._match_text_seq("REFRESH"): 4222 self._retreat(self._index - 1) 4223 return None 4224 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4225 4226 def _parse_compress(self) -> exp.CompressColumnConstraint: 4227 if self._match(TokenType.L_PAREN, advance=False): 4228 return self.expression( 4229 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4230 ) 4231 4232 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4233 4234 def _parse_generated_as_identity( 4235 self, 4236 ) -> ( 4237 exp.GeneratedAsIdentityColumnConstraint 4238 | exp.ComputedColumnConstraint 4239 | exp.GeneratedAsRowColumnConstraint 4240 ): 4241 if self._match_text_seq("BY", "DEFAULT"): 4242 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4243 this = self.expression( 4244 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4245 ) 4246 else: 4247 self._match_text_seq("ALWAYS") 4248 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4249 4250 self._match(TokenType.ALIAS) 4251 4252 if self._match_text_seq("ROW"): 4253 start = self._match_text_seq("START") 4254 if not start: 4255 self._match(TokenType.END) 4256 hidden = self._match_text_seq("HIDDEN") 4257 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4258 4259 identity = self._match_text_seq("IDENTITY") 4260 4261 if self._match(TokenType.L_PAREN): 4262 if self._match(TokenType.START_WITH): 4263 this.set("start", self._parse_bitwise()) 4264 if self._match_text_seq("INCREMENT", "BY"): 4265 this.set("increment", self._parse_bitwise()) 4266 if self._match_text_seq("MINVALUE"): 4267 this.set("minvalue", self._parse_bitwise()) 4268 if self._match_text_seq("MAXVALUE"): 4269 this.set("maxvalue", self._parse_bitwise()) 4270 4271 if self._match_text_seq("CYCLE"): 4272 this.set("cycle", True) 4273 elif self._match_text_seq("NO", "CYCLE"): 4274 this.set("cycle", False) 4275 4276 if not identity: 4277 this.set("expression", self._parse_bitwise()) 4278 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4279 args = self._parse_csv(self._parse_bitwise) 4280 this.set("start", seq_get(args, 0)) 4281 this.set("increment", seq_get(args, 1)) 4282 4283 self._match_r_paren() 4284 4285 return this 4286 4287 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4288 self._match_text_seq("LENGTH") 4289 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4290 4291 def _parse_not_constraint( 4292 self, 4293 ) -> t.Optional[exp.Expression]: 4294 if self._match_text_seq("NULL"): 4295 return self.expression(exp.NotNullColumnConstraint) 4296 if self._match_text_seq("CASESPECIFIC"): 4297 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4298 if self._match_text_seq("FOR", "REPLICATION"): 4299 return self.expression(exp.NotForReplicationColumnConstraint) 4300 return None 4301 4302 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4303 if self._match(TokenType.CONSTRAINT): 4304 this = self._parse_id_var() 4305 else: 4306 this = None 4307 4308 if self._match_texts(self.CONSTRAINT_PARSERS): 4309 return self.expression( 4310 exp.ColumnConstraint, 4311 this=this, 4312 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4313 ) 4314 4315 return this 4316 4317 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4318 if not self._match(TokenType.CONSTRAINT): 4319 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4320 4321 this = self._parse_id_var() 4322 expressions = [] 4323 4324 while True: 4325 constraint = self._parse_unnamed_constraint() or self._parse_function() 4326 if not constraint: 4327 break 4328 expressions.append(constraint) 4329 4330 return self.expression(exp.Constraint, this=this, expressions=expressions) 4331 4332 def _parse_unnamed_constraint( 4333 self, constraints: t.Optional[t.Collection[str]] = None 4334 ) -> t.Optional[exp.Expression]: 4335 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4336 constraints or self.CONSTRAINT_PARSERS 4337 ): 4338 return None 4339 4340 constraint = self._prev.text.upper() 4341 if constraint not in self.CONSTRAINT_PARSERS: 4342 self.raise_error(f"No parser found for schema constraint {constraint}.") 4343 4344 return self.CONSTRAINT_PARSERS[constraint](self) 4345 4346 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4347 self._match_text_seq("KEY") 4348 return self.expression( 4349 exp.UniqueColumnConstraint, 4350 this=self._parse_schema(self._parse_id_var(any_token=False)), 4351 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4352 ) 4353 4354 def _parse_key_constraint_options(self) -> t.List[str]: 4355 options = [] 4356 while True: 4357 if not self._curr: 4358 break 4359 4360 if self._match(TokenType.ON): 4361 action = None 4362 on = self._advance_any() and self._prev.text 4363 4364 if self._match_text_seq("NO", "ACTION"): 4365 action = "NO ACTION" 4366 elif self._match_text_seq("CASCADE"): 4367 action = "CASCADE" 4368 elif self._match_text_seq("RESTRICT"): 4369 action = "RESTRICT" 4370 elif self._match_pair(TokenType.SET, TokenType.NULL): 4371 action = "SET NULL" 4372 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4373 action = "SET DEFAULT" 4374 else: 4375 self.raise_error("Invalid key constraint") 4376 4377 options.append(f"ON {on} {action}") 4378 elif self._match_text_seq("NOT", "ENFORCED"): 4379 options.append("NOT ENFORCED") 4380 elif self._match_text_seq("DEFERRABLE"): 4381 options.append("DEFERRABLE") 4382 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4383 options.append("INITIALLY DEFERRED") 4384 elif self._match_text_seq("NORELY"): 4385 options.append("NORELY") 4386 elif self._match_text_seq("MATCH", "FULL"): 4387 options.append("MATCH FULL") 4388 else: 4389 break 4390 4391 return options 4392 4393 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4394 if match and not self._match(TokenType.REFERENCES): 4395 return None 4396 4397 expressions = None 4398 this = self._parse_table(schema=True) 4399 options = self._parse_key_constraint_options() 4400 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4401 4402 def _parse_foreign_key(self) -> exp.ForeignKey: 4403 expressions = self._parse_wrapped_id_vars() 4404 reference = self._parse_references() 4405 options = {} 4406 4407 while self._match(TokenType.ON): 4408 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4409 self.raise_error("Expected DELETE or UPDATE") 4410 4411 kind = self._prev.text.lower() 4412 4413 if self._match_text_seq("NO", "ACTION"): 4414 action = "NO ACTION" 4415 elif self._match(TokenType.SET): 4416 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4417 action = "SET " + self._prev.text.upper() 4418 else: 4419 self._advance() 4420 action = self._prev.text.upper() 4421 4422 options[kind] = action 4423 4424 return self.expression( 4425 exp.ForeignKey, 4426 expressions=expressions, 4427 reference=reference, 4428 **options, # type: ignore 4429 ) 4430 4431 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4432 return self._parse_field() 4433 4434 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4435 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4436 self._retreat(self._index - 1) 4437 return None 4438 4439 id_vars = self._parse_wrapped_id_vars() 4440 return self.expression( 4441 exp.PeriodForSystemTimeConstraint, 4442 this=seq_get(id_vars, 0), 4443 expression=seq_get(id_vars, 1), 4444 ) 4445 4446 def _parse_primary_key( 4447 self, wrapped_optional: bool = False, in_props: bool = False 4448 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4449 desc = ( 4450 self._match_set((TokenType.ASC, TokenType.DESC)) 4451 and self._prev.token_type == TokenType.DESC 4452 ) 4453 4454 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4455 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4456 4457 expressions = self._parse_wrapped_csv( 4458 self._parse_primary_key_part, optional=wrapped_optional 4459 ) 4460 options = self._parse_key_constraint_options() 4461 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4462 4463 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4464 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4465 4466 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4467 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4468 return this 4469 4470 bracket_kind = self._prev.token_type 4471 expressions = self._parse_csv( 4472 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4473 ) 4474 4475 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4476 self.raise_error("Expected ]") 4477 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4478 self.raise_error("Expected }") 4479 4480 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4481 if bracket_kind == TokenType.L_BRACE: 4482 this = self.expression(exp.Struct, expressions=expressions) 4483 elif not this or this.name.upper() == "ARRAY": 4484 this = self.expression(exp.Array, expressions=expressions) 4485 else: 4486 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4487 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4488 4489 self._add_comments(this) 4490 return self._parse_bracket(this) 4491 4492 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4493 if self._match(TokenType.COLON): 4494 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4495 return this 4496 4497 def _parse_case(self) -> t.Optional[exp.Expression]: 4498 ifs = [] 4499 default = None 4500 4501 comments = self._prev_comments 4502 expression = self._parse_conjunction() 4503 4504 while self._match(TokenType.WHEN): 4505 this = self._parse_conjunction() 4506 self._match(TokenType.THEN) 4507 then = self._parse_conjunction() 4508 ifs.append(self.expression(exp.If, this=this, true=then)) 4509 4510 if self._match(TokenType.ELSE): 4511 default = self._parse_conjunction() 4512 4513 if not self._match(TokenType.END): 4514 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4515 default = exp.column("interval") 4516 else: 4517 self.raise_error("Expected END after CASE", self._prev) 4518 4519 return self._parse_window( 4520 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4521 ) 4522 4523 def _parse_if(self) -> t.Optional[exp.Expression]: 4524 if self._match(TokenType.L_PAREN): 4525 args = self._parse_csv(self._parse_conjunction) 4526 this = self.validate_expression(exp.If.from_arg_list(args), args) 4527 self._match_r_paren() 4528 else: 4529 index = self._index - 1 4530 4531 if self.NO_PAREN_IF_COMMANDS and index == 0: 4532 return self._parse_as_command(self._prev) 4533 4534 condition = self._parse_conjunction() 4535 4536 if not condition: 4537 self._retreat(index) 4538 return None 4539 4540 self._match(TokenType.THEN) 4541 true = self._parse_conjunction() 4542 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4543 self._match(TokenType.END) 4544 this = self.expression(exp.If, this=condition, true=true, false=false) 4545 4546 return self._parse_window(this) 4547 4548 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4549 if not self._match_text_seq("VALUE", "FOR"): 4550 self._retreat(self._index - 1) 4551 return None 4552 4553 return self.expression( 4554 exp.NextValueFor, 4555 this=self._parse_column(), 4556 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4557 ) 4558 4559 def _parse_extract(self) -> exp.Extract: 4560 this = self._parse_function() or self._parse_var() or self._parse_type() 4561 4562 if self._match(TokenType.FROM): 4563 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4564 4565 if not self._match(TokenType.COMMA): 4566 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4567 4568 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4569 4570 def _parse_any_value(self) -> exp.AnyValue: 4571 this = self._parse_lambda() 4572 is_max = None 4573 having = None 4574 4575 if self._match(TokenType.HAVING): 4576 self._match_texts(("MAX", "MIN")) 4577 is_max = self._prev.text == "MAX" 4578 having = self._parse_column() 4579 4580 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4581 4582 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4583 this = self._parse_conjunction() 4584 4585 if not self._match(TokenType.ALIAS): 4586 if self._match(TokenType.COMMA): 4587 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4588 4589 self.raise_error("Expected AS after CAST") 4590 4591 fmt = None 4592 to = self._parse_types() 4593 4594 if self._match(TokenType.FORMAT): 4595 fmt_string = self._parse_string() 4596 fmt = self._parse_at_time_zone(fmt_string) 4597 4598 if not to: 4599 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4600 if to.this in exp.DataType.TEMPORAL_TYPES: 4601 this = self.expression( 4602 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4603 this=this, 4604 format=exp.Literal.string( 4605 format_time( 4606 fmt_string.this if fmt_string else "", 4607 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4608 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4609 ) 4610 ), 4611 ) 4612 4613 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4614 this.set("zone", fmt.args["zone"]) 4615 return this 4616 elif not to: 4617 self.raise_error("Expected TYPE after CAST") 4618 elif isinstance(to, exp.Identifier): 4619 to = exp.DataType.build(to.name, udt=True) 4620 elif to.this == exp.DataType.Type.CHAR: 4621 if self._match(TokenType.CHARACTER_SET): 4622 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4623 4624 return self.expression( 4625 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4626 ) 4627 4628 def _parse_string_agg(self) -> exp.Expression: 4629 if self._match(TokenType.DISTINCT): 4630 args: t.List[t.Optional[exp.Expression]] = [ 4631 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4632 ] 4633 if self._match(TokenType.COMMA): 4634 args.extend(self._parse_csv(self._parse_conjunction)) 4635 else: 4636 args = self._parse_csv(self._parse_conjunction) # type: ignore 4637 4638 index = self._index 4639 if not self._match(TokenType.R_PAREN) and args: 4640 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4641 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4642 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4643 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4644 4645 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4646 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4647 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4648 if not self._match_text_seq("WITHIN", "GROUP"): 4649 self._retreat(index) 4650 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4651 4652 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4653 order = self._parse_order(this=seq_get(args, 0)) 4654 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4655 4656 def _parse_convert( 4657 self, strict: bool, safe: t.Optional[bool] = None 4658 ) -> t.Optional[exp.Expression]: 4659 this = self._parse_bitwise() 4660 4661 if self._match(TokenType.USING): 4662 to: t.Optional[exp.Expression] = self.expression( 4663 exp.CharacterSet, this=self._parse_var() 4664 ) 4665 elif self._match(TokenType.COMMA): 4666 to = self._parse_types() 4667 else: 4668 to = None 4669 4670 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4671 4672 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4673 """ 4674 There are generally two variants of the DECODE function: 4675 4676 - DECODE(bin, charset) 4677 - DECODE(expression, search, result [, search, result] ... [, default]) 4678 4679 The second variant will always be parsed into a CASE expression. Note that NULL 4680 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4681 instead of relying on pattern matching. 4682 """ 4683 args = self._parse_csv(self._parse_conjunction) 4684 4685 if len(args) < 3: 4686 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4687 4688 expression, *expressions = args 4689 if not expression: 4690 return None 4691 4692 ifs = [] 4693 for search, result in zip(expressions[::2], expressions[1::2]): 4694 if not search or not result: 4695 return None 4696 4697 if isinstance(search, exp.Literal): 4698 ifs.append( 4699 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4700 ) 4701 elif isinstance(search, exp.Null): 4702 ifs.append( 4703 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4704 ) 4705 else: 4706 cond = exp.or_( 4707 exp.EQ(this=expression.copy(), expression=search), 4708 exp.and_( 4709 exp.Is(this=expression.copy(), expression=exp.Null()), 4710 exp.Is(this=search.copy(), expression=exp.Null()), 4711 copy=False, 4712 ), 4713 copy=False, 4714 ) 4715 ifs.append(exp.If(this=cond, true=result)) 4716 4717 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4718 4719 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4720 self._match_text_seq("KEY") 4721 key = self._parse_column() 4722 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4723 self._match_text_seq("VALUE") 4724 value = self._parse_bitwise() 4725 4726 if not key and not value: 4727 return None 4728 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4729 4730 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4731 if not this or not self._match_text_seq("FORMAT", "JSON"): 4732 return this 4733 4734 return self.expression(exp.FormatJson, this=this) 4735 4736 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4737 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4738 for value in values: 4739 if self._match_text_seq(value, "ON", on): 4740 return f"{value} ON {on}" 4741 4742 return None 4743 4744 @t.overload 4745 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4746 ... 4747 4748 @t.overload 4749 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4750 ... 4751 4752 def _parse_json_object(self, agg=False): 4753 star = self._parse_star() 4754 expressions = ( 4755 [star] 4756 if star 4757 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4758 ) 4759 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4760 4761 unique_keys = None 4762 if self._match_text_seq("WITH", "UNIQUE"): 4763 unique_keys = True 4764 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4765 unique_keys = False 4766 4767 self._match_text_seq("KEYS") 4768 4769 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4770 self._parse_type() 4771 ) 4772 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4773 4774 return self.expression( 4775 exp.JSONObjectAgg if agg else exp.JSONObject, 4776 expressions=expressions, 4777 null_handling=null_handling, 4778 unique_keys=unique_keys, 4779 return_type=return_type, 4780 encoding=encoding, 4781 ) 4782 4783 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4784 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4785 if not self._match_text_seq("NESTED"): 4786 this = self._parse_id_var() 4787 kind = self._parse_types(allow_identifiers=False) 4788 nested = None 4789 else: 4790 this = None 4791 kind = None 4792 nested = True 4793 4794 path = self._match_text_seq("PATH") and self._parse_string() 4795 nested_schema = nested and self._parse_json_schema() 4796 4797 return self.expression( 4798 exp.JSONColumnDef, 4799 this=this, 4800 kind=kind, 4801 path=path, 4802 nested_schema=nested_schema, 4803 ) 4804 4805 def _parse_json_schema(self) -> exp.JSONSchema: 4806 self._match_text_seq("COLUMNS") 4807 return self.expression( 4808 exp.JSONSchema, 4809 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4810 ) 4811 4812 def _parse_json_table(self) -> exp.JSONTable: 4813 this = self._parse_format_json(self._parse_bitwise()) 4814 path = self._match(TokenType.COMMA) and self._parse_string() 4815 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4816 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4817 schema = self._parse_json_schema() 4818 4819 return exp.JSONTable( 4820 this=this, 4821 schema=schema, 4822 path=path, 4823 error_handling=error_handling, 4824 empty_handling=empty_handling, 4825 ) 4826 4827 def _parse_match_against(self) -> exp.MatchAgainst: 4828 expressions = self._parse_csv(self._parse_column) 4829 4830 self._match_text_seq(")", "AGAINST", "(") 4831 4832 this = self._parse_string() 4833 4834 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4835 modifier = "IN NATURAL LANGUAGE MODE" 4836 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4837 modifier = f"{modifier} WITH QUERY EXPANSION" 4838 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4839 modifier = "IN BOOLEAN MODE" 4840 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4841 modifier = "WITH QUERY EXPANSION" 4842 else: 4843 modifier = None 4844 4845 return self.expression( 4846 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4847 ) 4848 4849 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4850 def _parse_open_json(self) -> exp.OpenJSON: 4851 this = self._parse_bitwise() 4852 path = self._match(TokenType.COMMA) and self._parse_string() 4853 4854 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4855 this = self._parse_field(any_token=True) 4856 kind = self._parse_types() 4857 path = self._parse_string() 4858 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4859 4860 return self.expression( 4861 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4862 ) 4863 4864 expressions = None 4865 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4866 self._match_l_paren() 4867 expressions = self._parse_csv(_parse_open_json_column_def) 4868 4869 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4870 4871 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4872 args = self._parse_csv(self._parse_bitwise) 4873 4874 if self._match(TokenType.IN): 4875 return self.expression( 4876 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4877 ) 4878 4879 if haystack_first: 4880 haystack = seq_get(args, 0) 4881 needle = seq_get(args, 1) 4882 else: 4883 needle = seq_get(args, 0) 4884 haystack = seq_get(args, 1) 4885 4886 return self.expression( 4887 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4888 ) 4889 4890 def _parse_predict(self) -> exp.Predict: 4891 self._match_text_seq("MODEL") 4892 this = self._parse_table() 4893 4894 self._match(TokenType.COMMA) 4895 self._match_text_seq("TABLE") 4896 4897 return self.expression( 4898 exp.Predict, 4899 this=this, 4900 expression=self._parse_table(), 4901 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4902 ) 4903 4904 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4905 args = self._parse_csv(self._parse_table) 4906 return exp.JoinHint(this=func_name.upper(), expressions=args) 4907 4908 def _parse_substring(self) -> exp.Substring: 4909 # Postgres supports the form: substring(string [from int] [for int]) 4910 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4911 4912 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4913 4914 if self._match(TokenType.FROM): 4915 args.append(self._parse_bitwise()) 4916 if self._match(TokenType.FOR): 4917 args.append(self._parse_bitwise()) 4918 4919 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4920 4921 def _parse_trim(self) -> exp.Trim: 4922 # https://www.w3resource.com/sql/character-functions/trim.php 4923 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4924 4925 position = None 4926 collation = None 4927 expression = None 4928 4929 if self._match_texts(self.TRIM_TYPES): 4930 position = self._prev.text.upper() 4931 4932 this = self._parse_bitwise() 4933 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4934 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4935 expression = self._parse_bitwise() 4936 4937 if invert_order: 4938 this, expression = expression, this 4939 4940 if self._match(TokenType.COLLATE): 4941 collation = self._parse_bitwise() 4942 4943 return self.expression( 4944 exp.Trim, this=this, position=position, expression=expression, collation=collation 4945 ) 4946 4947 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4948 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4949 4950 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4951 return self._parse_window(self._parse_id_var(), alias=True) 4952 4953 def _parse_respect_or_ignore_nulls( 4954 self, this: t.Optional[exp.Expression] 4955 ) -> t.Optional[exp.Expression]: 4956 if self._match_text_seq("IGNORE", "NULLS"): 4957 return self.expression(exp.IgnoreNulls, this=this) 4958 if self._match_text_seq("RESPECT", "NULLS"): 4959 return self.expression(exp.RespectNulls, this=this) 4960 return this 4961 4962 def _parse_window( 4963 self, this: t.Optional[exp.Expression], alias: bool = False 4964 ) -> t.Optional[exp.Expression]: 4965 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4966 self._match(TokenType.WHERE) 4967 this = self.expression( 4968 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4969 ) 4970 self._match_r_paren() 4971 4972 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4973 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4974 if self._match_text_seq("WITHIN", "GROUP"): 4975 order = self._parse_wrapped(self._parse_order) 4976 this = self.expression(exp.WithinGroup, this=this, expression=order) 4977 4978 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4979 # Some dialects choose to implement and some do not. 4980 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4981 4982 # There is some code above in _parse_lambda that handles 4983 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4984 4985 # The below changes handle 4986 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4987 4988 # Oracle allows both formats 4989 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4990 # and Snowflake chose to do the same for familiarity 4991 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4992 if isinstance(this, exp.AggFunc): 4993 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 4994 4995 if ignore_respect and ignore_respect is not this: 4996 ignore_respect.replace(ignore_respect.this) 4997 this = self.expression(ignore_respect.__class__, this=this) 4998 4999 this = self._parse_respect_or_ignore_nulls(this) 5000 5001 # bigquery select from window x AS (partition by ...) 5002 if alias: 5003 over = None 5004 self._match(TokenType.ALIAS) 5005 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5006 return this 5007 else: 5008 over = self._prev.text.upper() 5009 5010 if not self._match(TokenType.L_PAREN): 5011 return self.expression( 5012 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5013 ) 5014 5015 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5016 5017 first = self._match(TokenType.FIRST) 5018 if self._match_text_seq("LAST"): 5019 first = False 5020 5021 partition, order = self._parse_partition_and_order() 5022 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5023 5024 if kind: 5025 self._match(TokenType.BETWEEN) 5026 start = self._parse_window_spec() 5027 self._match(TokenType.AND) 5028 end = self._parse_window_spec() 5029 5030 spec = self.expression( 5031 exp.WindowSpec, 5032 kind=kind, 5033 start=start["value"], 5034 start_side=start["side"], 5035 end=end["value"], 5036 end_side=end["side"], 5037 ) 5038 else: 5039 spec = None 5040 5041 self._match_r_paren() 5042 5043 window = self.expression( 5044 exp.Window, 5045 this=this, 5046 partition_by=partition, 5047 order=order, 5048 spec=spec, 5049 alias=window_alias, 5050 over=over, 5051 first=first, 5052 ) 5053 5054 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5055 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5056 return self._parse_window(window, alias=alias) 5057 5058 return window 5059 5060 def _parse_partition_and_order( 5061 self, 5062 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5063 return self._parse_partition_by(), self._parse_order() 5064 5065 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5066 self._match(TokenType.BETWEEN) 5067 5068 return { 5069 "value": ( 5070 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5071 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5072 or self._parse_bitwise() 5073 ), 5074 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5075 } 5076 5077 def _parse_alias( 5078 self, this: t.Optional[exp.Expression], explicit: bool = False 5079 ) -> t.Optional[exp.Expression]: 5080 any_token = self._match(TokenType.ALIAS) 5081 comments = self._prev_comments 5082 5083 if explicit and not any_token: 5084 return this 5085 5086 if self._match(TokenType.L_PAREN): 5087 aliases = self.expression( 5088 exp.Aliases, 5089 comments=comments, 5090 this=this, 5091 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5092 ) 5093 self._match_r_paren(aliases) 5094 return aliases 5095 5096 alias = self._parse_id_var(any_token) or ( 5097 self.STRING_ALIASES and self._parse_string_as_identifier() 5098 ) 5099 5100 if alias: 5101 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5102 column = this.this 5103 5104 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5105 if not this.comments and column and column.comments: 5106 this.comments = column.comments 5107 column.comments = None 5108 5109 return this 5110 5111 def _parse_id_var( 5112 self, 5113 any_token: bool = True, 5114 tokens: t.Optional[t.Collection[TokenType]] = None, 5115 ) -> t.Optional[exp.Expression]: 5116 identifier = self._parse_identifier() 5117 5118 if identifier: 5119 return identifier 5120 5121 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5122 quoted = self._prev.token_type == TokenType.STRING 5123 return exp.Identifier(this=self._prev.text, quoted=quoted) 5124 5125 return None 5126 5127 def _parse_string(self) -> t.Optional[exp.Expression]: 5128 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5129 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5130 return self._parse_placeholder() 5131 5132 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5133 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5134 5135 def _parse_number(self) -> t.Optional[exp.Expression]: 5136 if self._match(TokenType.NUMBER): 5137 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5138 return self._parse_placeholder() 5139 5140 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5141 if self._match(TokenType.IDENTIFIER): 5142 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5143 return self._parse_placeholder() 5144 5145 def _parse_var( 5146 self, 5147 any_token: bool = False, 5148 tokens: t.Optional[t.Collection[TokenType]] = None, 5149 upper: bool = False, 5150 ) -> t.Optional[exp.Expression]: 5151 if ( 5152 (any_token and self._advance_any()) 5153 or self._match(TokenType.VAR) 5154 or (self._match_set(tokens) if tokens else False) 5155 ): 5156 return self.expression( 5157 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5158 ) 5159 return self._parse_placeholder() 5160 5161 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5162 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5163 self._advance() 5164 return self._prev 5165 return None 5166 5167 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5168 return self._parse_var() or self._parse_string() 5169 5170 def _parse_null(self) -> t.Optional[exp.Expression]: 5171 if self._match_set(self.NULL_TOKENS): 5172 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5173 return self._parse_placeholder() 5174 5175 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5176 if self._match(TokenType.TRUE): 5177 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5178 if self._match(TokenType.FALSE): 5179 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5180 return self._parse_placeholder() 5181 5182 def _parse_star(self) -> t.Optional[exp.Expression]: 5183 if self._match(TokenType.STAR): 5184 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5185 return self._parse_placeholder() 5186 5187 def _parse_parameter(self) -> exp.Parameter: 5188 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5189 return ( 5190 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5191 ) 5192 5193 self._match(TokenType.L_BRACE) 5194 this = _parse_parameter_part() 5195 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5196 self._match(TokenType.R_BRACE) 5197 5198 return self.expression(exp.Parameter, this=this, expression=expression) 5199 5200 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5201 if self._match_set(self.PLACEHOLDER_PARSERS): 5202 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5203 if placeholder: 5204 return placeholder 5205 self._advance(-1) 5206 return None 5207 5208 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5209 if not self._match(TokenType.EXCEPT): 5210 return None 5211 if self._match(TokenType.L_PAREN, advance=False): 5212 return self._parse_wrapped_csv(self._parse_column) 5213 5214 except_column = self._parse_column() 5215 return [except_column] if except_column else None 5216 5217 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5218 if not self._match(TokenType.REPLACE): 5219 return None 5220 if self._match(TokenType.L_PAREN, advance=False): 5221 return self._parse_wrapped_csv(self._parse_expression) 5222 5223 replace_expression = self._parse_expression() 5224 return [replace_expression] if replace_expression else None 5225 5226 def _parse_csv( 5227 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5228 ) -> t.List[exp.Expression]: 5229 parse_result = parse_method() 5230 items = [parse_result] if parse_result is not None else [] 5231 5232 while self._match(sep): 5233 self._add_comments(parse_result) 5234 parse_result = parse_method() 5235 if parse_result is not None: 5236 items.append(parse_result) 5237 5238 return items 5239 5240 def _parse_tokens( 5241 self, parse_method: t.Callable, expressions: t.Dict 5242 ) -> t.Optional[exp.Expression]: 5243 this = parse_method() 5244 5245 while self._match_set(expressions): 5246 this = self.expression( 5247 expressions[self._prev.token_type], 5248 this=this, 5249 comments=self._prev_comments, 5250 expression=parse_method(), 5251 ) 5252 5253 return this 5254 5255 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5256 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5257 5258 def _parse_wrapped_csv( 5259 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5260 ) -> t.List[exp.Expression]: 5261 return self._parse_wrapped( 5262 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5263 ) 5264 5265 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5266 wrapped = self._match(TokenType.L_PAREN) 5267 if not wrapped and not optional: 5268 self.raise_error("Expecting (") 5269 parse_result = parse_method() 5270 if wrapped: 5271 self._match_r_paren() 5272 return parse_result 5273 5274 def _parse_expressions(self) -> t.List[exp.Expression]: 5275 return self._parse_csv(self._parse_expression) 5276 5277 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5278 return self._parse_select() or self._parse_set_operations( 5279 self._parse_expression() if alias else self._parse_conjunction() 5280 ) 5281 5282 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5283 return self._parse_query_modifiers( 5284 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5285 ) 5286 5287 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5288 this = None 5289 if self._match_texts(self.TRANSACTION_KIND): 5290 this = self._prev.text 5291 5292 self._match_texts(("TRANSACTION", "WORK")) 5293 5294 modes = [] 5295 while True: 5296 mode = [] 5297 while self._match(TokenType.VAR): 5298 mode.append(self._prev.text) 5299 5300 if mode: 5301 modes.append(" ".join(mode)) 5302 if not self._match(TokenType.COMMA): 5303 break 5304 5305 return self.expression(exp.Transaction, this=this, modes=modes) 5306 5307 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5308 chain = None 5309 savepoint = None 5310 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5311 5312 self._match_texts(("TRANSACTION", "WORK")) 5313 5314 if self._match_text_seq("TO"): 5315 self._match_text_seq("SAVEPOINT") 5316 savepoint = self._parse_id_var() 5317 5318 if self._match(TokenType.AND): 5319 chain = not self._match_text_seq("NO") 5320 self._match_text_seq("CHAIN") 5321 5322 if is_rollback: 5323 return self.expression(exp.Rollback, savepoint=savepoint) 5324 5325 return self.expression(exp.Commit, chain=chain) 5326 5327 def _parse_refresh(self) -> exp.Refresh: 5328 self._match(TokenType.TABLE) 5329 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5330 5331 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5332 if not self._match_text_seq("ADD"): 5333 return None 5334 5335 self._match(TokenType.COLUMN) 5336 exists_column = self._parse_exists(not_=True) 5337 expression = self._parse_field_def() 5338 5339 if expression: 5340 expression.set("exists", exists_column) 5341 5342 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5343 if self._match_texts(("FIRST", "AFTER")): 5344 position = self._prev.text 5345 column_position = self.expression( 5346 exp.ColumnPosition, this=self._parse_column(), position=position 5347 ) 5348 expression.set("position", column_position) 5349 5350 return expression 5351 5352 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5353 drop = self._match(TokenType.DROP) and self._parse_drop() 5354 if drop and not isinstance(drop, exp.Command): 5355 drop.set("kind", drop.args.get("kind", "COLUMN")) 5356 return drop 5357 5358 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5359 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5360 return self.expression( 5361 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5362 ) 5363 5364 def _parse_add_constraint(self) -> exp.AddConstraint: 5365 this = None 5366 kind = self._prev.token_type 5367 5368 if kind == TokenType.CONSTRAINT: 5369 this = self._parse_id_var() 5370 5371 if self._match_text_seq("CHECK"): 5372 expression = self._parse_wrapped(self._parse_conjunction) 5373 enforced = self._match_text_seq("ENFORCED") or False 5374 5375 return self.expression( 5376 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5377 ) 5378 5379 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5380 expression = self._parse_foreign_key() 5381 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5382 expression = self._parse_primary_key() 5383 else: 5384 expression = None 5385 5386 return self.expression(exp.AddConstraint, this=this, expression=expression) 5387 5388 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5389 index = self._index - 1 5390 5391 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5392 return self._parse_csv(self._parse_add_constraint) 5393 5394 self._retreat(index) 5395 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5396 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5397 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5398 5399 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5400 self._match(TokenType.COLUMN) 5401 column = self._parse_field(any_token=True) 5402 5403 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5404 return self.expression(exp.AlterColumn, this=column, drop=True) 5405 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5406 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5407 if self._match(TokenType.COMMENT): 5408 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5409 5410 self._match_text_seq("SET", "DATA") 5411 return self.expression( 5412 exp.AlterColumn, 5413 this=column, 5414 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5415 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5416 using=self._match(TokenType.USING) and self._parse_conjunction(), 5417 ) 5418 5419 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5420 index = self._index - 1 5421 5422 partition_exists = self._parse_exists() 5423 if self._match(TokenType.PARTITION, advance=False): 5424 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5425 5426 self._retreat(index) 5427 return self._parse_csv(self._parse_drop_column) 5428 5429 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5430 if self._match(TokenType.COLUMN): 5431 exists = self._parse_exists() 5432 old_column = self._parse_column() 5433 to = self._match_text_seq("TO") 5434 new_column = self._parse_column() 5435 5436 if old_column is None or to is None or new_column is None: 5437 return None 5438 5439 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5440 5441 self._match_text_seq("TO") 5442 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5443 5444 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5445 start = self._prev 5446 5447 if not self._match(TokenType.TABLE): 5448 return self._parse_as_command(start) 5449 5450 exists = self._parse_exists() 5451 only = self._match_text_seq("ONLY") 5452 this = self._parse_table(schema=True) 5453 5454 if self._next: 5455 self._advance() 5456 5457 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5458 if parser: 5459 actions = ensure_list(parser(self)) 5460 5461 if not self._curr and actions: 5462 return self.expression( 5463 exp.AlterTable, 5464 this=this, 5465 exists=exists, 5466 actions=actions, 5467 only=only, 5468 ) 5469 5470 return self._parse_as_command(start) 5471 5472 def _parse_merge(self) -> exp.Merge: 5473 self._match(TokenType.INTO) 5474 target = self._parse_table() 5475 5476 if target and self._match(TokenType.ALIAS, advance=False): 5477 target.set("alias", self._parse_table_alias()) 5478 5479 self._match(TokenType.USING) 5480 using = self._parse_table() 5481 5482 self._match(TokenType.ON) 5483 on = self._parse_conjunction() 5484 5485 return self.expression( 5486 exp.Merge, 5487 this=target, 5488 using=using, 5489 on=on, 5490 expressions=self._parse_when_matched(), 5491 ) 5492 5493 def _parse_when_matched(self) -> t.List[exp.When]: 5494 whens = [] 5495 5496 while self._match(TokenType.WHEN): 5497 matched = not self._match(TokenType.NOT) 5498 self._match_text_seq("MATCHED") 5499 source = ( 5500 False 5501 if self._match_text_seq("BY", "TARGET") 5502 else self._match_text_seq("BY", "SOURCE") 5503 ) 5504 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5505 5506 self._match(TokenType.THEN) 5507 5508 if self._match(TokenType.INSERT): 5509 _this = self._parse_star() 5510 if _this: 5511 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5512 else: 5513 then = self.expression( 5514 exp.Insert, 5515 this=self._parse_value(), 5516 expression=self._match(TokenType.VALUES) and self._parse_value(), 5517 ) 5518 elif self._match(TokenType.UPDATE): 5519 expressions = self._parse_star() 5520 if expressions: 5521 then = self.expression(exp.Update, expressions=expressions) 5522 else: 5523 then = self.expression( 5524 exp.Update, 5525 expressions=self._match(TokenType.SET) 5526 and self._parse_csv(self._parse_equality), 5527 ) 5528 elif self._match(TokenType.DELETE): 5529 then = self.expression(exp.Var, this=self._prev.text) 5530 else: 5531 then = None 5532 5533 whens.append( 5534 self.expression( 5535 exp.When, 5536 matched=matched, 5537 source=source, 5538 condition=condition, 5539 then=then, 5540 ) 5541 ) 5542 return whens 5543 5544 def _parse_show(self) -> t.Optional[exp.Expression]: 5545 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5546 if parser: 5547 return parser(self) 5548 return self._parse_as_command(self._prev) 5549 5550 def _parse_set_item_assignment( 5551 self, kind: t.Optional[str] = None 5552 ) -> t.Optional[exp.Expression]: 5553 index = self._index 5554 5555 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5556 return self._parse_set_transaction(global_=kind == "GLOBAL") 5557 5558 left = self._parse_primary() or self._parse_id_var() 5559 assignment_delimiter = self._match_texts(("=", "TO")) 5560 5561 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5562 self._retreat(index) 5563 return None 5564 5565 right = self._parse_statement() or self._parse_id_var() 5566 this = self.expression(exp.EQ, this=left, expression=right) 5567 5568 return self.expression(exp.SetItem, this=this, kind=kind) 5569 5570 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5571 self._match_text_seq("TRANSACTION") 5572 characteristics = self._parse_csv( 5573 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5574 ) 5575 return self.expression( 5576 exp.SetItem, 5577 expressions=characteristics, 5578 kind="TRANSACTION", 5579 **{"global": global_}, # type: ignore 5580 ) 5581 5582 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5583 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5584 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5585 5586 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5587 index = self._index 5588 set_ = self.expression( 5589 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5590 ) 5591 5592 if self._curr: 5593 self._retreat(index) 5594 return self._parse_as_command(self._prev) 5595 5596 return set_ 5597 5598 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5599 for option in options: 5600 if self._match_text_seq(*option.split(" ")): 5601 return exp.var(option) 5602 return None 5603 5604 def _parse_as_command(self, start: Token) -> exp.Command: 5605 while self._curr: 5606 self._advance() 5607 text = self._find_sql(start, self._prev) 5608 size = len(start.text) 5609 self._warn_unsupported() 5610 return exp.Command(this=text[:size], expression=text[size:]) 5611 5612 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5613 settings = [] 5614 5615 self._match_l_paren() 5616 kind = self._parse_id_var() 5617 5618 if self._match(TokenType.L_PAREN): 5619 while True: 5620 key = self._parse_id_var() 5621 value = self._parse_primary() 5622 5623 if not key and value is None: 5624 break 5625 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5626 self._match(TokenType.R_PAREN) 5627 5628 self._match_r_paren() 5629 5630 return self.expression( 5631 exp.DictProperty, 5632 this=this, 5633 kind=kind.this if kind else None, 5634 settings=settings, 5635 ) 5636 5637 def _parse_dict_range(self, this: str) -> exp.DictRange: 5638 self._match_l_paren() 5639 has_min = self._match_text_seq("MIN") 5640 if has_min: 5641 min = self._parse_var() or self._parse_primary() 5642 self._match_text_seq("MAX") 5643 max = self._parse_var() or self._parse_primary() 5644 else: 5645 max = self._parse_var() or self._parse_primary() 5646 min = exp.Literal.number(0) 5647 self._match_r_paren() 5648 return self.expression(exp.DictRange, this=this, min=min, max=max) 5649 5650 def _parse_comprehension( 5651 self, this: t.Optional[exp.Expression] 5652 ) -> t.Optional[exp.Comprehension]: 5653 index = self._index 5654 expression = self._parse_column() 5655 if not self._match(TokenType.IN): 5656 self._retreat(index - 1) 5657 return None 5658 iterator = self._parse_column() 5659 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5660 return self.expression( 5661 exp.Comprehension, 5662 this=this, 5663 expression=expression, 5664 iterator=iterator, 5665 condition=condition, 5666 ) 5667 5668 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5669 if self._match(TokenType.HEREDOC_STRING): 5670 return self.expression(exp.Heredoc, this=self._prev.text) 5671 5672 if not self._match_text_seq("$"): 5673 return None 5674 5675 tags = ["$"] 5676 tag_text = None 5677 5678 if self._is_connected(): 5679 self._advance() 5680 tags.append(self._prev.text.upper()) 5681 else: 5682 self.raise_error("No closing $ found") 5683 5684 if tags[-1] != "$": 5685 if self._is_connected() and self._match_text_seq("$"): 5686 tag_text = tags[-1] 5687 tags.append("$") 5688 else: 5689 self.raise_error("No closing $ found") 5690 5691 heredoc_start = self._curr 5692 5693 while self._curr: 5694 if self._match_text_seq(*tags, advance=False): 5695 this = self._find_sql(heredoc_start, self._prev) 5696 self._advance(len(tags)) 5697 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5698 5699 self._advance() 5700 5701 self.raise_error(f"No closing {''.join(tags)} found") 5702 return None 5703 5704 def _find_parser( 5705 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5706 ) -> t.Optional[t.Callable]: 5707 if not self._curr: 5708 return None 5709 5710 index = self._index 5711 this = [] 5712 while True: 5713 # The current token might be multiple words 5714 curr = self._curr.text.upper() 5715 key = curr.split(" ") 5716 this.append(curr) 5717 5718 self._advance() 5719 result, trie = in_trie(trie, key) 5720 if result == TrieResult.FAILED: 5721 break 5722 5723 if result == TrieResult.EXISTS: 5724 subparser = parsers[" ".join(this)] 5725 return subparser 5726 5727 self._retreat(index) 5728 return None 5729 5730 def _match(self, token_type, advance=True, expression=None): 5731 if not self._curr: 5732 return None 5733 5734 if self._curr.token_type == token_type: 5735 if advance: 5736 self._advance() 5737 self._add_comments(expression) 5738 return True 5739 5740 return None 5741 5742 def _match_set(self, types, advance=True): 5743 if not self._curr: 5744 return None 5745 5746 if self._curr.token_type in types: 5747 if advance: 5748 self._advance() 5749 return True 5750 5751 return None 5752 5753 def _match_pair(self, token_type_a, token_type_b, advance=True): 5754 if not self._curr or not self._next: 5755 return None 5756 5757 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5758 if advance: 5759 self._advance(2) 5760 return True 5761 5762 return None 5763 5764 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5765 if not self._match(TokenType.L_PAREN, expression=expression): 5766 self.raise_error("Expecting (") 5767 5768 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5769 if not self._match(TokenType.R_PAREN, expression=expression): 5770 self.raise_error("Expecting )") 5771 5772 def _match_texts(self, texts, advance=True): 5773 if self._curr and self._curr.text.upper() in texts: 5774 if advance: 5775 self._advance() 5776 return True 5777 return None 5778 5779 def _match_text_seq(self, *texts, advance=True): 5780 index = self._index 5781 for text in texts: 5782 if self._curr and self._curr.text.upper() == text: 5783 self._advance() 5784 else: 5785 self._retreat(index) 5786 return None 5787 5788 if not advance: 5789 self._retreat(index) 5790 5791 return True 5792 5793 @t.overload 5794 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5795 ... 5796 5797 @t.overload 5798 def _replace_columns_with_dots( 5799 self, this: t.Optional[exp.Expression] 5800 ) -> t.Optional[exp.Expression]: 5801 ... 5802 5803 def _replace_columns_with_dots(self, this): 5804 if isinstance(this, exp.Dot): 5805 exp.replace_children(this, self._replace_columns_with_dots) 5806 elif isinstance(this, exp.Column): 5807 exp.replace_children(this, self._replace_columns_with_dots) 5808 table = this.args.get("table") 5809 this = ( 5810 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5811 ) 5812 5813 return this 5814 5815 def _replace_lambda( 5816 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5817 ) -> t.Optional[exp.Expression]: 5818 if not node: 5819 return node 5820 5821 for column in node.find_all(exp.Column): 5822 if column.parts[0].name in lambda_variables: 5823 dot_or_id = column.to_dot() if column.table else column.this 5824 parent = column.parent 5825 5826 while isinstance(parent, exp.Dot): 5827 if not isinstance(parent.parent, exp.Dot): 5828 parent.replace(dot_or_id) 5829 break 5830 parent = parent.parent 5831 else: 5832 if column is node: 5833 node = dot_or_id 5834 else: 5835 column.replace(dot_or_id) 5836 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1027 def __init__( 1028 self, 1029 error_level: t.Optional[ErrorLevel] = None, 1030 error_message_context: int = 100, 1031 max_errors: int = 3, 1032 dialect: DialectType = None, 1033 ): 1034 from sqlglot.dialects import Dialect 1035 1036 self.error_level = error_level or ErrorLevel.IMMEDIATE 1037 self.error_message_context = error_message_context 1038 self.max_errors = max_errors 1039 self.dialect = Dialect.get_or_raise(dialect) 1040 self.reset()
1052 def parse( 1053 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1054 ) -> t.List[t.Optional[exp.Expression]]: 1055 """ 1056 Parses a list of tokens and returns a list of syntax trees, one tree 1057 per parsed SQL statement. 1058 1059 Args: 1060 raw_tokens: The list of tokens. 1061 sql: The original SQL string, used to produce helpful debug messages. 1062 1063 Returns: 1064 The list of the produced syntax trees. 1065 """ 1066 return self._parse( 1067 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1068 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1070 def parse_into( 1071 self, 1072 expression_types: exp.IntoType, 1073 raw_tokens: t.List[Token], 1074 sql: t.Optional[str] = None, 1075 ) -> t.List[t.Optional[exp.Expression]]: 1076 """ 1077 Parses a list of tokens into a given Expression type. If a collection of Expression 1078 types is given instead, this method will try to parse the token list into each one 1079 of them, stopping at the first for which the parsing succeeds. 1080 1081 Args: 1082 expression_types: The expression type(s) to try and parse the token list into. 1083 raw_tokens: The list of tokens. 1084 sql: The original SQL string, used to produce helpful debug messages. 1085 1086 Returns: 1087 The target Expression. 1088 """ 1089 errors = [] 1090 for expression_type in ensure_list(expression_types): 1091 parser = self.EXPRESSION_PARSERS.get(expression_type) 1092 if not parser: 1093 raise TypeError(f"No parser registered for {expression_type}") 1094 1095 try: 1096 return self._parse(parser, raw_tokens, sql) 1097 except ParseError as e: 1098 e.errors[0]["into_expression"] = expression_type 1099 errors.append(e) 1100 1101 raise ParseError( 1102 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1103 errors=merge_errors(errors), 1104 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1141 def check_errors(self) -> None: 1142 """Logs or raises any found errors, depending on the chosen error level setting.""" 1143 if self.error_level == ErrorLevel.WARN: 1144 for error in self.errors: 1145 logger.error(str(error)) 1146 elif self.error_level == ErrorLevel.RAISE and self.errors: 1147 raise ParseError( 1148 concat_messages(self.errors, self.max_errors), 1149 errors=merge_errors(self.errors), 1150 )
Logs or raises any found errors, depending on the chosen error level setting.
1152 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1153 """ 1154 Appends an error in the list of recorded errors or raises it, depending on the chosen 1155 error level setting. 1156 """ 1157 token = token or self._curr or self._prev or Token.string("") 1158 start = token.start 1159 end = token.end + 1 1160 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1161 highlight = self.sql[start:end] 1162 end_context = self.sql[end : end + self.error_message_context] 1163 1164 error = ParseError.new( 1165 f"{message}. Line {token.line}, Col: {token.col}.\n" 1166 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1167 description=message, 1168 line=token.line, 1169 col=token.col, 1170 start_context=start_context, 1171 highlight=highlight, 1172 end_context=end_context, 1173 ) 1174 1175 if self.error_level == ErrorLevel.IMMEDIATE: 1176 raise error 1177 1178 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1180 def expression( 1181 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1182 ) -> E: 1183 """ 1184 Creates a new, validated Expression. 1185 1186 Args: 1187 exp_class: The expression class to instantiate. 1188 comments: An optional list of comments to attach to the expression. 1189 kwargs: The arguments to set for the expression along with their respective values. 1190 1191 Returns: 1192 The target expression. 1193 """ 1194 instance = exp_class(**kwargs) 1195 instance.add_comments(comments) if comments else self._add_comments(instance) 1196 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1203 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1204 """ 1205 Validates an Expression, making sure that all its mandatory arguments are set. 1206 1207 Args: 1208 expression: The expression to validate. 1209 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1210 1211 Returns: 1212 The validated expression. 1213 """ 1214 if self.error_level != ErrorLevel.IGNORE: 1215 for error_message in expression.error_messages(args): 1216 self.raise_error(error_message) 1217 1218 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.