sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63class _Parser(type): 64 def __new__(cls, clsname, bases, attrs): 65 klass = super().__new__(cls, clsname, bases, attrs) 66 67 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 68 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 69 70 return klass 71 72 73class Parser(metaclass=_Parser): 74 """ 75 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 76 77 Args: 78 error_level: The desired error level. 79 Default: ErrorLevel.IMMEDIATE 80 error_message_context: Determines the amount of context to capture from a 81 query string when displaying the error message (in number of characters). 82 Default: 100 83 max_errors: Maximum number of error messages to include in a raised ParseError. 84 This is only relevant if error_level is ErrorLevel.RAISE. 85 Default: 3 86 """ 87 88 FUNCTIONS: t.Dict[str, t.Callable] = { 89 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 90 "CONCAT": lambda args, dialect: exp.Concat( 91 expressions=args, 92 safe=not dialect.STRICT_STRING_CONCAT, 93 coalesce=dialect.CONCAT_COALESCE, 94 ), 95 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 96 expressions=args, 97 safe=not dialect.STRICT_STRING_CONCAT, 98 coalesce=dialect.CONCAT_COALESCE, 99 ), 100 "DATE_TO_DATE_STR": lambda args: exp.Cast( 101 this=seq_get(args, 0), 102 to=exp.DataType(this=exp.DataType.Type.TEXT), 103 ), 104 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 105 "LIKE": parse_like, 106 "LOG": parse_logarithm, 107 "TIME_TO_TIME_STR": lambda args: exp.Cast( 108 this=seq_get(args, 0), 109 to=exp.DataType(this=exp.DataType.Type.TEXT), 110 ), 111 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 112 this=exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 start=exp.Literal.number(1), 117 length=exp.Literal.number(10), 118 ), 119 "VAR_MAP": parse_var_map, 120 } 121 122 NO_PAREN_FUNCTIONS = { 123 TokenType.CURRENT_DATE: exp.CurrentDate, 124 TokenType.CURRENT_DATETIME: exp.CurrentDate, 125 TokenType.CURRENT_TIME: exp.CurrentTime, 126 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 127 TokenType.CURRENT_USER: exp.CurrentUser, 128 } 129 130 STRUCT_TYPE_TOKENS = { 131 TokenType.NESTED, 132 TokenType.STRUCT, 133 } 134 135 NESTED_TYPE_TOKENS = { 136 TokenType.ARRAY, 137 TokenType.LOWCARDINALITY, 138 TokenType.MAP, 139 TokenType.NULLABLE, 140 *STRUCT_TYPE_TOKENS, 141 } 142 143 ENUM_TYPE_TOKENS = { 144 TokenType.ENUM, 145 TokenType.ENUM8, 146 TokenType.ENUM16, 147 } 148 149 TYPE_TOKENS = { 150 TokenType.BIT, 151 TokenType.BOOLEAN, 152 TokenType.TINYINT, 153 TokenType.UTINYINT, 154 TokenType.SMALLINT, 155 TokenType.USMALLINT, 156 TokenType.INT, 157 TokenType.UINT, 158 TokenType.BIGINT, 159 TokenType.UBIGINT, 160 TokenType.INT128, 161 TokenType.UINT128, 162 TokenType.INT256, 163 TokenType.UINT256, 164 TokenType.MEDIUMINT, 165 TokenType.UMEDIUMINT, 166 TokenType.FIXEDSTRING, 167 TokenType.FLOAT, 168 TokenType.DOUBLE, 169 TokenType.CHAR, 170 TokenType.NCHAR, 171 TokenType.VARCHAR, 172 TokenType.NVARCHAR, 173 TokenType.TEXT, 174 TokenType.MEDIUMTEXT, 175 TokenType.LONGTEXT, 176 TokenType.MEDIUMBLOB, 177 TokenType.LONGBLOB, 178 TokenType.BINARY, 179 TokenType.VARBINARY, 180 TokenType.JSON, 181 TokenType.JSONB, 182 TokenType.INTERVAL, 183 TokenType.TINYBLOB, 184 TokenType.TINYTEXT, 185 TokenType.TIME, 186 TokenType.TIMETZ, 187 TokenType.TIMESTAMP, 188 TokenType.TIMESTAMP_S, 189 TokenType.TIMESTAMP_MS, 190 TokenType.TIMESTAMP_NS, 191 TokenType.TIMESTAMPTZ, 192 TokenType.TIMESTAMPLTZ, 193 TokenType.DATETIME, 194 TokenType.DATETIME64, 195 TokenType.DATE, 196 TokenType.INT4RANGE, 197 TokenType.INT4MULTIRANGE, 198 TokenType.INT8RANGE, 199 TokenType.INT8MULTIRANGE, 200 TokenType.NUMRANGE, 201 TokenType.NUMMULTIRANGE, 202 TokenType.TSRANGE, 203 TokenType.TSMULTIRANGE, 204 TokenType.TSTZRANGE, 205 TokenType.TSTZMULTIRANGE, 206 TokenType.DATERANGE, 207 TokenType.DATEMULTIRANGE, 208 TokenType.DECIMAL, 209 TokenType.UDECIMAL, 210 TokenType.BIGDECIMAL, 211 TokenType.UUID, 212 TokenType.GEOGRAPHY, 213 TokenType.GEOMETRY, 214 TokenType.HLLSKETCH, 215 TokenType.HSTORE, 216 TokenType.PSEUDO_TYPE, 217 TokenType.SUPER, 218 TokenType.SERIAL, 219 TokenType.SMALLSERIAL, 220 TokenType.BIGSERIAL, 221 TokenType.XML, 222 TokenType.YEAR, 223 TokenType.UNIQUEIDENTIFIER, 224 TokenType.USERDEFINED, 225 TokenType.MONEY, 226 TokenType.SMALLMONEY, 227 TokenType.ROWVERSION, 228 TokenType.IMAGE, 229 TokenType.VARIANT, 230 TokenType.OBJECT, 231 TokenType.OBJECT_IDENTIFIER, 232 TokenType.INET, 233 TokenType.IPADDRESS, 234 TokenType.IPPREFIX, 235 TokenType.UNKNOWN, 236 TokenType.NULL, 237 *ENUM_TYPE_TOKENS, 238 *NESTED_TYPE_TOKENS, 239 } 240 241 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 242 TokenType.BIGINT: TokenType.UBIGINT, 243 TokenType.INT: TokenType.UINT, 244 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 245 TokenType.SMALLINT: TokenType.USMALLINT, 246 TokenType.TINYINT: TokenType.UTINYINT, 247 TokenType.DECIMAL: TokenType.UDECIMAL, 248 } 249 250 SUBQUERY_PREDICATES = { 251 TokenType.ANY: exp.Any, 252 TokenType.ALL: exp.All, 253 TokenType.EXISTS: exp.Exists, 254 TokenType.SOME: exp.Any, 255 } 256 257 RESERVED_TOKENS = { 258 *Tokenizer.SINGLE_TOKENS.values(), 259 TokenType.SELECT, 260 } 261 262 DB_CREATABLES = { 263 TokenType.DATABASE, 264 TokenType.SCHEMA, 265 TokenType.TABLE, 266 TokenType.VIEW, 267 TokenType.MODEL, 268 TokenType.DICTIONARY, 269 } 270 271 CREATABLES = { 272 TokenType.COLUMN, 273 TokenType.CONSTRAINT, 274 TokenType.FUNCTION, 275 TokenType.INDEX, 276 TokenType.PROCEDURE, 277 TokenType.FOREIGN_KEY, 278 *DB_CREATABLES, 279 } 280 281 # Tokens that can represent identifiers 282 ID_VAR_TOKENS = { 283 TokenType.VAR, 284 TokenType.ANTI, 285 TokenType.APPLY, 286 TokenType.ASC, 287 TokenType.AUTO_INCREMENT, 288 TokenType.BEGIN, 289 TokenType.CACHE, 290 TokenType.CASE, 291 TokenType.COLLATE, 292 TokenType.COMMAND, 293 TokenType.COMMENT, 294 TokenType.COMMIT, 295 TokenType.CONSTRAINT, 296 TokenType.DEFAULT, 297 TokenType.DELETE, 298 TokenType.DESC, 299 TokenType.DESCRIBE, 300 TokenType.DICTIONARY, 301 TokenType.DIV, 302 TokenType.END, 303 TokenType.EXECUTE, 304 TokenType.ESCAPE, 305 TokenType.FALSE, 306 TokenType.FIRST, 307 TokenType.FILTER, 308 TokenType.FINAL, 309 TokenType.FORMAT, 310 TokenType.FULL, 311 TokenType.IS, 312 TokenType.ISNULL, 313 TokenType.INTERVAL, 314 TokenType.KEEP, 315 TokenType.KILL, 316 TokenType.LEFT, 317 TokenType.LOAD, 318 TokenType.MERGE, 319 TokenType.NATURAL, 320 TokenType.NEXT, 321 TokenType.OFFSET, 322 TokenType.OPERATOR, 323 TokenType.ORDINALITY, 324 TokenType.OVERLAPS, 325 TokenType.OVERWRITE, 326 TokenType.PARTITION, 327 TokenType.PERCENT, 328 TokenType.PIVOT, 329 TokenType.PRAGMA, 330 TokenType.RANGE, 331 TokenType.RECURSIVE, 332 TokenType.REFERENCES, 333 TokenType.REFRESH, 334 TokenType.REPLACE, 335 TokenType.RIGHT, 336 TokenType.ROW, 337 TokenType.ROWS, 338 TokenType.SEMI, 339 TokenType.SET, 340 TokenType.SETTINGS, 341 TokenType.SHOW, 342 TokenType.TEMPORARY, 343 TokenType.TOP, 344 TokenType.TRUE, 345 TokenType.UNIQUE, 346 TokenType.UNPIVOT, 347 TokenType.UPDATE, 348 TokenType.USE, 349 TokenType.VOLATILE, 350 TokenType.WINDOW, 351 *CREATABLES, 352 *SUBQUERY_PREDICATES, 353 *TYPE_TOKENS, 354 *NO_PAREN_FUNCTIONS, 355 } 356 357 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 358 359 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 360 TokenType.ANTI, 361 TokenType.APPLY, 362 TokenType.ASOF, 363 TokenType.FULL, 364 TokenType.LEFT, 365 TokenType.LOCK, 366 TokenType.NATURAL, 367 TokenType.OFFSET, 368 TokenType.RIGHT, 369 TokenType.SEMI, 370 TokenType.WINDOW, 371 } 372 373 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 374 375 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 376 377 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 378 379 FUNC_TOKENS = { 380 TokenType.COLLATE, 381 TokenType.COMMAND, 382 TokenType.CURRENT_DATE, 383 TokenType.CURRENT_DATETIME, 384 TokenType.CURRENT_TIMESTAMP, 385 TokenType.CURRENT_TIME, 386 TokenType.CURRENT_USER, 387 TokenType.FILTER, 388 TokenType.FIRST, 389 TokenType.FORMAT, 390 TokenType.GLOB, 391 TokenType.IDENTIFIER, 392 TokenType.INDEX, 393 TokenType.ISNULL, 394 TokenType.ILIKE, 395 TokenType.INSERT, 396 TokenType.LIKE, 397 TokenType.MERGE, 398 TokenType.OFFSET, 399 TokenType.PRIMARY_KEY, 400 TokenType.RANGE, 401 TokenType.REPLACE, 402 TokenType.RLIKE, 403 TokenType.ROW, 404 TokenType.UNNEST, 405 TokenType.VAR, 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.DATE, 409 TokenType.DATETIME, 410 TokenType.TABLE, 411 TokenType.TIMESTAMP, 412 TokenType.TIMESTAMPTZ, 413 TokenType.WINDOW, 414 TokenType.XOR, 415 *TYPE_TOKENS, 416 *SUBQUERY_PREDICATES, 417 } 418 419 CONJUNCTION = { 420 TokenType.AND: exp.And, 421 TokenType.OR: exp.Or, 422 } 423 424 EQUALITY = { 425 TokenType.COLON_EQ: exp.PropertyEQ, 426 TokenType.EQ: exp.EQ, 427 TokenType.NEQ: exp.NEQ, 428 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 429 } 430 431 COMPARISON = { 432 TokenType.GT: exp.GT, 433 TokenType.GTE: exp.GTE, 434 TokenType.LT: exp.LT, 435 TokenType.LTE: exp.LTE, 436 } 437 438 BITWISE = { 439 TokenType.AMP: exp.BitwiseAnd, 440 TokenType.CARET: exp.BitwiseXor, 441 TokenType.PIPE: exp.BitwiseOr, 442 } 443 444 TERM = { 445 TokenType.DASH: exp.Sub, 446 TokenType.PLUS: exp.Add, 447 TokenType.MOD: exp.Mod, 448 TokenType.COLLATE: exp.Collate, 449 } 450 451 FACTOR = { 452 TokenType.DIV: exp.IntDiv, 453 TokenType.LR_ARROW: exp.Distance, 454 TokenType.SLASH: exp.Div, 455 TokenType.STAR: exp.Mul, 456 } 457 458 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 459 460 TIMES = { 461 TokenType.TIME, 462 TokenType.TIMETZ, 463 } 464 465 TIMESTAMPS = { 466 TokenType.TIMESTAMP, 467 TokenType.TIMESTAMPTZ, 468 TokenType.TIMESTAMPLTZ, 469 *TIMES, 470 } 471 472 SET_OPERATIONS = { 473 TokenType.UNION, 474 TokenType.INTERSECT, 475 TokenType.EXCEPT, 476 } 477 478 JOIN_METHODS = { 479 TokenType.NATURAL, 480 TokenType.ASOF, 481 } 482 483 JOIN_SIDES = { 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.FULL, 487 } 488 489 JOIN_KINDS = { 490 TokenType.INNER, 491 TokenType.OUTER, 492 TokenType.CROSS, 493 TokenType.SEMI, 494 TokenType.ANTI, 495 } 496 497 JOIN_HINTS: t.Set[str] = set() 498 499 LAMBDAS = { 500 TokenType.ARROW: lambda self, expressions: self.expression( 501 exp.Lambda, 502 this=self._replace_lambda( 503 self._parse_conjunction(), 504 {node.name for node in expressions}, 505 ), 506 expressions=expressions, 507 ), 508 TokenType.FARROW: lambda self, expressions: self.expression( 509 exp.Kwarg, 510 this=exp.var(expressions[0].name), 511 expression=self._parse_conjunction(), 512 ), 513 } 514 515 COLUMN_OPERATORS = { 516 TokenType.DOT: None, 517 TokenType.DCOLON: lambda self, this, to: self.expression( 518 exp.Cast if self.STRICT_CAST else exp.TryCast, 519 this=this, 520 to=to, 521 ), 522 TokenType.ARROW: lambda self, this, path: self.expression( 523 exp.JSONExtract, 524 this=this, 525 expression=path, 526 ), 527 TokenType.DARROW: lambda self, this, path: self.expression( 528 exp.JSONExtractScalar, 529 this=this, 530 expression=path, 531 ), 532 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 533 exp.JSONBExtract, 534 this=this, 535 expression=path, 536 ), 537 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 538 exp.JSONBExtractScalar, 539 this=this, 540 expression=path, 541 ), 542 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 543 exp.JSONBContains, 544 this=this, 545 expression=key, 546 ), 547 } 548 549 EXPRESSION_PARSERS = { 550 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 551 exp.Column: lambda self: self._parse_column(), 552 exp.Condition: lambda self: self._parse_conjunction(), 553 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 554 exp.Expression: lambda self: self._parse_statement(), 555 exp.From: lambda self: self._parse_from(), 556 exp.Group: lambda self: self._parse_group(), 557 exp.Having: lambda self: self._parse_having(), 558 exp.Identifier: lambda self: self._parse_id_var(), 559 exp.Join: lambda self: self._parse_join(), 560 exp.Lambda: lambda self: self._parse_lambda(), 561 exp.Lateral: lambda self: self._parse_lateral(), 562 exp.Limit: lambda self: self._parse_limit(), 563 exp.Offset: lambda self: self._parse_offset(), 564 exp.Order: lambda self: self._parse_order(), 565 exp.Ordered: lambda self: self._parse_ordered(), 566 exp.Properties: lambda self: self._parse_properties(), 567 exp.Qualify: lambda self: self._parse_qualify(), 568 exp.Returning: lambda self: self._parse_returning(), 569 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 570 exp.Table: lambda self: self._parse_table_parts(), 571 exp.TableAlias: lambda self: self._parse_table_alias(), 572 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 573 exp.Where: lambda self: self._parse_where(), 574 exp.Window: lambda self: self._parse_named_window(), 575 exp.With: lambda self: self._parse_with(), 576 "JOIN_TYPE": lambda self: self._parse_join_parts(), 577 } 578 579 STATEMENT_PARSERS = { 580 TokenType.ALTER: lambda self: self._parse_alter(), 581 TokenType.BEGIN: lambda self: self._parse_transaction(), 582 TokenType.CACHE: lambda self: self._parse_cache(), 583 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 584 TokenType.COMMENT: lambda self: self._parse_comment(), 585 TokenType.CREATE: lambda self: self._parse_create(), 586 TokenType.DELETE: lambda self: self._parse_delete(), 587 TokenType.DESC: lambda self: self._parse_describe(), 588 TokenType.DESCRIBE: lambda self: self._parse_describe(), 589 TokenType.DROP: lambda self: self._parse_drop(), 590 TokenType.INSERT: lambda self: self._parse_insert(), 591 TokenType.KILL: lambda self: self._parse_kill(), 592 TokenType.LOAD: lambda self: self._parse_load(), 593 TokenType.MERGE: lambda self: self._parse_merge(), 594 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 595 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 596 TokenType.REFRESH: lambda self: self._parse_refresh(), 597 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 598 TokenType.SET: lambda self: self._parse_set(), 599 TokenType.UNCACHE: lambda self: self._parse_uncache(), 600 TokenType.UPDATE: lambda self: self._parse_update(), 601 TokenType.USE: lambda self: self.expression( 602 exp.Use, 603 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 604 and exp.var(self._prev.text), 605 this=self._parse_table(schema=False), 606 ), 607 } 608 609 UNARY_PARSERS = { 610 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 611 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 612 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 613 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 614 } 615 616 PRIMARY_PARSERS = { 617 TokenType.STRING: lambda self, token: self.expression( 618 exp.Literal, this=token.text, is_string=True 619 ), 620 TokenType.NUMBER: lambda self, token: self.expression( 621 exp.Literal, this=token.text, is_string=False 622 ), 623 TokenType.STAR: lambda self, _: self.expression( 624 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 625 ), 626 TokenType.NULL: lambda self, _: self.expression(exp.Null), 627 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 628 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 629 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 630 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 631 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 632 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 633 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 634 exp.National, this=token.text 635 ), 636 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 637 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 638 exp.RawString, this=token.text 639 ), 640 TokenType.UNICODE_STRING: lambda self, token: self.expression( 641 exp.UnicodeString, 642 this=token.text, 643 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 644 ), 645 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 646 } 647 648 PLACEHOLDER_PARSERS = { 649 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 650 TokenType.PARAMETER: lambda self: self._parse_parameter(), 651 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 652 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 653 else None, 654 } 655 656 RANGE_PARSERS = { 657 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 658 TokenType.GLOB: binary_range_parser(exp.Glob), 659 TokenType.ILIKE: binary_range_parser(exp.ILike), 660 TokenType.IN: lambda self, this: self._parse_in(this), 661 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 662 TokenType.IS: lambda self, this: self._parse_is(this), 663 TokenType.LIKE: binary_range_parser(exp.Like), 664 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 665 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 666 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 667 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 668 } 669 670 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 671 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 672 "AUTO": lambda self: self._parse_auto_property(), 673 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 674 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 675 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 676 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 677 "CHECKSUM": lambda self: self._parse_checksum(), 678 "CLUSTER BY": lambda self: self._parse_cluster(), 679 "CLUSTERED": lambda self: self._parse_clustered_by(), 680 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 681 exp.CollateProperty, **kwargs 682 ), 683 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 684 "CONTAINS": lambda self: self._parse_contains_property(), 685 "COPY": lambda self: self._parse_copy_property(), 686 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 687 "DEFINER": lambda self: self._parse_definer(), 688 "DETERMINISTIC": lambda self: self.expression( 689 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 690 ), 691 "DISTKEY": lambda self: self._parse_distkey(), 692 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 693 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 694 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 695 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 696 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 697 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 698 "FREESPACE": lambda self: self._parse_freespace(), 699 "HEAP": lambda self: self.expression(exp.HeapProperty), 700 "IMMUTABLE": lambda self: self.expression( 701 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 702 ), 703 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 704 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 705 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 706 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 707 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 708 "LIKE": lambda self: self._parse_create_like(), 709 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 710 "LOCK": lambda self: self._parse_locking(), 711 "LOCKING": lambda self: self._parse_locking(), 712 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 713 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 714 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 715 "MODIFIES": lambda self: self._parse_modifies_property(), 716 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 717 "NO": lambda self: self._parse_no_property(), 718 "ON": lambda self: self._parse_on_property(), 719 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 720 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 721 "PARTITION": lambda self: self._parse_partitioned_of(), 722 "PARTITION BY": lambda self: self._parse_partitioned_by(), 723 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 724 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 725 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 726 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 727 "READS": lambda self: self._parse_reads_property(), 728 "REMOTE": lambda self: self._parse_remote_with_connection(), 729 "RETURNS": lambda self: self._parse_returns(), 730 "ROW": lambda self: self._parse_row(), 731 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 732 "SAMPLE": lambda self: self.expression( 733 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 734 ), 735 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 736 "SETTINGS": lambda self: self.expression( 737 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 738 ), 739 "SORTKEY": lambda self: self._parse_sortkey(), 740 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 741 "STABLE": lambda self: self.expression( 742 exp.StabilityProperty, this=exp.Literal.string("STABLE") 743 ), 744 "STORED": lambda self: self._parse_stored(), 745 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 746 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 747 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 748 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 749 "TO": lambda self: self._parse_to_table(), 750 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 751 "TRANSFORM": lambda self: self.expression( 752 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 753 ), 754 "TTL": lambda self: self._parse_ttl(), 755 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 756 "VOLATILE": lambda self: self._parse_volatile_property(), 757 "WITH": lambda self: self._parse_with_property(), 758 } 759 760 CONSTRAINT_PARSERS = { 761 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 762 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 763 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 764 "CHARACTER SET": lambda self: self.expression( 765 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 766 ), 767 "CHECK": lambda self: self.expression( 768 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 769 ), 770 "COLLATE": lambda self: self.expression( 771 exp.CollateColumnConstraint, this=self._parse_var() 772 ), 773 "COMMENT": lambda self: self.expression( 774 exp.CommentColumnConstraint, this=self._parse_string() 775 ), 776 "COMPRESS": lambda self: self._parse_compress(), 777 "CLUSTERED": lambda self: self.expression( 778 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 779 ), 780 "NONCLUSTERED": lambda self: self.expression( 781 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 782 ), 783 "DEFAULT": lambda self: self.expression( 784 exp.DefaultColumnConstraint, this=self._parse_bitwise() 785 ), 786 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 787 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 788 "FORMAT": lambda self: self.expression( 789 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 790 ), 791 "GENERATED": lambda self: self._parse_generated_as_identity(), 792 "IDENTITY": lambda self: self._parse_auto_increment(), 793 "INLINE": lambda self: self._parse_inline(), 794 "LIKE": lambda self: self._parse_create_like(), 795 "NOT": lambda self: self._parse_not_constraint(), 796 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 797 "ON": lambda self: ( 798 self._match(TokenType.UPDATE) 799 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 800 ) 801 or self.expression(exp.OnProperty, this=self._parse_id_var()), 802 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 803 "PERIOD": lambda self: self._parse_period_for_system_time(), 804 "PRIMARY KEY": lambda self: self._parse_primary_key(), 805 "REFERENCES": lambda self: self._parse_references(match=False), 806 "TITLE": lambda self: self.expression( 807 exp.TitleColumnConstraint, this=self._parse_var_or_string() 808 ), 809 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 810 "UNIQUE": lambda self: self._parse_unique(), 811 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 812 "WITH": lambda self: self.expression( 813 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 814 ), 815 } 816 817 ALTER_PARSERS = { 818 "ADD": lambda self: self._parse_alter_table_add(), 819 "ALTER": lambda self: self._parse_alter_table_alter(), 820 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 821 "DROP": lambda self: self._parse_alter_table_drop(), 822 "RENAME": lambda self: self._parse_alter_table_rename(), 823 } 824 825 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 826 827 NO_PAREN_FUNCTION_PARSERS = { 828 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 829 "CASE": lambda self: self._parse_case(), 830 "IF": lambda self: self._parse_if(), 831 "NEXT": lambda self: self._parse_next_value_for(), 832 } 833 834 INVALID_FUNC_NAME_TOKENS = { 835 TokenType.IDENTIFIER, 836 TokenType.STRING, 837 } 838 839 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 840 841 FUNCTION_PARSERS = { 842 "ANY_VALUE": lambda self: self._parse_any_value(), 843 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 844 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 845 "DECODE": lambda self: self._parse_decode(), 846 "EXTRACT": lambda self: self._parse_extract(), 847 "JSON_OBJECT": lambda self: self._parse_json_object(), 848 "JSON_TABLE": lambda self: self._parse_json_table(), 849 "MATCH": lambda self: self._parse_match_against(), 850 "OPENJSON": lambda self: self._parse_open_json(), 851 "POSITION": lambda self: self._parse_position(), 852 "PREDICT": lambda self: self._parse_predict(), 853 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 854 "STRING_AGG": lambda self: self._parse_string_agg(), 855 "SUBSTRING": lambda self: self._parse_substring(), 856 "TRIM": lambda self: self._parse_trim(), 857 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 858 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 859 } 860 861 QUERY_MODIFIER_PARSERS = { 862 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 863 TokenType.WHERE: lambda self: ("where", self._parse_where()), 864 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 865 TokenType.HAVING: lambda self: ("having", self._parse_having()), 866 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 867 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 868 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 869 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 870 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 871 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 872 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 873 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 874 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 875 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 876 TokenType.CLUSTER_BY: lambda self: ( 877 "cluster", 878 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 879 ), 880 TokenType.DISTRIBUTE_BY: lambda self: ( 881 "distribute", 882 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 883 ), 884 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 885 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 886 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 887 } 888 889 SET_PARSERS = { 890 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 891 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 892 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 893 "TRANSACTION": lambda self: self._parse_set_transaction(), 894 } 895 896 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 897 898 TYPE_LITERAL_PARSERS = { 899 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 900 } 901 902 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 903 904 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 905 906 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 907 908 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 909 TRANSACTION_CHARACTERISTICS = { 910 "ISOLATION LEVEL REPEATABLE READ", 911 "ISOLATION LEVEL READ COMMITTED", 912 "ISOLATION LEVEL READ UNCOMMITTED", 913 "ISOLATION LEVEL SERIALIZABLE", 914 "READ WRITE", 915 "READ ONLY", 916 } 917 918 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 919 920 CLONE_KEYWORDS = {"CLONE", "COPY"} 921 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 922 923 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 924 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 925 926 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 927 928 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 929 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 930 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 931 932 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 933 934 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 935 936 DISTINCT_TOKENS = {TokenType.DISTINCT} 937 938 NULL_TOKENS = {TokenType.NULL} 939 940 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 941 942 STRICT_CAST = True 943 944 PREFIXED_PIVOT_COLUMNS = False 945 IDENTIFY_PIVOT_STRINGS = False 946 947 LOG_DEFAULTS_TO_LN = False 948 949 # Whether or not ADD is present for each column added by ALTER TABLE 950 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 951 952 # Whether or not the table sample clause expects CSV syntax 953 TABLESAMPLE_CSV = False 954 955 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 956 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 957 958 # Whether the TRIM function expects the characters to trim as its first argument 959 TRIM_PATTERN_FIRST = False 960 961 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 962 STRING_ALIASES = False 963 964 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 965 MODIFIERS_ATTACHED_TO_UNION = True 966 UNION_MODIFIERS = {"order", "limit", "offset"} 967 968 __slots__ = ( 969 "error_level", 970 "error_message_context", 971 "max_errors", 972 "dialect", 973 "sql", 974 "errors", 975 "_tokens", 976 "_index", 977 "_curr", 978 "_next", 979 "_prev", 980 "_prev_comments", 981 ) 982 983 # Autofilled 984 SHOW_TRIE: t.Dict = {} 985 SET_TRIE: t.Dict = {} 986 987 def __init__( 988 self, 989 error_level: t.Optional[ErrorLevel] = None, 990 error_message_context: int = 100, 991 max_errors: int = 3, 992 dialect: DialectType = None, 993 ): 994 from sqlglot.dialects import Dialect 995 996 self.error_level = error_level or ErrorLevel.IMMEDIATE 997 self.error_message_context = error_message_context 998 self.max_errors = max_errors 999 self.dialect = Dialect.get_or_raise(dialect) 1000 self.reset() 1001 1002 def reset(self): 1003 self.sql = "" 1004 self.errors = [] 1005 self._tokens = [] 1006 self._index = 0 1007 self._curr = None 1008 self._next = None 1009 self._prev = None 1010 self._prev_comments = None 1011 1012 def parse( 1013 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1014 ) -> t.List[t.Optional[exp.Expression]]: 1015 """ 1016 Parses a list of tokens and returns a list of syntax trees, one tree 1017 per parsed SQL statement. 1018 1019 Args: 1020 raw_tokens: The list of tokens. 1021 sql: The original SQL string, used to produce helpful debug messages. 1022 1023 Returns: 1024 The list of the produced syntax trees. 1025 """ 1026 return self._parse( 1027 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1028 ) 1029 1030 def parse_into( 1031 self, 1032 expression_types: exp.IntoType, 1033 raw_tokens: t.List[Token], 1034 sql: t.Optional[str] = None, 1035 ) -> t.List[t.Optional[exp.Expression]]: 1036 """ 1037 Parses a list of tokens into a given Expression type. If a collection of Expression 1038 types is given instead, this method will try to parse the token list into each one 1039 of them, stopping at the first for which the parsing succeeds. 1040 1041 Args: 1042 expression_types: The expression type(s) to try and parse the token list into. 1043 raw_tokens: The list of tokens. 1044 sql: The original SQL string, used to produce helpful debug messages. 1045 1046 Returns: 1047 The target Expression. 1048 """ 1049 errors = [] 1050 for expression_type in ensure_list(expression_types): 1051 parser = self.EXPRESSION_PARSERS.get(expression_type) 1052 if not parser: 1053 raise TypeError(f"No parser registered for {expression_type}") 1054 1055 try: 1056 return self._parse(parser, raw_tokens, sql) 1057 except ParseError as e: 1058 e.errors[0]["into_expression"] = expression_type 1059 errors.append(e) 1060 1061 raise ParseError( 1062 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1063 errors=merge_errors(errors), 1064 ) from errors[-1] 1065 1066 def _parse( 1067 self, 1068 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1069 raw_tokens: t.List[Token], 1070 sql: t.Optional[str] = None, 1071 ) -> t.List[t.Optional[exp.Expression]]: 1072 self.reset() 1073 self.sql = sql or "" 1074 1075 total = len(raw_tokens) 1076 chunks: t.List[t.List[Token]] = [[]] 1077 1078 for i, token in enumerate(raw_tokens): 1079 if token.token_type == TokenType.SEMICOLON: 1080 if i < total - 1: 1081 chunks.append([]) 1082 else: 1083 chunks[-1].append(token) 1084 1085 expressions = [] 1086 1087 for tokens in chunks: 1088 self._index = -1 1089 self._tokens = tokens 1090 self._advance() 1091 1092 expressions.append(parse_method(self)) 1093 1094 if self._index < len(self._tokens): 1095 self.raise_error("Invalid expression / Unexpected token") 1096 1097 self.check_errors() 1098 1099 return expressions 1100 1101 def check_errors(self) -> None: 1102 """Logs or raises any found errors, depending on the chosen error level setting.""" 1103 if self.error_level == ErrorLevel.WARN: 1104 for error in self.errors: 1105 logger.error(str(error)) 1106 elif self.error_level == ErrorLevel.RAISE and self.errors: 1107 raise ParseError( 1108 concat_messages(self.errors, self.max_errors), 1109 errors=merge_errors(self.errors), 1110 ) 1111 1112 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1113 """ 1114 Appends an error in the list of recorded errors or raises it, depending on the chosen 1115 error level setting. 1116 """ 1117 token = token or self._curr or self._prev or Token.string("") 1118 start = token.start 1119 end = token.end + 1 1120 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1121 highlight = self.sql[start:end] 1122 end_context = self.sql[end : end + self.error_message_context] 1123 1124 error = ParseError.new( 1125 f"{message}. Line {token.line}, Col: {token.col}.\n" 1126 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1127 description=message, 1128 line=token.line, 1129 col=token.col, 1130 start_context=start_context, 1131 highlight=highlight, 1132 end_context=end_context, 1133 ) 1134 1135 if self.error_level == ErrorLevel.IMMEDIATE: 1136 raise error 1137 1138 self.errors.append(error) 1139 1140 def expression( 1141 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1142 ) -> E: 1143 """ 1144 Creates a new, validated Expression. 1145 1146 Args: 1147 exp_class: The expression class to instantiate. 1148 comments: An optional list of comments to attach to the expression. 1149 kwargs: The arguments to set for the expression along with their respective values. 1150 1151 Returns: 1152 The target expression. 1153 """ 1154 instance = exp_class(**kwargs) 1155 instance.add_comments(comments) if comments else self._add_comments(instance) 1156 return self.validate_expression(instance) 1157 1158 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1159 if expression and self._prev_comments: 1160 expression.add_comments(self._prev_comments) 1161 self._prev_comments = None 1162 1163 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1164 """ 1165 Validates an Expression, making sure that all its mandatory arguments are set. 1166 1167 Args: 1168 expression: The expression to validate. 1169 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1170 1171 Returns: 1172 The validated expression. 1173 """ 1174 if self.error_level != ErrorLevel.IGNORE: 1175 for error_message in expression.error_messages(args): 1176 self.raise_error(error_message) 1177 1178 return expression 1179 1180 def _find_sql(self, start: Token, end: Token) -> str: 1181 return self.sql[start.start : end.end + 1] 1182 1183 def _is_connected(self) -> bool: 1184 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1185 1186 def _advance(self, times: int = 1) -> None: 1187 self._index += times 1188 self._curr = seq_get(self._tokens, self._index) 1189 self._next = seq_get(self._tokens, self._index + 1) 1190 1191 if self._index > 0: 1192 self._prev = self._tokens[self._index - 1] 1193 self._prev_comments = self._prev.comments 1194 else: 1195 self._prev = None 1196 self._prev_comments = None 1197 1198 def _retreat(self, index: int) -> None: 1199 if index != self._index: 1200 self._advance(index - self._index) 1201 1202 def _parse_command(self) -> exp.Command: 1203 return self.expression( 1204 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1205 ) 1206 1207 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1208 start = self._prev 1209 exists = self._parse_exists() if allow_exists else None 1210 1211 self._match(TokenType.ON) 1212 1213 kind = self._match_set(self.CREATABLES) and self._prev 1214 if not kind: 1215 return self._parse_as_command(start) 1216 1217 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1218 this = self._parse_user_defined_function(kind=kind.token_type) 1219 elif kind.token_type == TokenType.TABLE: 1220 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1221 elif kind.token_type == TokenType.COLUMN: 1222 this = self._parse_column() 1223 else: 1224 this = self._parse_id_var() 1225 1226 self._match(TokenType.IS) 1227 1228 return self.expression( 1229 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1230 ) 1231 1232 def _parse_to_table( 1233 self, 1234 ) -> exp.ToTableProperty: 1235 table = self._parse_table_parts(schema=True) 1236 return self.expression(exp.ToTableProperty, this=table) 1237 1238 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1239 def _parse_ttl(self) -> exp.Expression: 1240 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1241 this = self._parse_bitwise() 1242 1243 if self._match_text_seq("DELETE"): 1244 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1245 if self._match_text_seq("RECOMPRESS"): 1246 return self.expression( 1247 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1248 ) 1249 if self._match_text_seq("TO", "DISK"): 1250 return self.expression( 1251 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1252 ) 1253 if self._match_text_seq("TO", "VOLUME"): 1254 return self.expression( 1255 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1256 ) 1257 1258 return this 1259 1260 expressions = self._parse_csv(_parse_ttl_action) 1261 where = self._parse_where() 1262 group = self._parse_group() 1263 1264 aggregates = None 1265 if group and self._match(TokenType.SET): 1266 aggregates = self._parse_csv(self._parse_set_item) 1267 1268 return self.expression( 1269 exp.MergeTreeTTL, 1270 expressions=expressions, 1271 where=where, 1272 group=group, 1273 aggregates=aggregates, 1274 ) 1275 1276 def _parse_statement(self) -> t.Optional[exp.Expression]: 1277 if self._curr is None: 1278 return None 1279 1280 if self._match_set(self.STATEMENT_PARSERS): 1281 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1282 1283 if self._match_set(Tokenizer.COMMANDS): 1284 return self._parse_command() 1285 1286 expression = self._parse_expression() 1287 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1288 return self._parse_query_modifiers(expression) 1289 1290 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1291 start = self._prev 1292 temporary = self._match(TokenType.TEMPORARY) 1293 materialized = self._match_text_seq("MATERIALIZED") 1294 1295 kind = self._match_set(self.CREATABLES) and self._prev.text 1296 if not kind: 1297 return self._parse_as_command(start) 1298 1299 return self.expression( 1300 exp.Drop, 1301 comments=start.comments, 1302 exists=exists or self._parse_exists(), 1303 this=self._parse_table(schema=True), 1304 kind=kind, 1305 temporary=temporary, 1306 materialized=materialized, 1307 cascade=self._match_text_seq("CASCADE"), 1308 constraints=self._match_text_seq("CONSTRAINTS"), 1309 purge=self._match_text_seq("PURGE"), 1310 ) 1311 1312 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1313 return ( 1314 self._match_text_seq("IF") 1315 and (not not_ or self._match(TokenType.NOT)) 1316 and self._match(TokenType.EXISTS) 1317 ) 1318 1319 def _parse_create(self) -> exp.Create | exp.Command: 1320 # Note: this can't be None because we've matched a statement parser 1321 start = self._prev 1322 comments = self._prev_comments 1323 1324 replace = start.text.upper() == "REPLACE" or self._match_pair( 1325 TokenType.OR, TokenType.REPLACE 1326 ) 1327 unique = self._match(TokenType.UNIQUE) 1328 1329 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1330 self._advance() 1331 1332 properties = None 1333 create_token = self._match_set(self.CREATABLES) and self._prev 1334 1335 if not create_token: 1336 # exp.Properties.Location.POST_CREATE 1337 properties = self._parse_properties() 1338 create_token = self._match_set(self.CREATABLES) and self._prev 1339 1340 if not properties or not create_token: 1341 return self._parse_as_command(start) 1342 1343 exists = self._parse_exists(not_=True) 1344 this = None 1345 expression: t.Optional[exp.Expression] = None 1346 indexes = None 1347 no_schema_binding = None 1348 begin = None 1349 end = None 1350 clone = None 1351 1352 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1353 nonlocal properties 1354 if properties and temp_props: 1355 properties.expressions.extend(temp_props.expressions) 1356 elif temp_props: 1357 properties = temp_props 1358 1359 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1360 this = self._parse_user_defined_function(kind=create_token.token_type) 1361 1362 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1363 extend_props(self._parse_properties()) 1364 1365 self._match(TokenType.ALIAS) 1366 1367 if self._match(TokenType.COMMAND): 1368 expression = self._parse_as_command(self._prev) 1369 else: 1370 begin = self._match(TokenType.BEGIN) 1371 return_ = self._match_text_seq("RETURN") 1372 1373 if self._match(TokenType.STRING, advance=False): 1374 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1375 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1376 expression = self._parse_string() 1377 extend_props(self._parse_properties()) 1378 else: 1379 expression = self._parse_statement() 1380 1381 end = self._match_text_seq("END") 1382 1383 if return_: 1384 expression = self.expression(exp.Return, this=expression) 1385 elif create_token.token_type == TokenType.INDEX: 1386 this = self._parse_index(index=self._parse_id_var()) 1387 elif create_token.token_type in self.DB_CREATABLES: 1388 table_parts = self._parse_table_parts(schema=True) 1389 1390 # exp.Properties.Location.POST_NAME 1391 self._match(TokenType.COMMA) 1392 extend_props(self._parse_properties(before=True)) 1393 1394 this = self._parse_schema(this=table_parts) 1395 1396 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1397 extend_props(self._parse_properties()) 1398 1399 self._match(TokenType.ALIAS) 1400 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1401 # exp.Properties.Location.POST_ALIAS 1402 extend_props(self._parse_properties()) 1403 1404 expression = self._parse_ddl_select() 1405 1406 if create_token.token_type == TokenType.TABLE: 1407 # exp.Properties.Location.POST_EXPRESSION 1408 extend_props(self._parse_properties()) 1409 1410 indexes = [] 1411 while True: 1412 index = self._parse_index() 1413 1414 # exp.Properties.Location.POST_INDEX 1415 extend_props(self._parse_properties()) 1416 1417 if not index: 1418 break 1419 else: 1420 self._match(TokenType.COMMA) 1421 indexes.append(index) 1422 elif create_token.token_type == TokenType.VIEW: 1423 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1424 no_schema_binding = True 1425 1426 shallow = self._match_text_seq("SHALLOW") 1427 1428 if self._match_texts(self.CLONE_KEYWORDS): 1429 copy = self._prev.text.lower() == "copy" 1430 clone = self.expression( 1431 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1432 ) 1433 1434 return self.expression( 1435 exp.Create, 1436 comments=comments, 1437 this=this, 1438 kind=create_token.text.upper(), 1439 replace=replace, 1440 unique=unique, 1441 expression=expression, 1442 exists=exists, 1443 properties=properties, 1444 indexes=indexes, 1445 no_schema_binding=no_schema_binding, 1446 begin=begin, 1447 end=end, 1448 clone=clone, 1449 ) 1450 1451 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1452 # only used for teradata currently 1453 self._match(TokenType.COMMA) 1454 1455 kwargs = { 1456 "no": self._match_text_seq("NO"), 1457 "dual": self._match_text_seq("DUAL"), 1458 "before": self._match_text_seq("BEFORE"), 1459 "default": self._match_text_seq("DEFAULT"), 1460 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1461 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1462 "after": self._match_text_seq("AFTER"), 1463 "minimum": self._match_texts(("MIN", "MINIMUM")), 1464 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1465 } 1466 1467 if self._match_texts(self.PROPERTY_PARSERS): 1468 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1469 try: 1470 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1471 except TypeError: 1472 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1473 1474 return None 1475 1476 def _parse_property(self) -> t.Optional[exp.Expression]: 1477 if self._match_texts(self.PROPERTY_PARSERS): 1478 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1479 1480 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1481 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1482 1483 if self._match_text_seq("COMPOUND", "SORTKEY"): 1484 return self._parse_sortkey(compound=True) 1485 1486 if self._match_text_seq("SQL", "SECURITY"): 1487 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1488 1489 index = self._index 1490 key = self._parse_column() 1491 1492 if not self._match(TokenType.EQ): 1493 self._retreat(index) 1494 return None 1495 1496 return self.expression( 1497 exp.Property, 1498 this=key.to_dot() if isinstance(key, exp.Column) else key, 1499 value=self._parse_column() or self._parse_var(any_token=True), 1500 ) 1501 1502 def _parse_stored(self) -> exp.FileFormatProperty: 1503 self._match(TokenType.ALIAS) 1504 1505 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1506 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1507 1508 return self.expression( 1509 exp.FileFormatProperty, 1510 this=self.expression( 1511 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1512 ) 1513 if input_format or output_format 1514 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1515 ) 1516 1517 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1518 self._match(TokenType.EQ) 1519 self._match(TokenType.ALIAS) 1520 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1521 1522 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1523 properties = [] 1524 while True: 1525 if before: 1526 prop = self._parse_property_before() 1527 else: 1528 prop = self._parse_property() 1529 1530 if not prop: 1531 break 1532 for p in ensure_list(prop): 1533 properties.append(p) 1534 1535 if properties: 1536 return self.expression(exp.Properties, expressions=properties) 1537 1538 return None 1539 1540 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1541 return self.expression( 1542 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1543 ) 1544 1545 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1546 if self._index >= 2: 1547 pre_volatile_token = self._tokens[self._index - 2] 1548 else: 1549 pre_volatile_token = None 1550 1551 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1552 return exp.VolatileProperty() 1553 1554 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1555 1556 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1557 self._match_pair(TokenType.EQ, TokenType.ON) 1558 1559 prop = self.expression(exp.WithSystemVersioningProperty) 1560 if self._match(TokenType.L_PAREN): 1561 self._match_text_seq("HISTORY_TABLE", "=") 1562 prop.set("this", self._parse_table_parts()) 1563 1564 if self._match(TokenType.COMMA): 1565 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1566 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1567 1568 self._match_r_paren() 1569 1570 return prop 1571 1572 def _parse_with_property( 1573 self, 1574 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1575 if self._match(TokenType.L_PAREN, advance=False): 1576 return self._parse_wrapped_csv(self._parse_property) 1577 1578 if self._match_text_seq("JOURNAL"): 1579 return self._parse_withjournaltable() 1580 1581 if self._match_text_seq("DATA"): 1582 return self._parse_withdata(no=False) 1583 elif self._match_text_seq("NO", "DATA"): 1584 return self._parse_withdata(no=True) 1585 1586 if not self._next: 1587 return None 1588 1589 return self._parse_withisolatedloading() 1590 1591 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1592 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1593 self._match(TokenType.EQ) 1594 1595 user = self._parse_id_var() 1596 self._match(TokenType.PARAMETER) 1597 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1598 1599 if not user or not host: 1600 return None 1601 1602 return exp.DefinerProperty(this=f"{user}@{host}") 1603 1604 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1605 self._match(TokenType.TABLE) 1606 self._match(TokenType.EQ) 1607 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1608 1609 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1610 return self.expression(exp.LogProperty, no=no) 1611 1612 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1613 return self.expression(exp.JournalProperty, **kwargs) 1614 1615 def _parse_checksum(self) -> exp.ChecksumProperty: 1616 self._match(TokenType.EQ) 1617 1618 on = None 1619 if self._match(TokenType.ON): 1620 on = True 1621 elif self._match_text_seq("OFF"): 1622 on = False 1623 1624 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1625 1626 def _parse_cluster(self) -> exp.Cluster: 1627 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1628 1629 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1630 self._match_text_seq("BY") 1631 1632 self._match_l_paren() 1633 expressions = self._parse_csv(self._parse_column) 1634 self._match_r_paren() 1635 1636 if self._match_text_seq("SORTED", "BY"): 1637 self._match_l_paren() 1638 sorted_by = self._parse_csv(self._parse_ordered) 1639 self._match_r_paren() 1640 else: 1641 sorted_by = None 1642 1643 self._match(TokenType.INTO) 1644 buckets = self._parse_number() 1645 self._match_text_seq("BUCKETS") 1646 1647 return self.expression( 1648 exp.ClusteredByProperty, 1649 expressions=expressions, 1650 sorted_by=sorted_by, 1651 buckets=buckets, 1652 ) 1653 1654 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1655 if not self._match_text_seq("GRANTS"): 1656 self._retreat(self._index - 1) 1657 return None 1658 1659 return self.expression(exp.CopyGrantsProperty) 1660 1661 def _parse_freespace(self) -> exp.FreespaceProperty: 1662 self._match(TokenType.EQ) 1663 return self.expression( 1664 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1665 ) 1666 1667 def _parse_mergeblockratio( 1668 self, no: bool = False, default: bool = False 1669 ) -> exp.MergeBlockRatioProperty: 1670 if self._match(TokenType.EQ): 1671 return self.expression( 1672 exp.MergeBlockRatioProperty, 1673 this=self._parse_number(), 1674 percent=self._match(TokenType.PERCENT), 1675 ) 1676 1677 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1678 1679 def _parse_datablocksize( 1680 self, 1681 default: t.Optional[bool] = None, 1682 minimum: t.Optional[bool] = None, 1683 maximum: t.Optional[bool] = None, 1684 ) -> exp.DataBlocksizeProperty: 1685 self._match(TokenType.EQ) 1686 size = self._parse_number() 1687 1688 units = None 1689 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1690 units = self._prev.text 1691 1692 return self.expression( 1693 exp.DataBlocksizeProperty, 1694 size=size, 1695 units=units, 1696 default=default, 1697 minimum=minimum, 1698 maximum=maximum, 1699 ) 1700 1701 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1702 self._match(TokenType.EQ) 1703 always = self._match_text_seq("ALWAYS") 1704 manual = self._match_text_seq("MANUAL") 1705 never = self._match_text_seq("NEVER") 1706 default = self._match_text_seq("DEFAULT") 1707 1708 autotemp = None 1709 if self._match_text_seq("AUTOTEMP"): 1710 autotemp = self._parse_schema() 1711 1712 return self.expression( 1713 exp.BlockCompressionProperty, 1714 always=always, 1715 manual=manual, 1716 never=never, 1717 default=default, 1718 autotemp=autotemp, 1719 ) 1720 1721 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1722 no = self._match_text_seq("NO") 1723 concurrent = self._match_text_seq("CONCURRENT") 1724 self._match_text_seq("ISOLATED", "LOADING") 1725 for_all = self._match_text_seq("FOR", "ALL") 1726 for_insert = self._match_text_seq("FOR", "INSERT") 1727 for_none = self._match_text_seq("FOR", "NONE") 1728 return self.expression( 1729 exp.IsolatedLoadingProperty, 1730 no=no, 1731 concurrent=concurrent, 1732 for_all=for_all, 1733 for_insert=for_insert, 1734 for_none=for_none, 1735 ) 1736 1737 def _parse_locking(self) -> exp.LockingProperty: 1738 if self._match(TokenType.TABLE): 1739 kind = "TABLE" 1740 elif self._match(TokenType.VIEW): 1741 kind = "VIEW" 1742 elif self._match(TokenType.ROW): 1743 kind = "ROW" 1744 elif self._match_text_seq("DATABASE"): 1745 kind = "DATABASE" 1746 else: 1747 kind = None 1748 1749 if kind in ("DATABASE", "TABLE", "VIEW"): 1750 this = self._parse_table_parts() 1751 else: 1752 this = None 1753 1754 if self._match(TokenType.FOR): 1755 for_or_in = "FOR" 1756 elif self._match(TokenType.IN): 1757 for_or_in = "IN" 1758 else: 1759 for_or_in = None 1760 1761 if self._match_text_seq("ACCESS"): 1762 lock_type = "ACCESS" 1763 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1764 lock_type = "EXCLUSIVE" 1765 elif self._match_text_seq("SHARE"): 1766 lock_type = "SHARE" 1767 elif self._match_text_seq("READ"): 1768 lock_type = "READ" 1769 elif self._match_text_seq("WRITE"): 1770 lock_type = "WRITE" 1771 elif self._match_text_seq("CHECKSUM"): 1772 lock_type = "CHECKSUM" 1773 else: 1774 lock_type = None 1775 1776 override = self._match_text_seq("OVERRIDE") 1777 1778 return self.expression( 1779 exp.LockingProperty, 1780 this=this, 1781 kind=kind, 1782 for_or_in=for_or_in, 1783 lock_type=lock_type, 1784 override=override, 1785 ) 1786 1787 def _parse_partition_by(self) -> t.List[exp.Expression]: 1788 if self._match(TokenType.PARTITION_BY): 1789 return self._parse_csv(self._parse_conjunction) 1790 return [] 1791 1792 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1793 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1794 if self._match_text_seq("MINVALUE"): 1795 return exp.var("MINVALUE") 1796 if self._match_text_seq("MAXVALUE"): 1797 return exp.var("MAXVALUE") 1798 return self._parse_bitwise() 1799 1800 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1801 expression = None 1802 from_expressions = None 1803 to_expressions = None 1804 1805 if self._match(TokenType.IN): 1806 this = self._parse_wrapped_csv(self._parse_bitwise) 1807 elif self._match(TokenType.FROM): 1808 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1809 self._match_text_seq("TO") 1810 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1811 elif self._match_text_seq("WITH", "(", "MODULUS"): 1812 this = self._parse_number() 1813 self._match_text_seq(",", "REMAINDER") 1814 expression = self._parse_number() 1815 self._match_r_paren() 1816 else: 1817 self.raise_error("Failed to parse partition bound spec.") 1818 1819 return self.expression( 1820 exp.PartitionBoundSpec, 1821 this=this, 1822 expression=expression, 1823 from_expressions=from_expressions, 1824 to_expressions=to_expressions, 1825 ) 1826 1827 # https://www.postgresql.org/docs/current/sql-createtable.html 1828 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1829 if not self._match_text_seq("OF"): 1830 self._retreat(self._index - 1) 1831 return None 1832 1833 this = self._parse_table(schema=True) 1834 1835 if self._match(TokenType.DEFAULT): 1836 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1837 elif self._match_text_seq("FOR", "VALUES"): 1838 expression = self._parse_partition_bound_spec() 1839 else: 1840 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1841 1842 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1843 1844 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1845 self._match(TokenType.EQ) 1846 return self.expression( 1847 exp.PartitionedByProperty, 1848 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1849 ) 1850 1851 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1852 if self._match_text_seq("AND", "STATISTICS"): 1853 statistics = True 1854 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1855 statistics = False 1856 else: 1857 statistics = None 1858 1859 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1860 1861 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1862 if self._match_text_seq("SQL"): 1863 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1864 return None 1865 1866 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1867 if self._match_text_seq("SQL", "DATA"): 1868 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1869 return None 1870 1871 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1872 if self._match_text_seq("PRIMARY", "INDEX"): 1873 return exp.NoPrimaryIndexProperty() 1874 if self._match_text_seq("SQL"): 1875 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1876 return None 1877 1878 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1879 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1880 return exp.OnCommitProperty() 1881 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1882 return exp.OnCommitProperty(delete=True) 1883 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1884 1885 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1886 if self._match_text_seq("SQL", "DATA"): 1887 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1888 return None 1889 1890 def _parse_distkey(self) -> exp.DistKeyProperty: 1891 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1892 1893 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1894 table = self._parse_table(schema=True) 1895 1896 options = [] 1897 while self._match_texts(("INCLUDING", "EXCLUDING")): 1898 this = self._prev.text.upper() 1899 1900 id_var = self._parse_id_var() 1901 if not id_var: 1902 return None 1903 1904 options.append( 1905 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1906 ) 1907 1908 return self.expression(exp.LikeProperty, this=table, expressions=options) 1909 1910 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1911 return self.expression( 1912 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1913 ) 1914 1915 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1916 self._match(TokenType.EQ) 1917 return self.expression( 1918 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1919 ) 1920 1921 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1922 self._match_text_seq("WITH", "CONNECTION") 1923 return self.expression( 1924 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1925 ) 1926 1927 def _parse_returns(self) -> exp.ReturnsProperty: 1928 value: t.Optional[exp.Expression] 1929 is_table = self._match(TokenType.TABLE) 1930 1931 if is_table: 1932 if self._match(TokenType.LT): 1933 value = self.expression( 1934 exp.Schema, 1935 this="TABLE", 1936 expressions=self._parse_csv(self._parse_struct_types), 1937 ) 1938 if not self._match(TokenType.GT): 1939 self.raise_error("Expecting >") 1940 else: 1941 value = self._parse_schema(exp.var("TABLE")) 1942 else: 1943 value = self._parse_types() 1944 1945 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1946 1947 def _parse_describe(self) -> exp.Describe: 1948 kind = self._match_set(self.CREATABLES) and self._prev.text 1949 this = self._parse_table(schema=True) 1950 properties = self._parse_properties() 1951 expressions = properties.expressions if properties else None 1952 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1953 1954 def _parse_insert(self) -> exp.Insert: 1955 comments = ensure_list(self._prev_comments) 1956 overwrite = self._match(TokenType.OVERWRITE) 1957 ignore = self._match(TokenType.IGNORE) 1958 local = self._match_text_seq("LOCAL") 1959 alternative = None 1960 1961 if self._match_text_seq("DIRECTORY"): 1962 this: t.Optional[exp.Expression] = self.expression( 1963 exp.Directory, 1964 this=self._parse_var_or_string(), 1965 local=local, 1966 row_format=self._parse_row_format(match_row=True), 1967 ) 1968 else: 1969 if self._match(TokenType.OR): 1970 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1971 1972 self._match(TokenType.INTO) 1973 comments += ensure_list(self._prev_comments) 1974 self._match(TokenType.TABLE) 1975 this = self._parse_table(schema=True) 1976 1977 returning = self._parse_returning() 1978 1979 return self.expression( 1980 exp.Insert, 1981 comments=comments, 1982 this=this, 1983 by_name=self._match_text_seq("BY", "NAME"), 1984 exists=self._parse_exists(), 1985 partition=self._parse_partition(), 1986 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1987 and self._parse_conjunction(), 1988 expression=self._parse_ddl_select(), 1989 conflict=self._parse_on_conflict(), 1990 returning=returning or self._parse_returning(), 1991 overwrite=overwrite, 1992 alternative=alternative, 1993 ignore=ignore, 1994 ) 1995 1996 def _parse_kill(self) -> exp.Kill: 1997 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1998 1999 return self.expression( 2000 exp.Kill, 2001 this=self._parse_primary(), 2002 kind=kind, 2003 ) 2004 2005 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2006 conflict = self._match_text_seq("ON", "CONFLICT") 2007 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2008 2009 if not conflict and not duplicate: 2010 return None 2011 2012 nothing = None 2013 expressions = None 2014 key = None 2015 constraint = None 2016 2017 if conflict: 2018 if self._match_text_seq("ON", "CONSTRAINT"): 2019 constraint = self._parse_id_var() 2020 else: 2021 key = self._parse_csv(self._parse_value) 2022 2023 self._match_text_seq("DO") 2024 if self._match_text_seq("NOTHING"): 2025 nothing = True 2026 else: 2027 self._match(TokenType.UPDATE) 2028 self._match(TokenType.SET) 2029 expressions = self._parse_csv(self._parse_equality) 2030 2031 return self.expression( 2032 exp.OnConflict, 2033 duplicate=duplicate, 2034 expressions=expressions, 2035 nothing=nothing, 2036 key=key, 2037 constraint=constraint, 2038 ) 2039 2040 def _parse_returning(self) -> t.Optional[exp.Returning]: 2041 if not self._match(TokenType.RETURNING): 2042 return None 2043 return self.expression( 2044 exp.Returning, 2045 expressions=self._parse_csv(self._parse_expression), 2046 into=self._match(TokenType.INTO) and self._parse_table_part(), 2047 ) 2048 2049 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2050 if not self._match(TokenType.FORMAT): 2051 return None 2052 return self._parse_row_format() 2053 2054 def _parse_row_format( 2055 self, match_row: bool = False 2056 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2057 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2058 return None 2059 2060 if self._match_text_seq("SERDE"): 2061 this = self._parse_string() 2062 2063 serde_properties = None 2064 if self._match(TokenType.SERDE_PROPERTIES): 2065 serde_properties = self.expression( 2066 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2067 ) 2068 2069 return self.expression( 2070 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2071 ) 2072 2073 self._match_text_seq("DELIMITED") 2074 2075 kwargs = {} 2076 2077 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2078 kwargs["fields"] = self._parse_string() 2079 if self._match_text_seq("ESCAPED", "BY"): 2080 kwargs["escaped"] = self._parse_string() 2081 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2082 kwargs["collection_items"] = self._parse_string() 2083 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2084 kwargs["map_keys"] = self._parse_string() 2085 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2086 kwargs["lines"] = self._parse_string() 2087 if self._match_text_seq("NULL", "DEFINED", "AS"): 2088 kwargs["null"] = self._parse_string() 2089 2090 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2091 2092 def _parse_load(self) -> exp.LoadData | exp.Command: 2093 if self._match_text_seq("DATA"): 2094 local = self._match_text_seq("LOCAL") 2095 self._match_text_seq("INPATH") 2096 inpath = self._parse_string() 2097 overwrite = self._match(TokenType.OVERWRITE) 2098 self._match_pair(TokenType.INTO, TokenType.TABLE) 2099 2100 return self.expression( 2101 exp.LoadData, 2102 this=self._parse_table(schema=True), 2103 local=local, 2104 overwrite=overwrite, 2105 inpath=inpath, 2106 partition=self._parse_partition(), 2107 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2108 serde=self._match_text_seq("SERDE") and self._parse_string(), 2109 ) 2110 return self._parse_as_command(self._prev) 2111 2112 def _parse_delete(self) -> exp.Delete: 2113 # This handles MySQL's "Multiple-Table Syntax" 2114 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2115 tables = None 2116 comments = self._prev_comments 2117 if not self._match(TokenType.FROM, advance=False): 2118 tables = self._parse_csv(self._parse_table) or None 2119 2120 returning = self._parse_returning() 2121 2122 return self.expression( 2123 exp.Delete, 2124 comments=comments, 2125 tables=tables, 2126 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2127 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2128 where=self._parse_where(), 2129 returning=returning or self._parse_returning(), 2130 limit=self._parse_limit(), 2131 ) 2132 2133 def _parse_update(self) -> exp.Update: 2134 comments = self._prev_comments 2135 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2136 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2137 returning = self._parse_returning() 2138 return self.expression( 2139 exp.Update, 2140 comments=comments, 2141 **{ # type: ignore 2142 "this": this, 2143 "expressions": expressions, 2144 "from": self._parse_from(joins=True), 2145 "where": self._parse_where(), 2146 "returning": returning or self._parse_returning(), 2147 "order": self._parse_order(), 2148 "limit": self._parse_limit(), 2149 }, 2150 ) 2151 2152 def _parse_uncache(self) -> exp.Uncache: 2153 if not self._match(TokenType.TABLE): 2154 self.raise_error("Expecting TABLE after UNCACHE") 2155 2156 return self.expression( 2157 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2158 ) 2159 2160 def _parse_cache(self) -> exp.Cache: 2161 lazy = self._match_text_seq("LAZY") 2162 self._match(TokenType.TABLE) 2163 table = self._parse_table(schema=True) 2164 2165 options = [] 2166 if self._match_text_seq("OPTIONS"): 2167 self._match_l_paren() 2168 k = self._parse_string() 2169 self._match(TokenType.EQ) 2170 v = self._parse_string() 2171 options = [k, v] 2172 self._match_r_paren() 2173 2174 self._match(TokenType.ALIAS) 2175 return self.expression( 2176 exp.Cache, 2177 this=table, 2178 lazy=lazy, 2179 options=options, 2180 expression=self._parse_select(nested=True), 2181 ) 2182 2183 def _parse_partition(self) -> t.Optional[exp.Partition]: 2184 if not self._match(TokenType.PARTITION): 2185 return None 2186 2187 return self.expression( 2188 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2189 ) 2190 2191 def _parse_value(self) -> exp.Tuple: 2192 if self._match(TokenType.L_PAREN): 2193 expressions = self._parse_csv(self._parse_expression) 2194 self._match_r_paren() 2195 return self.expression(exp.Tuple, expressions=expressions) 2196 2197 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2198 # https://prestodb.io/docs/current/sql/values.html 2199 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2200 2201 def _parse_projections(self) -> t.List[exp.Expression]: 2202 return self._parse_expressions() 2203 2204 def _parse_select( 2205 self, 2206 nested: bool = False, 2207 table: bool = False, 2208 parse_subquery_alias: bool = True, 2209 parse_set_operation: bool = True, 2210 ) -> t.Optional[exp.Expression]: 2211 cte = self._parse_with() 2212 2213 if cte: 2214 this = self._parse_statement() 2215 2216 if not this: 2217 self.raise_error("Failed to parse any statement following CTE") 2218 return cte 2219 2220 if "with" in this.arg_types: 2221 this.set("with", cte) 2222 else: 2223 self.raise_error(f"{this.key} does not support CTE") 2224 this = cte 2225 2226 return this 2227 2228 # duckdb supports leading with FROM x 2229 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2230 2231 if self._match(TokenType.SELECT): 2232 comments = self._prev_comments 2233 2234 hint = self._parse_hint() 2235 all_ = self._match(TokenType.ALL) 2236 distinct = self._match_set(self.DISTINCT_TOKENS) 2237 2238 kind = ( 2239 self._match(TokenType.ALIAS) 2240 and self._match_texts(("STRUCT", "VALUE")) 2241 and self._prev.text.upper() 2242 ) 2243 2244 if distinct: 2245 distinct = self.expression( 2246 exp.Distinct, 2247 on=self._parse_value() if self._match(TokenType.ON) else None, 2248 ) 2249 2250 if all_ and distinct: 2251 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2252 2253 limit = self._parse_limit(top=True) 2254 projections = self._parse_projections() 2255 2256 this = self.expression( 2257 exp.Select, 2258 kind=kind, 2259 hint=hint, 2260 distinct=distinct, 2261 expressions=projections, 2262 limit=limit, 2263 ) 2264 this.comments = comments 2265 2266 into = self._parse_into() 2267 if into: 2268 this.set("into", into) 2269 2270 if not from_: 2271 from_ = self._parse_from() 2272 2273 if from_: 2274 this.set("from", from_) 2275 2276 this = self._parse_query_modifiers(this) 2277 elif (table or nested) and self._match(TokenType.L_PAREN): 2278 if self._match(TokenType.PIVOT): 2279 this = self._parse_simplified_pivot() 2280 elif self._match(TokenType.FROM): 2281 this = exp.select("*").from_( 2282 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2283 ) 2284 else: 2285 this = ( 2286 self._parse_table() 2287 if table 2288 else self._parse_select(nested=True, parse_set_operation=False) 2289 ) 2290 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2291 2292 self._match_r_paren() 2293 2294 # We return early here so that the UNION isn't attached to the subquery by the 2295 # following call to _parse_set_operations, but instead becomes the parent node 2296 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2297 elif self._match(TokenType.VALUES): 2298 this = self.expression( 2299 exp.Values, 2300 expressions=self._parse_csv(self._parse_value), 2301 alias=self._parse_table_alias(), 2302 ) 2303 elif from_: 2304 this = exp.select("*").from_(from_.this, copy=False) 2305 else: 2306 this = None 2307 2308 if parse_set_operation: 2309 return self._parse_set_operations(this) 2310 return this 2311 2312 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2313 if not skip_with_token and not self._match(TokenType.WITH): 2314 return None 2315 2316 comments = self._prev_comments 2317 recursive = self._match(TokenType.RECURSIVE) 2318 2319 expressions = [] 2320 while True: 2321 expressions.append(self._parse_cte()) 2322 2323 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2324 break 2325 else: 2326 self._match(TokenType.WITH) 2327 2328 return self.expression( 2329 exp.With, comments=comments, expressions=expressions, recursive=recursive 2330 ) 2331 2332 def _parse_cte(self) -> exp.CTE: 2333 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2334 if not alias or not alias.this: 2335 self.raise_error("Expected CTE to have alias") 2336 2337 self._match(TokenType.ALIAS) 2338 return self.expression( 2339 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2340 ) 2341 2342 def _parse_table_alias( 2343 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2344 ) -> t.Optional[exp.TableAlias]: 2345 any_token = self._match(TokenType.ALIAS) 2346 alias = ( 2347 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2348 or self._parse_string_as_identifier() 2349 ) 2350 2351 index = self._index 2352 if self._match(TokenType.L_PAREN): 2353 columns = self._parse_csv(self._parse_function_parameter) 2354 self._match_r_paren() if columns else self._retreat(index) 2355 else: 2356 columns = None 2357 2358 if not alias and not columns: 2359 return None 2360 2361 return self.expression(exp.TableAlias, this=alias, columns=columns) 2362 2363 def _parse_subquery( 2364 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2365 ) -> t.Optional[exp.Subquery]: 2366 if not this: 2367 return None 2368 2369 return self.expression( 2370 exp.Subquery, 2371 this=this, 2372 pivots=self._parse_pivots(), 2373 alias=self._parse_table_alias() if parse_alias else None, 2374 ) 2375 2376 def _parse_query_modifiers( 2377 self, this: t.Optional[exp.Expression] 2378 ) -> t.Optional[exp.Expression]: 2379 if isinstance(this, self.MODIFIABLES): 2380 for join in iter(self._parse_join, None): 2381 this.append("joins", join) 2382 for lateral in iter(self._parse_lateral, None): 2383 this.append("laterals", lateral) 2384 2385 while True: 2386 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2387 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2388 key, expression = parser(self) 2389 2390 if expression: 2391 this.set(key, expression) 2392 if key == "limit": 2393 offset = expression.args.pop("offset", None) 2394 if offset: 2395 this.set("offset", exp.Offset(expression=offset)) 2396 continue 2397 break 2398 return this 2399 2400 def _parse_hint(self) -> t.Optional[exp.Hint]: 2401 if self._match(TokenType.HINT): 2402 hints = [] 2403 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2404 hints.extend(hint) 2405 2406 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2407 self.raise_error("Expected */ after HINT") 2408 2409 return self.expression(exp.Hint, expressions=hints) 2410 2411 return None 2412 2413 def _parse_into(self) -> t.Optional[exp.Into]: 2414 if not self._match(TokenType.INTO): 2415 return None 2416 2417 temp = self._match(TokenType.TEMPORARY) 2418 unlogged = self._match_text_seq("UNLOGGED") 2419 self._match(TokenType.TABLE) 2420 2421 return self.expression( 2422 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2423 ) 2424 2425 def _parse_from( 2426 self, joins: bool = False, skip_from_token: bool = False 2427 ) -> t.Optional[exp.From]: 2428 if not skip_from_token and not self._match(TokenType.FROM): 2429 return None 2430 2431 return self.expression( 2432 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2433 ) 2434 2435 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2436 if not self._match(TokenType.MATCH_RECOGNIZE): 2437 return None 2438 2439 self._match_l_paren() 2440 2441 partition = self._parse_partition_by() 2442 order = self._parse_order() 2443 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2444 2445 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2446 rows = exp.var("ONE ROW PER MATCH") 2447 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2448 text = "ALL ROWS PER MATCH" 2449 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2450 text += f" SHOW EMPTY MATCHES" 2451 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2452 text += f" OMIT EMPTY MATCHES" 2453 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2454 text += f" WITH UNMATCHED ROWS" 2455 rows = exp.var(text) 2456 else: 2457 rows = None 2458 2459 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2460 text = "AFTER MATCH SKIP" 2461 if self._match_text_seq("PAST", "LAST", "ROW"): 2462 text += f" PAST LAST ROW" 2463 elif self._match_text_seq("TO", "NEXT", "ROW"): 2464 text += f" TO NEXT ROW" 2465 elif self._match_text_seq("TO", "FIRST"): 2466 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2467 elif self._match_text_seq("TO", "LAST"): 2468 text += f" TO LAST {self._advance_any().text}" # type: ignore 2469 after = exp.var(text) 2470 else: 2471 after = None 2472 2473 if self._match_text_seq("PATTERN"): 2474 self._match_l_paren() 2475 2476 if not self._curr: 2477 self.raise_error("Expecting )", self._curr) 2478 2479 paren = 1 2480 start = self._curr 2481 2482 while self._curr and paren > 0: 2483 if self._curr.token_type == TokenType.L_PAREN: 2484 paren += 1 2485 if self._curr.token_type == TokenType.R_PAREN: 2486 paren -= 1 2487 2488 end = self._prev 2489 self._advance() 2490 2491 if paren > 0: 2492 self.raise_error("Expecting )", self._curr) 2493 2494 pattern = exp.var(self._find_sql(start, end)) 2495 else: 2496 pattern = None 2497 2498 define = ( 2499 self._parse_csv(self._parse_name_as_expression) 2500 if self._match_text_seq("DEFINE") 2501 else None 2502 ) 2503 2504 self._match_r_paren() 2505 2506 return self.expression( 2507 exp.MatchRecognize, 2508 partition_by=partition, 2509 order=order, 2510 measures=measures, 2511 rows=rows, 2512 after=after, 2513 pattern=pattern, 2514 define=define, 2515 alias=self._parse_table_alias(), 2516 ) 2517 2518 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2519 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2520 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2521 cross_apply = False 2522 2523 if cross_apply is not None: 2524 this = self._parse_select(table=True) 2525 view = None 2526 outer = None 2527 elif self._match(TokenType.LATERAL): 2528 this = self._parse_select(table=True) 2529 view = self._match(TokenType.VIEW) 2530 outer = self._match(TokenType.OUTER) 2531 else: 2532 return None 2533 2534 if not this: 2535 this = ( 2536 self._parse_unnest() 2537 or self._parse_function() 2538 or self._parse_id_var(any_token=False) 2539 ) 2540 2541 while self._match(TokenType.DOT): 2542 this = exp.Dot( 2543 this=this, 2544 expression=self._parse_function() or self._parse_id_var(any_token=False), 2545 ) 2546 2547 if view: 2548 table = self._parse_id_var(any_token=False) 2549 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2550 table_alias: t.Optional[exp.TableAlias] = self.expression( 2551 exp.TableAlias, this=table, columns=columns 2552 ) 2553 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2554 # We move the alias from the lateral's child node to the lateral itself 2555 table_alias = this.args["alias"].pop() 2556 else: 2557 table_alias = self._parse_table_alias() 2558 2559 return self.expression( 2560 exp.Lateral, 2561 this=this, 2562 view=view, 2563 outer=outer, 2564 alias=table_alias, 2565 cross_apply=cross_apply, 2566 ) 2567 2568 def _parse_join_parts( 2569 self, 2570 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2571 return ( 2572 self._match_set(self.JOIN_METHODS) and self._prev, 2573 self._match_set(self.JOIN_SIDES) and self._prev, 2574 self._match_set(self.JOIN_KINDS) and self._prev, 2575 ) 2576 2577 def _parse_join( 2578 self, skip_join_token: bool = False, parse_bracket: bool = False 2579 ) -> t.Optional[exp.Join]: 2580 if self._match(TokenType.COMMA): 2581 return self.expression(exp.Join, this=self._parse_table()) 2582 2583 index = self._index 2584 method, side, kind = self._parse_join_parts() 2585 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2586 join = self._match(TokenType.JOIN) 2587 2588 if not skip_join_token and not join: 2589 self._retreat(index) 2590 kind = None 2591 method = None 2592 side = None 2593 2594 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2595 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2596 2597 if not skip_join_token and not join and not outer_apply and not cross_apply: 2598 return None 2599 2600 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2601 2602 if method: 2603 kwargs["method"] = method.text 2604 if side: 2605 kwargs["side"] = side.text 2606 if kind: 2607 kwargs["kind"] = kind.text 2608 if hint: 2609 kwargs["hint"] = hint 2610 2611 if self._match(TokenType.ON): 2612 kwargs["on"] = self._parse_conjunction() 2613 elif self._match(TokenType.USING): 2614 kwargs["using"] = self._parse_wrapped_id_vars() 2615 elif not (kind and kind.token_type == TokenType.CROSS): 2616 index = self._index 2617 join = self._parse_join() 2618 2619 if join and self._match(TokenType.ON): 2620 kwargs["on"] = self._parse_conjunction() 2621 elif join and self._match(TokenType.USING): 2622 kwargs["using"] = self._parse_wrapped_id_vars() 2623 else: 2624 join = None 2625 self._retreat(index) 2626 2627 kwargs["this"].set("joins", [join] if join else None) 2628 2629 comments = [c for token in (method, side, kind) if token for c in token.comments] 2630 return self.expression(exp.Join, comments=comments, **kwargs) 2631 2632 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2633 this = self._parse_conjunction() 2634 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2635 return this 2636 2637 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2638 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2639 2640 return this 2641 2642 def _parse_index( 2643 self, 2644 index: t.Optional[exp.Expression] = None, 2645 ) -> t.Optional[exp.Index]: 2646 if index: 2647 unique = None 2648 primary = None 2649 amp = None 2650 2651 self._match(TokenType.ON) 2652 self._match(TokenType.TABLE) # hive 2653 table = self._parse_table_parts(schema=True) 2654 else: 2655 unique = self._match(TokenType.UNIQUE) 2656 primary = self._match_text_seq("PRIMARY") 2657 amp = self._match_text_seq("AMP") 2658 2659 if not self._match(TokenType.INDEX): 2660 return None 2661 2662 index = self._parse_id_var() 2663 table = None 2664 2665 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2666 2667 if self._match(TokenType.L_PAREN, advance=False): 2668 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2669 else: 2670 columns = None 2671 2672 return self.expression( 2673 exp.Index, 2674 this=index, 2675 table=table, 2676 using=using, 2677 columns=columns, 2678 unique=unique, 2679 primary=primary, 2680 amp=amp, 2681 partition_by=self._parse_partition_by(), 2682 where=self._parse_where(), 2683 ) 2684 2685 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2686 hints: t.List[exp.Expression] = [] 2687 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2688 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2689 hints.append( 2690 self.expression( 2691 exp.WithTableHint, 2692 expressions=self._parse_csv( 2693 lambda: self._parse_function() or self._parse_var(any_token=True) 2694 ), 2695 ) 2696 ) 2697 self._match_r_paren() 2698 else: 2699 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2700 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2701 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2702 2703 self._match_texts(("INDEX", "KEY")) 2704 if self._match(TokenType.FOR): 2705 hint.set("target", self._advance_any() and self._prev.text.upper()) 2706 2707 hint.set("expressions", self._parse_wrapped_id_vars()) 2708 hints.append(hint) 2709 2710 return hints or None 2711 2712 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2713 return ( 2714 (not schema and self._parse_function(optional_parens=False)) 2715 or self._parse_id_var(any_token=False) 2716 or self._parse_string_as_identifier() 2717 or self._parse_placeholder() 2718 ) 2719 2720 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2721 catalog = None 2722 db = None 2723 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2724 2725 while self._match(TokenType.DOT): 2726 if catalog: 2727 # This allows nesting the table in arbitrarily many dot expressions if needed 2728 table = self.expression( 2729 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2730 ) 2731 else: 2732 catalog = db 2733 db = table 2734 table = self._parse_table_part(schema=schema) or "" 2735 2736 if not table: 2737 self.raise_error(f"Expected table name but got {self._curr}") 2738 2739 return self.expression( 2740 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2741 ) 2742 2743 def _parse_table( 2744 self, 2745 schema: bool = False, 2746 joins: bool = False, 2747 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2748 parse_bracket: bool = False, 2749 ) -> t.Optional[exp.Expression]: 2750 lateral = self._parse_lateral() 2751 if lateral: 2752 return lateral 2753 2754 unnest = self._parse_unnest() 2755 if unnest: 2756 return unnest 2757 2758 values = self._parse_derived_table_values() 2759 if values: 2760 return values 2761 2762 subquery = self._parse_select(table=True) 2763 if subquery: 2764 if not subquery.args.get("pivots"): 2765 subquery.set("pivots", self._parse_pivots()) 2766 return subquery 2767 2768 bracket = parse_bracket and self._parse_bracket(None) 2769 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2770 this = t.cast( 2771 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2772 ) 2773 2774 if schema: 2775 return self._parse_schema(this=this) 2776 2777 version = self._parse_version() 2778 2779 if version: 2780 this.set("version", version) 2781 2782 if self.dialect.ALIAS_POST_TABLESAMPLE: 2783 table_sample = self._parse_table_sample() 2784 2785 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2786 if alias: 2787 this.set("alias", alias) 2788 2789 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2790 return self.expression( 2791 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2792 ) 2793 2794 this.set("hints", self._parse_table_hints()) 2795 2796 if not this.args.get("pivots"): 2797 this.set("pivots", self._parse_pivots()) 2798 2799 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2800 table_sample = self._parse_table_sample() 2801 2802 if table_sample: 2803 table_sample.set("this", this) 2804 this = table_sample 2805 2806 if joins: 2807 for join in iter(self._parse_join, None): 2808 this.append("joins", join) 2809 2810 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2811 this.set("ordinality", True) 2812 this.set("alias", self._parse_table_alias()) 2813 2814 return this 2815 2816 def _parse_version(self) -> t.Optional[exp.Version]: 2817 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2818 this = "TIMESTAMP" 2819 elif self._match(TokenType.VERSION_SNAPSHOT): 2820 this = "VERSION" 2821 else: 2822 return None 2823 2824 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2825 kind = self._prev.text.upper() 2826 start = self._parse_bitwise() 2827 self._match_texts(("TO", "AND")) 2828 end = self._parse_bitwise() 2829 expression: t.Optional[exp.Expression] = self.expression( 2830 exp.Tuple, expressions=[start, end] 2831 ) 2832 elif self._match_text_seq("CONTAINED", "IN"): 2833 kind = "CONTAINED IN" 2834 expression = self.expression( 2835 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2836 ) 2837 elif self._match(TokenType.ALL): 2838 kind = "ALL" 2839 expression = None 2840 else: 2841 self._match_text_seq("AS", "OF") 2842 kind = "AS OF" 2843 expression = self._parse_type() 2844 2845 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2846 2847 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2848 if not self._match(TokenType.UNNEST): 2849 return None 2850 2851 expressions = self._parse_wrapped_csv(self._parse_equality) 2852 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2853 2854 alias = self._parse_table_alias() if with_alias else None 2855 2856 if alias: 2857 if self.dialect.UNNEST_COLUMN_ONLY: 2858 if alias.args.get("columns"): 2859 self.raise_error("Unexpected extra column alias in unnest.") 2860 2861 alias.set("columns", [alias.this]) 2862 alias.set("this", None) 2863 2864 columns = alias.args.get("columns") or [] 2865 if offset and len(expressions) < len(columns): 2866 offset = columns.pop() 2867 2868 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2869 self._match(TokenType.ALIAS) 2870 offset = self._parse_id_var( 2871 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2872 ) or exp.to_identifier("offset") 2873 2874 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2875 2876 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2877 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2878 if not is_derived and not self._match(TokenType.VALUES): 2879 return None 2880 2881 expressions = self._parse_csv(self._parse_value) 2882 alias = self._parse_table_alias() 2883 2884 if is_derived: 2885 self._match_r_paren() 2886 2887 return self.expression( 2888 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2889 ) 2890 2891 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2892 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2893 as_modifier and self._match_text_seq("USING", "SAMPLE") 2894 ): 2895 return None 2896 2897 bucket_numerator = None 2898 bucket_denominator = None 2899 bucket_field = None 2900 percent = None 2901 size = None 2902 seed = None 2903 2904 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2905 matched_l_paren = self._match(TokenType.L_PAREN) 2906 2907 if self.TABLESAMPLE_CSV: 2908 num = None 2909 expressions = self._parse_csv(self._parse_primary) 2910 else: 2911 expressions = None 2912 num = ( 2913 self._parse_factor() 2914 if self._match(TokenType.NUMBER, advance=False) 2915 else self._parse_primary() or self._parse_placeholder() 2916 ) 2917 2918 if self._match_text_seq("BUCKET"): 2919 bucket_numerator = self._parse_number() 2920 self._match_text_seq("OUT", "OF") 2921 bucket_denominator = bucket_denominator = self._parse_number() 2922 self._match(TokenType.ON) 2923 bucket_field = self._parse_field() 2924 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2925 percent = num 2926 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2927 size = num 2928 else: 2929 percent = num 2930 2931 if matched_l_paren: 2932 self._match_r_paren() 2933 2934 if self._match(TokenType.L_PAREN): 2935 method = self._parse_var(upper=True) 2936 seed = self._match(TokenType.COMMA) and self._parse_number() 2937 self._match_r_paren() 2938 elif self._match_texts(("SEED", "REPEATABLE")): 2939 seed = self._parse_wrapped(self._parse_number) 2940 2941 return self.expression( 2942 exp.TableSample, 2943 expressions=expressions, 2944 method=method, 2945 bucket_numerator=bucket_numerator, 2946 bucket_denominator=bucket_denominator, 2947 bucket_field=bucket_field, 2948 percent=percent, 2949 size=size, 2950 seed=seed, 2951 ) 2952 2953 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2954 return list(iter(self._parse_pivot, None)) or None 2955 2956 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2957 return list(iter(self._parse_join, None)) or None 2958 2959 # https://duckdb.org/docs/sql/statements/pivot 2960 def _parse_simplified_pivot(self) -> exp.Pivot: 2961 def _parse_on() -> t.Optional[exp.Expression]: 2962 this = self._parse_bitwise() 2963 return self._parse_in(this) if self._match(TokenType.IN) else this 2964 2965 this = self._parse_table() 2966 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2967 using = self._match(TokenType.USING) and self._parse_csv( 2968 lambda: self._parse_alias(self._parse_function()) 2969 ) 2970 group = self._parse_group() 2971 return self.expression( 2972 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2973 ) 2974 2975 def _parse_pivot_in(self) -> exp.In: 2976 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 2977 this = self._parse_conjunction() 2978 2979 self._match(TokenType.ALIAS) 2980 alias = self._parse_field() 2981 if alias: 2982 return self.expression(exp.PivotAlias, this=this, alias=alias) 2983 2984 return this 2985 2986 value = self._parse_column() 2987 2988 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 2989 self.raise_error("Expecting IN (") 2990 2991 aliased_expressions = self._parse_csv(_parse_aliased_expression) 2992 2993 self._match_r_paren() 2994 return self.expression(exp.In, this=value, expressions=aliased_expressions) 2995 2996 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2997 index = self._index 2998 include_nulls = None 2999 3000 if self._match(TokenType.PIVOT): 3001 unpivot = False 3002 elif self._match(TokenType.UNPIVOT): 3003 unpivot = True 3004 3005 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3006 if self._match_text_seq("INCLUDE", "NULLS"): 3007 include_nulls = True 3008 elif self._match_text_seq("EXCLUDE", "NULLS"): 3009 include_nulls = False 3010 else: 3011 return None 3012 3013 expressions = [] 3014 3015 if not self._match(TokenType.L_PAREN): 3016 self._retreat(index) 3017 return None 3018 3019 if unpivot: 3020 expressions = self._parse_csv(self._parse_column) 3021 else: 3022 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3023 3024 if not expressions: 3025 self.raise_error("Failed to parse PIVOT's aggregation list") 3026 3027 if not self._match(TokenType.FOR): 3028 self.raise_error("Expecting FOR") 3029 3030 field = self._parse_pivot_in() 3031 3032 self._match_r_paren() 3033 3034 pivot = self.expression( 3035 exp.Pivot, 3036 expressions=expressions, 3037 field=field, 3038 unpivot=unpivot, 3039 include_nulls=include_nulls, 3040 ) 3041 3042 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3043 pivot.set("alias", self._parse_table_alias()) 3044 3045 if not unpivot: 3046 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3047 3048 columns: t.List[exp.Expression] = [] 3049 for fld in pivot.args["field"].expressions: 3050 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3051 for name in names: 3052 if self.PREFIXED_PIVOT_COLUMNS: 3053 name = f"{name}_{field_name}" if name else field_name 3054 else: 3055 name = f"{field_name}_{name}" if name else field_name 3056 3057 columns.append(exp.to_identifier(name)) 3058 3059 pivot.set("columns", columns) 3060 3061 return pivot 3062 3063 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3064 return [agg.alias for agg in aggregations] 3065 3066 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3067 if not skip_where_token and not self._match(TokenType.WHERE): 3068 return None 3069 3070 return self.expression( 3071 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3072 ) 3073 3074 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3075 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3076 return None 3077 3078 elements = defaultdict(list) 3079 3080 if self._match(TokenType.ALL): 3081 return self.expression(exp.Group, all=True) 3082 3083 while True: 3084 expressions = self._parse_csv(self._parse_conjunction) 3085 if expressions: 3086 elements["expressions"].extend(expressions) 3087 3088 grouping_sets = self._parse_grouping_sets() 3089 if grouping_sets: 3090 elements["grouping_sets"].extend(grouping_sets) 3091 3092 rollup = None 3093 cube = None 3094 totals = None 3095 3096 index = self._index 3097 with_ = self._match(TokenType.WITH) 3098 if self._match(TokenType.ROLLUP): 3099 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3100 elements["rollup"].extend(ensure_list(rollup)) 3101 3102 if self._match(TokenType.CUBE): 3103 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3104 elements["cube"].extend(ensure_list(cube)) 3105 3106 if self._match_text_seq("TOTALS"): 3107 totals = True 3108 elements["totals"] = True # type: ignore 3109 3110 if not (grouping_sets or rollup or cube or totals): 3111 if with_: 3112 self._retreat(index) 3113 break 3114 3115 return self.expression(exp.Group, **elements) # type: ignore 3116 3117 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3118 if not self._match(TokenType.GROUPING_SETS): 3119 return None 3120 3121 return self._parse_wrapped_csv(self._parse_grouping_set) 3122 3123 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3124 if self._match(TokenType.L_PAREN): 3125 grouping_set = self._parse_csv(self._parse_column) 3126 self._match_r_paren() 3127 return self.expression(exp.Tuple, expressions=grouping_set) 3128 3129 return self._parse_column() 3130 3131 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3132 if not skip_having_token and not self._match(TokenType.HAVING): 3133 return None 3134 return self.expression(exp.Having, this=self._parse_conjunction()) 3135 3136 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3137 if not self._match(TokenType.QUALIFY): 3138 return None 3139 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3140 3141 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3142 if skip_start_token: 3143 start = None 3144 elif self._match(TokenType.START_WITH): 3145 start = self._parse_conjunction() 3146 else: 3147 return None 3148 3149 self._match(TokenType.CONNECT_BY) 3150 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3151 exp.Prior, this=self._parse_bitwise() 3152 ) 3153 connect = self._parse_conjunction() 3154 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3155 3156 if not start and self._match(TokenType.START_WITH): 3157 start = self._parse_conjunction() 3158 3159 return self.expression(exp.Connect, start=start, connect=connect) 3160 3161 def _parse_name_as_expression(self) -> exp.Alias: 3162 return self.expression( 3163 exp.Alias, 3164 alias=self._parse_id_var(any_token=True), 3165 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3166 ) 3167 3168 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3169 if self._match_text_seq("INTERPOLATE"): 3170 return self._parse_wrapped_csv(self._parse_name_as_expression) 3171 return None 3172 3173 def _parse_order( 3174 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3175 ) -> t.Optional[exp.Expression]: 3176 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3177 return this 3178 3179 return self.expression( 3180 exp.Order, 3181 this=this, 3182 expressions=self._parse_csv(self._parse_ordered), 3183 interpolate=self._parse_interpolate(), 3184 ) 3185 3186 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3187 if not self._match(token): 3188 return None 3189 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3190 3191 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3192 this = parse_method() if parse_method else self._parse_conjunction() 3193 3194 asc = self._match(TokenType.ASC) 3195 desc = self._match(TokenType.DESC) or (asc and False) 3196 3197 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3198 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3199 3200 nulls_first = is_nulls_first or False 3201 explicitly_null_ordered = is_nulls_first or is_nulls_last 3202 3203 if ( 3204 not explicitly_null_ordered 3205 and ( 3206 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3207 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3208 ) 3209 and self.dialect.NULL_ORDERING != "nulls_are_last" 3210 ): 3211 nulls_first = True 3212 3213 if self._match_text_seq("WITH", "FILL"): 3214 with_fill = self.expression( 3215 exp.WithFill, 3216 **{ # type: ignore 3217 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3218 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3219 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3220 }, 3221 ) 3222 else: 3223 with_fill = None 3224 3225 return self.expression( 3226 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3227 ) 3228 3229 def _parse_limit( 3230 self, this: t.Optional[exp.Expression] = None, top: bool = False 3231 ) -> t.Optional[exp.Expression]: 3232 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3233 comments = self._prev_comments 3234 if top: 3235 limit_paren = self._match(TokenType.L_PAREN) 3236 expression = self._parse_term() if limit_paren else self._parse_number() 3237 3238 if limit_paren: 3239 self._match_r_paren() 3240 else: 3241 expression = self._parse_term() 3242 3243 if self._match(TokenType.COMMA): 3244 offset = expression 3245 expression = self._parse_term() 3246 else: 3247 offset = None 3248 3249 limit_exp = self.expression( 3250 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3251 ) 3252 3253 return limit_exp 3254 3255 if self._match(TokenType.FETCH): 3256 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3257 direction = self._prev.text.upper() if direction else "FIRST" 3258 3259 count = self._parse_field(tokens=self.FETCH_TOKENS) 3260 percent = self._match(TokenType.PERCENT) 3261 3262 self._match_set((TokenType.ROW, TokenType.ROWS)) 3263 3264 only = self._match_text_seq("ONLY") 3265 with_ties = self._match_text_seq("WITH", "TIES") 3266 3267 if only and with_ties: 3268 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3269 3270 return self.expression( 3271 exp.Fetch, 3272 direction=direction, 3273 count=count, 3274 percent=percent, 3275 with_ties=with_ties, 3276 ) 3277 3278 return this 3279 3280 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3281 if not self._match(TokenType.OFFSET): 3282 return this 3283 3284 count = self._parse_term() 3285 self._match_set((TokenType.ROW, TokenType.ROWS)) 3286 return self.expression(exp.Offset, this=this, expression=count) 3287 3288 def _parse_locks(self) -> t.List[exp.Lock]: 3289 locks = [] 3290 while True: 3291 if self._match_text_seq("FOR", "UPDATE"): 3292 update = True 3293 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3294 "LOCK", "IN", "SHARE", "MODE" 3295 ): 3296 update = False 3297 else: 3298 break 3299 3300 expressions = None 3301 if self._match_text_seq("OF"): 3302 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3303 3304 wait: t.Optional[bool | exp.Expression] = None 3305 if self._match_text_seq("NOWAIT"): 3306 wait = True 3307 elif self._match_text_seq("WAIT"): 3308 wait = self._parse_primary() 3309 elif self._match_text_seq("SKIP", "LOCKED"): 3310 wait = False 3311 3312 locks.append( 3313 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3314 ) 3315 3316 return locks 3317 3318 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3319 while this and self._match_set(self.SET_OPERATIONS): 3320 token_type = self._prev.token_type 3321 3322 if token_type == TokenType.UNION: 3323 operation = exp.Union 3324 elif token_type == TokenType.EXCEPT: 3325 operation = exp.Except 3326 else: 3327 operation = exp.Intersect 3328 3329 comments = self._prev.comments 3330 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3331 by_name = self._match_text_seq("BY", "NAME") 3332 expression = self._parse_select(nested=True, parse_set_operation=False) 3333 3334 this = self.expression( 3335 operation, 3336 comments=comments, 3337 this=this, 3338 distinct=distinct, 3339 by_name=by_name, 3340 expression=expression, 3341 ) 3342 3343 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3344 expression = this.expression 3345 3346 if expression: 3347 for arg in self.UNION_MODIFIERS: 3348 expr = expression.args.get(arg) 3349 if expr: 3350 this.set(arg, expr.pop()) 3351 3352 return this 3353 3354 def _parse_expression(self) -> t.Optional[exp.Expression]: 3355 return self._parse_alias(self._parse_conjunction()) 3356 3357 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3358 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3359 3360 def _parse_equality(self) -> t.Optional[exp.Expression]: 3361 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3362 3363 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3364 return self._parse_tokens(self._parse_range, self.COMPARISON) 3365 3366 def _parse_range(self) -> t.Optional[exp.Expression]: 3367 this = self._parse_bitwise() 3368 negate = self._match(TokenType.NOT) 3369 3370 if self._match_set(self.RANGE_PARSERS): 3371 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3372 if not expression: 3373 return this 3374 3375 this = expression 3376 elif self._match(TokenType.ISNULL): 3377 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3378 3379 # Postgres supports ISNULL and NOTNULL for conditions. 3380 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3381 if self._match(TokenType.NOTNULL): 3382 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3383 this = self.expression(exp.Not, this=this) 3384 3385 if negate: 3386 this = self.expression(exp.Not, this=this) 3387 3388 if self._match(TokenType.IS): 3389 this = self._parse_is(this) 3390 3391 return this 3392 3393 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3394 index = self._index - 1 3395 negate = self._match(TokenType.NOT) 3396 3397 if self._match_text_seq("DISTINCT", "FROM"): 3398 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3399 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3400 3401 expression = self._parse_null() or self._parse_boolean() 3402 if not expression: 3403 self._retreat(index) 3404 return None 3405 3406 this = self.expression(exp.Is, this=this, expression=expression) 3407 return self.expression(exp.Not, this=this) if negate else this 3408 3409 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3410 unnest = self._parse_unnest(with_alias=False) 3411 if unnest: 3412 this = self.expression(exp.In, this=this, unnest=unnest) 3413 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3414 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3415 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3416 3417 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3418 this = self.expression(exp.In, this=this, query=expressions[0]) 3419 else: 3420 this = self.expression(exp.In, this=this, expressions=expressions) 3421 3422 if matched_l_paren: 3423 self._match_r_paren(this) 3424 elif not self._match(TokenType.R_BRACKET, expression=this): 3425 self.raise_error("Expecting ]") 3426 else: 3427 this = self.expression(exp.In, this=this, field=self._parse_field()) 3428 3429 return this 3430 3431 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3432 low = self._parse_bitwise() 3433 self._match(TokenType.AND) 3434 high = self._parse_bitwise() 3435 return self.expression(exp.Between, this=this, low=low, high=high) 3436 3437 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3438 if not self._match(TokenType.ESCAPE): 3439 return this 3440 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3441 3442 def _parse_interval(self) -> t.Optional[exp.Interval]: 3443 index = self._index 3444 3445 if not self._match(TokenType.INTERVAL): 3446 return None 3447 3448 if self._match(TokenType.STRING, advance=False): 3449 this = self._parse_primary() 3450 else: 3451 this = self._parse_term() 3452 3453 if not this or ( 3454 isinstance(this, exp.Column) 3455 and not this.table 3456 and not this.this.quoted 3457 and this.name.upper() == "IS" 3458 ): 3459 self._retreat(index) 3460 return None 3461 3462 unit = self._parse_function() or self._parse_var(any_token=True, upper=True) 3463 3464 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3465 # each INTERVAL expression into this canonical form so it's easy to transpile 3466 if this and this.is_number: 3467 this = exp.Literal.string(this.name) 3468 elif this and this.is_string: 3469 parts = this.name.split() 3470 3471 if len(parts) == 2: 3472 if unit: 3473 # This is not actually a unit, it's something else (e.g. a "window side") 3474 unit = None 3475 self._retreat(self._index - 1) 3476 3477 this = exp.Literal.string(parts[0]) 3478 unit = self.expression(exp.Var, this=parts[1].upper()) 3479 3480 return self.expression(exp.Interval, this=this, unit=unit) 3481 3482 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3483 this = self._parse_term() 3484 3485 while True: 3486 if self._match_set(self.BITWISE): 3487 this = self.expression( 3488 self.BITWISE[self._prev.token_type], 3489 this=this, 3490 expression=self._parse_term(), 3491 ) 3492 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3493 this = self.expression( 3494 exp.DPipe, 3495 this=this, 3496 expression=self._parse_term(), 3497 safe=not self.dialect.STRICT_STRING_CONCAT, 3498 ) 3499 elif self._match(TokenType.DQMARK): 3500 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3501 elif self._match_pair(TokenType.LT, TokenType.LT): 3502 this = self.expression( 3503 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3504 ) 3505 elif self._match_pair(TokenType.GT, TokenType.GT): 3506 this = self.expression( 3507 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3508 ) 3509 else: 3510 break 3511 3512 return this 3513 3514 def _parse_term(self) -> t.Optional[exp.Expression]: 3515 return self._parse_tokens(self._parse_factor, self.TERM) 3516 3517 def _parse_factor(self) -> t.Optional[exp.Expression]: 3518 if self.EXPONENT: 3519 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3520 else: 3521 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3522 if isinstance(factor, exp.Div): 3523 factor.args["typed"] = self.dialect.TYPED_DIVISION 3524 factor.args["safe"] = self.dialect.SAFE_DIVISION 3525 return factor 3526 3527 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3528 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3529 3530 def _parse_unary(self) -> t.Optional[exp.Expression]: 3531 if self._match_set(self.UNARY_PARSERS): 3532 return self.UNARY_PARSERS[self._prev.token_type](self) 3533 return self._parse_at_time_zone(self._parse_type()) 3534 3535 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3536 interval = parse_interval and self._parse_interval() 3537 if interval: 3538 return interval 3539 3540 index = self._index 3541 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3542 this = self._parse_column() 3543 3544 if data_type: 3545 if isinstance(this, exp.Literal): 3546 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3547 if parser: 3548 return parser(self, this, data_type) 3549 return self.expression(exp.Cast, this=this, to=data_type) 3550 if not data_type.expressions: 3551 self._retreat(index) 3552 return self._parse_column() 3553 return self._parse_column_ops(data_type) 3554 3555 return this and self._parse_column_ops(this) 3556 3557 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3558 this = self._parse_type() 3559 if not this: 3560 return None 3561 3562 return self.expression( 3563 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3564 ) 3565 3566 def _parse_types( 3567 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3568 ) -> t.Optional[exp.Expression]: 3569 index = self._index 3570 3571 prefix = self._match_text_seq("SYSUDTLIB", ".") 3572 3573 if not self._match_set(self.TYPE_TOKENS): 3574 identifier = allow_identifiers and self._parse_id_var( 3575 any_token=False, tokens=(TokenType.VAR,) 3576 ) 3577 3578 if identifier: 3579 tokens = self.dialect.tokenize(identifier.name) 3580 3581 if len(tokens) != 1: 3582 self.raise_error("Unexpected identifier", self._prev) 3583 3584 if tokens[0].token_type in self.TYPE_TOKENS: 3585 self._prev = tokens[0] 3586 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3587 type_name = identifier.name 3588 3589 while self._match(TokenType.DOT): 3590 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3591 3592 return exp.DataType.build(type_name, udt=True) 3593 else: 3594 return None 3595 else: 3596 return None 3597 3598 type_token = self._prev.token_type 3599 3600 if type_token == TokenType.PSEUDO_TYPE: 3601 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3602 3603 if type_token == TokenType.OBJECT_IDENTIFIER: 3604 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3605 3606 nested = type_token in self.NESTED_TYPE_TOKENS 3607 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3608 expressions = None 3609 maybe_func = False 3610 3611 if self._match(TokenType.L_PAREN): 3612 if is_struct: 3613 expressions = self._parse_csv(self._parse_struct_types) 3614 elif nested: 3615 expressions = self._parse_csv( 3616 lambda: self._parse_types( 3617 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3618 ) 3619 ) 3620 elif type_token in self.ENUM_TYPE_TOKENS: 3621 expressions = self._parse_csv(self._parse_equality) 3622 else: 3623 expressions = self._parse_csv(self._parse_type_size) 3624 3625 if not expressions or not self._match(TokenType.R_PAREN): 3626 self._retreat(index) 3627 return None 3628 3629 maybe_func = True 3630 3631 this: t.Optional[exp.Expression] = None 3632 values: t.Optional[t.List[exp.Expression]] = None 3633 3634 if nested and self._match(TokenType.LT): 3635 if is_struct: 3636 expressions = self._parse_csv(self._parse_struct_types) 3637 else: 3638 expressions = self._parse_csv( 3639 lambda: self._parse_types( 3640 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3641 ) 3642 ) 3643 3644 if not self._match(TokenType.GT): 3645 self.raise_error("Expecting >") 3646 3647 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3648 values = self._parse_csv(self._parse_conjunction) 3649 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3650 3651 if type_token in self.TIMESTAMPS: 3652 if self._match_text_seq("WITH", "TIME", "ZONE"): 3653 maybe_func = False 3654 tz_type = ( 3655 exp.DataType.Type.TIMETZ 3656 if type_token in self.TIMES 3657 else exp.DataType.Type.TIMESTAMPTZ 3658 ) 3659 this = exp.DataType(this=tz_type, expressions=expressions) 3660 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3661 maybe_func = False 3662 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3663 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3664 maybe_func = False 3665 elif type_token == TokenType.INTERVAL: 3666 unit = self._parse_var() 3667 3668 if self._match_text_seq("TO"): 3669 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3670 else: 3671 span = None 3672 3673 if span or not unit: 3674 this = self.expression( 3675 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3676 ) 3677 else: 3678 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3679 3680 if maybe_func and check_func: 3681 index2 = self._index 3682 peek = self._parse_string() 3683 3684 if not peek: 3685 self._retreat(index) 3686 return None 3687 3688 self._retreat(index2) 3689 3690 if not this: 3691 if self._match_text_seq("UNSIGNED"): 3692 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3693 if not unsigned_type_token: 3694 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3695 3696 type_token = unsigned_type_token or type_token 3697 3698 this = exp.DataType( 3699 this=exp.DataType.Type[type_token.value], 3700 expressions=expressions, 3701 nested=nested, 3702 values=values, 3703 prefix=prefix, 3704 ) 3705 3706 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3707 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3708 3709 return this 3710 3711 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3712 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3713 self._match(TokenType.COLON) 3714 return self._parse_column_def(this) 3715 3716 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3717 if not self._match_text_seq("AT", "TIME", "ZONE"): 3718 return this 3719 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3720 3721 def _parse_column(self) -> t.Optional[exp.Expression]: 3722 this = self._parse_field() 3723 if isinstance(this, exp.Identifier): 3724 this = self.expression(exp.Column, this=this) 3725 elif not this: 3726 return self._parse_bracket(this) 3727 return self._parse_column_ops(this) 3728 3729 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3730 this = self._parse_bracket(this) 3731 3732 while self._match_set(self.COLUMN_OPERATORS): 3733 op_token = self._prev.token_type 3734 op = self.COLUMN_OPERATORS.get(op_token) 3735 3736 if op_token == TokenType.DCOLON: 3737 field = self._parse_types() 3738 if not field: 3739 self.raise_error("Expected type") 3740 elif op and self._curr: 3741 self._advance() 3742 value = self._prev.text 3743 field = ( 3744 exp.Literal.number(value) 3745 if self._prev.token_type == TokenType.NUMBER 3746 else exp.Literal.string(value) 3747 ) 3748 else: 3749 field = self._parse_field(anonymous_func=True, any_token=True) 3750 3751 if isinstance(field, exp.Func): 3752 # bigquery allows function calls like x.y.count(...) 3753 # SAFE.SUBSTR(...) 3754 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3755 this = self._replace_columns_with_dots(this) 3756 3757 if op: 3758 this = op(self, this, field) 3759 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3760 this = self.expression( 3761 exp.Column, 3762 this=field, 3763 table=this.this, 3764 db=this.args.get("table"), 3765 catalog=this.args.get("db"), 3766 ) 3767 else: 3768 this = self.expression(exp.Dot, this=this, expression=field) 3769 this = self._parse_bracket(this) 3770 return this 3771 3772 def _parse_primary(self) -> t.Optional[exp.Expression]: 3773 if self._match_set(self.PRIMARY_PARSERS): 3774 token_type = self._prev.token_type 3775 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3776 3777 if token_type == TokenType.STRING: 3778 expressions = [primary] 3779 while self._match(TokenType.STRING): 3780 expressions.append(exp.Literal.string(self._prev.text)) 3781 3782 if len(expressions) > 1: 3783 return self.expression(exp.Concat, expressions=expressions) 3784 3785 return primary 3786 3787 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3788 return exp.Literal.number(f"0.{self._prev.text}") 3789 3790 if self._match(TokenType.L_PAREN): 3791 comments = self._prev_comments 3792 query = self._parse_select() 3793 3794 if query: 3795 expressions = [query] 3796 else: 3797 expressions = self._parse_expressions() 3798 3799 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3800 3801 if isinstance(this, exp.Subqueryable): 3802 this = self._parse_set_operations( 3803 self._parse_subquery(this=this, parse_alias=False) 3804 ) 3805 elif len(expressions) > 1: 3806 this = self.expression(exp.Tuple, expressions=expressions) 3807 else: 3808 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3809 3810 if this: 3811 this.add_comments(comments) 3812 3813 self._match_r_paren(expression=this) 3814 return this 3815 3816 return None 3817 3818 def _parse_field( 3819 self, 3820 any_token: bool = False, 3821 tokens: t.Optional[t.Collection[TokenType]] = None, 3822 anonymous_func: bool = False, 3823 ) -> t.Optional[exp.Expression]: 3824 return ( 3825 self._parse_primary() 3826 or self._parse_function(anonymous=anonymous_func) 3827 or self._parse_id_var(any_token=any_token, tokens=tokens) 3828 ) 3829 3830 def _parse_function( 3831 self, 3832 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3833 anonymous: bool = False, 3834 optional_parens: bool = True, 3835 ) -> t.Optional[exp.Expression]: 3836 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3837 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3838 fn_syntax = False 3839 if ( 3840 self._match(TokenType.L_BRACE, advance=False) 3841 and self._next 3842 and self._next.text.upper() == "FN" 3843 ): 3844 self._advance(2) 3845 fn_syntax = True 3846 3847 func = self._parse_function_call( 3848 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3849 ) 3850 3851 if fn_syntax: 3852 self._match(TokenType.R_BRACE) 3853 3854 return func 3855 3856 def _parse_function_call( 3857 self, 3858 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3859 anonymous: bool = False, 3860 optional_parens: bool = True, 3861 ) -> t.Optional[exp.Expression]: 3862 if not self._curr: 3863 return None 3864 3865 comments = self._curr.comments 3866 token_type = self._curr.token_type 3867 this = self._curr.text 3868 upper = this.upper() 3869 3870 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3871 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3872 self._advance() 3873 return parser(self) 3874 3875 if not self._next or self._next.token_type != TokenType.L_PAREN: 3876 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3877 self._advance() 3878 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3879 3880 return None 3881 3882 if token_type not in self.FUNC_TOKENS: 3883 return None 3884 3885 self._advance(2) 3886 3887 parser = self.FUNCTION_PARSERS.get(upper) 3888 if parser and not anonymous: 3889 this = parser(self) 3890 else: 3891 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3892 3893 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3894 this = self.expression(subquery_predicate, this=self._parse_select()) 3895 self._match_r_paren() 3896 return this 3897 3898 if functions is None: 3899 functions = self.FUNCTIONS 3900 3901 function = functions.get(upper) 3902 3903 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3904 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3905 3906 if function and not anonymous: 3907 if "dialect" in function.__code__.co_varnames: 3908 func = function(args, dialect=self.dialect) 3909 else: 3910 func = function(args) 3911 3912 func = self.validate_expression(func, args) 3913 if not self.dialect.NORMALIZE_FUNCTIONS: 3914 func.meta["name"] = this 3915 3916 this = func 3917 else: 3918 this = self.expression(exp.Anonymous, this=this, expressions=args) 3919 3920 if isinstance(this, exp.Expression): 3921 this.add_comments(comments) 3922 3923 self._match_r_paren(this) 3924 return self._parse_window(this) 3925 3926 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3927 return self._parse_column_def(self._parse_id_var()) 3928 3929 def _parse_user_defined_function( 3930 self, kind: t.Optional[TokenType] = None 3931 ) -> t.Optional[exp.Expression]: 3932 this = self._parse_id_var() 3933 3934 while self._match(TokenType.DOT): 3935 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3936 3937 if not self._match(TokenType.L_PAREN): 3938 return this 3939 3940 expressions = self._parse_csv(self._parse_function_parameter) 3941 self._match_r_paren() 3942 return self.expression( 3943 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3944 ) 3945 3946 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3947 literal = self._parse_primary() 3948 if literal: 3949 return self.expression(exp.Introducer, this=token.text, expression=literal) 3950 3951 return self.expression(exp.Identifier, this=token.text) 3952 3953 def _parse_session_parameter(self) -> exp.SessionParameter: 3954 kind = None 3955 this = self._parse_id_var() or self._parse_primary() 3956 3957 if this and self._match(TokenType.DOT): 3958 kind = this.name 3959 this = self._parse_var() or self._parse_primary() 3960 3961 return self.expression(exp.SessionParameter, this=this, kind=kind) 3962 3963 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3964 index = self._index 3965 3966 if self._match(TokenType.L_PAREN): 3967 expressions = t.cast( 3968 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3969 ) 3970 3971 if not self._match(TokenType.R_PAREN): 3972 self._retreat(index) 3973 else: 3974 expressions = [self._parse_id_var()] 3975 3976 if self._match_set(self.LAMBDAS): 3977 return self.LAMBDAS[self._prev.token_type](self, expressions) 3978 3979 self._retreat(index) 3980 3981 this: t.Optional[exp.Expression] 3982 3983 if self._match(TokenType.DISTINCT): 3984 this = self.expression( 3985 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3986 ) 3987 else: 3988 this = self._parse_select_or_expression(alias=alias) 3989 3990 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3991 3992 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3993 index = self._index 3994 3995 if not self.errors: 3996 try: 3997 if self._parse_select(nested=True): 3998 return this 3999 except ParseError: 4000 pass 4001 finally: 4002 self.errors.clear() 4003 self._retreat(index) 4004 4005 if not self._match(TokenType.L_PAREN): 4006 return this 4007 4008 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4009 4010 self._match_r_paren() 4011 return self.expression(exp.Schema, this=this, expressions=args) 4012 4013 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4014 return self._parse_column_def(self._parse_field(any_token=True)) 4015 4016 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4017 # column defs are not really columns, they're identifiers 4018 if isinstance(this, exp.Column): 4019 this = this.this 4020 4021 kind = self._parse_types(schema=True) 4022 4023 if self._match_text_seq("FOR", "ORDINALITY"): 4024 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4025 4026 constraints: t.List[exp.Expression] = [] 4027 4028 if not kind and self._match(TokenType.ALIAS): 4029 constraints.append( 4030 self.expression( 4031 exp.ComputedColumnConstraint, 4032 this=self._parse_conjunction(), 4033 persisted=self._match_text_seq("PERSISTED"), 4034 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4035 ) 4036 ) 4037 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4038 self._match(TokenType.ALIAS) 4039 constraints.append( 4040 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4041 ) 4042 4043 while True: 4044 constraint = self._parse_column_constraint() 4045 if not constraint: 4046 break 4047 constraints.append(constraint) 4048 4049 if not kind and not constraints: 4050 return this 4051 4052 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4053 4054 def _parse_auto_increment( 4055 self, 4056 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4057 start = None 4058 increment = None 4059 4060 if self._match(TokenType.L_PAREN, advance=False): 4061 args = self._parse_wrapped_csv(self._parse_bitwise) 4062 start = seq_get(args, 0) 4063 increment = seq_get(args, 1) 4064 elif self._match_text_seq("START"): 4065 start = self._parse_bitwise() 4066 self._match_text_seq("INCREMENT") 4067 increment = self._parse_bitwise() 4068 4069 if start and increment: 4070 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4071 4072 return exp.AutoIncrementColumnConstraint() 4073 4074 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4075 if not self._match_text_seq("REFRESH"): 4076 self._retreat(self._index - 1) 4077 return None 4078 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4079 4080 def _parse_compress(self) -> exp.CompressColumnConstraint: 4081 if self._match(TokenType.L_PAREN, advance=False): 4082 return self.expression( 4083 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4084 ) 4085 4086 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4087 4088 def _parse_generated_as_identity( 4089 self, 4090 ) -> ( 4091 exp.GeneratedAsIdentityColumnConstraint 4092 | exp.ComputedColumnConstraint 4093 | exp.GeneratedAsRowColumnConstraint 4094 ): 4095 if self._match_text_seq("BY", "DEFAULT"): 4096 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4097 this = self.expression( 4098 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4099 ) 4100 else: 4101 self._match_text_seq("ALWAYS") 4102 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4103 4104 self._match(TokenType.ALIAS) 4105 4106 if self._match_text_seq("ROW"): 4107 start = self._match_text_seq("START") 4108 if not start: 4109 self._match(TokenType.END) 4110 hidden = self._match_text_seq("HIDDEN") 4111 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4112 4113 identity = self._match_text_seq("IDENTITY") 4114 4115 if self._match(TokenType.L_PAREN): 4116 if self._match(TokenType.START_WITH): 4117 this.set("start", self._parse_bitwise()) 4118 if self._match_text_seq("INCREMENT", "BY"): 4119 this.set("increment", self._parse_bitwise()) 4120 if self._match_text_seq("MINVALUE"): 4121 this.set("minvalue", self._parse_bitwise()) 4122 if self._match_text_seq("MAXVALUE"): 4123 this.set("maxvalue", self._parse_bitwise()) 4124 4125 if self._match_text_seq("CYCLE"): 4126 this.set("cycle", True) 4127 elif self._match_text_seq("NO", "CYCLE"): 4128 this.set("cycle", False) 4129 4130 if not identity: 4131 this.set("expression", self._parse_bitwise()) 4132 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4133 args = self._parse_csv(self._parse_bitwise) 4134 this.set("start", seq_get(args, 0)) 4135 this.set("increment", seq_get(args, 1)) 4136 4137 self._match_r_paren() 4138 4139 return this 4140 4141 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4142 self._match_text_seq("LENGTH") 4143 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4144 4145 def _parse_not_constraint( 4146 self, 4147 ) -> t.Optional[exp.Expression]: 4148 if self._match_text_seq("NULL"): 4149 return self.expression(exp.NotNullColumnConstraint) 4150 if self._match_text_seq("CASESPECIFIC"): 4151 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4152 if self._match_text_seq("FOR", "REPLICATION"): 4153 return self.expression(exp.NotForReplicationColumnConstraint) 4154 return None 4155 4156 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4157 if self._match(TokenType.CONSTRAINT): 4158 this = self._parse_id_var() 4159 else: 4160 this = None 4161 4162 if self._match_texts(self.CONSTRAINT_PARSERS): 4163 return self.expression( 4164 exp.ColumnConstraint, 4165 this=this, 4166 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4167 ) 4168 4169 return this 4170 4171 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4172 if not self._match(TokenType.CONSTRAINT): 4173 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4174 4175 this = self._parse_id_var() 4176 expressions = [] 4177 4178 while True: 4179 constraint = self._parse_unnamed_constraint() or self._parse_function() 4180 if not constraint: 4181 break 4182 expressions.append(constraint) 4183 4184 return self.expression(exp.Constraint, this=this, expressions=expressions) 4185 4186 def _parse_unnamed_constraint( 4187 self, constraints: t.Optional[t.Collection[str]] = None 4188 ) -> t.Optional[exp.Expression]: 4189 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4190 constraints or self.CONSTRAINT_PARSERS 4191 ): 4192 return None 4193 4194 constraint = self._prev.text.upper() 4195 if constraint not in self.CONSTRAINT_PARSERS: 4196 self.raise_error(f"No parser found for schema constraint {constraint}.") 4197 4198 return self.CONSTRAINT_PARSERS[constraint](self) 4199 4200 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4201 self._match_text_seq("KEY") 4202 return self.expression( 4203 exp.UniqueColumnConstraint, 4204 this=self._parse_schema(self._parse_id_var(any_token=False)), 4205 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4206 ) 4207 4208 def _parse_key_constraint_options(self) -> t.List[str]: 4209 options = [] 4210 while True: 4211 if not self._curr: 4212 break 4213 4214 if self._match(TokenType.ON): 4215 action = None 4216 on = self._advance_any() and self._prev.text 4217 4218 if self._match_text_seq("NO", "ACTION"): 4219 action = "NO ACTION" 4220 elif self._match_text_seq("CASCADE"): 4221 action = "CASCADE" 4222 elif self._match_text_seq("RESTRICT"): 4223 action = "RESTRICT" 4224 elif self._match_pair(TokenType.SET, TokenType.NULL): 4225 action = "SET NULL" 4226 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4227 action = "SET DEFAULT" 4228 else: 4229 self.raise_error("Invalid key constraint") 4230 4231 options.append(f"ON {on} {action}") 4232 elif self._match_text_seq("NOT", "ENFORCED"): 4233 options.append("NOT ENFORCED") 4234 elif self._match_text_seq("DEFERRABLE"): 4235 options.append("DEFERRABLE") 4236 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4237 options.append("INITIALLY DEFERRED") 4238 elif self._match_text_seq("NORELY"): 4239 options.append("NORELY") 4240 elif self._match_text_seq("MATCH", "FULL"): 4241 options.append("MATCH FULL") 4242 else: 4243 break 4244 4245 return options 4246 4247 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4248 if match and not self._match(TokenType.REFERENCES): 4249 return None 4250 4251 expressions = None 4252 this = self._parse_table(schema=True) 4253 options = self._parse_key_constraint_options() 4254 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4255 4256 def _parse_foreign_key(self) -> exp.ForeignKey: 4257 expressions = self._parse_wrapped_id_vars() 4258 reference = self._parse_references() 4259 options = {} 4260 4261 while self._match(TokenType.ON): 4262 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4263 self.raise_error("Expected DELETE or UPDATE") 4264 4265 kind = self._prev.text.lower() 4266 4267 if self._match_text_seq("NO", "ACTION"): 4268 action = "NO ACTION" 4269 elif self._match(TokenType.SET): 4270 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4271 action = "SET " + self._prev.text.upper() 4272 else: 4273 self._advance() 4274 action = self._prev.text.upper() 4275 4276 options[kind] = action 4277 4278 return self.expression( 4279 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4280 ) 4281 4282 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4283 return self._parse_field() 4284 4285 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4286 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4287 4288 id_vars = self._parse_wrapped_id_vars() 4289 return self.expression( 4290 exp.PeriodForSystemTimeConstraint, 4291 this=seq_get(id_vars, 0), 4292 expression=seq_get(id_vars, 1), 4293 ) 4294 4295 def _parse_primary_key( 4296 self, wrapped_optional: bool = False, in_props: bool = False 4297 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4298 desc = ( 4299 self._match_set((TokenType.ASC, TokenType.DESC)) 4300 and self._prev.token_type == TokenType.DESC 4301 ) 4302 4303 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4304 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4305 4306 expressions = self._parse_wrapped_csv( 4307 self._parse_primary_key_part, optional=wrapped_optional 4308 ) 4309 options = self._parse_key_constraint_options() 4310 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4311 4312 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4313 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4314 4315 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4316 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4317 return this 4318 4319 bracket_kind = self._prev.token_type 4320 expressions = self._parse_csv( 4321 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4322 ) 4323 4324 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4325 self.raise_error("Expected ]") 4326 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4327 self.raise_error("Expected }") 4328 4329 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4330 if bracket_kind == TokenType.L_BRACE: 4331 this = self.expression(exp.Struct, expressions=expressions) 4332 elif not this or this.name.upper() == "ARRAY": 4333 this = self.expression(exp.Array, expressions=expressions) 4334 else: 4335 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4336 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4337 4338 self._add_comments(this) 4339 return self._parse_bracket(this) 4340 4341 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4342 if self._match(TokenType.COLON): 4343 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4344 return this 4345 4346 def _parse_case(self) -> t.Optional[exp.Expression]: 4347 ifs = [] 4348 default = None 4349 4350 comments = self._prev_comments 4351 expression = self._parse_conjunction() 4352 4353 while self._match(TokenType.WHEN): 4354 this = self._parse_conjunction() 4355 self._match(TokenType.THEN) 4356 then = self._parse_conjunction() 4357 ifs.append(self.expression(exp.If, this=this, true=then)) 4358 4359 if self._match(TokenType.ELSE): 4360 default = self._parse_conjunction() 4361 4362 if not self._match(TokenType.END): 4363 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4364 default = exp.column("interval") 4365 else: 4366 self.raise_error("Expected END after CASE", self._prev) 4367 4368 return self._parse_window( 4369 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4370 ) 4371 4372 def _parse_if(self) -> t.Optional[exp.Expression]: 4373 if self._match(TokenType.L_PAREN): 4374 args = self._parse_csv(self._parse_conjunction) 4375 this = self.validate_expression(exp.If.from_arg_list(args), args) 4376 self._match_r_paren() 4377 else: 4378 index = self._index - 1 4379 condition = self._parse_conjunction() 4380 4381 if not condition: 4382 self._retreat(index) 4383 return None 4384 4385 self._match(TokenType.THEN) 4386 true = self._parse_conjunction() 4387 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4388 self._match(TokenType.END) 4389 this = self.expression(exp.If, this=condition, true=true, false=false) 4390 4391 return self._parse_window(this) 4392 4393 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4394 if not self._match_text_seq("VALUE", "FOR"): 4395 self._retreat(self._index - 1) 4396 return None 4397 4398 return self.expression( 4399 exp.NextValueFor, 4400 this=self._parse_column(), 4401 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4402 ) 4403 4404 def _parse_extract(self) -> exp.Extract: 4405 this = self._parse_function() or self._parse_var() or self._parse_type() 4406 4407 if self._match(TokenType.FROM): 4408 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4409 4410 if not self._match(TokenType.COMMA): 4411 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4412 4413 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4414 4415 def _parse_any_value(self) -> exp.AnyValue: 4416 this = self._parse_lambda() 4417 is_max = None 4418 having = None 4419 4420 if self._match(TokenType.HAVING): 4421 self._match_texts(("MAX", "MIN")) 4422 is_max = self._prev.text == "MAX" 4423 having = self._parse_column() 4424 4425 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4426 4427 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4428 this = self._parse_conjunction() 4429 4430 if not self._match(TokenType.ALIAS): 4431 if self._match(TokenType.COMMA): 4432 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4433 4434 self.raise_error("Expected AS after CAST") 4435 4436 fmt = None 4437 to = self._parse_types() 4438 4439 if self._match(TokenType.FORMAT): 4440 fmt_string = self._parse_string() 4441 fmt = self._parse_at_time_zone(fmt_string) 4442 4443 if not to: 4444 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4445 if to.this in exp.DataType.TEMPORAL_TYPES: 4446 this = self.expression( 4447 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4448 this=this, 4449 format=exp.Literal.string( 4450 format_time( 4451 fmt_string.this if fmt_string else "", 4452 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4453 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4454 ) 4455 ), 4456 ) 4457 4458 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4459 this.set("zone", fmt.args["zone"]) 4460 return this 4461 elif not to: 4462 self.raise_error("Expected TYPE after CAST") 4463 elif isinstance(to, exp.Identifier): 4464 to = exp.DataType.build(to.name, udt=True) 4465 elif to.this == exp.DataType.Type.CHAR: 4466 if self._match(TokenType.CHARACTER_SET): 4467 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4468 4469 return self.expression( 4470 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4471 ) 4472 4473 def _parse_string_agg(self) -> exp.Expression: 4474 if self._match(TokenType.DISTINCT): 4475 args: t.List[t.Optional[exp.Expression]] = [ 4476 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4477 ] 4478 if self._match(TokenType.COMMA): 4479 args.extend(self._parse_csv(self._parse_conjunction)) 4480 else: 4481 args = self._parse_csv(self._parse_conjunction) # type: ignore 4482 4483 index = self._index 4484 if not self._match(TokenType.R_PAREN) and args: 4485 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4486 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4487 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4488 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4489 4490 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4491 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4492 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4493 if not self._match_text_seq("WITHIN", "GROUP"): 4494 self._retreat(index) 4495 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4496 4497 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4498 order = self._parse_order(this=seq_get(args, 0)) 4499 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4500 4501 def _parse_convert( 4502 self, strict: bool, safe: t.Optional[bool] = None 4503 ) -> t.Optional[exp.Expression]: 4504 this = self._parse_bitwise() 4505 4506 if self._match(TokenType.USING): 4507 to: t.Optional[exp.Expression] = self.expression( 4508 exp.CharacterSet, this=self._parse_var() 4509 ) 4510 elif self._match(TokenType.COMMA): 4511 to = self._parse_types() 4512 else: 4513 to = None 4514 4515 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4516 4517 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4518 """ 4519 There are generally two variants of the DECODE function: 4520 4521 - DECODE(bin, charset) 4522 - DECODE(expression, search, result [, search, result] ... [, default]) 4523 4524 The second variant will always be parsed into a CASE expression. Note that NULL 4525 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4526 instead of relying on pattern matching. 4527 """ 4528 args = self._parse_csv(self._parse_conjunction) 4529 4530 if len(args) < 3: 4531 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4532 4533 expression, *expressions = args 4534 if not expression: 4535 return None 4536 4537 ifs = [] 4538 for search, result in zip(expressions[::2], expressions[1::2]): 4539 if not search or not result: 4540 return None 4541 4542 if isinstance(search, exp.Literal): 4543 ifs.append( 4544 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4545 ) 4546 elif isinstance(search, exp.Null): 4547 ifs.append( 4548 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4549 ) 4550 else: 4551 cond = exp.or_( 4552 exp.EQ(this=expression.copy(), expression=search), 4553 exp.and_( 4554 exp.Is(this=expression.copy(), expression=exp.Null()), 4555 exp.Is(this=search.copy(), expression=exp.Null()), 4556 copy=False, 4557 ), 4558 copy=False, 4559 ) 4560 ifs.append(exp.If(this=cond, true=result)) 4561 4562 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4563 4564 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4565 self._match_text_seq("KEY") 4566 key = self._parse_column() 4567 self._match_set((TokenType.COLON, TokenType.COMMA)) 4568 self._match_text_seq("VALUE") 4569 value = self._parse_bitwise() 4570 4571 if not key and not value: 4572 return None 4573 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4574 4575 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4576 if not this or not self._match_text_seq("FORMAT", "JSON"): 4577 return this 4578 4579 return self.expression(exp.FormatJson, this=this) 4580 4581 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4582 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4583 for value in values: 4584 if self._match_text_seq(value, "ON", on): 4585 return f"{value} ON {on}" 4586 4587 return None 4588 4589 def _parse_json_object(self) -> exp.JSONObject: 4590 star = self._parse_star() 4591 expressions = ( 4592 [star] 4593 if star 4594 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4595 ) 4596 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4597 4598 unique_keys = None 4599 if self._match_text_seq("WITH", "UNIQUE"): 4600 unique_keys = True 4601 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4602 unique_keys = False 4603 4604 self._match_text_seq("KEYS") 4605 4606 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4607 self._parse_type() 4608 ) 4609 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4610 4611 return self.expression( 4612 exp.JSONObject, 4613 expressions=expressions, 4614 null_handling=null_handling, 4615 unique_keys=unique_keys, 4616 return_type=return_type, 4617 encoding=encoding, 4618 ) 4619 4620 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4621 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4622 if not self._match_text_seq("NESTED"): 4623 this = self._parse_id_var() 4624 kind = self._parse_types(allow_identifiers=False) 4625 nested = None 4626 else: 4627 this = None 4628 kind = None 4629 nested = True 4630 4631 path = self._match_text_seq("PATH") and self._parse_string() 4632 nested_schema = nested and self._parse_json_schema() 4633 4634 return self.expression( 4635 exp.JSONColumnDef, 4636 this=this, 4637 kind=kind, 4638 path=path, 4639 nested_schema=nested_schema, 4640 ) 4641 4642 def _parse_json_schema(self) -> exp.JSONSchema: 4643 self._match_text_seq("COLUMNS") 4644 return self.expression( 4645 exp.JSONSchema, 4646 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4647 ) 4648 4649 def _parse_json_table(self) -> exp.JSONTable: 4650 this = self._parse_format_json(self._parse_bitwise()) 4651 path = self._match(TokenType.COMMA) and self._parse_string() 4652 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4653 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4654 schema = self._parse_json_schema() 4655 4656 return exp.JSONTable( 4657 this=this, 4658 schema=schema, 4659 path=path, 4660 error_handling=error_handling, 4661 empty_handling=empty_handling, 4662 ) 4663 4664 def _parse_match_against(self) -> exp.MatchAgainst: 4665 expressions = self._parse_csv(self._parse_column) 4666 4667 self._match_text_seq(")", "AGAINST", "(") 4668 4669 this = self._parse_string() 4670 4671 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4672 modifier = "IN NATURAL LANGUAGE MODE" 4673 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4674 modifier = f"{modifier} WITH QUERY EXPANSION" 4675 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4676 modifier = "IN BOOLEAN MODE" 4677 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4678 modifier = "WITH QUERY EXPANSION" 4679 else: 4680 modifier = None 4681 4682 return self.expression( 4683 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4684 ) 4685 4686 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4687 def _parse_open_json(self) -> exp.OpenJSON: 4688 this = self._parse_bitwise() 4689 path = self._match(TokenType.COMMA) and self._parse_string() 4690 4691 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4692 this = self._parse_field(any_token=True) 4693 kind = self._parse_types() 4694 path = self._parse_string() 4695 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4696 4697 return self.expression( 4698 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4699 ) 4700 4701 expressions = None 4702 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4703 self._match_l_paren() 4704 expressions = self._parse_csv(_parse_open_json_column_def) 4705 4706 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4707 4708 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4709 args = self._parse_csv(self._parse_bitwise) 4710 4711 if self._match(TokenType.IN): 4712 return self.expression( 4713 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4714 ) 4715 4716 if haystack_first: 4717 haystack = seq_get(args, 0) 4718 needle = seq_get(args, 1) 4719 else: 4720 needle = seq_get(args, 0) 4721 haystack = seq_get(args, 1) 4722 4723 return self.expression( 4724 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4725 ) 4726 4727 def _parse_predict(self) -> exp.Predict: 4728 self._match_text_seq("MODEL") 4729 this = self._parse_table() 4730 4731 self._match(TokenType.COMMA) 4732 self._match_text_seq("TABLE") 4733 4734 return self.expression( 4735 exp.Predict, 4736 this=this, 4737 expression=self._parse_table(), 4738 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4739 ) 4740 4741 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4742 args = self._parse_csv(self._parse_table) 4743 return exp.JoinHint(this=func_name.upper(), expressions=args) 4744 4745 def _parse_substring(self) -> exp.Substring: 4746 # Postgres supports the form: substring(string [from int] [for int]) 4747 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4748 4749 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4750 4751 if self._match(TokenType.FROM): 4752 args.append(self._parse_bitwise()) 4753 if self._match(TokenType.FOR): 4754 args.append(self._parse_bitwise()) 4755 4756 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4757 4758 def _parse_trim(self) -> exp.Trim: 4759 # https://www.w3resource.com/sql/character-functions/trim.php 4760 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4761 4762 position = None 4763 collation = None 4764 expression = None 4765 4766 if self._match_texts(self.TRIM_TYPES): 4767 position = self._prev.text.upper() 4768 4769 this = self._parse_bitwise() 4770 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4771 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4772 expression = self._parse_bitwise() 4773 4774 if invert_order: 4775 this, expression = expression, this 4776 4777 if self._match(TokenType.COLLATE): 4778 collation = self._parse_bitwise() 4779 4780 return self.expression( 4781 exp.Trim, this=this, position=position, expression=expression, collation=collation 4782 ) 4783 4784 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4785 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4786 4787 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4788 return self._parse_window(self._parse_id_var(), alias=True) 4789 4790 def _parse_respect_or_ignore_nulls( 4791 self, this: t.Optional[exp.Expression] 4792 ) -> t.Optional[exp.Expression]: 4793 if self._match_text_seq("IGNORE", "NULLS"): 4794 return self.expression(exp.IgnoreNulls, this=this) 4795 if self._match_text_seq("RESPECT", "NULLS"): 4796 return self.expression(exp.RespectNulls, this=this) 4797 return this 4798 4799 def _parse_window( 4800 self, this: t.Optional[exp.Expression], alias: bool = False 4801 ) -> t.Optional[exp.Expression]: 4802 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4803 self._match(TokenType.WHERE) 4804 this = self.expression( 4805 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4806 ) 4807 self._match_r_paren() 4808 4809 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4810 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4811 if self._match_text_seq("WITHIN", "GROUP"): 4812 order = self._parse_wrapped(self._parse_order) 4813 this = self.expression(exp.WithinGroup, this=this, expression=order) 4814 4815 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4816 # Some dialects choose to implement and some do not. 4817 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4818 4819 # There is some code above in _parse_lambda that handles 4820 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4821 4822 # The below changes handle 4823 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4824 4825 # Oracle allows both formats 4826 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4827 # and Snowflake chose to do the same for familiarity 4828 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4829 this = self._parse_respect_or_ignore_nulls(this) 4830 4831 # bigquery select from window x AS (partition by ...) 4832 if alias: 4833 over = None 4834 self._match(TokenType.ALIAS) 4835 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4836 return this 4837 else: 4838 over = self._prev.text.upper() 4839 4840 if not self._match(TokenType.L_PAREN): 4841 return self.expression( 4842 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4843 ) 4844 4845 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4846 4847 first = self._match(TokenType.FIRST) 4848 if self._match_text_seq("LAST"): 4849 first = False 4850 4851 partition, order = self._parse_partition_and_order() 4852 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4853 4854 if kind: 4855 self._match(TokenType.BETWEEN) 4856 start = self._parse_window_spec() 4857 self._match(TokenType.AND) 4858 end = self._parse_window_spec() 4859 4860 spec = self.expression( 4861 exp.WindowSpec, 4862 kind=kind, 4863 start=start["value"], 4864 start_side=start["side"], 4865 end=end["value"], 4866 end_side=end["side"], 4867 ) 4868 else: 4869 spec = None 4870 4871 self._match_r_paren() 4872 4873 window = self.expression( 4874 exp.Window, 4875 this=this, 4876 partition_by=partition, 4877 order=order, 4878 spec=spec, 4879 alias=window_alias, 4880 over=over, 4881 first=first, 4882 ) 4883 4884 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4885 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4886 return self._parse_window(window, alias=alias) 4887 4888 return window 4889 4890 def _parse_partition_and_order( 4891 self, 4892 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4893 return self._parse_partition_by(), self._parse_order() 4894 4895 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4896 self._match(TokenType.BETWEEN) 4897 4898 return { 4899 "value": ( 4900 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4901 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4902 or self._parse_bitwise() 4903 ), 4904 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4905 } 4906 4907 def _parse_alias( 4908 self, this: t.Optional[exp.Expression], explicit: bool = False 4909 ) -> t.Optional[exp.Expression]: 4910 any_token = self._match(TokenType.ALIAS) 4911 comments = self._prev_comments 4912 4913 if explicit and not any_token: 4914 return this 4915 4916 if self._match(TokenType.L_PAREN): 4917 aliases = self.expression( 4918 exp.Aliases, 4919 comments=comments, 4920 this=this, 4921 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4922 ) 4923 self._match_r_paren(aliases) 4924 return aliases 4925 4926 alias = self._parse_id_var(any_token) or ( 4927 self.STRING_ALIASES and self._parse_string_as_identifier() 4928 ) 4929 4930 if alias: 4931 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4932 4933 return this 4934 4935 def _parse_id_var( 4936 self, 4937 any_token: bool = True, 4938 tokens: t.Optional[t.Collection[TokenType]] = None, 4939 ) -> t.Optional[exp.Expression]: 4940 identifier = self._parse_identifier() 4941 4942 if identifier: 4943 return identifier 4944 4945 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4946 quoted = self._prev.token_type == TokenType.STRING 4947 return exp.Identifier(this=self._prev.text, quoted=quoted) 4948 4949 return None 4950 4951 def _parse_string(self) -> t.Optional[exp.Expression]: 4952 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4953 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4954 return self._parse_placeholder() 4955 4956 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4957 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4958 4959 def _parse_number(self) -> t.Optional[exp.Expression]: 4960 if self._match(TokenType.NUMBER): 4961 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4962 return self._parse_placeholder() 4963 4964 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4965 if self._match(TokenType.IDENTIFIER): 4966 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4967 return self._parse_placeholder() 4968 4969 def _parse_var( 4970 self, 4971 any_token: bool = False, 4972 tokens: t.Optional[t.Collection[TokenType]] = None, 4973 upper: bool = False, 4974 ) -> t.Optional[exp.Expression]: 4975 if ( 4976 (any_token and self._advance_any()) 4977 or self._match(TokenType.VAR) 4978 or (self._match_set(tokens) if tokens else False) 4979 ): 4980 return self.expression( 4981 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 4982 ) 4983 return self._parse_placeholder() 4984 4985 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 4986 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 4987 self._advance() 4988 return self._prev 4989 return None 4990 4991 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4992 return self._parse_var() or self._parse_string() 4993 4994 def _parse_null(self) -> t.Optional[exp.Expression]: 4995 if self._match_set(self.NULL_TOKENS): 4996 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4997 return self._parse_placeholder() 4998 4999 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5000 if self._match(TokenType.TRUE): 5001 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5002 if self._match(TokenType.FALSE): 5003 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5004 return self._parse_placeholder() 5005 5006 def _parse_star(self) -> t.Optional[exp.Expression]: 5007 if self._match(TokenType.STAR): 5008 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5009 return self._parse_placeholder() 5010 5011 def _parse_parameter(self) -> exp.Parameter: 5012 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5013 return ( 5014 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5015 ) 5016 5017 self._match(TokenType.L_BRACE) 5018 this = _parse_parameter_part() 5019 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5020 self._match(TokenType.R_BRACE) 5021 5022 return self.expression(exp.Parameter, this=this, expression=expression) 5023 5024 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5025 if self._match_set(self.PLACEHOLDER_PARSERS): 5026 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5027 if placeholder: 5028 return placeholder 5029 self._advance(-1) 5030 return None 5031 5032 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5033 if not self._match(TokenType.EXCEPT): 5034 return None 5035 if self._match(TokenType.L_PAREN, advance=False): 5036 return self._parse_wrapped_csv(self._parse_column) 5037 5038 except_column = self._parse_column() 5039 return [except_column] if except_column else None 5040 5041 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5042 if not self._match(TokenType.REPLACE): 5043 return None 5044 if self._match(TokenType.L_PAREN, advance=False): 5045 return self._parse_wrapped_csv(self._parse_expression) 5046 5047 replace_expression = self._parse_expression() 5048 return [replace_expression] if replace_expression else None 5049 5050 def _parse_csv( 5051 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5052 ) -> t.List[exp.Expression]: 5053 parse_result = parse_method() 5054 items = [parse_result] if parse_result is not None else [] 5055 5056 while self._match(sep): 5057 self._add_comments(parse_result) 5058 parse_result = parse_method() 5059 if parse_result is not None: 5060 items.append(parse_result) 5061 5062 return items 5063 5064 def _parse_tokens( 5065 self, parse_method: t.Callable, expressions: t.Dict 5066 ) -> t.Optional[exp.Expression]: 5067 this = parse_method() 5068 5069 while self._match_set(expressions): 5070 this = self.expression( 5071 expressions[self._prev.token_type], 5072 this=this, 5073 comments=self._prev_comments, 5074 expression=parse_method(), 5075 ) 5076 5077 return this 5078 5079 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5080 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5081 5082 def _parse_wrapped_csv( 5083 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5084 ) -> t.List[exp.Expression]: 5085 return self._parse_wrapped( 5086 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5087 ) 5088 5089 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5090 wrapped = self._match(TokenType.L_PAREN) 5091 if not wrapped and not optional: 5092 self.raise_error("Expecting (") 5093 parse_result = parse_method() 5094 if wrapped: 5095 self._match_r_paren() 5096 return parse_result 5097 5098 def _parse_expressions(self) -> t.List[exp.Expression]: 5099 return self._parse_csv(self._parse_expression) 5100 5101 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5102 return self._parse_select() or self._parse_set_operations( 5103 self._parse_expression() if alias else self._parse_conjunction() 5104 ) 5105 5106 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5107 return self._parse_query_modifiers( 5108 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5109 ) 5110 5111 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5112 this = None 5113 if self._match_texts(self.TRANSACTION_KIND): 5114 this = self._prev.text 5115 5116 self._match_texts(("TRANSACTION", "WORK")) 5117 5118 modes = [] 5119 while True: 5120 mode = [] 5121 while self._match(TokenType.VAR): 5122 mode.append(self._prev.text) 5123 5124 if mode: 5125 modes.append(" ".join(mode)) 5126 if not self._match(TokenType.COMMA): 5127 break 5128 5129 return self.expression(exp.Transaction, this=this, modes=modes) 5130 5131 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5132 chain = None 5133 savepoint = None 5134 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5135 5136 self._match_texts(("TRANSACTION", "WORK")) 5137 5138 if self._match_text_seq("TO"): 5139 self._match_text_seq("SAVEPOINT") 5140 savepoint = self._parse_id_var() 5141 5142 if self._match(TokenType.AND): 5143 chain = not self._match_text_seq("NO") 5144 self._match_text_seq("CHAIN") 5145 5146 if is_rollback: 5147 return self.expression(exp.Rollback, savepoint=savepoint) 5148 5149 return self.expression(exp.Commit, chain=chain) 5150 5151 def _parse_refresh(self) -> exp.Refresh: 5152 self._match(TokenType.TABLE) 5153 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5154 5155 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5156 if not self._match_text_seq("ADD"): 5157 return None 5158 5159 self._match(TokenType.COLUMN) 5160 exists_column = self._parse_exists(not_=True) 5161 expression = self._parse_field_def() 5162 5163 if expression: 5164 expression.set("exists", exists_column) 5165 5166 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5167 if self._match_texts(("FIRST", "AFTER")): 5168 position = self._prev.text 5169 column_position = self.expression( 5170 exp.ColumnPosition, this=self._parse_column(), position=position 5171 ) 5172 expression.set("position", column_position) 5173 5174 return expression 5175 5176 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5177 drop = self._match(TokenType.DROP) and self._parse_drop() 5178 if drop and not isinstance(drop, exp.Command): 5179 drop.set("kind", drop.args.get("kind", "COLUMN")) 5180 return drop 5181 5182 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5183 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5184 return self.expression( 5185 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5186 ) 5187 5188 def _parse_add_constraint(self) -> exp.AddConstraint: 5189 this = None 5190 kind = self._prev.token_type 5191 5192 if kind == TokenType.CONSTRAINT: 5193 this = self._parse_id_var() 5194 5195 if self._match_text_seq("CHECK"): 5196 expression = self._parse_wrapped(self._parse_conjunction) 5197 enforced = self._match_text_seq("ENFORCED") 5198 5199 return self.expression( 5200 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5201 ) 5202 5203 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5204 expression = self._parse_foreign_key() 5205 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5206 expression = self._parse_primary_key() 5207 else: 5208 expression = None 5209 5210 return self.expression(exp.AddConstraint, this=this, expression=expression) 5211 5212 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5213 index = self._index - 1 5214 5215 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5216 return self._parse_csv(self._parse_add_constraint) 5217 5218 self._retreat(index) 5219 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5220 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5221 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5222 5223 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5224 self._match(TokenType.COLUMN) 5225 column = self._parse_field(any_token=True) 5226 5227 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5228 return self.expression(exp.AlterColumn, this=column, drop=True) 5229 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5230 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5231 5232 self._match_text_seq("SET", "DATA") 5233 return self.expression( 5234 exp.AlterColumn, 5235 this=column, 5236 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5237 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5238 using=self._match(TokenType.USING) and self._parse_conjunction(), 5239 ) 5240 5241 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5242 index = self._index - 1 5243 5244 partition_exists = self._parse_exists() 5245 if self._match(TokenType.PARTITION, advance=False): 5246 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5247 5248 self._retreat(index) 5249 return self._parse_csv(self._parse_drop_column) 5250 5251 def _parse_alter_table_rename(self) -> exp.RenameTable: 5252 self._match_text_seq("TO") 5253 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5254 5255 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5256 start = self._prev 5257 5258 if not self._match(TokenType.TABLE): 5259 return self._parse_as_command(start) 5260 5261 exists = self._parse_exists() 5262 only = self._match_text_seq("ONLY") 5263 this = self._parse_table(schema=True) 5264 5265 if self._next: 5266 self._advance() 5267 5268 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5269 if parser: 5270 actions = ensure_list(parser(self)) 5271 5272 if not self._curr: 5273 return self.expression( 5274 exp.AlterTable, 5275 this=this, 5276 exists=exists, 5277 actions=actions, 5278 only=only, 5279 ) 5280 5281 return self._parse_as_command(start) 5282 5283 def _parse_merge(self) -> exp.Merge: 5284 self._match(TokenType.INTO) 5285 target = self._parse_table() 5286 5287 if target and self._match(TokenType.ALIAS, advance=False): 5288 target.set("alias", self._parse_table_alias()) 5289 5290 self._match(TokenType.USING) 5291 using = self._parse_table() 5292 5293 self._match(TokenType.ON) 5294 on = self._parse_conjunction() 5295 5296 return self.expression( 5297 exp.Merge, 5298 this=target, 5299 using=using, 5300 on=on, 5301 expressions=self._parse_when_matched(), 5302 ) 5303 5304 def _parse_when_matched(self) -> t.List[exp.When]: 5305 whens = [] 5306 5307 while self._match(TokenType.WHEN): 5308 matched = not self._match(TokenType.NOT) 5309 self._match_text_seq("MATCHED") 5310 source = ( 5311 False 5312 if self._match_text_seq("BY", "TARGET") 5313 else self._match_text_seq("BY", "SOURCE") 5314 ) 5315 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5316 5317 self._match(TokenType.THEN) 5318 5319 if self._match(TokenType.INSERT): 5320 _this = self._parse_star() 5321 if _this: 5322 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5323 else: 5324 then = self.expression( 5325 exp.Insert, 5326 this=self._parse_value(), 5327 expression=self._match(TokenType.VALUES) and self._parse_value(), 5328 ) 5329 elif self._match(TokenType.UPDATE): 5330 expressions = self._parse_star() 5331 if expressions: 5332 then = self.expression(exp.Update, expressions=expressions) 5333 else: 5334 then = self.expression( 5335 exp.Update, 5336 expressions=self._match(TokenType.SET) 5337 and self._parse_csv(self._parse_equality), 5338 ) 5339 elif self._match(TokenType.DELETE): 5340 then = self.expression(exp.Var, this=self._prev.text) 5341 else: 5342 then = None 5343 5344 whens.append( 5345 self.expression( 5346 exp.When, 5347 matched=matched, 5348 source=source, 5349 condition=condition, 5350 then=then, 5351 ) 5352 ) 5353 return whens 5354 5355 def _parse_show(self) -> t.Optional[exp.Expression]: 5356 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5357 if parser: 5358 return parser(self) 5359 return self._parse_as_command(self._prev) 5360 5361 def _parse_set_item_assignment( 5362 self, kind: t.Optional[str] = None 5363 ) -> t.Optional[exp.Expression]: 5364 index = self._index 5365 5366 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5367 return self._parse_set_transaction(global_=kind == "GLOBAL") 5368 5369 left = self._parse_primary() or self._parse_id_var() 5370 assignment_delimiter = self._match_texts(("=", "TO")) 5371 5372 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5373 self._retreat(index) 5374 return None 5375 5376 right = self._parse_statement() or self._parse_id_var() 5377 this = self.expression(exp.EQ, this=left, expression=right) 5378 5379 return self.expression(exp.SetItem, this=this, kind=kind) 5380 5381 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5382 self._match_text_seq("TRANSACTION") 5383 characteristics = self._parse_csv( 5384 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5385 ) 5386 return self.expression( 5387 exp.SetItem, 5388 expressions=characteristics, 5389 kind="TRANSACTION", 5390 **{"global": global_}, # type: ignore 5391 ) 5392 5393 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5394 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5395 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5396 5397 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5398 index = self._index 5399 set_ = self.expression( 5400 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5401 ) 5402 5403 if self._curr: 5404 self._retreat(index) 5405 return self._parse_as_command(self._prev) 5406 5407 return set_ 5408 5409 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5410 for option in options: 5411 if self._match_text_seq(*option.split(" ")): 5412 return exp.var(option) 5413 return None 5414 5415 def _parse_as_command(self, start: Token) -> exp.Command: 5416 while self._curr: 5417 self._advance() 5418 text = self._find_sql(start, self._prev) 5419 size = len(start.text) 5420 return exp.Command(this=text[:size], expression=text[size:]) 5421 5422 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5423 settings = [] 5424 5425 self._match_l_paren() 5426 kind = self._parse_id_var() 5427 5428 if self._match(TokenType.L_PAREN): 5429 while True: 5430 key = self._parse_id_var() 5431 value = self._parse_primary() 5432 5433 if not key and value is None: 5434 break 5435 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5436 self._match(TokenType.R_PAREN) 5437 5438 self._match_r_paren() 5439 5440 return self.expression( 5441 exp.DictProperty, 5442 this=this, 5443 kind=kind.this if kind else None, 5444 settings=settings, 5445 ) 5446 5447 def _parse_dict_range(self, this: str) -> exp.DictRange: 5448 self._match_l_paren() 5449 has_min = self._match_text_seq("MIN") 5450 if has_min: 5451 min = self._parse_var() or self._parse_primary() 5452 self._match_text_seq("MAX") 5453 max = self._parse_var() or self._parse_primary() 5454 else: 5455 max = self._parse_var() or self._parse_primary() 5456 min = exp.Literal.number(0) 5457 self._match_r_paren() 5458 return self.expression(exp.DictRange, this=this, min=min, max=max) 5459 5460 def _parse_comprehension( 5461 self, this: t.Optional[exp.Expression] 5462 ) -> t.Optional[exp.Comprehension]: 5463 index = self._index 5464 expression = self._parse_column() 5465 if not self._match(TokenType.IN): 5466 self._retreat(index - 1) 5467 return None 5468 iterator = self._parse_column() 5469 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5470 return self.expression( 5471 exp.Comprehension, 5472 this=this, 5473 expression=expression, 5474 iterator=iterator, 5475 condition=condition, 5476 ) 5477 5478 def _find_parser( 5479 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5480 ) -> t.Optional[t.Callable]: 5481 if not self._curr: 5482 return None 5483 5484 index = self._index 5485 this = [] 5486 while True: 5487 # The current token might be multiple words 5488 curr = self._curr.text.upper() 5489 key = curr.split(" ") 5490 this.append(curr) 5491 5492 self._advance() 5493 result, trie = in_trie(trie, key) 5494 if result == TrieResult.FAILED: 5495 break 5496 5497 if result == TrieResult.EXISTS: 5498 subparser = parsers[" ".join(this)] 5499 return subparser 5500 5501 self._retreat(index) 5502 return None 5503 5504 def _match(self, token_type, advance=True, expression=None): 5505 if not self._curr: 5506 return None 5507 5508 if self._curr.token_type == token_type: 5509 if advance: 5510 self._advance() 5511 self._add_comments(expression) 5512 return True 5513 5514 return None 5515 5516 def _match_set(self, types, advance=True): 5517 if not self._curr: 5518 return None 5519 5520 if self._curr.token_type in types: 5521 if advance: 5522 self._advance() 5523 return True 5524 5525 return None 5526 5527 def _match_pair(self, token_type_a, token_type_b, advance=True): 5528 if not self._curr or not self._next: 5529 return None 5530 5531 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5532 if advance: 5533 self._advance(2) 5534 return True 5535 5536 return None 5537 5538 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5539 if not self._match(TokenType.L_PAREN, expression=expression): 5540 self.raise_error("Expecting (") 5541 5542 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5543 if not self._match(TokenType.R_PAREN, expression=expression): 5544 self.raise_error("Expecting )") 5545 5546 def _match_texts(self, texts, advance=True): 5547 if self._curr and self._curr.text.upper() in texts: 5548 if advance: 5549 self._advance() 5550 return True 5551 return False 5552 5553 def _match_text_seq(self, *texts, advance=True): 5554 index = self._index 5555 for text in texts: 5556 if self._curr and self._curr.text.upper() == text: 5557 self._advance() 5558 else: 5559 self._retreat(index) 5560 return False 5561 5562 if not advance: 5563 self._retreat(index) 5564 5565 return True 5566 5567 @t.overload 5568 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5569 ... 5570 5571 @t.overload 5572 def _replace_columns_with_dots( 5573 self, this: t.Optional[exp.Expression] 5574 ) -> t.Optional[exp.Expression]: 5575 ... 5576 5577 def _replace_columns_with_dots(self, this): 5578 if isinstance(this, exp.Dot): 5579 exp.replace_children(this, self._replace_columns_with_dots) 5580 elif isinstance(this, exp.Column): 5581 exp.replace_children(this, self._replace_columns_with_dots) 5582 table = this.args.get("table") 5583 this = ( 5584 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5585 ) 5586 5587 return this 5588 5589 def _replace_lambda( 5590 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5591 ) -> t.Optional[exp.Expression]: 5592 if not node: 5593 return node 5594 5595 for column in node.find_all(exp.Column): 5596 if column.parts[0].name in lambda_variables: 5597 dot_or_id = column.to_dot() if column.table else column.this 5598 parent = column.parent 5599 5600 while isinstance(parent, exp.Dot): 5601 if not isinstance(parent.parent, exp.Dot): 5602 parent.replace(dot_or_id) 5603 break 5604 parent = parent.parent 5605 else: 5606 if column is node: 5607 node = dot_or_id 5608 else: 5609 column.replace(dot_or_id) 5610 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
74class Parser(metaclass=_Parser): 75 """ 76 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 77 78 Args: 79 error_level: The desired error level. 80 Default: ErrorLevel.IMMEDIATE 81 error_message_context: Determines the amount of context to capture from a 82 query string when displaying the error message (in number of characters). 83 Default: 100 84 max_errors: Maximum number of error messages to include in a raised ParseError. 85 This is only relevant if error_level is ErrorLevel.RAISE. 86 Default: 3 87 """ 88 89 FUNCTIONS: t.Dict[str, t.Callable] = { 90 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 91 "CONCAT": lambda args, dialect: exp.Concat( 92 expressions=args, 93 safe=not dialect.STRICT_STRING_CONCAT, 94 coalesce=dialect.CONCAT_COALESCE, 95 ), 96 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 97 expressions=args, 98 safe=not dialect.STRICT_STRING_CONCAT, 99 coalesce=dialect.CONCAT_COALESCE, 100 ), 101 "DATE_TO_DATE_STR": lambda args: exp.Cast( 102 this=seq_get(args, 0), 103 to=exp.DataType(this=exp.DataType.Type.TEXT), 104 ), 105 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 106 "LIKE": parse_like, 107 "LOG": parse_logarithm, 108 "TIME_TO_TIME_STR": lambda args: exp.Cast( 109 this=seq_get(args, 0), 110 to=exp.DataType(this=exp.DataType.Type.TEXT), 111 ), 112 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 113 this=exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 start=exp.Literal.number(1), 118 length=exp.Literal.number(10), 119 ), 120 "VAR_MAP": parse_var_map, 121 } 122 123 NO_PAREN_FUNCTIONS = { 124 TokenType.CURRENT_DATE: exp.CurrentDate, 125 TokenType.CURRENT_DATETIME: exp.CurrentDate, 126 TokenType.CURRENT_TIME: exp.CurrentTime, 127 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 128 TokenType.CURRENT_USER: exp.CurrentUser, 129 } 130 131 STRUCT_TYPE_TOKENS = { 132 TokenType.NESTED, 133 TokenType.STRUCT, 134 } 135 136 NESTED_TYPE_TOKENS = { 137 TokenType.ARRAY, 138 TokenType.LOWCARDINALITY, 139 TokenType.MAP, 140 TokenType.NULLABLE, 141 *STRUCT_TYPE_TOKENS, 142 } 143 144 ENUM_TYPE_TOKENS = { 145 TokenType.ENUM, 146 TokenType.ENUM8, 147 TokenType.ENUM16, 148 } 149 150 TYPE_TOKENS = { 151 TokenType.BIT, 152 TokenType.BOOLEAN, 153 TokenType.TINYINT, 154 TokenType.UTINYINT, 155 TokenType.SMALLINT, 156 TokenType.USMALLINT, 157 TokenType.INT, 158 TokenType.UINT, 159 TokenType.BIGINT, 160 TokenType.UBIGINT, 161 TokenType.INT128, 162 TokenType.UINT128, 163 TokenType.INT256, 164 TokenType.UINT256, 165 TokenType.MEDIUMINT, 166 TokenType.UMEDIUMINT, 167 TokenType.FIXEDSTRING, 168 TokenType.FLOAT, 169 TokenType.DOUBLE, 170 TokenType.CHAR, 171 TokenType.NCHAR, 172 TokenType.VARCHAR, 173 TokenType.NVARCHAR, 174 TokenType.TEXT, 175 TokenType.MEDIUMTEXT, 176 TokenType.LONGTEXT, 177 TokenType.MEDIUMBLOB, 178 TokenType.LONGBLOB, 179 TokenType.BINARY, 180 TokenType.VARBINARY, 181 TokenType.JSON, 182 TokenType.JSONB, 183 TokenType.INTERVAL, 184 TokenType.TINYBLOB, 185 TokenType.TINYTEXT, 186 TokenType.TIME, 187 TokenType.TIMETZ, 188 TokenType.TIMESTAMP, 189 TokenType.TIMESTAMP_S, 190 TokenType.TIMESTAMP_MS, 191 TokenType.TIMESTAMP_NS, 192 TokenType.TIMESTAMPTZ, 193 TokenType.TIMESTAMPLTZ, 194 TokenType.DATETIME, 195 TokenType.DATETIME64, 196 TokenType.DATE, 197 TokenType.INT4RANGE, 198 TokenType.INT4MULTIRANGE, 199 TokenType.INT8RANGE, 200 TokenType.INT8MULTIRANGE, 201 TokenType.NUMRANGE, 202 TokenType.NUMMULTIRANGE, 203 TokenType.TSRANGE, 204 TokenType.TSMULTIRANGE, 205 TokenType.TSTZRANGE, 206 TokenType.TSTZMULTIRANGE, 207 TokenType.DATERANGE, 208 TokenType.DATEMULTIRANGE, 209 TokenType.DECIMAL, 210 TokenType.UDECIMAL, 211 TokenType.BIGDECIMAL, 212 TokenType.UUID, 213 TokenType.GEOGRAPHY, 214 TokenType.GEOMETRY, 215 TokenType.HLLSKETCH, 216 TokenType.HSTORE, 217 TokenType.PSEUDO_TYPE, 218 TokenType.SUPER, 219 TokenType.SERIAL, 220 TokenType.SMALLSERIAL, 221 TokenType.BIGSERIAL, 222 TokenType.XML, 223 TokenType.YEAR, 224 TokenType.UNIQUEIDENTIFIER, 225 TokenType.USERDEFINED, 226 TokenType.MONEY, 227 TokenType.SMALLMONEY, 228 TokenType.ROWVERSION, 229 TokenType.IMAGE, 230 TokenType.VARIANT, 231 TokenType.OBJECT, 232 TokenType.OBJECT_IDENTIFIER, 233 TokenType.INET, 234 TokenType.IPADDRESS, 235 TokenType.IPPREFIX, 236 TokenType.UNKNOWN, 237 TokenType.NULL, 238 *ENUM_TYPE_TOKENS, 239 *NESTED_TYPE_TOKENS, 240 } 241 242 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 243 TokenType.BIGINT: TokenType.UBIGINT, 244 TokenType.INT: TokenType.UINT, 245 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 246 TokenType.SMALLINT: TokenType.USMALLINT, 247 TokenType.TINYINT: TokenType.UTINYINT, 248 TokenType.DECIMAL: TokenType.UDECIMAL, 249 } 250 251 SUBQUERY_PREDICATES = { 252 TokenType.ANY: exp.Any, 253 TokenType.ALL: exp.All, 254 TokenType.EXISTS: exp.Exists, 255 TokenType.SOME: exp.Any, 256 } 257 258 RESERVED_TOKENS = { 259 *Tokenizer.SINGLE_TOKENS.values(), 260 TokenType.SELECT, 261 } 262 263 DB_CREATABLES = { 264 TokenType.DATABASE, 265 TokenType.SCHEMA, 266 TokenType.TABLE, 267 TokenType.VIEW, 268 TokenType.MODEL, 269 TokenType.DICTIONARY, 270 } 271 272 CREATABLES = { 273 TokenType.COLUMN, 274 TokenType.CONSTRAINT, 275 TokenType.FUNCTION, 276 TokenType.INDEX, 277 TokenType.PROCEDURE, 278 TokenType.FOREIGN_KEY, 279 *DB_CREATABLES, 280 } 281 282 # Tokens that can represent identifiers 283 ID_VAR_TOKENS = { 284 TokenType.VAR, 285 TokenType.ANTI, 286 TokenType.APPLY, 287 TokenType.ASC, 288 TokenType.AUTO_INCREMENT, 289 TokenType.BEGIN, 290 TokenType.CACHE, 291 TokenType.CASE, 292 TokenType.COLLATE, 293 TokenType.COMMAND, 294 TokenType.COMMENT, 295 TokenType.COMMIT, 296 TokenType.CONSTRAINT, 297 TokenType.DEFAULT, 298 TokenType.DELETE, 299 TokenType.DESC, 300 TokenType.DESCRIBE, 301 TokenType.DICTIONARY, 302 TokenType.DIV, 303 TokenType.END, 304 TokenType.EXECUTE, 305 TokenType.ESCAPE, 306 TokenType.FALSE, 307 TokenType.FIRST, 308 TokenType.FILTER, 309 TokenType.FINAL, 310 TokenType.FORMAT, 311 TokenType.FULL, 312 TokenType.IS, 313 TokenType.ISNULL, 314 TokenType.INTERVAL, 315 TokenType.KEEP, 316 TokenType.KILL, 317 TokenType.LEFT, 318 TokenType.LOAD, 319 TokenType.MERGE, 320 TokenType.NATURAL, 321 TokenType.NEXT, 322 TokenType.OFFSET, 323 TokenType.OPERATOR, 324 TokenType.ORDINALITY, 325 TokenType.OVERLAPS, 326 TokenType.OVERWRITE, 327 TokenType.PARTITION, 328 TokenType.PERCENT, 329 TokenType.PIVOT, 330 TokenType.PRAGMA, 331 TokenType.RANGE, 332 TokenType.RECURSIVE, 333 TokenType.REFERENCES, 334 TokenType.REFRESH, 335 TokenType.REPLACE, 336 TokenType.RIGHT, 337 TokenType.ROW, 338 TokenType.ROWS, 339 TokenType.SEMI, 340 TokenType.SET, 341 TokenType.SETTINGS, 342 TokenType.SHOW, 343 TokenType.TEMPORARY, 344 TokenType.TOP, 345 TokenType.TRUE, 346 TokenType.UNIQUE, 347 TokenType.UNPIVOT, 348 TokenType.UPDATE, 349 TokenType.USE, 350 TokenType.VOLATILE, 351 TokenType.WINDOW, 352 *CREATABLES, 353 *SUBQUERY_PREDICATES, 354 *TYPE_TOKENS, 355 *NO_PAREN_FUNCTIONS, 356 } 357 358 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 359 360 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASOF, 364 TokenType.FULL, 365 TokenType.LEFT, 366 TokenType.LOCK, 367 TokenType.NATURAL, 368 TokenType.OFFSET, 369 TokenType.RIGHT, 370 TokenType.SEMI, 371 TokenType.WINDOW, 372 } 373 374 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 375 376 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 377 378 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 379 380 FUNC_TOKENS = { 381 TokenType.COLLATE, 382 TokenType.COMMAND, 383 TokenType.CURRENT_DATE, 384 TokenType.CURRENT_DATETIME, 385 TokenType.CURRENT_TIMESTAMP, 386 TokenType.CURRENT_TIME, 387 TokenType.CURRENT_USER, 388 TokenType.FILTER, 389 TokenType.FIRST, 390 TokenType.FORMAT, 391 TokenType.GLOB, 392 TokenType.IDENTIFIER, 393 TokenType.INDEX, 394 TokenType.ISNULL, 395 TokenType.ILIKE, 396 TokenType.INSERT, 397 TokenType.LIKE, 398 TokenType.MERGE, 399 TokenType.OFFSET, 400 TokenType.PRIMARY_KEY, 401 TokenType.RANGE, 402 TokenType.REPLACE, 403 TokenType.RLIKE, 404 TokenType.ROW, 405 TokenType.UNNEST, 406 TokenType.VAR, 407 TokenType.LEFT, 408 TokenType.RIGHT, 409 TokenType.DATE, 410 TokenType.DATETIME, 411 TokenType.TABLE, 412 TokenType.TIMESTAMP, 413 TokenType.TIMESTAMPTZ, 414 TokenType.WINDOW, 415 TokenType.XOR, 416 *TYPE_TOKENS, 417 *SUBQUERY_PREDICATES, 418 } 419 420 CONJUNCTION = { 421 TokenType.AND: exp.And, 422 TokenType.OR: exp.Or, 423 } 424 425 EQUALITY = { 426 TokenType.COLON_EQ: exp.PropertyEQ, 427 TokenType.EQ: exp.EQ, 428 TokenType.NEQ: exp.NEQ, 429 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 430 } 431 432 COMPARISON = { 433 TokenType.GT: exp.GT, 434 TokenType.GTE: exp.GTE, 435 TokenType.LT: exp.LT, 436 TokenType.LTE: exp.LTE, 437 } 438 439 BITWISE = { 440 TokenType.AMP: exp.BitwiseAnd, 441 TokenType.CARET: exp.BitwiseXor, 442 TokenType.PIPE: exp.BitwiseOr, 443 } 444 445 TERM = { 446 TokenType.DASH: exp.Sub, 447 TokenType.PLUS: exp.Add, 448 TokenType.MOD: exp.Mod, 449 TokenType.COLLATE: exp.Collate, 450 } 451 452 FACTOR = { 453 TokenType.DIV: exp.IntDiv, 454 TokenType.LR_ARROW: exp.Distance, 455 TokenType.SLASH: exp.Div, 456 TokenType.STAR: exp.Mul, 457 } 458 459 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 460 461 TIMES = { 462 TokenType.TIME, 463 TokenType.TIMETZ, 464 } 465 466 TIMESTAMPS = { 467 TokenType.TIMESTAMP, 468 TokenType.TIMESTAMPTZ, 469 TokenType.TIMESTAMPLTZ, 470 *TIMES, 471 } 472 473 SET_OPERATIONS = { 474 TokenType.UNION, 475 TokenType.INTERSECT, 476 TokenType.EXCEPT, 477 } 478 479 JOIN_METHODS = { 480 TokenType.NATURAL, 481 TokenType.ASOF, 482 } 483 484 JOIN_SIDES = { 485 TokenType.LEFT, 486 TokenType.RIGHT, 487 TokenType.FULL, 488 } 489 490 JOIN_KINDS = { 491 TokenType.INNER, 492 TokenType.OUTER, 493 TokenType.CROSS, 494 TokenType.SEMI, 495 TokenType.ANTI, 496 } 497 498 JOIN_HINTS: t.Set[str] = set() 499 500 LAMBDAS = { 501 TokenType.ARROW: lambda self, expressions: self.expression( 502 exp.Lambda, 503 this=self._replace_lambda( 504 self._parse_conjunction(), 505 {node.name for node in expressions}, 506 ), 507 expressions=expressions, 508 ), 509 TokenType.FARROW: lambda self, expressions: self.expression( 510 exp.Kwarg, 511 this=exp.var(expressions[0].name), 512 expression=self._parse_conjunction(), 513 ), 514 } 515 516 COLUMN_OPERATORS = { 517 TokenType.DOT: None, 518 TokenType.DCOLON: lambda self, this, to: self.expression( 519 exp.Cast if self.STRICT_CAST else exp.TryCast, 520 this=this, 521 to=to, 522 ), 523 TokenType.ARROW: lambda self, this, path: self.expression( 524 exp.JSONExtract, 525 this=this, 526 expression=path, 527 ), 528 TokenType.DARROW: lambda self, this, path: self.expression( 529 exp.JSONExtractScalar, 530 this=this, 531 expression=path, 532 ), 533 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 534 exp.JSONBExtract, 535 this=this, 536 expression=path, 537 ), 538 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 539 exp.JSONBExtractScalar, 540 this=this, 541 expression=path, 542 ), 543 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 544 exp.JSONBContains, 545 this=this, 546 expression=key, 547 ), 548 } 549 550 EXPRESSION_PARSERS = { 551 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 552 exp.Column: lambda self: self._parse_column(), 553 exp.Condition: lambda self: self._parse_conjunction(), 554 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 555 exp.Expression: lambda self: self._parse_statement(), 556 exp.From: lambda self: self._parse_from(), 557 exp.Group: lambda self: self._parse_group(), 558 exp.Having: lambda self: self._parse_having(), 559 exp.Identifier: lambda self: self._parse_id_var(), 560 exp.Join: lambda self: self._parse_join(), 561 exp.Lambda: lambda self: self._parse_lambda(), 562 exp.Lateral: lambda self: self._parse_lateral(), 563 exp.Limit: lambda self: self._parse_limit(), 564 exp.Offset: lambda self: self._parse_offset(), 565 exp.Order: lambda self: self._parse_order(), 566 exp.Ordered: lambda self: self._parse_ordered(), 567 exp.Properties: lambda self: self._parse_properties(), 568 exp.Qualify: lambda self: self._parse_qualify(), 569 exp.Returning: lambda self: self._parse_returning(), 570 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 571 exp.Table: lambda self: self._parse_table_parts(), 572 exp.TableAlias: lambda self: self._parse_table_alias(), 573 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 574 exp.Where: lambda self: self._parse_where(), 575 exp.Window: lambda self: self._parse_named_window(), 576 exp.With: lambda self: self._parse_with(), 577 "JOIN_TYPE": lambda self: self._parse_join_parts(), 578 } 579 580 STATEMENT_PARSERS = { 581 TokenType.ALTER: lambda self: self._parse_alter(), 582 TokenType.BEGIN: lambda self: self._parse_transaction(), 583 TokenType.CACHE: lambda self: self._parse_cache(), 584 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 585 TokenType.COMMENT: lambda self: self._parse_comment(), 586 TokenType.CREATE: lambda self: self._parse_create(), 587 TokenType.DELETE: lambda self: self._parse_delete(), 588 TokenType.DESC: lambda self: self._parse_describe(), 589 TokenType.DESCRIBE: lambda self: self._parse_describe(), 590 TokenType.DROP: lambda self: self._parse_drop(), 591 TokenType.INSERT: lambda self: self._parse_insert(), 592 TokenType.KILL: lambda self: self._parse_kill(), 593 TokenType.LOAD: lambda self: self._parse_load(), 594 TokenType.MERGE: lambda self: self._parse_merge(), 595 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 596 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 597 TokenType.REFRESH: lambda self: self._parse_refresh(), 598 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 599 TokenType.SET: lambda self: self._parse_set(), 600 TokenType.UNCACHE: lambda self: self._parse_uncache(), 601 TokenType.UPDATE: lambda self: self._parse_update(), 602 TokenType.USE: lambda self: self.expression( 603 exp.Use, 604 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 605 and exp.var(self._prev.text), 606 this=self._parse_table(schema=False), 607 ), 608 } 609 610 UNARY_PARSERS = { 611 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 612 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 613 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 614 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 615 } 616 617 PRIMARY_PARSERS = { 618 TokenType.STRING: lambda self, token: self.expression( 619 exp.Literal, this=token.text, is_string=True 620 ), 621 TokenType.NUMBER: lambda self, token: self.expression( 622 exp.Literal, this=token.text, is_string=False 623 ), 624 TokenType.STAR: lambda self, _: self.expression( 625 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 626 ), 627 TokenType.NULL: lambda self, _: self.expression(exp.Null), 628 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 629 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 630 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 631 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 632 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 633 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 634 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 635 exp.National, this=token.text 636 ), 637 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 638 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 639 exp.RawString, this=token.text 640 ), 641 TokenType.UNICODE_STRING: lambda self, token: self.expression( 642 exp.UnicodeString, 643 this=token.text, 644 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 645 ), 646 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 647 } 648 649 PLACEHOLDER_PARSERS = { 650 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 651 TokenType.PARAMETER: lambda self: self._parse_parameter(), 652 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 653 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 654 else None, 655 } 656 657 RANGE_PARSERS = { 658 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 659 TokenType.GLOB: binary_range_parser(exp.Glob), 660 TokenType.ILIKE: binary_range_parser(exp.ILike), 661 TokenType.IN: lambda self, this: self._parse_in(this), 662 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 663 TokenType.IS: lambda self, this: self._parse_is(this), 664 TokenType.LIKE: binary_range_parser(exp.Like), 665 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 666 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 667 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 668 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 669 } 670 671 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 672 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 673 "AUTO": lambda self: self._parse_auto_property(), 674 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 675 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 676 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 677 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 678 "CHECKSUM": lambda self: self._parse_checksum(), 679 "CLUSTER BY": lambda self: self._parse_cluster(), 680 "CLUSTERED": lambda self: self._parse_clustered_by(), 681 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 682 exp.CollateProperty, **kwargs 683 ), 684 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 685 "CONTAINS": lambda self: self._parse_contains_property(), 686 "COPY": lambda self: self._parse_copy_property(), 687 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 688 "DEFINER": lambda self: self._parse_definer(), 689 "DETERMINISTIC": lambda self: self.expression( 690 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 691 ), 692 "DISTKEY": lambda self: self._parse_distkey(), 693 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 694 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 695 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 696 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 697 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 698 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 699 "FREESPACE": lambda self: self._parse_freespace(), 700 "HEAP": lambda self: self.expression(exp.HeapProperty), 701 "IMMUTABLE": lambda self: self.expression( 702 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 703 ), 704 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 705 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 706 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 707 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 708 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 709 "LIKE": lambda self: self._parse_create_like(), 710 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 711 "LOCK": lambda self: self._parse_locking(), 712 "LOCKING": lambda self: self._parse_locking(), 713 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 714 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 715 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 716 "MODIFIES": lambda self: self._parse_modifies_property(), 717 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 718 "NO": lambda self: self._parse_no_property(), 719 "ON": lambda self: self._parse_on_property(), 720 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 721 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 722 "PARTITION": lambda self: self._parse_partitioned_of(), 723 "PARTITION BY": lambda self: self._parse_partitioned_by(), 724 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 725 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 726 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 727 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 728 "READS": lambda self: self._parse_reads_property(), 729 "REMOTE": lambda self: self._parse_remote_with_connection(), 730 "RETURNS": lambda self: self._parse_returns(), 731 "ROW": lambda self: self._parse_row(), 732 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 733 "SAMPLE": lambda self: self.expression( 734 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 735 ), 736 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 737 "SETTINGS": lambda self: self.expression( 738 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 739 ), 740 "SORTKEY": lambda self: self._parse_sortkey(), 741 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 742 "STABLE": lambda self: self.expression( 743 exp.StabilityProperty, this=exp.Literal.string("STABLE") 744 ), 745 "STORED": lambda self: self._parse_stored(), 746 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 747 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 748 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 749 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 750 "TO": lambda self: self._parse_to_table(), 751 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 752 "TRANSFORM": lambda self: self.expression( 753 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 754 ), 755 "TTL": lambda self: self._parse_ttl(), 756 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 757 "VOLATILE": lambda self: self._parse_volatile_property(), 758 "WITH": lambda self: self._parse_with_property(), 759 } 760 761 CONSTRAINT_PARSERS = { 762 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 763 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 764 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 765 "CHARACTER SET": lambda self: self.expression( 766 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 767 ), 768 "CHECK": lambda self: self.expression( 769 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 770 ), 771 "COLLATE": lambda self: self.expression( 772 exp.CollateColumnConstraint, this=self._parse_var() 773 ), 774 "COMMENT": lambda self: self.expression( 775 exp.CommentColumnConstraint, this=self._parse_string() 776 ), 777 "COMPRESS": lambda self: self._parse_compress(), 778 "CLUSTERED": lambda self: self.expression( 779 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 780 ), 781 "NONCLUSTERED": lambda self: self.expression( 782 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 783 ), 784 "DEFAULT": lambda self: self.expression( 785 exp.DefaultColumnConstraint, this=self._parse_bitwise() 786 ), 787 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 788 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 789 "FORMAT": lambda self: self.expression( 790 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 791 ), 792 "GENERATED": lambda self: self._parse_generated_as_identity(), 793 "IDENTITY": lambda self: self._parse_auto_increment(), 794 "INLINE": lambda self: self._parse_inline(), 795 "LIKE": lambda self: self._parse_create_like(), 796 "NOT": lambda self: self._parse_not_constraint(), 797 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 798 "ON": lambda self: ( 799 self._match(TokenType.UPDATE) 800 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 801 ) 802 or self.expression(exp.OnProperty, this=self._parse_id_var()), 803 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 804 "PERIOD": lambda self: self._parse_period_for_system_time(), 805 "PRIMARY KEY": lambda self: self._parse_primary_key(), 806 "REFERENCES": lambda self: self._parse_references(match=False), 807 "TITLE": lambda self: self.expression( 808 exp.TitleColumnConstraint, this=self._parse_var_or_string() 809 ), 810 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 811 "UNIQUE": lambda self: self._parse_unique(), 812 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 813 "WITH": lambda self: self.expression( 814 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 815 ), 816 } 817 818 ALTER_PARSERS = { 819 "ADD": lambda self: self._parse_alter_table_add(), 820 "ALTER": lambda self: self._parse_alter_table_alter(), 821 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 822 "DROP": lambda self: self._parse_alter_table_drop(), 823 "RENAME": lambda self: self._parse_alter_table_rename(), 824 } 825 826 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 827 828 NO_PAREN_FUNCTION_PARSERS = { 829 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 830 "CASE": lambda self: self._parse_case(), 831 "IF": lambda self: self._parse_if(), 832 "NEXT": lambda self: self._parse_next_value_for(), 833 } 834 835 INVALID_FUNC_NAME_TOKENS = { 836 TokenType.IDENTIFIER, 837 TokenType.STRING, 838 } 839 840 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 841 842 FUNCTION_PARSERS = { 843 "ANY_VALUE": lambda self: self._parse_any_value(), 844 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 845 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 846 "DECODE": lambda self: self._parse_decode(), 847 "EXTRACT": lambda self: self._parse_extract(), 848 "JSON_OBJECT": lambda self: self._parse_json_object(), 849 "JSON_TABLE": lambda self: self._parse_json_table(), 850 "MATCH": lambda self: self._parse_match_against(), 851 "OPENJSON": lambda self: self._parse_open_json(), 852 "POSITION": lambda self: self._parse_position(), 853 "PREDICT": lambda self: self._parse_predict(), 854 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 855 "STRING_AGG": lambda self: self._parse_string_agg(), 856 "SUBSTRING": lambda self: self._parse_substring(), 857 "TRIM": lambda self: self._parse_trim(), 858 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 859 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 860 } 861 862 QUERY_MODIFIER_PARSERS = { 863 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 864 TokenType.WHERE: lambda self: ("where", self._parse_where()), 865 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 866 TokenType.HAVING: lambda self: ("having", self._parse_having()), 867 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 868 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 869 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 870 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 871 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 872 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 873 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 874 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 875 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 876 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 877 TokenType.CLUSTER_BY: lambda self: ( 878 "cluster", 879 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 880 ), 881 TokenType.DISTRIBUTE_BY: lambda self: ( 882 "distribute", 883 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 884 ), 885 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 886 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 887 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 888 } 889 890 SET_PARSERS = { 891 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 892 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 893 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 894 "TRANSACTION": lambda self: self._parse_set_transaction(), 895 } 896 897 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 898 899 TYPE_LITERAL_PARSERS = { 900 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 901 } 902 903 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 904 905 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 906 907 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 908 909 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 910 TRANSACTION_CHARACTERISTICS = { 911 "ISOLATION LEVEL REPEATABLE READ", 912 "ISOLATION LEVEL READ COMMITTED", 913 "ISOLATION LEVEL READ UNCOMMITTED", 914 "ISOLATION LEVEL SERIALIZABLE", 915 "READ WRITE", 916 "READ ONLY", 917 } 918 919 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 920 921 CLONE_KEYWORDS = {"CLONE", "COPY"} 922 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 923 924 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 925 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 926 927 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 928 929 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 930 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 931 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 932 933 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 934 935 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 936 937 DISTINCT_TOKENS = {TokenType.DISTINCT} 938 939 NULL_TOKENS = {TokenType.NULL} 940 941 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 942 943 STRICT_CAST = True 944 945 PREFIXED_PIVOT_COLUMNS = False 946 IDENTIFY_PIVOT_STRINGS = False 947 948 LOG_DEFAULTS_TO_LN = False 949 950 # Whether or not ADD is present for each column added by ALTER TABLE 951 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 952 953 # Whether or not the table sample clause expects CSV syntax 954 TABLESAMPLE_CSV = False 955 956 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 957 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 958 959 # Whether the TRIM function expects the characters to trim as its first argument 960 TRIM_PATTERN_FIRST = False 961 962 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 963 STRING_ALIASES = False 964 965 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 966 MODIFIERS_ATTACHED_TO_UNION = True 967 UNION_MODIFIERS = {"order", "limit", "offset"} 968 969 __slots__ = ( 970 "error_level", 971 "error_message_context", 972 "max_errors", 973 "dialect", 974 "sql", 975 "errors", 976 "_tokens", 977 "_index", 978 "_curr", 979 "_next", 980 "_prev", 981 "_prev_comments", 982 ) 983 984 # Autofilled 985 SHOW_TRIE: t.Dict = {} 986 SET_TRIE: t.Dict = {} 987 988 def __init__( 989 self, 990 error_level: t.Optional[ErrorLevel] = None, 991 error_message_context: int = 100, 992 max_errors: int = 3, 993 dialect: DialectType = None, 994 ): 995 from sqlglot.dialects import Dialect 996 997 self.error_level = error_level or ErrorLevel.IMMEDIATE 998 self.error_message_context = error_message_context 999 self.max_errors = max_errors 1000 self.dialect = Dialect.get_or_raise(dialect) 1001 self.reset() 1002 1003 def reset(self): 1004 self.sql = "" 1005 self.errors = [] 1006 self._tokens = [] 1007 self._index = 0 1008 self._curr = None 1009 self._next = None 1010 self._prev = None 1011 self._prev_comments = None 1012 1013 def parse( 1014 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1015 ) -> t.List[t.Optional[exp.Expression]]: 1016 """ 1017 Parses a list of tokens and returns a list of syntax trees, one tree 1018 per parsed SQL statement. 1019 1020 Args: 1021 raw_tokens: The list of tokens. 1022 sql: The original SQL string, used to produce helpful debug messages. 1023 1024 Returns: 1025 The list of the produced syntax trees. 1026 """ 1027 return self._parse( 1028 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1029 ) 1030 1031 def parse_into( 1032 self, 1033 expression_types: exp.IntoType, 1034 raw_tokens: t.List[Token], 1035 sql: t.Optional[str] = None, 1036 ) -> t.List[t.Optional[exp.Expression]]: 1037 """ 1038 Parses a list of tokens into a given Expression type. If a collection of Expression 1039 types is given instead, this method will try to parse the token list into each one 1040 of them, stopping at the first for which the parsing succeeds. 1041 1042 Args: 1043 expression_types: The expression type(s) to try and parse the token list into. 1044 raw_tokens: The list of tokens. 1045 sql: The original SQL string, used to produce helpful debug messages. 1046 1047 Returns: 1048 The target Expression. 1049 """ 1050 errors = [] 1051 for expression_type in ensure_list(expression_types): 1052 parser = self.EXPRESSION_PARSERS.get(expression_type) 1053 if not parser: 1054 raise TypeError(f"No parser registered for {expression_type}") 1055 1056 try: 1057 return self._parse(parser, raw_tokens, sql) 1058 except ParseError as e: 1059 e.errors[0]["into_expression"] = expression_type 1060 errors.append(e) 1061 1062 raise ParseError( 1063 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1064 errors=merge_errors(errors), 1065 ) from errors[-1] 1066 1067 def _parse( 1068 self, 1069 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1070 raw_tokens: t.List[Token], 1071 sql: t.Optional[str] = None, 1072 ) -> t.List[t.Optional[exp.Expression]]: 1073 self.reset() 1074 self.sql = sql or "" 1075 1076 total = len(raw_tokens) 1077 chunks: t.List[t.List[Token]] = [[]] 1078 1079 for i, token in enumerate(raw_tokens): 1080 if token.token_type == TokenType.SEMICOLON: 1081 if i < total - 1: 1082 chunks.append([]) 1083 else: 1084 chunks[-1].append(token) 1085 1086 expressions = [] 1087 1088 for tokens in chunks: 1089 self._index = -1 1090 self._tokens = tokens 1091 self._advance() 1092 1093 expressions.append(parse_method(self)) 1094 1095 if self._index < len(self._tokens): 1096 self.raise_error("Invalid expression / Unexpected token") 1097 1098 self.check_errors() 1099 1100 return expressions 1101 1102 def check_errors(self) -> None: 1103 """Logs or raises any found errors, depending on the chosen error level setting.""" 1104 if self.error_level == ErrorLevel.WARN: 1105 for error in self.errors: 1106 logger.error(str(error)) 1107 elif self.error_level == ErrorLevel.RAISE and self.errors: 1108 raise ParseError( 1109 concat_messages(self.errors, self.max_errors), 1110 errors=merge_errors(self.errors), 1111 ) 1112 1113 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1114 """ 1115 Appends an error in the list of recorded errors or raises it, depending on the chosen 1116 error level setting. 1117 """ 1118 token = token or self._curr or self._prev or Token.string("") 1119 start = token.start 1120 end = token.end + 1 1121 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1122 highlight = self.sql[start:end] 1123 end_context = self.sql[end : end + self.error_message_context] 1124 1125 error = ParseError.new( 1126 f"{message}. Line {token.line}, Col: {token.col}.\n" 1127 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1128 description=message, 1129 line=token.line, 1130 col=token.col, 1131 start_context=start_context, 1132 highlight=highlight, 1133 end_context=end_context, 1134 ) 1135 1136 if self.error_level == ErrorLevel.IMMEDIATE: 1137 raise error 1138 1139 self.errors.append(error) 1140 1141 def expression( 1142 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1143 ) -> E: 1144 """ 1145 Creates a new, validated Expression. 1146 1147 Args: 1148 exp_class: The expression class to instantiate. 1149 comments: An optional list of comments to attach to the expression. 1150 kwargs: The arguments to set for the expression along with their respective values. 1151 1152 Returns: 1153 The target expression. 1154 """ 1155 instance = exp_class(**kwargs) 1156 instance.add_comments(comments) if comments else self._add_comments(instance) 1157 return self.validate_expression(instance) 1158 1159 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1160 if expression and self._prev_comments: 1161 expression.add_comments(self._prev_comments) 1162 self._prev_comments = None 1163 1164 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1165 """ 1166 Validates an Expression, making sure that all its mandatory arguments are set. 1167 1168 Args: 1169 expression: The expression to validate. 1170 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1171 1172 Returns: 1173 The validated expression. 1174 """ 1175 if self.error_level != ErrorLevel.IGNORE: 1176 for error_message in expression.error_messages(args): 1177 self.raise_error(error_message) 1178 1179 return expression 1180 1181 def _find_sql(self, start: Token, end: Token) -> str: 1182 return self.sql[start.start : end.end + 1] 1183 1184 def _is_connected(self) -> bool: 1185 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1186 1187 def _advance(self, times: int = 1) -> None: 1188 self._index += times 1189 self._curr = seq_get(self._tokens, self._index) 1190 self._next = seq_get(self._tokens, self._index + 1) 1191 1192 if self._index > 0: 1193 self._prev = self._tokens[self._index - 1] 1194 self._prev_comments = self._prev.comments 1195 else: 1196 self._prev = None 1197 self._prev_comments = None 1198 1199 def _retreat(self, index: int) -> None: 1200 if index != self._index: 1201 self._advance(index - self._index) 1202 1203 def _parse_command(self) -> exp.Command: 1204 return self.expression( 1205 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1206 ) 1207 1208 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1209 start = self._prev 1210 exists = self._parse_exists() if allow_exists else None 1211 1212 self._match(TokenType.ON) 1213 1214 kind = self._match_set(self.CREATABLES) and self._prev 1215 if not kind: 1216 return self._parse_as_command(start) 1217 1218 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1219 this = self._parse_user_defined_function(kind=kind.token_type) 1220 elif kind.token_type == TokenType.TABLE: 1221 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1222 elif kind.token_type == TokenType.COLUMN: 1223 this = self._parse_column() 1224 else: 1225 this = self._parse_id_var() 1226 1227 self._match(TokenType.IS) 1228 1229 return self.expression( 1230 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1231 ) 1232 1233 def _parse_to_table( 1234 self, 1235 ) -> exp.ToTableProperty: 1236 table = self._parse_table_parts(schema=True) 1237 return self.expression(exp.ToTableProperty, this=table) 1238 1239 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1240 def _parse_ttl(self) -> exp.Expression: 1241 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1242 this = self._parse_bitwise() 1243 1244 if self._match_text_seq("DELETE"): 1245 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1246 if self._match_text_seq("RECOMPRESS"): 1247 return self.expression( 1248 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1249 ) 1250 if self._match_text_seq("TO", "DISK"): 1251 return self.expression( 1252 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1253 ) 1254 if self._match_text_seq("TO", "VOLUME"): 1255 return self.expression( 1256 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1257 ) 1258 1259 return this 1260 1261 expressions = self._parse_csv(_parse_ttl_action) 1262 where = self._parse_where() 1263 group = self._parse_group() 1264 1265 aggregates = None 1266 if group and self._match(TokenType.SET): 1267 aggregates = self._parse_csv(self._parse_set_item) 1268 1269 return self.expression( 1270 exp.MergeTreeTTL, 1271 expressions=expressions, 1272 where=where, 1273 group=group, 1274 aggregates=aggregates, 1275 ) 1276 1277 def _parse_statement(self) -> t.Optional[exp.Expression]: 1278 if self._curr is None: 1279 return None 1280 1281 if self._match_set(self.STATEMENT_PARSERS): 1282 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1283 1284 if self._match_set(Tokenizer.COMMANDS): 1285 return self._parse_command() 1286 1287 expression = self._parse_expression() 1288 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1289 return self._parse_query_modifiers(expression) 1290 1291 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1292 start = self._prev 1293 temporary = self._match(TokenType.TEMPORARY) 1294 materialized = self._match_text_seq("MATERIALIZED") 1295 1296 kind = self._match_set(self.CREATABLES) and self._prev.text 1297 if not kind: 1298 return self._parse_as_command(start) 1299 1300 return self.expression( 1301 exp.Drop, 1302 comments=start.comments, 1303 exists=exists or self._parse_exists(), 1304 this=self._parse_table(schema=True), 1305 kind=kind, 1306 temporary=temporary, 1307 materialized=materialized, 1308 cascade=self._match_text_seq("CASCADE"), 1309 constraints=self._match_text_seq("CONSTRAINTS"), 1310 purge=self._match_text_seq("PURGE"), 1311 ) 1312 1313 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1314 return ( 1315 self._match_text_seq("IF") 1316 and (not not_ or self._match(TokenType.NOT)) 1317 and self._match(TokenType.EXISTS) 1318 ) 1319 1320 def _parse_create(self) -> exp.Create | exp.Command: 1321 # Note: this can't be None because we've matched a statement parser 1322 start = self._prev 1323 comments = self._prev_comments 1324 1325 replace = start.text.upper() == "REPLACE" or self._match_pair( 1326 TokenType.OR, TokenType.REPLACE 1327 ) 1328 unique = self._match(TokenType.UNIQUE) 1329 1330 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1331 self._advance() 1332 1333 properties = None 1334 create_token = self._match_set(self.CREATABLES) and self._prev 1335 1336 if not create_token: 1337 # exp.Properties.Location.POST_CREATE 1338 properties = self._parse_properties() 1339 create_token = self._match_set(self.CREATABLES) and self._prev 1340 1341 if not properties or not create_token: 1342 return self._parse_as_command(start) 1343 1344 exists = self._parse_exists(not_=True) 1345 this = None 1346 expression: t.Optional[exp.Expression] = None 1347 indexes = None 1348 no_schema_binding = None 1349 begin = None 1350 end = None 1351 clone = None 1352 1353 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1354 nonlocal properties 1355 if properties and temp_props: 1356 properties.expressions.extend(temp_props.expressions) 1357 elif temp_props: 1358 properties = temp_props 1359 1360 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1361 this = self._parse_user_defined_function(kind=create_token.token_type) 1362 1363 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1364 extend_props(self._parse_properties()) 1365 1366 self._match(TokenType.ALIAS) 1367 1368 if self._match(TokenType.COMMAND): 1369 expression = self._parse_as_command(self._prev) 1370 else: 1371 begin = self._match(TokenType.BEGIN) 1372 return_ = self._match_text_seq("RETURN") 1373 1374 if self._match(TokenType.STRING, advance=False): 1375 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1376 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1377 expression = self._parse_string() 1378 extend_props(self._parse_properties()) 1379 else: 1380 expression = self._parse_statement() 1381 1382 end = self._match_text_seq("END") 1383 1384 if return_: 1385 expression = self.expression(exp.Return, this=expression) 1386 elif create_token.token_type == TokenType.INDEX: 1387 this = self._parse_index(index=self._parse_id_var()) 1388 elif create_token.token_type in self.DB_CREATABLES: 1389 table_parts = self._parse_table_parts(schema=True) 1390 1391 # exp.Properties.Location.POST_NAME 1392 self._match(TokenType.COMMA) 1393 extend_props(self._parse_properties(before=True)) 1394 1395 this = self._parse_schema(this=table_parts) 1396 1397 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1398 extend_props(self._parse_properties()) 1399 1400 self._match(TokenType.ALIAS) 1401 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1402 # exp.Properties.Location.POST_ALIAS 1403 extend_props(self._parse_properties()) 1404 1405 expression = self._parse_ddl_select() 1406 1407 if create_token.token_type == TokenType.TABLE: 1408 # exp.Properties.Location.POST_EXPRESSION 1409 extend_props(self._parse_properties()) 1410 1411 indexes = [] 1412 while True: 1413 index = self._parse_index() 1414 1415 # exp.Properties.Location.POST_INDEX 1416 extend_props(self._parse_properties()) 1417 1418 if not index: 1419 break 1420 else: 1421 self._match(TokenType.COMMA) 1422 indexes.append(index) 1423 elif create_token.token_type == TokenType.VIEW: 1424 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1425 no_schema_binding = True 1426 1427 shallow = self._match_text_seq("SHALLOW") 1428 1429 if self._match_texts(self.CLONE_KEYWORDS): 1430 copy = self._prev.text.lower() == "copy" 1431 clone = self.expression( 1432 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1433 ) 1434 1435 return self.expression( 1436 exp.Create, 1437 comments=comments, 1438 this=this, 1439 kind=create_token.text.upper(), 1440 replace=replace, 1441 unique=unique, 1442 expression=expression, 1443 exists=exists, 1444 properties=properties, 1445 indexes=indexes, 1446 no_schema_binding=no_schema_binding, 1447 begin=begin, 1448 end=end, 1449 clone=clone, 1450 ) 1451 1452 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1453 # only used for teradata currently 1454 self._match(TokenType.COMMA) 1455 1456 kwargs = { 1457 "no": self._match_text_seq("NO"), 1458 "dual": self._match_text_seq("DUAL"), 1459 "before": self._match_text_seq("BEFORE"), 1460 "default": self._match_text_seq("DEFAULT"), 1461 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1462 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1463 "after": self._match_text_seq("AFTER"), 1464 "minimum": self._match_texts(("MIN", "MINIMUM")), 1465 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1466 } 1467 1468 if self._match_texts(self.PROPERTY_PARSERS): 1469 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1470 try: 1471 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1472 except TypeError: 1473 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1474 1475 return None 1476 1477 def _parse_property(self) -> t.Optional[exp.Expression]: 1478 if self._match_texts(self.PROPERTY_PARSERS): 1479 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1480 1481 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1482 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1483 1484 if self._match_text_seq("COMPOUND", "SORTKEY"): 1485 return self._parse_sortkey(compound=True) 1486 1487 if self._match_text_seq("SQL", "SECURITY"): 1488 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1489 1490 index = self._index 1491 key = self._parse_column() 1492 1493 if not self._match(TokenType.EQ): 1494 self._retreat(index) 1495 return None 1496 1497 return self.expression( 1498 exp.Property, 1499 this=key.to_dot() if isinstance(key, exp.Column) else key, 1500 value=self._parse_column() or self._parse_var(any_token=True), 1501 ) 1502 1503 def _parse_stored(self) -> exp.FileFormatProperty: 1504 self._match(TokenType.ALIAS) 1505 1506 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1507 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1508 1509 return self.expression( 1510 exp.FileFormatProperty, 1511 this=self.expression( 1512 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1513 ) 1514 if input_format or output_format 1515 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1516 ) 1517 1518 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1519 self._match(TokenType.EQ) 1520 self._match(TokenType.ALIAS) 1521 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1522 1523 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1524 properties = [] 1525 while True: 1526 if before: 1527 prop = self._parse_property_before() 1528 else: 1529 prop = self._parse_property() 1530 1531 if not prop: 1532 break 1533 for p in ensure_list(prop): 1534 properties.append(p) 1535 1536 if properties: 1537 return self.expression(exp.Properties, expressions=properties) 1538 1539 return None 1540 1541 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1542 return self.expression( 1543 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1544 ) 1545 1546 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1547 if self._index >= 2: 1548 pre_volatile_token = self._tokens[self._index - 2] 1549 else: 1550 pre_volatile_token = None 1551 1552 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1553 return exp.VolatileProperty() 1554 1555 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1556 1557 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1558 self._match_pair(TokenType.EQ, TokenType.ON) 1559 1560 prop = self.expression(exp.WithSystemVersioningProperty) 1561 if self._match(TokenType.L_PAREN): 1562 self._match_text_seq("HISTORY_TABLE", "=") 1563 prop.set("this", self._parse_table_parts()) 1564 1565 if self._match(TokenType.COMMA): 1566 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1567 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1568 1569 self._match_r_paren() 1570 1571 return prop 1572 1573 def _parse_with_property( 1574 self, 1575 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1576 if self._match(TokenType.L_PAREN, advance=False): 1577 return self._parse_wrapped_csv(self._parse_property) 1578 1579 if self._match_text_seq("JOURNAL"): 1580 return self._parse_withjournaltable() 1581 1582 if self._match_text_seq("DATA"): 1583 return self._parse_withdata(no=False) 1584 elif self._match_text_seq("NO", "DATA"): 1585 return self._parse_withdata(no=True) 1586 1587 if not self._next: 1588 return None 1589 1590 return self._parse_withisolatedloading() 1591 1592 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1593 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1594 self._match(TokenType.EQ) 1595 1596 user = self._parse_id_var() 1597 self._match(TokenType.PARAMETER) 1598 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1599 1600 if not user or not host: 1601 return None 1602 1603 return exp.DefinerProperty(this=f"{user}@{host}") 1604 1605 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1606 self._match(TokenType.TABLE) 1607 self._match(TokenType.EQ) 1608 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1609 1610 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1611 return self.expression(exp.LogProperty, no=no) 1612 1613 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1614 return self.expression(exp.JournalProperty, **kwargs) 1615 1616 def _parse_checksum(self) -> exp.ChecksumProperty: 1617 self._match(TokenType.EQ) 1618 1619 on = None 1620 if self._match(TokenType.ON): 1621 on = True 1622 elif self._match_text_seq("OFF"): 1623 on = False 1624 1625 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1626 1627 def _parse_cluster(self) -> exp.Cluster: 1628 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1629 1630 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1631 self._match_text_seq("BY") 1632 1633 self._match_l_paren() 1634 expressions = self._parse_csv(self._parse_column) 1635 self._match_r_paren() 1636 1637 if self._match_text_seq("SORTED", "BY"): 1638 self._match_l_paren() 1639 sorted_by = self._parse_csv(self._parse_ordered) 1640 self._match_r_paren() 1641 else: 1642 sorted_by = None 1643 1644 self._match(TokenType.INTO) 1645 buckets = self._parse_number() 1646 self._match_text_seq("BUCKETS") 1647 1648 return self.expression( 1649 exp.ClusteredByProperty, 1650 expressions=expressions, 1651 sorted_by=sorted_by, 1652 buckets=buckets, 1653 ) 1654 1655 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1656 if not self._match_text_seq("GRANTS"): 1657 self._retreat(self._index - 1) 1658 return None 1659 1660 return self.expression(exp.CopyGrantsProperty) 1661 1662 def _parse_freespace(self) -> exp.FreespaceProperty: 1663 self._match(TokenType.EQ) 1664 return self.expression( 1665 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1666 ) 1667 1668 def _parse_mergeblockratio( 1669 self, no: bool = False, default: bool = False 1670 ) -> exp.MergeBlockRatioProperty: 1671 if self._match(TokenType.EQ): 1672 return self.expression( 1673 exp.MergeBlockRatioProperty, 1674 this=self._parse_number(), 1675 percent=self._match(TokenType.PERCENT), 1676 ) 1677 1678 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1679 1680 def _parse_datablocksize( 1681 self, 1682 default: t.Optional[bool] = None, 1683 minimum: t.Optional[bool] = None, 1684 maximum: t.Optional[bool] = None, 1685 ) -> exp.DataBlocksizeProperty: 1686 self._match(TokenType.EQ) 1687 size = self._parse_number() 1688 1689 units = None 1690 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1691 units = self._prev.text 1692 1693 return self.expression( 1694 exp.DataBlocksizeProperty, 1695 size=size, 1696 units=units, 1697 default=default, 1698 minimum=minimum, 1699 maximum=maximum, 1700 ) 1701 1702 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1703 self._match(TokenType.EQ) 1704 always = self._match_text_seq("ALWAYS") 1705 manual = self._match_text_seq("MANUAL") 1706 never = self._match_text_seq("NEVER") 1707 default = self._match_text_seq("DEFAULT") 1708 1709 autotemp = None 1710 if self._match_text_seq("AUTOTEMP"): 1711 autotemp = self._parse_schema() 1712 1713 return self.expression( 1714 exp.BlockCompressionProperty, 1715 always=always, 1716 manual=manual, 1717 never=never, 1718 default=default, 1719 autotemp=autotemp, 1720 ) 1721 1722 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1723 no = self._match_text_seq("NO") 1724 concurrent = self._match_text_seq("CONCURRENT") 1725 self._match_text_seq("ISOLATED", "LOADING") 1726 for_all = self._match_text_seq("FOR", "ALL") 1727 for_insert = self._match_text_seq("FOR", "INSERT") 1728 for_none = self._match_text_seq("FOR", "NONE") 1729 return self.expression( 1730 exp.IsolatedLoadingProperty, 1731 no=no, 1732 concurrent=concurrent, 1733 for_all=for_all, 1734 for_insert=for_insert, 1735 for_none=for_none, 1736 ) 1737 1738 def _parse_locking(self) -> exp.LockingProperty: 1739 if self._match(TokenType.TABLE): 1740 kind = "TABLE" 1741 elif self._match(TokenType.VIEW): 1742 kind = "VIEW" 1743 elif self._match(TokenType.ROW): 1744 kind = "ROW" 1745 elif self._match_text_seq("DATABASE"): 1746 kind = "DATABASE" 1747 else: 1748 kind = None 1749 1750 if kind in ("DATABASE", "TABLE", "VIEW"): 1751 this = self._parse_table_parts() 1752 else: 1753 this = None 1754 1755 if self._match(TokenType.FOR): 1756 for_or_in = "FOR" 1757 elif self._match(TokenType.IN): 1758 for_or_in = "IN" 1759 else: 1760 for_or_in = None 1761 1762 if self._match_text_seq("ACCESS"): 1763 lock_type = "ACCESS" 1764 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1765 lock_type = "EXCLUSIVE" 1766 elif self._match_text_seq("SHARE"): 1767 lock_type = "SHARE" 1768 elif self._match_text_seq("READ"): 1769 lock_type = "READ" 1770 elif self._match_text_seq("WRITE"): 1771 lock_type = "WRITE" 1772 elif self._match_text_seq("CHECKSUM"): 1773 lock_type = "CHECKSUM" 1774 else: 1775 lock_type = None 1776 1777 override = self._match_text_seq("OVERRIDE") 1778 1779 return self.expression( 1780 exp.LockingProperty, 1781 this=this, 1782 kind=kind, 1783 for_or_in=for_or_in, 1784 lock_type=lock_type, 1785 override=override, 1786 ) 1787 1788 def _parse_partition_by(self) -> t.List[exp.Expression]: 1789 if self._match(TokenType.PARTITION_BY): 1790 return self._parse_csv(self._parse_conjunction) 1791 return [] 1792 1793 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1794 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1795 if self._match_text_seq("MINVALUE"): 1796 return exp.var("MINVALUE") 1797 if self._match_text_seq("MAXVALUE"): 1798 return exp.var("MAXVALUE") 1799 return self._parse_bitwise() 1800 1801 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1802 expression = None 1803 from_expressions = None 1804 to_expressions = None 1805 1806 if self._match(TokenType.IN): 1807 this = self._parse_wrapped_csv(self._parse_bitwise) 1808 elif self._match(TokenType.FROM): 1809 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1810 self._match_text_seq("TO") 1811 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1812 elif self._match_text_seq("WITH", "(", "MODULUS"): 1813 this = self._parse_number() 1814 self._match_text_seq(",", "REMAINDER") 1815 expression = self._parse_number() 1816 self._match_r_paren() 1817 else: 1818 self.raise_error("Failed to parse partition bound spec.") 1819 1820 return self.expression( 1821 exp.PartitionBoundSpec, 1822 this=this, 1823 expression=expression, 1824 from_expressions=from_expressions, 1825 to_expressions=to_expressions, 1826 ) 1827 1828 # https://www.postgresql.org/docs/current/sql-createtable.html 1829 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1830 if not self._match_text_seq("OF"): 1831 self._retreat(self._index - 1) 1832 return None 1833 1834 this = self._parse_table(schema=True) 1835 1836 if self._match(TokenType.DEFAULT): 1837 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1838 elif self._match_text_seq("FOR", "VALUES"): 1839 expression = self._parse_partition_bound_spec() 1840 else: 1841 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1842 1843 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1844 1845 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1846 self._match(TokenType.EQ) 1847 return self.expression( 1848 exp.PartitionedByProperty, 1849 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1850 ) 1851 1852 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1853 if self._match_text_seq("AND", "STATISTICS"): 1854 statistics = True 1855 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1856 statistics = False 1857 else: 1858 statistics = None 1859 1860 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1861 1862 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1863 if self._match_text_seq("SQL"): 1864 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1865 return None 1866 1867 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1868 if self._match_text_seq("SQL", "DATA"): 1869 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1870 return None 1871 1872 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1873 if self._match_text_seq("PRIMARY", "INDEX"): 1874 return exp.NoPrimaryIndexProperty() 1875 if self._match_text_seq("SQL"): 1876 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1877 return None 1878 1879 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1880 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1881 return exp.OnCommitProperty() 1882 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1883 return exp.OnCommitProperty(delete=True) 1884 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1885 1886 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1887 if self._match_text_seq("SQL", "DATA"): 1888 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1889 return None 1890 1891 def _parse_distkey(self) -> exp.DistKeyProperty: 1892 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1893 1894 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1895 table = self._parse_table(schema=True) 1896 1897 options = [] 1898 while self._match_texts(("INCLUDING", "EXCLUDING")): 1899 this = self._prev.text.upper() 1900 1901 id_var = self._parse_id_var() 1902 if not id_var: 1903 return None 1904 1905 options.append( 1906 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1907 ) 1908 1909 return self.expression(exp.LikeProperty, this=table, expressions=options) 1910 1911 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1912 return self.expression( 1913 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1914 ) 1915 1916 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1917 self._match(TokenType.EQ) 1918 return self.expression( 1919 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1920 ) 1921 1922 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1923 self._match_text_seq("WITH", "CONNECTION") 1924 return self.expression( 1925 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1926 ) 1927 1928 def _parse_returns(self) -> exp.ReturnsProperty: 1929 value: t.Optional[exp.Expression] 1930 is_table = self._match(TokenType.TABLE) 1931 1932 if is_table: 1933 if self._match(TokenType.LT): 1934 value = self.expression( 1935 exp.Schema, 1936 this="TABLE", 1937 expressions=self._parse_csv(self._parse_struct_types), 1938 ) 1939 if not self._match(TokenType.GT): 1940 self.raise_error("Expecting >") 1941 else: 1942 value = self._parse_schema(exp.var("TABLE")) 1943 else: 1944 value = self._parse_types() 1945 1946 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1947 1948 def _parse_describe(self) -> exp.Describe: 1949 kind = self._match_set(self.CREATABLES) and self._prev.text 1950 this = self._parse_table(schema=True) 1951 properties = self._parse_properties() 1952 expressions = properties.expressions if properties else None 1953 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1954 1955 def _parse_insert(self) -> exp.Insert: 1956 comments = ensure_list(self._prev_comments) 1957 overwrite = self._match(TokenType.OVERWRITE) 1958 ignore = self._match(TokenType.IGNORE) 1959 local = self._match_text_seq("LOCAL") 1960 alternative = None 1961 1962 if self._match_text_seq("DIRECTORY"): 1963 this: t.Optional[exp.Expression] = self.expression( 1964 exp.Directory, 1965 this=self._parse_var_or_string(), 1966 local=local, 1967 row_format=self._parse_row_format(match_row=True), 1968 ) 1969 else: 1970 if self._match(TokenType.OR): 1971 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1972 1973 self._match(TokenType.INTO) 1974 comments += ensure_list(self._prev_comments) 1975 self._match(TokenType.TABLE) 1976 this = self._parse_table(schema=True) 1977 1978 returning = self._parse_returning() 1979 1980 return self.expression( 1981 exp.Insert, 1982 comments=comments, 1983 this=this, 1984 by_name=self._match_text_seq("BY", "NAME"), 1985 exists=self._parse_exists(), 1986 partition=self._parse_partition(), 1987 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1988 and self._parse_conjunction(), 1989 expression=self._parse_ddl_select(), 1990 conflict=self._parse_on_conflict(), 1991 returning=returning or self._parse_returning(), 1992 overwrite=overwrite, 1993 alternative=alternative, 1994 ignore=ignore, 1995 ) 1996 1997 def _parse_kill(self) -> exp.Kill: 1998 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1999 2000 return self.expression( 2001 exp.Kill, 2002 this=self._parse_primary(), 2003 kind=kind, 2004 ) 2005 2006 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2007 conflict = self._match_text_seq("ON", "CONFLICT") 2008 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2009 2010 if not conflict and not duplicate: 2011 return None 2012 2013 nothing = None 2014 expressions = None 2015 key = None 2016 constraint = None 2017 2018 if conflict: 2019 if self._match_text_seq("ON", "CONSTRAINT"): 2020 constraint = self._parse_id_var() 2021 else: 2022 key = self._parse_csv(self._parse_value) 2023 2024 self._match_text_seq("DO") 2025 if self._match_text_seq("NOTHING"): 2026 nothing = True 2027 else: 2028 self._match(TokenType.UPDATE) 2029 self._match(TokenType.SET) 2030 expressions = self._parse_csv(self._parse_equality) 2031 2032 return self.expression( 2033 exp.OnConflict, 2034 duplicate=duplicate, 2035 expressions=expressions, 2036 nothing=nothing, 2037 key=key, 2038 constraint=constraint, 2039 ) 2040 2041 def _parse_returning(self) -> t.Optional[exp.Returning]: 2042 if not self._match(TokenType.RETURNING): 2043 return None 2044 return self.expression( 2045 exp.Returning, 2046 expressions=self._parse_csv(self._parse_expression), 2047 into=self._match(TokenType.INTO) and self._parse_table_part(), 2048 ) 2049 2050 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2051 if not self._match(TokenType.FORMAT): 2052 return None 2053 return self._parse_row_format() 2054 2055 def _parse_row_format( 2056 self, match_row: bool = False 2057 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2058 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2059 return None 2060 2061 if self._match_text_seq("SERDE"): 2062 this = self._parse_string() 2063 2064 serde_properties = None 2065 if self._match(TokenType.SERDE_PROPERTIES): 2066 serde_properties = self.expression( 2067 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2068 ) 2069 2070 return self.expression( 2071 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2072 ) 2073 2074 self._match_text_seq("DELIMITED") 2075 2076 kwargs = {} 2077 2078 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2079 kwargs["fields"] = self._parse_string() 2080 if self._match_text_seq("ESCAPED", "BY"): 2081 kwargs["escaped"] = self._parse_string() 2082 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2083 kwargs["collection_items"] = self._parse_string() 2084 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2085 kwargs["map_keys"] = self._parse_string() 2086 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2087 kwargs["lines"] = self._parse_string() 2088 if self._match_text_seq("NULL", "DEFINED", "AS"): 2089 kwargs["null"] = self._parse_string() 2090 2091 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2092 2093 def _parse_load(self) -> exp.LoadData | exp.Command: 2094 if self._match_text_seq("DATA"): 2095 local = self._match_text_seq("LOCAL") 2096 self._match_text_seq("INPATH") 2097 inpath = self._parse_string() 2098 overwrite = self._match(TokenType.OVERWRITE) 2099 self._match_pair(TokenType.INTO, TokenType.TABLE) 2100 2101 return self.expression( 2102 exp.LoadData, 2103 this=self._parse_table(schema=True), 2104 local=local, 2105 overwrite=overwrite, 2106 inpath=inpath, 2107 partition=self._parse_partition(), 2108 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2109 serde=self._match_text_seq("SERDE") and self._parse_string(), 2110 ) 2111 return self._parse_as_command(self._prev) 2112 2113 def _parse_delete(self) -> exp.Delete: 2114 # This handles MySQL's "Multiple-Table Syntax" 2115 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2116 tables = None 2117 comments = self._prev_comments 2118 if not self._match(TokenType.FROM, advance=False): 2119 tables = self._parse_csv(self._parse_table) or None 2120 2121 returning = self._parse_returning() 2122 2123 return self.expression( 2124 exp.Delete, 2125 comments=comments, 2126 tables=tables, 2127 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2128 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2129 where=self._parse_where(), 2130 returning=returning or self._parse_returning(), 2131 limit=self._parse_limit(), 2132 ) 2133 2134 def _parse_update(self) -> exp.Update: 2135 comments = self._prev_comments 2136 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2137 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2138 returning = self._parse_returning() 2139 return self.expression( 2140 exp.Update, 2141 comments=comments, 2142 **{ # type: ignore 2143 "this": this, 2144 "expressions": expressions, 2145 "from": self._parse_from(joins=True), 2146 "where": self._parse_where(), 2147 "returning": returning or self._parse_returning(), 2148 "order": self._parse_order(), 2149 "limit": self._parse_limit(), 2150 }, 2151 ) 2152 2153 def _parse_uncache(self) -> exp.Uncache: 2154 if not self._match(TokenType.TABLE): 2155 self.raise_error("Expecting TABLE after UNCACHE") 2156 2157 return self.expression( 2158 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2159 ) 2160 2161 def _parse_cache(self) -> exp.Cache: 2162 lazy = self._match_text_seq("LAZY") 2163 self._match(TokenType.TABLE) 2164 table = self._parse_table(schema=True) 2165 2166 options = [] 2167 if self._match_text_seq("OPTIONS"): 2168 self._match_l_paren() 2169 k = self._parse_string() 2170 self._match(TokenType.EQ) 2171 v = self._parse_string() 2172 options = [k, v] 2173 self._match_r_paren() 2174 2175 self._match(TokenType.ALIAS) 2176 return self.expression( 2177 exp.Cache, 2178 this=table, 2179 lazy=lazy, 2180 options=options, 2181 expression=self._parse_select(nested=True), 2182 ) 2183 2184 def _parse_partition(self) -> t.Optional[exp.Partition]: 2185 if not self._match(TokenType.PARTITION): 2186 return None 2187 2188 return self.expression( 2189 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2190 ) 2191 2192 def _parse_value(self) -> exp.Tuple: 2193 if self._match(TokenType.L_PAREN): 2194 expressions = self._parse_csv(self._parse_expression) 2195 self._match_r_paren() 2196 return self.expression(exp.Tuple, expressions=expressions) 2197 2198 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2199 # https://prestodb.io/docs/current/sql/values.html 2200 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2201 2202 def _parse_projections(self) -> t.List[exp.Expression]: 2203 return self._parse_expressions() 2204 2205 def _parse_select( 2206 self, 2207 nested: bool = False, 2208 table: bool = False, 2209 parse_subquery_alias: bool = True, 2210 parse_set_operation: bool = True, 2211 ) -> t.Optional[exp.Expression]: 2212 cte = self._parse_with() 2213 2214 if cte: 2215 this = self._parse_statement() 2216 2217 if not this: 2218 self.raise_error("Failed to parse any statement following CTE") 2219 return cte 2220 2221 if "with" in this.arg_types: 2222 this.set("with", cte) 2223 else: 2224 self.raise_error(f"{this.key} does not support CTE") 2225 this = cte 2226 2227 return this 2228 2229 # duckdb supports leading with FROM x 2230 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2231 2232 if self._match(TokenType.SELECT): 2233 comments = self._prev_comments 2234 2235 hint = self._parse_hint() 2236 all_ = self._match(TokenType.ALL) 2237 distinct = self._match_set(self.DISTINCT_TOKENS) 2238 2239 kind = ( 2240 self._match(TokenType.ALIAS) 2241 and self._match_texts(("STRUCT", "VALUE")) 2242 and self._prev.text.upper() 2243 ) 2244 2245 if distinct: 2246 distinct = self.expression( 2247 exp.Distinct, 2248 on=self._parse_value() if self._match(TokenType.ON) else None, 2249 ) 2250 2251 if all_ and distinct: 2252 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2253 2254 limit = self._parse_limit(top=True) 2255 projections = self._parse_projections() 2256 2257 this = self.expression( 2258 exp.Select, 2259 kind=kind, 2260 hint=hint, 2261 distinct=distinct, 2262 expressions=projections, 2263 limit=limit, 2264 ) 2265 this.comments = comments 2266 2267 into = self._parse_into() 2268 if into: 2269 this.set("into", into) 2270 2271 if not from_: 2272 from_ = self._parse_from() 2273 2274 if from_: 2275 this.set("from", from_) 2276 2277 this = self._parse_query_modifiers(this) 2278 elif (table or nested) and self._match(TokenType.L_PAREN): 2279 if self._match(TokenType.PIVOT): 2280 this = self._parse_simplified_pivot() 2281 elif self._match(TokenType.FROM): 2282 this = exp.select("*").from_( 2283 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2284 ) 2285 else: 2286 this = ( 2287 self._parse_table() 2288 if table 2289 else self._parse_select(nested=True, parse_set_operation=False) 2290 ) 2291 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2292 2293 self._match_r_paren() 2294 2295 # We return early here so that the UNION isn't attached to the subquery by the 2296 # following call to _parse_set_operations, but instead becomes the parent node 2297 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2298 elif self._match(TokenType.VALUES): 2299 this = self.expression( 2300 exp.Values, 2301 expressions=self._parse_csv(self._parse_value), 2302 alias=self._parse_table_alias(), 2303 ) 2304 elif from_: 2305 this = exp.select("*").from_(from_.this, copy=False) 2306 else: 2307 this = None 2308 2309 if parse_set_operation: 2310 return self._parse_set_operations(this) 2311 return this 2312 2313 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2314 if not skip_with_token and not self._match(TokenType.WITH): 2315 return None 2316 2317 comments = self._prev_comments 2318 recursive = self._match(TokenType.RECURSIVE) 2319 2320 expressions = [] 2321 while True: 2322 expressions.append(self._parse_cte()) 2323 2324 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2325 break 2326 else: 2327 self._match(TokenType.WITH) 2328 2329 return self.expression( 2330 exp.With, comments=comments, expressions=expressions, recursive=recursive 2331 ) 2332 2333 def _parse_cte(self) -> exp.CTE: 2334 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2335 if not alias or not alias.this: 2336 self.raise_error("Expected CTE to have alias") 2337 2338 self._match(TokenType.ALIAS) 2339 return self.expression( 2340 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2341 ) 2342 2343 def _parse_table_alias( 2344 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2345 ) -> t.Optional[exp.TableAlias]: 2346 any_token = self._match(TokenType.ALIAS) 2347 alias = ( 2348 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2349 or self._parse_string_as_identifier() 2350 ) 2351 2352 index = self._index 2353 if self._match(TokenType.L_PAREN): 2354 columns = self._parse_csv(self._parse_function_parameter) 2355 self._match_r_paren() if columns else self._retreat(index) 2356 else: 2357 columns = None 2358 2359 if not alias and not columns: 2360 return None 2361 2362 return self.expression(exp.TableAlias, this=alias, columns=columns) 2363 2364 def _parse_subquery( 2365 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2366 ) -> t.Optional[exp.Subquery]: 2367 if not this: 2368 return None 2369 2370 return self.expression( 2371 exp.Subquery, 2372 this=this, 2373 pivots=self._parse_pivots(), 2374 alias=self._parse_table_alias() if parse_alias else None, 2375 ) 2376 2377 def _parse_query_modifiers( 2378 self, this: t.Optional[exp.Expression] 2379 ) -> t.Optional[exp.Expression]: 2380 if isinstance(this, self.MODIFIABLES): 2381 for join in iter(self._parse_join, None): 2382 this.append("joins", join) 2383 for lateral in iter(self._parse_lateral, None): 2384 this.append("laterals", lateral) 2385 2386 while True: 2387 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2388 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2389 key, expression = parser(self) 2390 2391 if expression: 2392 this.set(key, expression) 2393 if key == "limit": 2394 offset = expression.args.pop("offset", None) 2395 if offset: 2396 this.set("offset", exp.Offset(expression=offset)) 2397 continue 2398 break 2399 return this 2400 2401 def _parse_hint(self) -> t.Optional[exp.Hint]: 2402 if self._match(TokenType.HINT): 2403 hints = [] 2404 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2405 hints.extend(hint) 2406 2407 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2408 self.raise_error("Expected */ after HINT") 2409 2410 return self.expression(exp.Hint, expressions=hints) 2411 2412 return None 2413 2414 def _parse_into(self) -> t.Optional[exp.Into]: 2415 if not self._match(TokenType.INTO): 2416 return None 2417 2418 temp = self._match(TokenType.TEMPORARY) 2419 unlogged = self._match_text_seq("UNLOGGED") 2420 self._match(TokenType.TABLE) 2421 2422 return self.expression( 2423 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2424 ) 2425 2426 def _parse_from( 2427 self, joins: bool = False, skip_from_token: bool = False 2428 ) -> t.Optional[exp.From]: 2429 if not skip_from_token and not self._match(TokenType.FROM): 2430 return None 2431 2432 return self.expression( 2433 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2434 ) 2435 2436 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2437 if not self._match(TokenType.MATCH_RECOGNIZE): 2438 return None 2439 2440 self._match_l_paren() 2441 2442 partition = self._parse_partition_by() 2443 order = self._parse_order() 2444 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2445 2446 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2447 rows = exp.var("ONE ROW PER MATCH") 2448 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2449 text = "ALL ROWS PER MATCH" 2450 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2451 text += f" SHOW EMPTY MATCHES" 2452 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2453 text += f" OMIT EMPTY MATCHES" 2454 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2455 text += f" WITH UNMATCHED ROWS" 2456 rows = exp.var(text) 2457 else: 2458 rows = None 2459 2460 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2461 text = "AFTER MATCH SKIP" 2462 if self._match_text_seq("PAST", "LAST", "ROW"): 2463 text += f" PAST LAST ROW" 2464 elif self._match_text_seq("TO", "NEXT", "ROW"): 2465 text += f" TO NEXT ROW" 2466 elif self._match_text_seq("TO", "FIRST"): 2467 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2468 elif self._match_text_seq("TO", "LAST"): 2469 text += f" TO LAST {self._advance_any().text}" # type: ignore 2470 after = exp.var(text) 2471 else: 2472 after = None 2473 2474 if self._match_text_seq("PATTERN"): 2475 self._match_l_paren() 2476 2477 if not self._curr: 2478 self.raise_error("Expecting )", self._curr) 2479 2480 paren = 1 2481 start = self._curr 2482 2483 while self._curr and paren > 0: 2484 if self._curr.token_type == TokenType.L_PAREN: 2485 paren += 1 2486 if self._curr.token_type == TokenType.R_PAREN: 2487 paren -= 1 2488 2489 end = self._prev 2490 self._advance() 2491 2492 if paren > 0: 2493 self.raise_error("Expecting )", self._curr) 2494 2495 pattern = exp.var(self._find_sql(start, end)) 2496 else: 2497 pattern = None 2498 2499 define = ( 2500 self._parse_csv(self._parse_name_as_expression) 2501 if self._match_text_seq("DEFINE") 2502 else None 2503 ) 2504 2505 self._match_r_paren() 2506 2507 return self.expression( 2508 exp.MatchRecognize, 2509 partition_by=partition, 2510 order=order, 2511 measures=measures, 2512 rows=rows, 2513 after=after, 2514 pattern=pattern, 2515 define=define, 2516 alias=self._parse_table_alias(), 2517 ) 2518 2519 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2520 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2521 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2522 cross_apply = False 2523 2524 if cross_apply is not None: 2525 this = self._parse_select(table=True) 2526 view = None 2527 outer = None 2528 elif self._match(TokenType.LATERAL): 2529 this = self._parse_select(table=True) 2530 view = self._match(TokenType.VIEW) 2531 outer = self._match(TokenType.OUTER) 2532 else: 2533 return None 2534 2535 if not this: 2536 this = ( 2537 self._parse_unnest() 2538 or self._parse_function() 2539 or self._parse_id_var(any_token=False) 2540 ) 2541 2542 while self._match(TokenType.DOT): 2543 this = exp.Dot( 2544 this=this, 2545 expression=self._parse_function() or self._parse_id_var(any_token=False), 2546 ) 2547 2548 if view: 2549 table = self._parse_id_var(any_token=False) 2550 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2551 table_alias: t.Optional[exp.TableAlias] = self.expression( 2552 exp.TableAlias, this=table, columns=columns 2553 ) 2554 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2555 # We move the alias from the lateral's child node to the lateral itself 2556 table_alias = this.args["alias"].pop() 2557 else: 2558 table_alias = self._parse_table_alias() 2559 2560 return self.expression( 2561 exp.Lateral, 2562 this=this, 2563 view=view, 2564 outer=outer, 2565 alias=table_alias, 2566 cross_apply=cross_apply, 2567 ) 2568 2569 def _parse_join_parts( 2570 self, 2571 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2572 return ( 2573 self._match_set(self.JOIN_METHODS) and self._prev, 2574 self._match_set(self.JOIN_SIDES) and self._prev, 2575 self._match_set(self.JOIN_KINDS) and self._prev, 2576 ) 2577 2578 def _parse_join( 2579 self, skip_join_token: bool = False, parse_bracket: bool = False 2580 ) -> t.Optional[exp.Join]: 2581 if self._match(TokenType.COMMA): 2582 return self.expression(exp.Join, this=self._parse_table()) 2583 2584 index = self._index 2585 method, side, kind = self._parse_join_parts() 2586 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2587 join = self._match(TokenType.JOIN) 2588 2589 if not skip_join_token and not join: 2590 self._retreat(index) 2591 kind = None 2592 method = None 2593 side = None 2594 2595 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2596 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2597 2598 if not skip_join_token and not join and not outer_apply and not cross_apply: 2599 return None 2600 2601 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2602 2603 if method: 2604 kwargs["method"] = method.text 2605 if side: 2606 kwargs["side"] = side.text 2607 if kind: 2608 kwargs["kind"] = kind.text 2609 if hint: 2610 kwargs["hint"] = hint 2611 2612 if self._match(TokenType.ON): 2613 kwargs["on"] = self._parse_conjunction() 2614 elif self._match(TokenType.USING): 2615 kwargs["using"] = self._parse_wrapped_id_vars() 2616 elif not (kind and kind.token_type == TokenType.CROSS): 2617 index = self._index 2618 join = self._parse_join() 2619 2620 if join and self._match(TokenType.ON): 2621 kwargs["on"] = self._parse_conjunction() 2622 elif join and self._match(TokenType.USING): 2623 kwargs["using"] = self._parse_wrapped_id_vars() 2624 else: 2625 join = None 2626 self._retreat(index) 2627 2628 kwargs["this"].set("joins", [join] if join else None) 2629 2630 comments = [c for token in (method, side, kind) if token for c in token.comments] 2631 return self.expression(exp.Join, comments=comments, **kwargs) 2632 2633 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2634 this = self._parse_conjunction() 2635 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2636 return this 2637 2638 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2639 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2640 2641 return this 2642 2643 def _parse_index( 2644 self, 2645 index: t.Optional[exp.Expression] = None, 2646 ) -> t.Optional[exp.Index]: 2647 if index: 2648 unique = None 2649 primary = None 2650 amp = None 2651 2652 self._match(TokenType.ON) 2653 self._match(TokenType.TABLE) # hive 2654 table = self._parse_table_parts(schema=True) 2655 else: 2656 unique = self._match(TokenType.UNIQUE) 2657 primary = self._match_text_seq("PRIMARY") 2658 amp = self._match_text_seq("AMP") 2659 2660 if not self._match(TokenType.INDEX): 2661 return None 2662 2663 index = self._parse_id_var() 2664 table = None 2665 2666 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2667 2668 if self._match(TokenType.L_PAREN, advance=False): 2669 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2670 else: 2671 columns = None 2672 2673 return self.expression( 2674 exp.Index, 2675 this=index, 2676 table=table, 2677 using=using, 2678 columns=columns, 2679 unique=unique, 2680 primary=primary, 2681 amp=amp, 2682 partition_by=self._parse_partition_by(), 2683 where=self._parse_where(), 2684 ) 2685 2686 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2687 hints: t.List[exp.Expression] = [] 2688 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2689 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2690 hints.append( 2691 self.expression( 2692 exp.WithTableHint, 2693 expressions=self._parse_csv( 2694 lambda: self._parse_function() or self._parse_var(any_token=True) 2695 ), 2696 ) 2697 ) 2698 self._match_r_paren() 2699 else: 2700 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2701 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2702 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2703 2704 self._match_texts(("INDEX", "KEY")) 2705 if self._match(TokenType.FOR): 2706 hint.set("target", self._advance_any() and self._prev.text.upper()) 2707 2708 hint.set("expressions", self._parse_wrapped_id_vars()) 2709 hints.append(hint) 2710 2711 return hints or None 2712 2713 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2714 return ( 2715 (not schema and self._parse_function(optional_parens=False)) 2716 or self._parse_id_var(any_token=False) 2717 or self._parse_string_as_identifier() 2718 or self._parse_placeholder() 2719 ) 2720 2721 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2722 catalog = None 2723 db = None 2724 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2725 2726 while self._match(TokenType.DOT): 2727 if catalog: 2728 # This allows nesting the table in arbitrarily many dot expressions if needed 2729 table = self.expression( 2730 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2731 ) 2732 else: 2733 catalog = db 2734 db = table 2735 table = self._parse_table_part(schema=schema) or "" 2736 2737 if not table: 2738 self.raise_error(f"Expected table name but got {self._curr}") 2739 2740 return self.expression( 2741 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2742 ) 2743 2744 def _parse_table( 2745 self, 2746 schema: bool = False, 2747 joins: bool = False, 2748 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2749 parse_bracket: bool = False, 2750 ) -> t.Optional[exp.Expression]: 2751 lateral = self._parse_lateral() 2752 if lateral: 2753 return lateral 2754 2755 unnest = self._parse_unnest() 2756 if unnest: 2757 return unnest 2758 2759 values = self._parse_derived_table_values() 2760 if values: 2761 return values 2762 2763 subquery = self._parse_select(table=True) 2764 if subquery: 2765 if not subquery.args.get("pivots"): 2766 subquery.set("pivots", self._parse_pivots()) 2767 return subquery 2768 2769 bracket = parse_bracket and self._parse_bracket(None) 2770 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2771 this = t.cast( 2772 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2773 ) 2774 2775 if schema: 2776 return self._parse_schema(this=this) 2777 2778 version = self._parse_version() 2779 2780 if version: 2781 this.set("version", version) 2782 2783 if self.dialect.ALIAS_POST_TABLESAMPLE: 2784 table_sample = self._parse_table_sample() 2785 2786 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2787 if alias: 2788 this.set("alias", alias) 2789 2790 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2791 return self.expression( 2792 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2793 ) 2794 2795 this.set("hints", self._parse_table_hints()) 2796 2797 if not this.args.get("pivots"): 2798 this.set("pivots", self._parse_pivots()) 2799 2800 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2801 table_sample = self._parse_table_sample() 2802 2803 if table_sample: 2804 table_sample.set("this", this) 2805 this = table_sample 2806 2807 if joins: 2808 for join in iter(self._parse_join, None): 2809 this.append("joins", join) 2810 2811 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2812 this.set("ordinality", True) 2813 this.set("alias", self._parse_table_alias()) 2814 2815 return this 2816 2817 def _parse_version(self) -> t.Optional[exp.Version]: 2818 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2819 this = "TIMESTAMP" 2820 elif self._match(TokenType.VERSION_SNAPSHOT): 2821 this = "VERSION" 2822 else: 2823 return None 2824 2825 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2826 kind = self._prev.text.upper() 2827 start = self._parse_bitwise() 2828 self._match_texts(("TO", "AND")) 2829 end = self._parse_bitwise() 2830 expression: t.Optional[exp.Expression] = self.expression( 2831 exp.Tuple, expressions=[start, end] 2832 ) 2833 elif self._match_text_seq("CONTAINED", "IN"): 2834 kind = "CONTAINED IN" 2835 expression = self.expression( 2836 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2837 ) 2838 elif self._match(TokenType.ALL): 2839 kind = "ALL" 2840 expression = None 2841 else: 2842 self._match_text_seq("AS", "OF") 2843 kind = "AS OF" 2844 expression = self._parse_type() 2845 2846 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2847 2848 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2849 if not self._match(TokenType.UNNEST): 2850 return None 2851 2852 expressions = self._parse_wrapped_csv(self._parse_equality) 2853 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2854 2855 alias = self._parse_table_alias() if with_alias else None 2856 2857 if alias: 2858 if self.dialect.UNNEST_COLUMN_ONLY: 2859 if alias.args.get("columns"): 2860 self.raise_error("Unexpected extra column alias in unnest.") 2861 2862 alias.set("columns", [alias.this]) 2863 alias.set("this", None) 2864 2865 columns = alias.args.get("columns") or [] 2866 if offset and len(expressions) < len(columns): 2867 offset = columns.pop() 2868 2869 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2870 self._match(TokenType.ALIAS) 2871 offset = self._parse_id_var( 2872 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2873 ) or exp.to_identifier("offset") 2874 2875 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2876 2877 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2878 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2879 if not is_derived and not self._match(TokenType.VALUES): 2880 return None 2881 2882 expressions = self._parse_csv(self._parse_value) 2883 alias = self._parse_table_alias() 2884 2885 if is_derived: 2886 self._match_r_paren() 2887 2888 return self.expression( 2889 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2890 ) 2891 2892 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2893 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2894 as_modifier and self._match_text_seq("USING", "SAMPLE") 2895 ): 2896 return None 2897 2898 bucket_numerator = None 2899 bucket_denominator = None 2900 bucket_field = None 2901 percent = None 2902 size = None 2903 seed = None 2904 2905 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2906 matched_l_paren = self._match(TokenType.L_PAREN) 2907 2908 if self.TABLESAMPLE_CSV: 2909 num = None 2910 expressions = self._parse_csv(self._parse_primary) 2911 else: 2912 expressions = None 2913 num = ( 2914 self._parse_factor() 2915 if self._match(TokenType.NUMBER, advance=False) 2916 else self._parse_primary() or self._parse_placeholder() 2917 ) 2918 2919 if self._match_text_seq("BUCKET"): 2920 bucket_numerator = self._parse_number() 2921 self._match_text_seq("OUT", "OF") 2922 bucket_denominator = bucket_denominator = self._parse_number() 2923 self._match(TokenType.ON) 2924 bucket_field = self._parse_field() 2925 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2926 percent = num 2927 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 2928 size = num 2929 else: 2930 percent = num 2931 2932 if matched_l_paren: 2933 self._match_r_paren() 2934 2935 if self._match(TokenType.L_PAREN): 2936 method = self._parse_var(upper=True) 2937 seed = self._match(TokenType.COMMA) and self._parse_number() 2938 self._match_r_paren() 2939 elif self._match_texts(("SEED", "REPEATABLE")): 2940 seed = self._parse_wrapped(self._parse_number) 2941 2942 return self.expression( 2943 exp.TableSample, 2944 expressions=expressions, 2945 method=method, 2946 bucket_numerator=bucket_numerator, 2947 bucket_denominator=bucket_denominator, 2948 bucket_field=bucket_field, 2949 percent=percent, 2950 size=size, 2951 seed=seed, 2952 ) 2953 2954 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2955 return list(iter(self._parse_pivot, None)) or None 2956 2957 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2958 return list(iter(self._parse_join, None)) or None 2959 2960 # https://duckdb.org/docs/sql/statements/pivot 2961 def _parse_simplified_pivot(self) -> exp.Pivot: 2962 def _parse_on() -> t.Optional[exp.Expression]: 2963 this = self._parse_bitwise() 2964 return self._parse_in(this) if self._match(TokenType.IN) else this 2965 2966 this = self._parse_table() 2967 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2968 using = self._match(TokenType.USING) and self._parse_csv( 2969 lambda: self._parse_alias(self._parse_function()) 2970 ) 2971 group = self._parse_group() 2972 return self.expression( 2973 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2974 ) 2975 2976 def _parse_pivot_in(self) -> exp.In: 2977 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 2978 this = self._parse_conjunction() 2979 2980 self._match(TokenType.ALIAS) 2981 alias = self._parse_field() 2982 if alias: 2983 return self.expression(exp.PivotAlias, this=this, alias=alias) 2984 2985 return this 2986 2987 value = self._parse_column() 2988 2989 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 2990 self.raise_error("Expecting IN (") 2991 2992 aliased_expressions = self._parse_csv(_parse_aliased_expression) 2993 2994 self._match_r_paren() 2995 return self.expression(exp.In, this=value, expressions=aliased_expressions) 2996 2997 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2998 index = self._index 2999 include_nulls = None 3000 3001 if self._match(TokenType.PIVOT): 3002 unpivot = False 3003 elif self._match(TokenType.UNPIVOT): 3004 unpivot = True 3005 3006 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3007 if self._match_text_seq("INCLUDE", "NULLS"): 3008 include_nulls = True 3009 elif self._match_text_seq("EXCLUDE", "NULLS"): 3010 include_nulls = False 3011 else: 3012 return None 3013 3014 expressions = [] 3015 3016 if not self._match(TokenType.L_PAREN): 3017 self._retreat(index) 3018 return None 3019 3020 if unpivot: 3021 expressions = self._parse_csv(self._parse_column) 3022 else: 3023 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3024 3025 if not expressions: 3026 self.raise_error("Failed to parse PIVOT's aggregation list") 3027 3028 if not self._match(TokenType.FOR): 3029 self.raise_error("Expecting FOR") 3030 3031 field = self._parse_pivot_in() 3032 3033 self._match_r_paren() 3034 3035 pivot = self.expression( 3036 exp.Pivot, 3037 expressions=expressions, 3038 field=field, 3039 unpivot=unpivot, 3040 include_nulls=include_nulls, 3041 ) 3042 3043 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3044 pivot.set("alias", self._parse_table_alias()) 3045 3046 if not unpivot: 3047 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3048 3049 columns: t.List[exp.Expression] = [] 3050 for fld in pivot.args["field"].expressions: 3051 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3052 for name in names: 3053 if self.PREFIXED_PIVOT_COLUMNS: 3054 name = f"{name}_{field_name}" if name else field_name 3055 else: 3056 name = f"{field_name}_{name}" if name else field_name 3057 3058 columns.append(exp.to_identifier(name)) 3059 3060 pivot.set("columns", columns) 3061 3062 return pivot 3063 3064 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3065 return [agg.alias for agg in aggregations] 3066 3067 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3068 if not skip_where_token and not self._match(TokenType.WHERE): 3069 return None 3070 3071 return self.expression( 3072 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3073 ) 3074 3075 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3076 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3077 return None 3078 3079 elements = defaultdict(list) 3080 3081 if self._match(TokenType.ALL): 3082 return self.expression(exp.Group, all=True) 3083 3084 while True: 3085 expressions = self._parse_csv(self._parse_conjunction) 3086 if expressions: 3087 elements["expressions"].extend(expressions) 3088 3089 grouping_sets = self._parse_grouping_sets() 3090 if grouping_sets: 3091 elements["grouping_sets"].extend(grouping_sets) 3092 3093 rollup = None 3094 cube = None 3095 totals = None 3096 3097 index = self._index 3098 with_ = self._match(TokenType.WITH) 3099 if self._match(TokenType.ROLLUP): 3100 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3101 elements["rollup"].extend(ensure_list(rollup)) 3102 3103 if self._match(TokenType.CUBE): 3104 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3105 elements["cube"].extend(ensure_list(cube)) 3106 3107 if self._match_text_seq("TOTALS"): 3108 totals = True 3109 elements["totals"] = True # type: ignore 3110 3111 if not (grouping_sets or rollup or cube or totals): 3112 if with_: 3113 self._retreat(index) 3114 break 3115 3116 return self.expression(exp.Group, **elements) # type: ignore 3117 3118 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3119 if not self._match(TokenType.GROUPING_SETS): 3120 return None 3121 3122 return self._parse_wrapped_csv(self._parse_grouping_set) 3123 3124 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3125 if self._match(TokenType.L_PAREN): 3126 grouping_set = self._parse_csv(self._parse_column) 3127 self._match_r_paren() 3128 return self.expression(exp.Tuple, expressions=grouping_set) 3129 3130 return self._parse_column() 3131 3132 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3133 if not skip_having_token and not self._match(TokenType.HAVING): 3134 return None 3135 return self.expression(exp.Having, this=self._parse_conjunction()) 3136 3137 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3138 if not self._match(TokenType.QUALIFY): 3139 return None 3140 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3141 3142 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3143 if skip_start_token: 3144 start = None 3145 elif self._match(TokenType.START_WITH): 3146 start = self._parse_conjunction() 3147 else: 3148 return None 3149 3150 self._match(TokenType.CONNECT_BY) 3151 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3152 exp.Prior, this=self._parse_bitwise() 3153 ) 3154 connect = self._parse_conjunction() 3155 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3156 3157 if not start and self._match(TokenType.START_WITH): 3158 start = self._parse_conjunction() 3159 3160 return self.expression(exp.Connect, start=start, connect=connect) 3161 3162 def _parse_name_as_expression(self) -> exp.Alias: 3163 return self.expression( 3164 exp.Alias, 3165 alias=self._parse_id_var(any_token=True), 3166 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3167 ) 3168 3169 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3170 if self._match_text_seq("INTERPOLATE"): 3171 return self._parse_wrapped_csv(self._parse_name_as_expression) 3172 return None 3173 3174 def _parse_order( 3175 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3176 ) -> t.Optional[exp.Expression]: 3177 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3178 return this 3179 3180 return self.expression( 3181 exp.Order, 3182 this=this, 3183 expressions=self._parse_csv(self._parse_ordered), 3184 interpolate=self._parse_interpolate(), 3185 ) 3186 3187 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3188 if not self._match(token): 3189 return None 3190 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3191 3192 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3193 this = parse_method() if parse_method else self._parse_conjunction() 3194 3195 asc = self._match(TokenType.ASC) 3196 desc = self._match(TokenType.DESC) or (asc and False) 3197 3198 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3199 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3200 3201 nulls_first = is_nulls_first or False 3202 explicitly_null_ordered = is_nulls_first or is_nulls_last 3203 3204 if ( 3205 not explicitly_null_ordered 3206 and ( 3207 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3208 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3209 ) 3210 and self.dialect.NULL_ORDERING != "nulls_are_last" 3211 ): 3212 nulls_first = True 3213 3214 if self._match_text_seq("WITH", "FILL"): 3215 with_fill = self.expression( 3216 exp.WithFill, 3217 **{ # type: ignore 3218 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3219 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3220 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3221 }, 3222 ) 3223 else: 3224 with_fill = None 3225 3226 return self.expression( 3227 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3228 ) 3229 3230 def _parse_limit( 3231 self, this: t.Optional[exp.Expression] = None, top: bool = False 3232 ) -> t.Optional[exp.Expression]: 3233 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3234 comments = self._prev_comments 3235 if top: 3236 limit_paren = self._match(TokenType.L_PAREN) 3237 expression = self._parse_term() if limit_paren else self._parse_number() 3238 3239 if limit_paren: 3240 self._match_r_paren() 3241 else: 3242 expression = self._parse_term() 3243 3244 if self._match(TokenType.COMMA): 3245 offset = expression 3246 expression = self._parse_term() 3247 else: 3248 offset = None 3249 3250 limit_exp = self.expression( 3251 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3252 ) 3253 3254 return limit_exp 3255 3256 if self._match(TokenType.FETCH): 3257 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3258 direction = self._prev.text.upper() if direction else "FIRST" 3259 3260 count = self._parse_field(tokens=self.FETCH_TOKENS) 3261 percent = self._match(TokenType.PERCENT) 3262 3263 self._match_set((TokenType.ROW, TokenType.ROWS)) 3264 3265 only = self._match_text_seq("ONLY") 3266 with_ties = self._match_text_seq("WITH", "TIES") 3267 3268 if only and with_ties: 3269 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3270 3271 return self.expression( 3272 exp.Fetch, 3273 direction=direction, 3274 count=count, 3275 percent=percent, 3276 with_ties=with_ties, 3277 ) 3278 3279 return this 3280 3281 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3282 if not self._match(TokenType.OFFSET): 3283 return this 3284 3285 count = self._parse_term() 3286 self._match_set((TokenType.ROW, TokenType.ROWS)) 3287 return self.expression(exp.Offset, this=this, expression=count) 3288 3289 def _parse_locks(self) -> t.List[exp.Lock]: 3290 locks = [] 3291 while True: 3292 if self._match_text_seq("FOR", "UPDATE"): 3293 update = True 3294 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3295 "LOCK", "IN", "SHARE", "MODE" 3296 ): 3297 update = False 3298 else: 3299 break 3300 3301 expressions = None 3302 if self._match_text_seq("OF"): 3303 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3304 3305 wait: t.Optional[bool | exp.Expression] = None 3306 if self._match_text_seq("NOWAIT"): 3307 wait = True 3308 elif self._match_text_seq("WAIT"): 3309 wait = self._parse_primary() 3310 elif self._match_text_seq("SKIP", "LOCKED"): 3311 wait = False 3312 3313 locks.append( 3314 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3315 ) 3316 3317 return locks 3318 3319 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3320 while this and self._match_set(self.SET_OPERATIONS): 3321 token_type = self._prev.token_type 3322 3323 if token_type == TokenType.UNION: 3324 operation = exp.Union 3325 elif token_type == TokenType.EXCEPT: 3326 operation = exp.Except 3327 else: 3328 operation = exp.Intersect 3329 3330 comments = self._prev.comments 3331 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3332 by_name = self._match_text_seq("BY", "NAME") 3333 expression = self._parse_select(nested=True, parse_set_operation=False) 3334 3335 this = self.expression( 3336 operation, 3337 comments=comments, 3338 this=this, 3339 distinct=distinct, 3340 by_name=by_name, 3341 expression=expression, 3342 ) 3343 3344 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3345 expression = this.expression 3346 3347 if expression: 3348 for arg in self.UNION_MODIFIERS: 3349 expr = expression.args.get(arg) 3350 if expr: 3351 this.set(arg, expr.pop()) 3352 3353 return this 3354 3355 def _parse_expression(self) -> t.Optional[exp.Expression]: 3356 return self._parse_alias(self._parse_conjunction()) 3357 3358 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3359 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3360 3361 def _parse_equality(self) -> t.Optional[exp.Expression]: 3362 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3363 3364 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3365 return self._parse_tokens(self._parse_range, self.COMPARISON) 3366 3367 def _parse_range(self) -> t.Optional[exp.Expression]: 3368 this = self._parse_bitwise() 3369 negate = self._match(TokenType.NOT) 3370 3371 if self._match_set(self.RANGE_PARSERS): 3372 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3373 if not expression: 3374 return this 3375 3376 this = expression 3377 elif self._match(TokenType.ISNULL): 3378 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3379 3380 # Postgres supports ISNULL and NOTNULL for conditions. 3381 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3382 if self._match(TokenType.NOTNULL): 3383 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3384 this = self.expression(exp.Not, this=this) 3385 3386 if negate: 3387 this = self.expression(exp.Not, this=this) 3388 3389 if self._match(TokenType.IS): 3390 this = self._parse_is(this) 3391 3392 return this 3393 3394 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3395 index = self._index - 1 3396 negate = self._match(TokenType.NOT) 3397 3398 if self._match_text_seq("DISTINCT", "FROM"): 3399 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3400 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3401 3402 expression = self._parse_null() or self._parse_boolean() 3403 if not expression: 3404 self._retreat(index) 3405 return None 3406 3407 this = self.expression(exp.Is, this=this, expression=expression) 3408 return self.expression(exp.Not, this=this) if negate else this 3409 3410 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3411 unnest = self._parse_unnest(with_alias=False) 3412 if unnest: 3413 this = self.expression(exp.In, this=this, unnest=unnest) 3414 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3415 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3416 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3417 3418 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3419 this = self.expression(exp.In, this=this, query=expressions[0]) 3420 else: 3421 this = self.expression(exp.In, this=this, expressions=expressions) 3422 3423 if matched_l_paren: 3424 self._match_r_paren(this) 3425 elif not self._match(TokenType.R_BRACKET, expression=this): 3426 self.raise_error("Expecting ]") 3427 else: 3428 this = self.expression(exp.In, this=this, field=self._parse_field()) 3429 3430 return this 3431 3432 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3433 low = self._parse_bitwise() 3434 self._match(TokenType.AND) 3435 high = self._parse_bitwise() 3436 return self.expression(exp.Between, this=this, low=low, high=high) 3437 3438 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3439 if not self._match(TokenType.ESCAPE): 3440 return this 3441 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3442 3443 def _parse_interval(self) -> t.Optional[exp.Interval]: 3444 index = self._index 3445 3446 if not self._match(TokenType.INTERVAL): 3447 return None 3448 3449 if self._match(TokenType.STRING, advance=False): 3450 this = self._parse_primary() 3451 else: 3452 this = self._parse_term() 3453 3454 if not this or ( 3455 isinstance(this, exp.Column) 3456 and not this.table 3457 and not this.this.quoted 3458 and this.name.upper() == "IS" 3459 ): 3460 self._retreat(index) 3461 return None 3462 3463 unit = self._parse_function() or self._parse_var(any_token=True, upper=True) 3464 3465 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3466 # each INTERVAL expression into this canonical form so it's easy to transpile 3467 if this and this.is_number: 3468 this = exp.Literal.string(this.name) 3469 elif this and this.is_string: 3470 parts = this.name.split() 3471 3472 if len(parts) == 2: 3473 if unit: 3474 # This is not actually a unit, it's something else (e.g. a "window side") 3475 unit = None 3476 self._retreat(self._index - 1) 3477 3478 this = exp.Literal.string(parts[0]) 3479 unit = self.expression(exp.Var, this=parts[1].upper()) 3480 3481 return self.expression(exp.Interval, this=this, unit=unit) 3482 3483 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3484 this = self._parse_term() 3485 3486 while True: 3487 if self._match_set(self.BITWISE): 3488 this = self.expression( 3489 self.BITWISE[self._prev.token_type], 3490 this=this, 3491 expression=self._parse_term(), 3492 ) 3493 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3494 this = self.expression( 3495 exp.DPipe, 3496 this=this, 3497 expression=self._parse_term(), 3498 safe=not self.dialect.STRICT_STRING_CONCAT, 3499 ) 3500 elif self._match(TokenType.DQMARK): 3501 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3502 elif self._match_pair(TokenType.LT, TokenType.LT): 3503 this = self.expression( 3504 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3505 ) 3506 elif self._match_pair(TokenType.GT, TokenType.GT): 3507 this = self.expression( 3508 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3509 ) 3510 else: 3511 break 3512 3513 return this 3514 3515 def _parse_term(self) -> t.Optional[exp.Expression]: 3516 return self._parse_tokens(self._parse_factor, self.TERM) 3517 3518 def _parse_factor(self) -> t.Optional[exp.Expression]: 3519 if self.EXPONENT: 3520 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3521 else: 3522 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3523 if isinstance(factor, exp.Div): 3524 factor.args["typed"] = self.dialect.TYPED_DIVISION 3525 factor.args["safe"] = self.dialect.SAFE_DIVISION 3526 return factor 3527 3528 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3529 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3530 3531 def _parse_unary(self) -> t.Optional[exp.Expression]: 3532 if self._match_set(self.UNARY_PARSERS): 3533 return self.UNARY_PARSERS[self._prev.token_type](self) 3534 return self._parse_at_time_zone(self._parse_type()) 3535 3536 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3537 interval = parse_interval and self._parse_interval() 3538 if interval: 3539 return interval 3540 3541 index = self._index 3542 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3543 this = self._parse_column() 3544 3545 if data_type: 3546 if isinstance(this, exp.Literal): 3547 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3548 if parser: 3549 return parser(self, this, data_type) 3550 return self.expression(exp.Cast, this=this, to=data_type) 3551 if not data_type.expressions: 3552 self._retreat(index) 3553 return self._parse_column() 3554 return self._parse_column_ops(data_type) 3555 3556 return this and self._parse_column_ops(this) 3557 3558 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3559 this = self._parse_type() 3560 if not this: 3561 return None 3562 3563 return self.expression( 3564 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3565 ) 3566 3567 def _parse_types( 3568 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3569 ) -> t.Optional[exp.Expression]: 3570 index = self._index 3571 3572 prefix = self._match_text_seq("SYSUDTLIB", ".") 3573 3574 if not self._match_set(self.TYPE_TOKENS): 3575 identifier = allow_identifiers and self._parse_id_var( 3576 any_token=False, tokens=(TokenType.VAR,) 3577 ) 3578 3579 if identifier: 3580 tokens = self.dialect.tokenize(identifier.name) 3581 3582 if len(tokens) != 1: 3583 self.raise_error("Unexpected identifier", self._prev) 3584 3585 if tokens[0].token_type in self.TYPE_TOKENS: 3586 self._prev = tokens[0] 3587 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3588 type_name = identifier.name 3589 3590 while self._match(TokenType.DOT): 3591 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3592 3593 return exp.DataType.build(type_name, udt=True) 3594 else: 3595 return None 3596 else: 3597 return None 3598 3599 type_token = self._prev.token_type 3600 3601 if type_token == TokenType.PSEUDO_TYPE: 3602 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3603 3604 if type_token == TokenType.OBJECT_IDENTIFIER: 3605 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3606 3607 nested = type_token in self.NESTED_TYPE_TOKENS 3608 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3609 expressions = None 3610 maybe_func = False 3611 3612 if self._match(TokenType.L_PAREN): 3613 if is_struct: 3614 expressions = self._parse_csv(self._parse_struct_types) 3615 elif nested: 3616 expressions = self._parse_csv( 3617 lambda: self._parse_types( 3618 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3619 ) 3620 ) 3621 elif type_token in self.ENUM_TYPE_TOKENS: 3622 expressions = self._parse_csv(self._parse_equality) 3623 else: 3624 expressions = self._parse_csv(self._parse_type_size) 3625 3626 if not expressions or not self._match(TokenType.R_PAREN): 3627 self._retreat(index) 3628 return None 3629 3630 maybe_func = True 3631 3632 this: t.Optional[exp.Expression] = None 3633 values: t.Optional[t.List[exp.Expression]] = None 3634 3635 if nested and self._match(TokenType.LT): 3636 if is_struct: 3637 expressions = self._parse_csv(self._parse_struct_types) 3638 else: 3639 expressions = self._parse_csv( 3640 lambda: self._parse_types( 3641 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3642 ) 3643 ) 3644 3645 if not self._match(TokenType.GT): 3646 self.raise_error("Expecting >") 3647 3648 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3649 values = self._parse_csv(self._parse_conjunction) 3650 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3651 3652 if type_token in self.TIMESTAMPS: 3653 if self._match_text_seq("WITH", "TIME", "ZONE"): 3654 maybe_func = False 3655 tz_type = ( 3656 exp.DataType.Type.TIMETZ 3657 if type_token in self.TIMES 3658 else exp.DataType.Type.TIMESTAMPTZ 3659 ) 3660 this = exp.DataType(this=tz_type, expressions=expressions) 3661 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3662 maybe_func = False 3663 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3664 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3665 maybe_func = False 3666 elif type_token == TokenType.INTERVAL: 3667 unit = self._parse_var() 3668 3669 if self._match_text_seq("TO"): 3670 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3671 else: 3672 span = None 3673 3674 if span or not unit: 3675 this = self.expression( 3676 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3677 ) 3678 else: 3679 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3680 3681 if maybe_func and check_func: 3682 index2 = self._index 3683 peek = self._parse_string() 3684 3685 if not peek: 3686 self._retreat(index) 3687 return None 3688 3689 self._retreat(index2) 3690 3691 if not this: 3692 if self._match_text_seq("UNSIGNED"): 3693 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3694 if not unsigned_type_token: 3695 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3696 3697 type_token = unsigned_type_token or type_token 3698 3699 this = exp.DataType( 3700 this=exp.DataType.Type[type_token.value], 3701 expressions=expressions, 3702 nested=nested, 3703 values=values, 3704 prefix=prefix, 3705 ) 3706 3707 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3708 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3709 3710 return this 3711 3712 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3713 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3714 self._match(TokenType.COLON) 3715 return self._parse_column_def(this) 3716 3717 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3718 if not self._match_text_seq("AT", "TIME", "ZONE"): 3719 return this 3720 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3721 3722 def _parse_column(self) -> t.Optional[exp.Expression]: 3723 this = self._parse_field() 3724 if isinstance(this, exp.Identifier): 3725 this = self.expression(exp.Column, this=this) 3726 elif not this: 3727 return self._parse_bracket(this) 3728 return self._parse_column_ops(this) 3729 3730 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3731 this = self._parse_bracket(this) 3732 3733 while self._match_set(self.COLUMN_OPERATORS): 3734 op_token = self._prev.token_type 3735 op = self.COLUMN_OPERATORS.get(op_token) 3736 3737 if op_token == TokenType.DCOLON: 3738 field = self._parse_types() 3739 if not field: 3740 self.raise_error("Expected type") 3741 elif op and self._curr: 3742 self._advance() 3743 value = self._prev.text 3744 field = ( 3745 exp.Literal.number(value) 3746 if self._prev.token_type == TokenType.NUMBER 3747 else exp.Literal.string(value) 3748 ) 3749 else: 3750 field = self._parse_field(anonymous_func=True, any_token=True) 3751 3752 if isinstance(field, exp.Func): 3753 # bigquery allows function calls like x.y.count(...) 3754 # SAFE.SUBSTR(...) 3755 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3756 this = self._replace_columns_with_dots(this) 3757 3758 if op: 3759 this = op(self, this, field) 3760 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3761 this = self.expression( 3762 exp.Column, 3763 this=field, 3764 table=this.this, 3765 db=this.args.get("table"), 3766 catalog=this.args.get("db"), 3767 ) 3768 else: 3769 this = self.expression(exp.Dot, this=this, expression=field) 3770 this = self._parse_bracket(this) 3771 return this 3772 3773 def _parse_primary(self) -> t.Optional[exp.Expression]: 3774 if self._match_set(self.PRIMARY_PARSERS): 3775 token_type = self._prev.token_type 3776 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3777 3778 if token_type == TokenType.STRING: 3779 expressions = [primary] 3780 while self._match(TokenType.STRING): 3781 expressions.append(exp.Literal.string(self._prev.text)) 3782 3783 if len(expressions) > 1: 3784 return self.expression(exp.Concat, expressions=expressions) 3785 3786 return primary 3787 3788 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3789 return exp.Literal.number(f"0.{self._prev.text}") 3790 3791 if self._match(TokenType.L_PAREN): 3792 comments = self._prev_comments 3793 query = self._parse_select() 3794 3795 if query: 3796 expressions = [query] 3797 else: 3798 expressions = self._parse_expressions() 3799 3800 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3801 3802 if isinstance(this, exp.Subqueryable): 3803 this = self._parse_set_operations( 3804 self._parse_subquery(this=this, parse_alias=False) 3805 ) 3806 elif len(expressions) > 1: 3807 this = self.expression(exp.Tuple, expressions=expressions) 3808 else: 3809 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3810 3811 if this: 3812 this.add_comments(comments) 3813 3814 self._match_r_paren(expression=this) 3815 return this 3816 3817 return None 3818 3819 def _parse_field( 3820 self, 3821 any_token: bool = False, 3822 tokens: t.Optional[t.Collection[TokenType]] = None, 3823 anonymous_func: bool = False, 3824 ) -> t.Optional[exp.Expression]: 3825 return ( 3826 self._parse_primary() 3827 or self._parse_function(anonymous=anonymous_func) 3828 or self._parse_id_var(any_token=any_token, tokens=tokens) 3829 ) 3830 3831 def _parse_function( 3832 self, 3833 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3834 anonymous: bool = False, 3835 optional_parens: bool = True, 3836 ) -> t.Optional[exp.Expression]: 3837 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3838 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3839 fn_syntax = False 3840 if ( 3841 self._match(TokenType.L_BRACE, advance=False) 3842 and self._next 3843 and self._next.text.upper() == "FN" 3844 ): 3845 self._advance(2) 3846 fn_syntax = True 3847 3848 func = self._parse_function_call( 3849 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3850 ) 3851 3852 if fn_syntax: 3853 self._match(TokenType.R_BRACE) 3854 3855 return func 3856 3857 def _parse_function_call( 3858 self, 3859 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3860 anonymous: bool = False, 3861 optional_parens: bool = True, 3862 ) -> t.Optional[exp.Expression]: 3863 if not self._curr: 3864 return None 3865 3866 comments = self._curr.comments 3867 token_type = self._curr.token_type 3868 this = self._curr.text 3869 upper = this.upper() 3870 3871 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3872 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3873 self._advance() 3874 return parser(self) 3875 3876 if not self._next or self._next.token_type != TokenType.L_PAREN: 3877 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3878 self._advance() 3879 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3880 3881 return None 3882 3883 if token_type not in self.FUNC_TOKENS: 3884 return None 3885 3886 self._advance(2) 3887 3888 parser = self.FUNCTION_PARSERS.get(upper) 3889 if parser and not anonymous: 3890 this = parser(self) 3891 else: 3892 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3893 3894 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3895 this = self.expression(subquery_predicate, this=self._parse_select()) 3896 self._match_r_paren() 3897 return this 3898 3899 if functions is None: 3900 functions = self.FUNCTIONS 3901 3902 function = functions.get(upper) 3903 3904 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3905 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3906 3907 if function and not anonymous: 3908 if "dialect" in function.__code__.co_varnames: 3909 func = function(args, dialect=self.dialect) 3910 else: 3911 func = function(args) 3912 3913 func = self.validate_expression(func, args) 3914 if not self.dialect.NORMALIZE_FUNCTIONS: 3915 func.meta["name"] = this 3916 3917 this = func 3918 else: 3919 this = self.expression(exp.Anonymous, this=this, expressions=args) 3920 3921 if isinstance(this, exp.Expression): 3922 this.add_comments(comments) 3923 3924 self._match_r_paren(this) 3925 return self._parse_window(this) 3926 3927 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3928 return self._parse_column_def(self._parse_id_var()) 3929 3930 def _parse_user_defined_function( 3931 self, kind: t.Optional[TokenType] = None 3932 ) -> t.Optional[exp.Expression]: 3933 this = self._parse_id_var() 3934 3935 while self._match(TokenType.DOT): 3936 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3937 3938 if not self._match(TokenType.L_PAREN): 3939 return this 3940 3941 expressions = self._parse_csv(self._parse_function_parameter) 3942 self._match_r_paren() 3943 return self.expression( 3944 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3945 ) 3946 3947 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3948 literal = self._parse_primary() 3949 if literal: 3950 return self.expression(exp.Introducer, this=token.text, expression=literal) 3951 3952 return self.expression(exp.Identifier, this=token.text) 3953 3954 def _parse_session_parameter(self) -> exp.SessionParameter: 3955 kind = None 3956 this = self._parse_id_var() or self._parse_primary() 3957 3958 if this and self._match(TokenType.DOT): 3959 kind = this.name 3960 this = self._parse_var() or self._parse_primary() 3961 3962 return self.expression(exp.SessionParameter, this=this, kind=kind) 3963 3964 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3965 index = self._index 3966 3967 if self._match(TokenType.L_PAREN): 3968 expressions = t.cast( 3969 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3970 ) 3971 3972 if not self._match(TokenType.R_PAREN): 3973 self._retreat(index) 3974 else: 3975 expressions = [self._parse_id_var()] 3976 3977 if self._match_set(self.LAMBDAS): 3978 return self.LAMBDAS[self._prev.token_type](self, expressions) 3979 3980 self._retreat(index) 3981 3982 this: t.Optional[exp.Expression] 3983 3984 if self._match(TokenType.DISTINCT): 3985 this = self.expression( 3986 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3987 ) 3988 else: 3989 this = self._parse_select_or_expression(alias=alias) 3990 3991 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3992 3993 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3994 index = self._index 3995 3996 if not self.errors: 3997 try: 3998 if self._parse_select(nested=True): 3999 return this 4000 except ParseError: 4001 pass 4002 finally: 4003 self.errors.clear() 4004 self._retreat(index) 4005 4006 if not self._match(TokenType.L_PAREN): 4007 return this 4008 4009 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4010 4011 self._match_r_paren() 4012 return self.expression(exp.Schema, this=this, expressions=args) 4013 4014 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4015 return self._parse_column_def(self._parse_field(any_token=True)) 4016 4017 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4018 # column defs are not really columns, they're identifiers 4019 if isinstance(this, exp.Column): 4020 this = this.this 4021 4022 kind = self._parse_types(schema=True) 4023 4024 if self._match_text_seq("FOR", "ORDINALITY"): 4025 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4026 4027 constraints: t.List[exp.Expression] = [] 4028 4029 if not kind and self._match(TokenType.ALIAS): 4030 constraints.append( 4031 self.expression( 4032 exp.ComputedColumnConstraint, 4033 this=self._parse_conjunction(), 4034 persisted=self._match_text_seq("PERSISTED"), 4035 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4036 ) 4037 ) 4038 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4039 self._match(TokenType.ALIAS) 4040 constraints.append( 4041 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4042 ) 4043 4044 while True: 4045 constraint = self._parse_column_constraint() 4046 if not constraint: 4047 break 4048 constraints.append(constraint) 4049 4050 if not kind and not constraints: 4051 return this 4052 4053 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4054 4055 def _parse_auto_increment( 4056 self, 4057 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4058 start = None 4059 increment = None 4060 4061 if self._match(TokenType.L_PAREN, advance=False): 4062 args = self._parse_wrapped_csv(self._parse_bitwise) 4063 start = seq_get(args, 0) 4064 increment = seq_get(args, 1) 4065 elif self._match_text_seq("START"): 4066 start = self._parse_bitwise() 4067 self._match_text_seq("INCREMENT") 4068 increment = self._parse_bitwise() 4069 4070 if start and increment: 4071 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4072 4073 return exp.AutoIncrementColumnConstraint() 4074 4075 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4076 if not self._match_text_seq("REFRESH"): 4077 self._retreat(self._index - 1) 4078 return None 4079 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4080 4081 def _parse_compress(self) -> exp.CompressColumnConstraint: 4082 if self._match(TokenType.L_PAREN, advance=False): 4083 return self.expression( 4084 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4085 ) 4086 4087 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4088 4089 def _parse_generated_as_identity( 4090 self, 4091 ) -> ( 4092 exp.GeneratedAsIdentityColumnConstraint 4093 | exp.ComputedColumnConstraint 4094 | exp.GeneratedAsRowColumnConstraint 4095 ): 4096 if self._match_text_seq("BY", "DEFAULT"): 4097 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4098 this = self.expression( 4099 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4100 ) 4101 else: 4102 self._match_text_seq("ALWAYS") 4103 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4104 4105 self._match(TokenType.ALIAS) 4106 4107 if self._match_text_seq("ROW"): 4108 start = self._match_text_seq("START") 4109 if not start: 4110 self._match(TokenType.END) 4111 hidden = self._match_text_seq("HIDDEN") 4112 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4113 4114 identity = self._match_text_seq("IDENTITY") 4115 4116 if self._match(TokenType.L_PAREN): 4117 if self._match(TokenType.START_WITH): 4118 this.set("start", self._parse_bitwise()) 4119 if self._match_text_seq("INCREMENT", "BY"): 4120 this.set("increment", self._parse_bitwise()) 4121 if self._match_text_seq("MINVALUE"): 4122 this.set("minvalue", self._parse_bitwise()) 4123 if self._match_text_seq("MAXVALUE"): 4124 this.set("maxvalue", self._parse_bitwise()) 4125 4126 if self._match_text_seq("CYCLE"): 4127 this.set("cycle", True) 4128 elif self._match_text_seq("NO", "CYCLE"): 4129 this.set("cycle", False) 4130 4131 if not identity: 4132 this.set("expression", self._parse_bitwise()) 4133 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4134 args = self._parse_csv(self._parse_bitwise) 4135 this.set("start", seq_get(args, 0)) 4136 this.set("increment", seq_get(args, 1)) 4137 4138 self._match_r_paren() 4139 4140 return this 4141 4142 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4143 self._match_text_seq("LENGTH") 4144 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4145 4146 def _parse_not_constraint( 4147 self, 4148 ) -> t.Optional[exp.Expression]: 4149 if self._match_text_seq("NULL"): 4150 return self.expression(exp.NotNullColumnConstraint) 4151 if self._match_text_seq("CASESPECIFIC"): 4152 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4153 if self._match_text_seq("FOR", "REPLICATION"): 4154 return self.expression(exp.NotForReplicationColumnConstraint) 4155 return None 4156 4157 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4158 if self._match(TokenType.CONSTRAINT): 4159 this = self._parse_id_var() 4160 else: 4161 this = None 4162 4163 if self._match_texts(self.CONSTRAINT_PARSERS): 4164 return self.expression( 4165 exp.ColumnConstraint, 4166 this=this, 4167 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4168 ) 4169 4170 return this 4171 4172 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4173 if not self._match(TokenType.CONSTRAINT): 4174 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4175 4176 this = self._parse_id_var() 4177 expressions = [] 4178 4179 while True: 4180 constraint = self._parse_unnamed_constraint() or self._parse_function() 4181 if not constraint: 4182 break 4183 expressions.append(constraint) 4184 4185 return self.expression(exp.Constraint, this=this, expressions=expressions) 4186 4187 def _parse_unnamed_constraint( 4188 self, constraints: t.Optional[t.Collection[str]] = None 4189 ) -> t.Optional[exp.Expression]: 4190 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4191 constraints or self.CONSTRAINT_PARSERS 4192 ): 4193 return None 4194 4195 constraint = self._prev.text.upper() 4196 if constraint not in self.CONSTRAINT_PARSERS: 4197 self.raise_error(f"No parser found for schema constraint {constraint}.") 4198 4199 return self.CONSTRAINT_PARSERS[constraint](self) 4200 4201 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4202 self._match_text_seq("KEY") 4203 return self.expression( 4204 exp.UniqueColumnConstraint, 4205 this=self._parse_schema(self._parse_id_var(any_token=False)), 4206 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4207 ) 4208 4209 def _parse_key_constraint_options(self) -> t.List[str]: 4210 options = [] 4211 while True: 4212 if not self._curr: 4213 break 4214 4215 if self._match(TokenType.ON): 4216 action = None 4217 on = self._advance_any() and self._prev.text 4218 4219 if self._match_text_seq("NO", "ACTION"): 4220 action = "NO ACTION" 4221 elif self._match_text_seq("CASCADE"): 4222 action = "CASCADE" 4223 elif self._match_text_seq("RESTRICT"): 4224 action = "RESTRICT" 4225 elif self._match_pair(TokenType.SET, TokenType.NULL): 4226 action = "SET NULL" 4227 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4228 action = "SET DEFAULT" 4229 else: 4230 self.raise_error("Invalid key constraint") 4231 4232 options.append(f"ON {on} {action}") 4233 elif self._match_text_seq("NOT", "ENFORCED"): 4234 options.append("NOT ENFORCED") 4235 elif self._match_text_seq("DEFERRABLE"): 4236 options.append("DEFERRABLE") 4237 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4238 options.append("INITIALLY DEFERRED") 4239 elif self._match_text_seq("NORELY"): 4240 options.append("NORELY") 4241 elif self._match_text_seq("MATCH", "FULL"): 4242 options.append("MATCH FULL") 4243 else: 4244 break 4245 4246 return options 4247 4248 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4249 if match and not self._match(TokenType.REFERENCES): 4250 return None 4251 4252 expressions = None 4253 this = self._parse_table(schema=True) 4254 options = self._parse_key_constraint_options() 4255 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4256 4257 def _parse_foreign_key(self) -> exp.ForeignKey: 4258 expressions = self._parse_wrapped_id_vars() 4259 reference = self._parse_references() 4260 options = {} 4261 4262 while self._match(TokenType.ON): 4263 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4264 self.raise_error("Expected DELETE or UPDATE") 4265 4266 kind = self._prev.text.lower() 4267 4268 if self._match_text_seq("NO", "ACTION"): 4269 action = "NO ACTION" 4270 elif self._match(TokenType.SET): 4271 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4272 action = "SET " + self._prev.text.upper() 4273 else: 4274 self._advance() 4275 action = self._prev.text.upper() 4276 4277 options[kind] = action 4278 4279 return self.expression( 4280 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4281 ) 4282 4283 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4284 return self._parse_field() 4285 4286 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4287 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4288 4289 id_vars = self._parse_wrapped_id_vars() 4290 return self.expression( 4291 exp.PeriodForSystemTimeConstraint, 4292 this=seq_get(id_vars, 0), 4293 expression=seq_get(id_vars, 1), 4294 ) 4295 4296 def _parse_primary_key( 4297 self, wrapped_optional: bool = False, in_props: bool = False 4298 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4299 desc = ( 4300 self._match_set((TokenType.ASC, TokenType.DESC)) 4301 and self._prev.token_type == TokenType.DESC 4302 ) 4303 4304 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4305 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4306 4307 expressions = self._parse_wrapped_csv( 4308 self._parse_primary_key_part, optional=wrapped_optional 4309 ) 4310 options = self._parse_key_constraint_options() 4311 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4312 4313 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4314 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4315 4316 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4317 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4318 return this 4319 4320 bracket_kind = self._prev.token_type 4321 expressions = self._parse_csv( 4322 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4323 ) 4324 4325 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4326 self.raise_error("Expected ]") 4327 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4328 self.raise_error("Expected }") 4329 4330 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4331 if bracket_kind == TokenType.L_BRACE: 4332 this = self.expression(exp.Struct, expressions=expressions) 4333 elif not this or this.name.upper() == "ARRAY": 4334 this = self.expression(exp.Array, expressions=expressions) 4335 else: 4336 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4337 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4338 4339 self._add_comments(this) 4340 return self._parse_bracket(this) 4341 4342 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4343 if self._match(TokenType.COLON): 4344 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4345 return this 4346 4347 def _parse_case(self) -> t.Optional[exp.Expression]: 4348 ifs = [] 4349 default = None 4350 4351 comments = self._prev_comments 4352 expression = self._parse_conjunction() 4353 4354 while self._match(TokenType.WHEN): 4355 this = self._parse_conjunction() 4356 self._match(TokenType.THEN) 4357 then = self._parse_conjunction() 4358 ifs.append(self.expression(exp.If, this=this, true=then)) 4359 4360 if self._match(TokenType.ELSE): 4361 default = self._parse_conjunction() 4362 4363 if not self._match(TokenType.END): 4364 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4365 default = exp.column("interval") 4366 else: 4367 self.raise_error("Expected END after CASE", self._prev) 4368 4369 return self._parse_window( 4370 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4371 ) 4372 4373 def _parse_if(self) -> t.Optional[exp.Expression]: 4374 if self._match(TokenType.L_PAREN): 4375 args = self._parse_csv(self._parse_conjunction) 4376 this = self.validate_expression(exp.If.from_arg_list(args), args) 4377 self._match_r_paren() 4378 else: 4379 index = self._index - 1 4380 condition = self._parse_conjunction() 4381 4382 if not condition: 4383 self._retreat(index) 4384 return None 4385 4386 self._match(TokenType.THEN) 4387 true = self._parse_conjunction() 4388 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4389 self._match(TokenType.END) 4390 this = self.expression(exp.If, this=condition, true=true, false=false) 4391 4392 return self._parse_window(this) 4393 4394 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4395 if not self._match_text_seq("VALUE", "FOR"): 4396 self._retreat(self._index - 1) 4397 return None 4398 4399 return self.expression( 4400 exp.NextValueFor, 4401 this=self._parse_column(), 4402 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4403 ) 4404 4405 def _parse_extract(self) -> exp.Extract: 4406 this = self._parse_function() or self._parse_var() or self._parse_type() 4407 4408 if self._match(TokenType.FROM): 4409 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4410 4411 if not self._match(TokenType.COMMA): 4412 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4413 4414 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4415 4416 def _parse_any_value(self) -> exp.AnyValue: 4417 this = self._parse_lambda() 4418 is_max = None 4419 having = None 4420 4421 if self._match(TokenType.HAVING): 4422 self._match_texts(("MAX", "MIN")) 4423 is_max = self._prev.text == "MAX" 4424 having = self._parse_column() 4425 4426 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4427 4428 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4429 this = self._parse_conjunction() 4430 4431 if not self._match(TokenType.ALIAS): 4432 if self._match(TokenType.COMMA): 4433 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4434 4435 self.raise_error("Expected AS after CAST") 4436 4437 fmt = None 4438 to = self._parse_types() 4439 4440 if self._match(TokenType.FORMAT): 4441 fmt_string = self._parse_string() 4442 fmt = self._parse_at_time_zone(fmt_string) 4443 4444 if not to: 4445 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4446 if to.this in exp.DataType.TEMPORAL_TYPES: 4447 this = self.expression( 4448 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4449 this=this, 4450 format=exp.Literal.string( 4451 format_time( 4452 fmt_string.this if fmt_string else "", 4453 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4454 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4455 ) 4456 ), 4457 ) 4458 4459 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4460 this.set("zone", fmt.args["zone"]) 4461 return this 4462 elif not to: 4463 self.raise_error("Expected TYPE after CAST") 4464 elif isinstance(to, exp.Identifier): 4465 to = exp.DataType.build(to.name, udt=True) 4466 elif to.this == exp.DataType.Type.CHAR: 4467 if self._match(TokenType.CHARACTER_SET): 4468 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4469 4470 return self.expression( 4471 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4472 ) 4473 4474 def _parse_string_agg(self) -> exp.Expression: 4475 if self._match(TokenType.DISTINCT): 4476 args: t.List[t.Optional[exp.Expression]] = [ 4477 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4478 ] 4479 if self._match(TokenType.COMMA): 4480 args.extend(self._parse_csv(self._parse_conjunction)) 4481 else: 4482 args = self._parse_csv(self._parse_conjunction) # type: ignore 4483 4484 index = self._index 4485 if not self._match(TokenType.R_PAREN) and args: 4486 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4487 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4488 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4489 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4490 4491 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4492 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4493 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4494 if not self._match_text_seq("WITHIN", "GROUP"): 4495 self._retreat(index) 4496 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4497 4498 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4499 order = self._parse_order(this=seq_get(args, 0)) 4500 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4501 4502 def _parse_convert( 4503 self, strict: bool, safe: t.Optional[bool] = None 4504 ) -> t.Optional[exp.Expression]: 4505 this = self._parse_bitwise() 4506 4507 if self._match(TokenType.USING): 4508 to: t.Optional[exp.Expression] = self.expression( 4509 exp.CharacterSet, this=self._parse_var() 4510 ) 4511 elif self._match(TokenType.COMMA): 4512 to = self._parse_types() 4513 else: 4514 to = None 4515 4516 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4517 4518 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4519 """ 4520 There are generally two variants of the DECODE function: 4521 4522 - DECODE(bin, charset) 4523 - DECODE(expression, search, result [, search, result] ... [, default]) 4524 4525 The second variant will always be parsed into a CASE expression. Note that NULL 4526 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4527 instead of relying on pattern matching. 4528 """ 4529 args = self._parse_csv(self._parse_conjunction) 4530 4531 if len(args) < 3: 4532 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4533 4534 expression, *expressions = args 4535 if not expression: 4536 return None 4537 4538 ifs = [] 4539 for search, result in zip(expressions[::2], expressions[1::2]): 4540 if not search or not result: 4541 return None 4542 4543 if isinstance(search, exp.Literal): 4544 ifs.append( 4545 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4546 ) 4547 elif isinstance(search, exp.Null): 4548 ifs.append( 4549 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4550 ) 4551 else: 4552 cond = exp.or_( 4553 exp.EQ(this=expression.copy(), expression=search), 4554 exp.and_( 4555 exp.Is(this=expression.copy(), expression=exp.Null()), 4556 exp.Is(this=search.copy(), expression=exp.Null()), 4557 copy=False, 4558 ), 4559 copy=False, 4560 ) 4561 ifs.append(exp.If(this=cond, true=result)) 4562 4563 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4564 4565 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4566 self._match_text_seq("KEY") 4567 key = self._parse_column() 4568 self._match_set((TokenType.COLON, TokenType.COMMA)) 4569 self._match_text_seq("VALUE") 4570 value = self._parse_bitwise() 4571 4572 if not key and not value: 4573 return None 4574 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4575 4576 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4577 if not this or not self._match_text_seq("FORMAT", "JSON"): 4578 return this 4579 4580 return self.expression(exp.FormatJson, this=this) 4581 4582 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4583 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4584 for value in values: 4585 if self._match_text_seq(value, "ON", on): 4586 return f"{value} ON {on}" 4587 4588 return None 4589 4590 def _parse_json_object(self) -> exp.JSONObject: 4591 star = self._parse_star() 4592 expressions = ( 4593 [star] 4594 if star 4595 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4596 ) 4597 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4598 4599 unique_keys = None 4600 if self._match_text_seq("WITH", "UNIQUE"): 4601 unique_keys = True 4602 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4603 unique_keys = False 4604 4605 self._match_text_seq("KEYS") 4606 4607 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4608 self._parse_type() 4609 ) 4610 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4611 4612 return self.expression( 4613 exp.JSONObject, 4614 expressions=expressions, 4615 null_handling=null_handling, 4616 unique_keys=unique_keys, 4617 return_type=return_type, 4618 encoding=encoding, 4619 ) 4620 4621 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4622 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4623 if not self._match_text_seq("NESTED"): 4624 this = self._parse_id_var() 4625 kind = self._parse_types(allow_identifiers=False) 4626 nested = None 4627 else: 4628 this = None 4629 kind = None 4630 nested = True 4631 4632 path = self._match_text_seq("PATH") and self._parse_string() 4633 nested_schema = nested and self._parse_json_schema() 4634 4635 return self.expression( 4636 exp.JSONColumnDef, 4637 this=this, 4638 kind=kind, 4639 path=path, 4640 nested_schema=nested_schema, 4641 ) 4642 4643 def _parse_json_schema(self) -> exp.JSONSchema: 4644 self._match_text_seq("COLUMNS") 4645 return self.expression( 4646 exp.JSONSchema, 4647 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4648 ) 4649 4650 def _parse_json_table(self) -> exp.JSONTable: 4651 this = self._parse_format_json(self._parse_bitwise()) 4652 path = self._match(TokenType.COMMA) and self._parse_string() 4653 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4654 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4655 schema = self._parse_json_schema() 4656 4657 return exp.JSONTable( 4658 this=this, 4659 schema=schema, 4660 path=path, 4661 error_handling=error_handling, 4662 empty_handling=empty_handling, 4663 ) 4664 4665 def _parse_match_against(self) -> exp.MatchAgainst: 4666 expressions = self._parse_csv(self._parse_column) 4667 4668 self._match_text_seq(")", "AGAINST", "(") 4669 4670 this = self._parse_string() 4671 4672 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4673 modifier = "IN NATURAL LANGUAGE MODE" 4674 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4675 modifier = f"{modifier} WITH QUERY EXPANSION" 4676 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4677 modifier = "IN BOOLEAN MODE" 4678 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4679 modifier = "WITH QUERY EXPANSION" 4680 else: 4681 modifier = None 4682 4683 return self.expression( 4684 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4685 ) 4686 4687 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4688 def _parse_open_json(self) -> exp.OpenJSON: 4689 this = self._parse_bitwise() 4690 path = self._match(TokenType.COMMA) and self._parse_string() 4691 4692 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4693 this = self._parse_field(any_token=True) 4694 kind = self._parse_types() 4695 path = self._parse_string() 4696 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4697 4698 return self.expression( 4699 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4700 ) 4701 4702 expressions = None 4703 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4704 self._match_l_paren() 4705 expressions = self._parse_csv(_parse_open_json_column_def) 4706 4707 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4708 4709 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4710 args = self._parse_csv(self._parse_bitwise) 4711 4712 if self._match(TokenType.IN): 4713 return self.expression( 4714 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4715 ) 4716 4717 if haystack_first: 4718 haystack = seq_get(args, 0) 4719 needle = seq_get(args, 1) 4720 else: 4721 needle = seq_get(args, 0) 4722 haystack = seq_get(args, 1) 4723 4724 return self.expression( 4725 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4726 ) 4727 4728 def _parse_predict(self) -> exp.Predict: 4729 self._match_text_seq("MODEL") 4730 this = self._parse_table() 4731 4732 self._match(TokenType.COMMA) 4733 self._match_text_seq("TABLE") 4734 4735 return self.expression( 4736 exp.Predict, 4737 this=this, 4738 expression=self._parse_table(), 4739 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4740 ) 4741 4742 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4743 args = self._parse_csv(self._parse_table) 4744 return exp.JoinHint(this=func_name.upper(), expressions=args) 4745 4746 def _parse_substring(self) -> exp.Substring: 4747 # Postgres supports the form: substring(string [from int] [for int]) 4748 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4749 4750 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4751 4752 if self._match(TokenType.FROM): 4753 args.append(self._parse_bitwise()) 4754 if self._match(TokenType.FOR): 4755 args.append(self._parse_bitwise()) 4756 4757 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4758 4759 def _parse_trim(self) -> exp.Trim: 4760 # https://www.w3resource.com/sql/character-functions/trim.php 4761 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4762 4763 position = None 4764 collation = None 4765 expression = None 4766 4767 if self._match_texts(self.TRIM_TYPES): 4768 position = self._prev.text.upper() 4769 4770 this = self._parse_bitwise() 4771 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4772 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4773 expression = self._parse_bitwise() 4774 4775 if invert_order: 4776 this, expression = expression, this 4777 4778 if self._match(TokenType.COLLATE): 4779 collation = self._parse_bitwise() 4780 4781 return self.expression( 4782 exp.Trim, this=this, position=position, expression=expression, collation=collation 4783 ) 4784 4785 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4786 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4787 4788 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4789 return self._parse_window(self._parse_id_var(), alias=True) 4790 4791 def _parse_respect_or_ignore_nulls( 4792 self, this: t.Optional[exp.Expression] 4793 ) -> t.Optional[exp.Expression]: 4794 if self._match_text_seq("IGNORE", "NULLS"): 4795 return self.expression(exp.IgnoreNulls, this=this) 4796 if self._match_text_seq("RESPECT", "NULLS"): 4797 return self.expression(exp.RespectNulls, this=this) 4798 return this 4799 4800 def _parse_window( 4801 self, this: t.Optional[exp.Expression], alias: bool = False 4802 ) -> t.Optional[exp.Expression]: 4803 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4804 self._match(TokenType.WHERE) 4805 this = self.expression( 4806 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4807 ) 4808 self._match_r_paren() 4809 4810 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4811 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4812 if self._match_text_seq("WITHIN", "GROUP"): 4813 order = self._parse_wrapped(self._parse_order) 4814 this = self.expression(exp.WithinGroup, this=this, expression=order) 4815 4816 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4817 # Some dialects choose to implement and some do not. 4818 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4819 4820 # There is some code above in _parse_lambda that handles 4821 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4822 4823 # The below changes handle 4824 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4825 4826 # Oracle allows both formats 4827 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4828 # and Snowflake chose to do the same for familiarity 4829 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4830 this = self._parse_respect_or_ignore_nulls(this) 4831 4832 # bigquery select from window x AS (partition by ...) 4833 if alias: 4834 over = None 4835 self._match(TokenType.ALIAS) 4836 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4837 return this 4838 else: 4839 over = self._prev.text.upper() 4840 4841 if not self._match(TokenType.L_PAREN): 4842 return self.expression( 4843 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4844 ) 4845 4846 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4847 4848 first = self._match(TokenType.FIRST) 4849 if self._match_text_seq("LAST"): 4850 first = False 4851 4852 partition, order = self._parse_partition_and_order() 4853 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4854 4855 if kind: 4856 self._match(TokenType.BETWEEN) 4857 start = self._parse_window_spec() 4858 self._match(TokenType.AND) 4859 end = self._parse_window_spec() 4860 4861 spec = self.expression( 4862 exp.WindowSpec, 4863 kind=kind, 4864 start=start["value"], 4865 start_side=start["side"], 4866 end=end["value"], 4867 end_side=end["side"], 4868 ) 4869 else: 4870 spec = None 4871 4872 self._match_r_paren() 4873 4874 window = self.expression( 4875 exp.Window, 4876 this=this, 4877 partition_by=partition, 4878 order=order, 4879 spec=spec, 4880 alias=window_alias, 4881 over=over, 4882 first=first, 4883 ) 4884 4885 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4886 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4887 return self._parse_window(window, alias=alias) 4888 4889 return window 4890 4891 def _parse_partition_and_order( 4892 self, 4893 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4894 return self._parse_partition_by(), self._parse_order() 4895 4896 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4897 self._match(TokenType.BETWEEN) 4898 4899 return { 4900 "value": ( 4901 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4902 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4903 or self._parse_bitwise() 4904 ), 4905 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4906 } 4907 4908 def _parse_alias( 4909 self, this: t.Optional[exp.Expression], explicit: bool = False 4910 ) -> t.Optional[exp.Expression]: 4911 any_token = self._match(TokenType.ALIAS) 4912 comments = self._prev_comments 4913 4914 if explicit and not any_token: 4915 return this 4916 4917 if self._match(TokenType.L_PAREN): 4918 aliases = self.expression( 4919 exp.Aliases, 4920 comments=comments, 4921 this=this, 4922 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4923 ) 4924 self._match_r_paren(aliases) 4925 return aliases 4926 4927 alias = self._parse_id_var(any_token) or ( 4928 self.STRING_ALIASES and self._parse_string_as_identifier() 4929 ) 4930 4931 if alias: 4932 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4933 4934 return this 4935 4936 def _parse_id_var( 4937 self, 4938 any_token: bool = True, 4939 tokens: t.Optional[t.Collection[TokenType]] = None, 4940 ) -> t.Optional[exp.Expression]: 4941 identifier = self._parse_identifier() 4942 4943 if identifier: 4944 return identifier 4945 4946 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4947 quoted = self._prev.token_type == TokenType.STRING 4948 return exp.Identifier(this=self._prev.text, quoted=quoted) 4949 4950 return None 4951 4952 def _parse_string(self) -> t.Optional[exp.Expression]: 4953 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4954 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4955 return self._parse_placeholder() 4956 4957 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4958 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4959 4960 def _parse_number(self) -> t.Optional[exp.Expression]: 4961 if self._match(TokenType.NUMBER): 4962 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4963 return self._parse_placeholder() 4964 4965 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4966 if self._match(TokenType.IDENTIFIER): 4967 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4968 return self._parse_placeholder() 4969 4970 def _parse_var( 4971 self, 4972 any_token: bool = False, 4973 tokens: t.Optional[t.Collection[TokenType]] = None, 4974 upper: bool = False, 4975 ) -> t.Optional[exp.Expression]: 4976 if ( 4977 (any_token and self._advance_any()) 4978 or self._match(TokenType.VAR) 4979 or (self._match_set(tokens) if tokens else False) 4980 ): 4981 return self.expression( 4982 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 4983 ) 4984 return self._parse_placeholder() 4985 4986 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 4987 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 4988 self._advance() 4989 return self._prev 4990 return None 4991 4992 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4993 return self._parse_var() or self._parse_string() 4994 4995 def _parse_null(self) -> t.Optional[exp.Expression]: 4996 if self._match_set(self.NULL_TOKENS): 4997 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4998 return self._parse_placeholder() 4999 5000 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5001 if self._match(TokenType.TRUE): 5002 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5003 if self._match(TokenType.FALSE): 5004 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5005 return self._parse_placeholder() 5006 5007 def _parse_star(self) -> t.Optional[exp.Expression]: 5008 if self._match(TokenType.STAR): 5009 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5010 return self._parse_placeholder() 5011 5012 def _parse_parameter(self) -> exp.Parameter: 5013 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5014 return ( 5015 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5016 ) 5017 5018 self._match(TokenType.L_BRACE) 5019 this = _parse_parameter_part() 5020 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5021 self._match(TokenType.R_BRACE) 5022 5023 return self.expression(exp.Parameter, this=this, expression=expression) 5024 5025 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5026 if self._match_set(self.PLACEHOLDER_PARSERS): 5027 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5028 if placeholder: 5029 return placeholder 5030 self._advance(-1) 5031 return None 5032 5033 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5034 if not self._match(TokenType.EXCEPT): 5035 return None 5036 if self._match(TokenType.L_PAREN, advance=False): 5037 return self._parse_wrapped_csv(self._parse_column) 5038 5039 except_column = self._parse_column() 5040 return [except_column] if except_column else None 5041 5042 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5043 if not self._match(TokenType.REPLACE): 5044 return None 5045 if self._match(TokenType.L_PAREN, advance=False): 5046 return self._parse_wrapped_csv(self._parse_expression) 5047 5048 replace_expression = self._parse_expression() 5049 return [replace_expression] if replace_expression else None 5050 5051 def _parse_csv( 5052 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5053 ) -> t.List[exp.Expression]: 5054 parse_result = parse_method() 5055 items = [parse_result] if parse_result is not None else [] 5056 5057 while self._match(sep): 5058 self._add_comments(parse_result) 5059 parse_result = parse_method() 5060 if parse_result is not None: 5061 items.append(parse_result) 5062 5063 return items 5064 5065 def _parse_tokens( 5066 self, parse_method: t.Callable, expressions: t.Dict 5067 ) -> t.Optional[exp.Expression]: 5068 this = parse_method() 5069 5070 while self._match_set(expressions): 5071 this = self.expression( 5072 expressions[self._prev.token_type], 5073 this=this, 5074 comments=self._prev_comments, 5075 expression=parse_method(), 5076 ) 5077 5078 return this 5079 5080 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5081 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5082 5083 def _parse_wrapped_csv( 5084 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5085 ) -> t.List[exp.Expression]: 5086 return self._parse_wrapped( 5087 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5088 ) 5089 5090 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5091 wrapped = self._match(TokenType.L_PAREN) 5092 if not wrapped and not optional: 5093 self.raise_error("Expecting (") 5094 parse_result = parse_method() 5095 if wrapped: 5096 self._match_r_paren() 5097 return parse_result 5098 5099 def _parse_expressions(self) -> t.List[exp.Expression]: 5100 return self._parse_csv(self._parse_expression) 5101 5102 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5103 return self._parse_select() or self._parse_set_operations( 5104 self._parse_expression() if alias else self._parse_conjunction() 5105 ) 5106 5107 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5108 return self._parse_query_modifiers( 5109 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5110 ) 5111 5112 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5113 this = None 5114 if self._match_texts(self.TRANSACTION_KIND): 5115 this = self._prev.text 5116 5117 self._match_texts(("TRANSACTION", "WORK")) 5118 5119 modes = [] 5120 while True: 5121 mode = [] 5122 while self._match(TokenType.VAR): 5123 mode.append(self._prev.text) 5124 5125 if mode: 5126 modes.append(" ".join(mode)) 5127 if not self._match(TokenType.COMMA): 5128 break 5129 5130 return self.expression(exp.Transaction, this=this, modes=modes) 5131 5132 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5133 chain = None 5134 savepoint = None 5135 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5136 5137 self._match_texts(("TRANSACTION", "WORK")) 5138 5139 if self._match_text_seq("TO"): 5140 self._match_text_seq("SAVEPOINT") 5141 savepoint = self._parse_id_var() 5142 5143 if self._match(TokenType.AND): 5144 chain = not self._match_text_seq("NO") 5145 self._match_text_seq("CHAIN") 5146 5147 if is_rollback: 5148 return self.expression(exp.Rollback, savepoint=savepoint) 5149 5150 return self.expression(exp.Commit, chain=chain) 5151 5152 def _parse_refresh(self) -> exp.Refresh: 5153 self._match(TokenType.TABLE) 5154 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5155 5156 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5157 if not self._match_text_seq("ADD"): 5158 return None 5159 5160 self._match(TokenType.COLUMN) 5161 exists_column = self._parse_exists(not_=True) 5162 expression = self._parse_field_def() 5163 5164 if expression: 5165 expression.set("exists", exists_column) 5166 5167 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5168 if self._match_texts(("FIRST", "AFTER")): 5169 position = self._prev.text 5170 column_position = self.expression( 5171 exp.ColumnPosition, this=self._parse_column(), position=position 5172 ) 5173 expression.set("position", column_position) 5174 5175 return expression 5176 5177 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5178 drop = self._match(TokenType.DROP) and self._parse_drop() 5179 if drop and not isinstance(drop, exp.Command): 5180 drop.set("kind", drop.args.get("kind", "COLUMN")) 5181 return drop 5182 5183 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5184 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5185 return self.expression( 5186 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5187 ) 5188 5189 def _parse_add_constraint(self) -> exp.AddConstraint: 5190 this = None 5191 kind = self._prev.token_type 5192 5193 if kind == TokenType.CONSTRAINT: 5194 this = self._parse_id_var() 5195 5196 if self._match_text_seq("CHECK"): 5197 expression = self._parse_wrapped(self._parse_conjunction) 5198 enforced = self._match_text_seq("ENFORCED") 5199 5200 return self.expression( 5201 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5202 ) 5203 5204 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5205 expression = self._parse_foreign_key() 5206 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5207 expression = self._parse_primary_key() 5208 else: 5209 expression = None 5210 5211 return self.expression(exp.AddConstraint, this=this, expression=expression) 5212 5213 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5214 index = self._index - 1 5215 5216 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5217 return self._parse_csv(self._parse_add_constraint) 5218 5219 self._retreat(index) 5220 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5221 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5222 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5223 5224 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5225 self._match(TokenType.COLUMN) 5226 column = self._parse_field(any_token=True) 5227 5228 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5229 return self.expression(exp.AlterColumn, this=column, drop=True) 5230 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5231 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5232 5233 self._match_text_seq("SET", "DATA") 5234 return self.expression( 5235 exp.AlterColumn, 5236 this=column, 5237 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5238 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5239 using=self._match(TokenType.USING) and self._parse_conjunction(), 5240 ) 5241 5242 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5243 index = self._index - 1 5244 5245 partition_exists = self._parse_exists() 5246 if self._match(TokenType.PARTITION, advance=False): 5247 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5248 5249 self._retreat(index) 5250 return self._parse_csv(self._parse_drop_column) 5251 5252 def _parse_alter_table_rename(self) -> exp.RenameTable: 5253 self._match_text_seq("TO") 5254 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5255 5256 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5257 start = self._prev 5258 5259 if not self._match(TokenType.TABLE): 5260 return self._parse_as_command(start) 5261 5262 exists = self._parse_exists() 5263 only = self._match_text_seq("ONLY") 5264 this = self._parse_table(schema=True) 5265 5266 if self._next: 5267 self._advance() 5268 5269 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5270 if parser: 5271 actions = ensure_list(parser(self)) 5272 5273 if not self._curr: 5274 return self.expression( 5275 exp.AlterTable, 5276 this=this, 5277 exists=exists, 5278 actions=actions, 5279 only=only, 5280 ) 5281 5282 return self._parse_as_command(start) 5283 5284 def _parse_merge(self) -> exp.Merge: 5285 self._match(TokenType.INTO) 5286 target = self._parse_table() 5287 5288 if target and self._match(TokenType.ALIAS, advance=False): 5289 target.set("alias", self._parse_table_alias()) 5290 5291 self._match(TokenType.USING) 5292 using = self._parse_table() 5293 5294 self._match(TokenType.ON) 5295 on = self._parse_conjunction() 5296 5297 return self.expression( 5298 exp.Merge, 5299 this=target, 5300 using=using, 5301 on=on, 5302 expressions=self._parse_when_matched(), 5303 ) 5304 5305 def _parse_when_matched(self) -> t.List[exp.When]: 5306 whens = [] 5307 5308 while self._match(TokenType.WHEN): 5309 matched = not self._match(TokenType.NOT) 5310 self._match_text_seq("MATCHED") 5311 source = ( 5312 False 5313 if self._match_text_seq("BY", "TARGET") 5314 else self._match_text_seq("BY", "SOURCE") 5315 ) 5316 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5317 5318 self._match(TokenType.THEN) 5319 5320 if self._match(TokenType.INSERT): 5321 _this = self._parse_star() 5322 if _this: 5323 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5324 else: 5325 then = self.expression( 5326 exp.Insert, 5327 this=self._parse_value(), 5328 expression=self._match(TokenType.VALUES) and self._parse_value(), 5329 ) 5330 elif self._match(TokenType.UPDATE): 5331 expressions = self._parse_star() 5332 if expressions: 5333 then = self.expression(exp.Update, expressions=expressions) 5334 else: 5335 then = self.expression( 5336 exp.Update, 5337 expressions=self._match(TokenType.SET) 5338 and self._parse_csv(self._parse_equality), 5339 ) 5340 elif self._match(TokenType.DELETE): 5341 then = self.expression(exp.Var, this=self._prev.text) 5342 else: 5343 then = None 5344 5345 whens.append( 5346 self.expression( 5347 exp.When, 5348 matched=matched, 5349 source=source, 5350 condition=condition, 5351 then=then, 5352 ) 5353 ) 5354 return whens 5355 5356 def _parse_show(self) -> t.Optional[exp.Expression]: 5357 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5358 if parser: 5359 return parser(self) 5360 return self._parse_as_command(self._prev) 5361 5362 def _parse_set_item_assignment( 5363 self, kind: t.Optional[str] = None 5364 ) -> t.Optional[exp.Expression]: 5365 index = self._index 5366 5367 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5368 return self._parse_set_transaction(global_=kind == "GLOBAL") 5369 5370 left = self._parse_primary() or self._parse_id_var() 5371 assignment_delimiter = self._match_texts(("=", "TO")) 5372 5373 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5374 self._retreat(index) 5375 return None 5376 5377 right = self._parse_statement() or self._parse_id_var() 5378 this = self.expression(exp.EQ, this=left, expression=right) 5379 5380 return self.expression(exp.SetItem, this=this, kind=kind) 5381 5382 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5383 self._match_text_seq("TRANSACTION") 5384 characteristics = self._parse_csv( 5385 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5386 ) 5387 return self.expression( 5388 exp.SetItem, 5389 expressions=characteristics, 5390 kind="TRANSACTION", 5391 **{"global": global_}, # type: ignore 5392 ) 5393 5394 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5395 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5396 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5397 5398 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5399 index = self._index 5400 set_ = self.expression( 5401 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5402 ) 5403 5404 if self._curr: 5405 self._retreat(index) 5406 return self._parse_as_command(self._prev) 5407 5408 return set_ 5409 5410 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5411 for option in options: 5412 if self._match_text_seq(*option.split(" ")): 5413 return exp.var(option) 5414 return None 5415 5416 def _parse_as_command(self, start: Token) -> exp.Command: 5417 while self._curr: 5418 self._advance() 5419 text = self._find_sql(start, self._prev) 5420 size = len(start.text) 5421 return exp.Command(this=text[:size], expression=text[size:]) 5422 5423 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5424 settings = [] 5425 5426 self._match_l_paren() 5427 kind = self._parse_id_var() 5428 5429 if self._match(TokenType.L_PAREN): 5430 while True: 5431 key = self._parse_id_var() 5432 value = self._parse_primary() 5433 5434 if not key and value is None: 5435 break 5436 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5437 self._match(TokenType.R_PAREN) 5438 5439 self._match_r_paren() 5440 5441 return self.expression( 5442 exp.DictProperty, 5443 this=this, 5444 kind=kind.this if kind else None, 5445 settings=settings, 5446 ) 5447 5448 def _parse_dict_range(self, this: str) -> exp.DictRange: 5449 self._match_l_paren() 5450 has_min = self._match_text_seq("MIN") 5451 if has_min: 5452 min = self._parse_var() or self._parse_primary() 5453 self._match_text_seq("MAX") 5454 max = self._parse_var() or self._parse_primary() 5455 else: 5456 max = self._parse_var() or self._parse_primary() 5457 min = exp.Literal.number(0) 5458 self._match_r_paren() 5459 return self.expression(exp.DictRange, this=this, min=min, max=max) 5460 5461 def _parse_comprehension( 5462 self, this: t.Optional[exp.Expression] 5463 ) -> t.Optional[exp.Comprehension]: 5464 index = self._index 5465 expression = self._parse_column() 5466 if not self._match(TokenType.IN): 5467 self._retreat(index - 1) 5468 return None 5469 iterator = self._parse_column() 5470 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5471 return self.expression( 5472 exp.Comprehension, 5473 this=this, 5474 expression=expression, 5475 iterator=iterator, 5476 condition=condition, 5477 ) 5478 5479 def _find_parser( 5480 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5481 ) -> t.Optional[t.Callable]: 5482 if not self._curr: 5483 return None 5484 5485 index = self._index 5486 this = [] 5487 while True: 5488 # The current token might be multiple words 5489 curr = self._curr.text.upper() 5490 key = curr.split(" ") 5491 this.append(curr) 5492 5493 self._advance() 5494 result, trie = in_trie(trie, key) 5495 if result == TrieResult.FAILED: 5496 break 5497 5498 if result == TrieResult.EXISTS: 5499 subparser = parsers[" ".join(this)] 5500 return subparser 5501 5502 self._retreat(index) 5503 return None 5504 5505 def _match(self, token_type, advance=True, expression=None): 5506 if not self._curr: 5507 return None 5508 5509 if self._curr.token_type == token_type: 5510 if advance: 5511 self._advance() 5512 self._add_comments(expression) 5513 return True 5514 5515 return None 5516 5517 def _match_set(self, types, advance=True): 5518 if not self._curr: 5519 return None 5520 5521 if self._curr.token_type in types: 5522 if advance: 5523 self._advance() 5524 return True 5525 5526 return None 5527 5528 def _match_pair(self, token_type_a, token_type_b, advance=True): 5529 if not self._curr or not self._next: 5530 return None 5531 5532 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5533 if advance: 5534 self._advance(2) 5535 return True 5536 5537 return None 5538 5539 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5540 if not self._match(TokenType.L_PAREN, expression=expression): 5541 self.raise_error("Expecting (") 5542 5543 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5544 if not self._match(TokenType.R_PAREN, expression=expression): 5545 self.raise_error("Expecting )") 5546 5547 def _match_texts(self, texts, advance=True): 5548 if self._curr and self._curr.text.upper() in texts: 5549 if advance: 5550 self._advance() 5551 return True 5552 return False 5553 5554 def _match_text_seq(self, *texts, advance=True): 5555 index = self._index 5556 for text in texts: 5557 if self._curr and self._curr.text.upper() == text: 5558 self._advance() 5559 else: 5560 self._retreat(index) 5561 return False 5562 5563 if not advance: 5564 self._retreat(index) 5565 5566 return True 5567 5568 @t.overload 5569 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5570 ... 5571 5572 @t.overload 5573 def _replace_columns_with_dots( 5574 self, this: t.Optional[exp.Expression] 5575 ) -> t.Optional[exp.Expression]: 5576 ... 5577 5578 def _replace_columns_with_dots(self, this): 5579 if isinstance(this, exp.Dot): 5580 exp.replace_children(this, self._replace_columns_with_dots) 5581 elif isinstance(this, exp.Column): 5582 exp.replace_children(this, self._replace_columns_with_dots) 5583 table = this.args.get("table") 5584 this = ( 5585 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5586 ) 5587 5588 return this 5589 5590 def _replace_lambda( 5591 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5592 ) -> t.Optional[exp.Expression]: 5593 if not node: 5594 return node 5595 5596 for column in node.find_all(exp.Column): 5597 if column.parts[0].name in lambda_variables: 5598 dot_or_id = column.to_dot() if column.table else column.this 5599 parent = column.parent 5600 5601 while isinstance(parent, exp.Dot): 5602 if not isinstance(parent.parent, exp.Dot): 5603 parent.replace(dot_or_id) 5604 break 5605 parent = parent.parent 5606 else: 5607 if column is node: 5608 node = dot_or_id 5609 else: 5610 column.replace(dot_or_id) 5611 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
988 def __init__( 989 self, 990 error_level: t.Optional[ErrorLevel] = None, 991 error_message_context: int = 100, 992 max_errors: int = 3, 993 dialect: DialectType = None, 994 ): 995 from sqlglot.dialects import Dialect 996 997 self.error_level = error_level or ErrorLevel.IMMEDIATE 998 self.error_message_context = error_message_context 999 self.max_errors = max_errors 1000 self.dialect = Dialect.get_or_raise(dialect) 1001 self.reset()
1013 def parse( 1014 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1015 ) -> t.List[t.Optional[exp.Expression]]: 1016 """ 1017 Parses a list of tokens and returns a list of syntax trees, one tree 1018 per parsed SQL statement. 1019 1020 Args: 1021 raw_tokens: The list of tokens. 1022 sql: The original SQL string, used to produce helpful debug messages. 1023 1024 Returns: 1025 The list of the produced syntax trees. 1026 """ 1027 return self._parse( 1028 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1029 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1031 def parse_into( 1032 self, 1033 expression_types: exp.IntoType, 1034 raw_tokens: t.List[Token], 1035 sql: t.Optional[str] = None, 1036 ) -> t.List[t.Optional[exp.Expression]]: 1037 """ 1038 Parses a list of tokens into a given Expression type. If a collection of Expression 1039 types is given instead, this method will try to parse the token list into each one 1040 of them, stopping at the first for which the parsing succeeds. 1041 1042 Args: 1043 expression_types: The expression type(s) to try and parse the token list into. 1044 raw_tokens: The list of tokens. 1045 sql: The original SQL string, used to produce helpful debug messages. 1046 1047 Returns: 1048 The target Expression. 1049 """ 1050 errors = [] 1051 for expression_type in ensure_list(expression_types): 1052 parser = self.EXPRESSION_PARSERS.get(expression_type) 1053 if not parser: 1054 raise TypeError(f"No parser registered for {expression_type}") 1055 1056 try: 1057 return self._parse(parser, raw_tokens, sql) 1058 except ParseError as e: 1059 e.errors[0]["into_expression"] = expression_type 1060 errors.append(e) 1061 1062 raise ParseError( 1063 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1064 errors=merge_errors(errors), 1065 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1102 def check_errors(self) -> None: 1103 """Logs or raises any found errors, depending on the chosen error level setting.""" 1104 if self.error_level == ErrorLevel.WARN: 1105 for error in self.errors: 1106 logger.error(str(error)) 1107 elif self.error_level == ErrorLevel.RAISE and self.errors: 1108 raise ParseError( 1109 concat_messages(self.errors, self.max_errors), 1110 errors=merge_errors(self.errors), 1111 )
Logs or raises any found errors, depending on the chosen error level setting.
1113 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1114 """ 1115 Appends an error in the list of recorded errors or raises it, depending on the chosen 1116 error level setting. 1117 """ 1118 token = token or self._curr or self._prev or Token.string("") 1119 start = token.start 1120 end = token.end + 1 1121 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1122 highlight = self.sql[start:end] 1123 end_context = self.sql[end : end + self.error_message_context] 1124 1125 error = ParseError.new( 1126 f"{message}. Line {token.line}, Col: {token.col}.\n" 1127 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1128 description=message, 1129 line=token.line, 1130 col=token.col, 1131 start_context=start_context, 1132 highlight=highlight, 1133 end_context=end_context, 1134 ) 1135 1136 if self.error_level == ErrorLevel.IMMEDIATE: 1137 raise error 1138 1139 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1141 def expression( 1142 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1143 ) -> E: 1144 """ 1145 Creates a new, validated Expression. 1146 1147 Args: 1148 exp_class: The expression class to instantiate. 1149 comments: An optional list of comments to attach to the expression. 1150 kwargs: The arguments to set for the expression along with their respective values. 1151 1152 Returns: 1153 The target expression. 1154 """ 1155 instance = exp_class(**kwargs) 1156 instance.add_comments(comments) if comments else self._add_comments(instance) 1157 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1164 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1165 """ 1166 Validates an Expression, making sure that all its mandatory arguments are set. 1167 1168 Args: 1169 expression: The expression to validate. 1170 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1171 1172 Returns: 1173 The validated expression. 1174 """ 1175 if self.error_level != ErrorLevel.IGNORE: 1176 for error_message in expression.error_messages(args): 1177 self.raise_error(error_message) 1178 1179 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.