sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.UDECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 TokenType.DECIMAL: TokenType.UDECIMAL, 220 } 221 222 SUBQUERY_PREDICATES = { 223 TokenType.ANY: exp.Any, 224 TokenType.ALL: exp.All, 225 TokenType.EXISTS: exp.Exists, 226 TokenType.SOME: exp.Any, 227 } 228 229 RESERVED_KEYWORDS = { 230 *Tokenizer.SINGLE_TOKENS.values(), 231 TokenType.SELECT, 232 } 233 234 DB_CREATABLES = { 235 TokenType.DATABASE, 236 TokenType.SCHEMA, 237 TokenType.TABLE, 238 TokenType.VIEW, 239 TokenType.MODEL, 240 TokenType.DICTIONARY, 241 } 242 243 CREATABLES = { 244 TokenType.COLUMN, 245 TokenType.FUNCTION, 246 TokenType.INDEX, 247 TokenType.PROCEDURE, 248 *DB_CREATABLES, 249 } 250 251 # Tokens that can represent identifiers 252 ID_VAR_TOKENS = { 253 TokenType.VAR, 254 TokenType.ANTI, 255 TokenType.APPLY, 256 TokenType.ASC, 257 TokenType.AUTO_INCREMENT, 258 TokenType.BEGIN, 259 TokenType.CACHE, 260 TokenType.CASE, 261 TokenType.COLLATE, 262 TokenType.COMMAND, 263 TokenType.COMMENT, 264 TokenType.COMMIT, 265 TokenType.CONSTRAINT, 266 TokenType.DEFAULT, 267 TokenType.DELETE, 268 TokenType.DESC, 269 TokenType.DESCRIBE, 270 TokenType.DICTIONARY, 271 TokenType.DIV, 272 TokenType.END, 273 TokenType.EXECUTE, 274 TokenType.ESCAPE, 275 TokenType.FALSE, 276 TokenType.FIRST, 277 TokenType.FILTER, 278 TokenType.FORMAT, 279 TokenType.FULL, 280 TokenType.IS, 281 TokenType.ISNULL, 282 TokenType.INTERVAL, 283 TokenType.KEEP, 284 TokenType.KILL, 285 TokenType.LEFT, 286 TokenType.LOAD, 287 TokenType.MERGE, 288 TokenType.NATURAL, 289 TokenType.NEXT, 290 TokenType.OFFSET, 291 TokenType.ORDINALITY, 292 TokenType.OVERLAPS, 293 TokenType.OVERWRITE, 294 TokenType.PARTITION, 295 TokenType.PERCENT, 296 TokenType.PIVOT, 297 TokenType.PRAGMA, 298 TokenType.RANGE, 299 TokenType.REFERENCES, 300 TokenType.RIGHT, 301 TokenType.ROW, 302 TokenType.ROWS, 303 TokenType.SEMI, 304 TokenType.SET, 305 TokenType.SETTINGS, 306 TokenType.SHOW, 307 TokenType.TEMPORARY, 308 TokenType.TOP, 309 TokenType.TRUE, 310 TokenType.UNIQUE, 311 TokenType.UNPIVOT, 312 TokenType.UPDATE, 313 TokenType.VOLATILE, 314 TokenType.WINDOW, 315 *CREATABLES, 316 *SUBQUERY_PREDICATES, 317 *TYPE_TOKENS, 318 *NO_PAREN_FUNCTIONS, 319 } 320 321 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 322 323 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 324 TokenType.ANTI, 325 TokenType.APPLY, 326 TokenType.ASOF, 327 TokenType.FULL, 328 TokenType.LEFT, 329 TokenType.LOCK, 330 TokenType.NATURAL, 331 TokenType.OFFSET, 332 TokenType.RIGHT, 333 TokenType.SEMI, 334 TokenType.WINDOW, 335 } 336 337 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 338 339 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 340 341 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 342 343 FUNC_TOKENS = { 344 TokenType.COLLATE, 345 TokenType.COMMAND, 346 TokenType.CURRENT_DATE, 347 TokenType.CURRENT_DATETIME, 348 TokenType.CURRENT_TIMESTAMP, 349 TokenType.CURRENT_TIME, 350 TokenType.CURRENT_USER, 351 TokenType.FILTER, 352 TokenType.FIRST, 353 TokenType.FORMAT, 354 TokenType.GLOB, 355 TokenType.IDENTIFIER, 356 TokenType.INDEX, 357 TokenType.ISNULL, 358 TokenType.ILIKE, 359 TokenType.INSERT, 360 TokenType.LIKE, 361 TokenType.MERGE, 362 TokenType.OFFSET, 363 TokenType.PRIMARY_KEY, 364 TokenType.RANGE, 365 TokenType.REPLACE, 366 TokenType.RLIKE, 367 TokenType.ROW, 368 TokenType.UNNEST, 369 TokenType.VAR, 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.DATE, 373 TokenType.DATETIME, 374 TokenType.TABLE, 375 TokenType.TIMESTAMP, 376 TokenType.TIMESTAMPTZ, 377 TokenType.WINDOW, 378 TokenType.XOR, 379 *TYPE_TOKENS, 380 *SUBQUERY_PREDICATES, 381 } 382 383 CONJUNCTION = { 384 TokenType.AND: exp.And, 385 TokenType.OR: exp.Or, 386 } 387 388 EQUALITY = { 389 TokenType.EQ: exp.EQ, 390 TokenType.NEQ: exp.NEQ, 391 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 392 } 393 394 COMPARISON = { 395 TokenType.GT: exp.GT, 396 TokenType.GTE: exp.GTE, 397 TokenType.LT: exp.LT, 398 TokenType.LTE: exp.LTE, 399 } 400 401 BITWISE = { 402 TokenType.AMP: exp.BitwiseAnd, 403 TokenType.CARET: exp.BitwiseXor, 404 TokenType.PIPE: exp.BitwiseOr, 405 TokenType.DPIPE: exp.DPipe, 406 } 407 408 TERM = { 409 TokenType.DASH: exp.Sub, 410 TokenType.PLUS: exp.Add, 411 TokenType.MOD: exp.Mod, 412 TokenType.COLLATE: exp.Collate, 413 } 414 415 FACTOR = { 416 TokenType.DIV: exp.IntDiv, 417 TokenType.LR_ARROW: exp.Distance, 418 TokenType.SLASH: exp.Div, 419 TokenType.STAR: exp.Mul, 420 } 421 422 TIMES = { 423 TokenType.TIME, 424 TokenType.TIMETZ, 425 } 426 427 TIMESTAMPS = { 428 TokenType.TIMESTAMP, 429 TokenType.TIMESTAMPTZ, 430 TokenType.TIMESTAMPLTZ, 431 *TIMES, 432 } 433 434 SET_OPERATIONS = { 435 TokenType.UNION, 436 TokenType.INTERSECT, 437 TokenType.EXCEPT, 438 } 439 440 JOIN_METHODS = { 441 TokenType.NATURAL, 442 TokenType.ASOF, 443 } 444 445 JOIN_SIDES = { 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.FULL, 449 } 450 451 JOIN_KINDS = { 452 TokenType.INNER, 453 TokenType.OUTER, 454 TokenType.CROSS, 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 JOIN_HINTS: t.Set[str] = set() 460 461 LAMBDAS = { 462 TokenType.ARROW: lambda self, expressions: self.expression( 463 exp.Lambda, 464 this=self._replace_lambda( 465 self._parse_conjunction(), 466 {node.name for node in expressions}, 467 ), 468 expressions=expressions, 469 ), 470 TokenType.FARROW: lambda self, expressions: self.expression( 471 exp.Kwarg, 472 this=exp.var(expressions[0].name), 473 expression=self._parse_conjunction(), 474 ), 475 } 476 477 COLUMN_OPERATORS = { 478 TokenType.DOT: None, 479 TokenType.DCOLON: lambda self, this, to: self.expression( 480 exp.Cast if self.STRICT_CAST else exp.TryCast, 481 this=this, 482 to=to, 483 ), 484 TokenType.ARROW: lambda self, this, path: self.expression( 485 exp.JSONExtract, 486 this=this, 487 expression=path, 488 ), 489 TokenType.DARROW: lambda self, this, path: self.expression( 490 exp.JSONExtractScalar, 491 this=this, 492 expression=path, 493 ), 494 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 495 exp.JSONBExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 505 exp.JSONBContains, 506 this=this, 507 expression=key, 508 ), 509 } 510 511 EXPRESSION_PARSERS = { 512 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 513 exp.Column: lambda self: self._parse_column(), 514 exp.Condition: lambda self: self._parse_conjunction(), 515 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 516 exp.Expression: lambda self: self._parse_statement(), 517 exp.From: lambda self: self._parse_from(), 518 exp.Group: lambda self: self._parse_group(), 519 exp.Having: lambda self: self._parse_having(), 520 exp.Identifier: lambda self: self._parse_id_var(), 521 exp.Join: lambda self: self._parse_join(), 522 exp.Lambda: lambda self: self._parse_lambda(), 523 exp.Lateral: lambda self: self._parse_lateral(), 524 exp.Limit: lambda self: self._parse_limit(), 525 exp.Offset: lambda self: self._parse_offset(), 526 exp.Order: lambda self: self._parse_order(), 527 exp.Ordered: lambda self: self._parse_ordered(), 528 exp.Properties: lambda self: self._parse_properties(), 529 exp.Qualify: lambda self: self._parse_qualify(), 530 exp.Returning: lambda self: self._parse_returning(), 531 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 532 exp.Table: lambda self: self._parse_table_parts(), 533 exp.TableAlias: lambda self: self._parse_table_alias(), 534 exp.Where: lambda self: self._parse_where(), 535 exp.Window: lambda self: self._parse_named_window(), 536 exp.With: lambda self: self._parse_with(), 537 "JOIN_TYPE": lambda self: self._parse_join_parts(), 538 } 539 540 STATEMENT_PARSERS = { 541 TokenType.ALTER: lambda self: self._parse_alter(), 542 TokenType.BEGIN: lambda self: self._parse_transaction(), 543 TokenType.CACHE: lambda self: self._parse_cache(), 544 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 545 TokenType.COMMENT: lambda self: self._parse_comment(), 546 TokenType.CREATE: lambda self: self._parse_create(), 547 TokenType.DELETE: lambda self: self._parse_delete(), 548 TokenType.DESC: lambda self: self._parse_describe(), 549 TokenType.DESCRIBE: lambda self: self._parse_describe(), 550 TokenType.DROP: lambda self: self._parse_drop(), 551 TokenType.INSERT: lambda self: self._parse_insert(), 552 TokenType.KILL: lambda self: self._parse_kill(), 553 TokenType.LOAD: lambda self: self._parse_load(), 554 TokenType.MERGE: lambda self: self._parse_merge(), 555 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 556 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 557 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 558 TokenType.SET: lambda self: self._parse_set(), 559 TokenType.UNCACHE: lambda self: self._parse_uncache(), 560 TokenType.UPDATE: lambda self: self._parse_update(), 561 TokenType.USE: lambda self: self.expression( 562 exp.Use, 563 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 564 and exp.var(self._prev.text), 565 this=self._parse_table(schema=False), 566 ), 567 } 568 569 UNARY_PARSERS = { 570 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 571 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 572 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 573 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 574 } 575 576 PRIMARY_PARSERS = { 577 TokenType.STRING: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=True 579 ), 580 TokenType.NUMBER: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=False 582 ), 583 TokenType.STAR: lambda self, _: self.expression( 584 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 585 ), 586 TokenType.NULL: lambda self, _: self.expression(exp.Null), 587 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 588 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 589 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 590 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 591 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 592 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 593 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 594 exp.National, this=token.text 595 ), 596 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 597 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 598 exp.RawString, this=token.text 599 ), 600 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 601 } 602 603 PLACEHOLDER_PARSERS = { 604 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 605 TokenType.PARAMETER: lambda self: self._parse_parameter(), 606 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 607 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 608 else None, 609 } 610 611 RANGE_PARSERS = { 612 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 613 TokenType.GLOB: binary_range_parser(exp.Glob), 614 TokenType.ILIKE: binary_range_parser(exp.ILike), 615 TokenType.IN: lambda self, this: self._parse_in(this), 616 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 617 TokenType.IS: lambda self, this: self._parse_is(this), 618 TokenType.LIKE: binary_range_parser(exp.Like), 619 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 620 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 621 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 622 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 623 } 624 625 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 626 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 627 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 628 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 629 "CHARACTER SET": lambda self: self._parse_character_set(), 630 "CHECKSUM": lambda self: self._parse_checksum(), 631 "CLUSTER BY": lambda self: self._parse_cluster(), 632 "CLUSTERED": lambda self: self._parse_clustered_by(), 633 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 634 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 635 "COPY": lambda self: self._parse_copy_property(), 636 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 637 "DEFINER": lambda self: self._parse_definer(), 638 "DETERMINISTIC": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "DISTKEY": lambda self: self._parse_distkey(), 642 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 643 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 644 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 645 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 646 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 647 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "FREESPACE": lambda self: self._parse_freespace(), 649 "HEAP": lambda self: self.expression(exp.HeapProperty), 650 "IMMUTABLE": lambda self: self.expression( 651 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 652 ), 653 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 654 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 655 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 656 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 657 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 658 "LIKE": lambda self: self._parse_create_like(), 659 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 660 "LOCK": lambda self: self._parse_locking(), 661 "LOCKING": lambda self: self._parse_locking(), 662 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 663 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 664 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 665 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 666 "NO": lambda self: self._parse_no_property(), 667 "ON": lambda self: self._parse_on_property(), 668 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 669 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 670 "PARTITION BY": lambda self: self._parse_partitioned_by(), 671 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 672 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 673 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 674 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 675 "REMOTE": lambda self: self._parse_remote_with_connection(), 676 "RETURNS": lambda self: self._parse_returns(), 677 "ROW": lambda self: self._parse_row(), 678 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 679 "SAMPLE": lambda self: self.expression( 680 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 681 ), 682 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 683 "SETTINGS": lambda self: self.expression( 684 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 685 ), 686 "SORTKEY": lambda self: self._parse_sortkey(), 687 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 688 "STABLE": lambda self: self.expression( 689 exp.StabilityProperty, this=exp.Literal.string("STABLE") 690 ), 691 "STORED": lambda self: self._parse_stored(), 692 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 693 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 694 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 695 "TO": lambda self: self._parse_to_table(), 696 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 697 "TRANSFORM": lambda self: self.expression( 698 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 699 ), 700 "TTL": lambda self: self._parse_ttl(), 701 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 702 "VOLATILE": lambda self: self._parse_volatile_property(), 703 "WITH": lambda self: self._parse_with_property(), 704 } 705 706 CONSTRAINT_PARSERS = { 707 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 708 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 709 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 710 "CHARACTER SET": lambda self: self.expression( 711 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 712 ), 713 "CHECK": lambda self: self.expression( 714 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 715 ), 716 "COLLATE": lambda self: self.expression( 717 exp.CollateColumnConstraint, this=self._parse_var() 718 ), 719 "COMMENT": lambda self: self.expression( 720 exp.CommentColumnConstraint, this=self._parse_string() 721 ), 722 "COMPRESS": lambda self: self._parse_compress(), 723 "CLUSTERED": lambda self: self.expression( 724 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 725 ), 726 "NONCLUSTERED": lambda self: self.expression( 727 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 728 ), 729 "DEFAULT": lambda self: self.expression( 730 exp.DefaultColumnConstraint, this=self._parse_bitwise() 731 ), 732 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 733 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 734 "FORMAT": lambda self: self.expression( 735 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 736 ), 737 "GENERATED": lambda self: self._parse_generated_as_identity(), 738 "IDENTITY": lambda self: self._parse_auto_increment(), 739 "INLINE": lambda self: self._parse_inline(), 740 "LIKE": lambda self: self._parse_create_like(), 741 "NOT": lambda self: self._parse_not_constraint(), 742 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 743 "ON": lambda self: ( 744 self._match(TokenType.UPDATE) 745 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 746 ) 747 or self.expression(exp.OnProperty, this=self._parse_id_var()), 748 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 749 "PRIMARY KEY": lambda self: self._parse_primary_key(), 750 "REFERENCES": lambda self: self._parse_references(match=False), 751 "TITLE": lambda self: self.expression( 752 exp.TitleColumnConstraint, this=self._parse_var_or_string() 753 ), 754 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 755 "UNIQUE": lambda self: self._parse_unique(), 756 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 757 "WITH": lambda self: self.expression( 758 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 759 ), 760 } 761 762 ALTER_PARSERS = { 763 "ADD": lambda self: self._parse_alter_table_add(), 764 "ALTER": lambda self: self._parse_alter_table_alter(), 765 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 766 "DROP": lambda self: self._parse_alter_table_drop(), 767 "RENAME": lambda self: self._parse_alter_table_rename(), 768 } 769 770 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 771 772 NO_PAREN_FUNCTION_PARSERS = { 773 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 774 "CASE": lambda self: self._parse_case(), 775 "IF": lambda self: self._parse_if(), 776 "NEXT": lambda self: self._parse_next_value_for(), 777 } 778 779 INVALID_FUNC_NAME_TOKENS = { 780 TokenType.IDENTIFIER, 781 TokenType.STRING, 782 } 783 784 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 785 786 FUNCTION_PARSERS = { 787 "ANY_VALUE": lambda self: self._parse_any_value(), 788 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 789 "CONCAT": lambda self: self._parse_concat(), 790 "CONCAT_WS": lambda self: self._parse_concat_ws(), 791 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 792 "DECODE": lambda self: self._parse_decode(), 793 "EXTRACT": lambda self: self._parse_extract(), 794 "JSON_OBJECT": lambda self: self._parse_json_object(), 795 "LOG": lambda self: self._parse_logarithm(), 796 "MATCH": lambda self: self._parse_match_against(), 797 "OPENJSON": lambda self: self._parse_open_json(), 798 "POSITION": lambda self: self._parse_position(), 799 "PREDICT": lambda self: self._parse_predict(), 800 "SAFE_CAST": lambda self: self._parse_cast(False), 801 "STRING_AGG": lambda self: self._parse_string_agg(), 802 "SUBSTRING": lambda self: self._parse_substring(), 803 "TRIM": lambda self: self._parse_trim(), 804 "TRY_CAST": lambda self: self._parse_cast(False), 805 "TRY_CONVERT": lambda self: self._parse_convert(False), 806 } 807 808 QUERY_MODIFIER_PARSERS = { 809 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 810 TokenType.WHERE: lambda self: ("where", self._parse_where()), 811 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 812 TokenType.HAVING: lambda self: ("having", self._parse_having()), 813 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 814 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 815 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 816 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 817 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 818 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 819 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 820 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 821 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 822 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 823 TokenType.CLUSTER_BY: lambda self: ( 824 "cluster", 825 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 826 ), 827 TokenType.DISTRIBUTE_BY: lambda self: ( 828 "distribute", 829 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 830 ), 831 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 832 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 833 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 834 } 835 836 SET_PARSERS = { 837 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 838 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 839 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 840 "TRANSACTION": lambda self: self._parse_set_transaction(), 841 } 842 843 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 844 845 TYPE_LITERAL_PARSERS = { 846 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 847 } 848 849 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 850 851 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 852 853 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 854 855 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 856 TRANSACTION_CHARACTERISTICS = { 857 "ISOLATION LEVEL REPEATABLE READ", 858 "ISOLATION LEVEL READ COMMITTED", 859 "ISOLATION LEVEL READ UNCOMMITTED", 860 "ISOLATION LEVEL SERIALIZABLE", 861 "READ WRITE", 862 "READ ONLY", 863 } 864 865 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 866 867 CLONE_KEYWORDS = {"CLONE", "COPY"} 868 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 869 870 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 871 872 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 873 874 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 875 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 876 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 877 878 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 879 880 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 881 882 DISTINCT_TOKENS = {TokenType.DISTINCT} 883 884 NULL_TOKENS = {TokenType.NULL} 885 886 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 887 888 STRICT_CAST = True 889 890 # A NULL arg in CONCAT yields NULL by default 891 CONCAT_NULL_OUTPUTS_STRING = False 892 893 PREFIXED_PIVOT_COLUMNS = False 894 IDENTIFY_PIVOT_STRINGS = False 895 896 LOG_BASE_FIRST = True 897 LOG_DEFAULTS_TO_LN = False 898 899 # Whether or not ADD is present for each column added by ALTER TABLE 900 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 901 902 # Whether or not the table sample clause expects CSV syntax 903 TABLESAMPLE_CSV = False 904 905 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 906 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 907 908 # Whether the TRIM function expects the characters to trim as its first argument 909 TRIM_PATTERN_FIRST = False 910 911 __slots__ = ( 912 "error_level", 913 "error_message_context", 914 "max_errors", 915 "sql", 916 "errors", 917 "_tokens", 918 "_index", 919 "_curr", 920 "_next", 921 "_prev", 922 "_prev_comments", 923 "_tokenizer", 924 ) 925 926 # Autofilled 927 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 928 INDEX_OFFSET: int = 0 929 UNNEST_COLUMN_ONLY: bool = False 930 ALIAS_POST_TABLESAMPLE: bool = False 931 STRICT_STRING_CONCAT = False 932 SUPPORTS_USER_DEFINED_TYPES = True 933 NORMALIZE_FUNCTIONS = "upper" 934 NULL_ORDERING: str = "nulls_are_small" 935 SHOW_TRIE: t.Dict = {} 936 SET_TRIE: t.Dict = {} 937 FORMAT_MAPPING: t.Dict[str, str] = {} 938 FORMAT_TRIE: t.Dict = {} 939 TIME_MAPPING: t.Dict[str, str] = {} 940 TIME_TRIE: t.Dict = {} 941 942 def __init__( 943 self, 944 error_level: t.Optional[ErrorLevel] = None, 945 error_message_context: int = 100, 946 max_errors: int = 3, 947 ): 948 self.error_level = error_level or ErrorLevel.IMMEDIATE 949 self.error_message_context = error_message_context 950 self.max_errors = max_errors 951 self._tokenizer = self.TOKENIZER_CLASS() 952 self.reset() 953 954 def reset(self): 955 self.sql = "" 956 self.errors = [] 957 self._tokens = [] 958 self._index = 0 959 self._curr = None 960 self._next = None 961 self._prev = None 962 self._prev_comments = None 963 964 def parse( 965 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 966 ) -> t.List[t.Optional[exp.Expression]]: 967 """ 968 Parses a list of tokens and returns a list of syntax trees, one tree 969 per parsed SQL statement. 970 971 Args: 972 raw_tokens: The list of tokens. 973 sql: The original SQL string, used to produce helpful debug messages. 974 975 Returns: 976 The list of the produced syntax trees. 977 """ 978 return self._parse( 979 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 980 ) 981 982 def parse_into( 983 self, 984 expression_types: exp.IntoType, 985 raw_tokens: t.List[Token], 986 sql: t.Optional[str] = None, 987 ) -> t.List[t.Optional[exp.Expression]]: 988 """ 989 Parses a list of tokens into a given Expression type. If a collection of Expression 990 types is given instead, this method will try to parse the token list into each one 991 of them, stopping at the first for which the parsing succeeds. 992 993 Args: 994 expression_types: The expression type(s) to try and parse the token list into. 995 raw_tokens: The list of tokens. 996 sql: The original SQL string, used to produce helpful debug messages. 997 998 Returns: 999 The target Expression. 1000 """ 1001 errors = [] 1002 for expression_type in ensure_list(expression_types): 1003 parser = self.EXPRESSION_PARSERS.get(expression_type) 1004 if not parser: 1005 raise TypeError(f"No parser registered for {expression_type}") 1006 1007 try: 1008 return self._parse(parser, raw_tokens, sql) 1009 except ParseError as e: 1010 e.errors[0]["into_expression"] = expression_type 1011 errors.append(e) 1012 1013 raise ParseError( 1014 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1015 errors=merge_errors(errors), 1016 ) from errors[-1] 1017 1018 def _parse( 1019 self, 1020 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1021 raw_tokens: t.List[Token], 1022 sql: t.Optional[str] = None, 1023 ) -> t.List[t.Optional[exp.Expression]]: 1024 self.reset() 1025 self.sql = sql or "" 1026 1027 total = len(raw_tokens) 1028 chunks: t.List[t.List[Token]] = [[]] 1029 1030 for i, token in enumerate(raw_tokens): 1031 if token.token_type == TokenType.SEMICOLON: 1032 if i < total - 1: 1033 chunks.append([]) 1034 else: 1035 chunks[-1].append(token) 1036 1037 expressions = [] 1038 1039 for tokens in chunks: 1040 self._index = -1 1041 self._tokens = tokens 1042 self._advance() 1043 1044 expressions.append(parse_method(self)) 1045 1046 if self._index < len(self._tokens): 1047 self.raise_error("Invalid expression / Unexpected token") 1048 1049 self.check_errors() 1050 1051 return expressions 1052 1053 def check_errors(self) -> None: 1054 """Logs or raises any found errors, depending on the chosen error level setting.""" 1055 if self.error_level == ErrorLevel.WARN: 1056 for error in self.errors: 1057 logger.error(str(error)) 1058 elif self.error_level == ErrorLevel.RAISE and self.errors: 1059 raise ParseError( 1060 concat_messages(self.errors, self.max_errors), 1061 errors=merge_errors(self.errors), 1062 ) 1063 1064 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1065 """ 1066 Appends an error in the list of recorded errors or raises it, depending on the chosen 1067 error level setting. 1068 """ 1069 token = token or self._curr or self._prev or Token.string("") 1070 start = token.start 1071 end = token.end + 1 1072 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1073 highlight = self.sql[start:end] 1074 end_context = self.sql[end : end + self.error_message_context] 1075 1076 error = ParseError.new( 1077 f"{message}. Line {token.line}, Col: {token.col}.\n" 1078 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1079 description=message, 1080 line=token.line, 1081 col=token.col, 1082 start_context=start_context, 1083 highlight=highlight, 1084 end_context=end_context, 1085 ) 1086 1087 if self.error_level == ErrorLevel.IMMEDIATE: 1088 raise error 1089 1090 self.errors.append(error) 1091 1092 def expression( 1093 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1094 ) -> E: 1095 """ 1096 Creates a new, validated Expression. 1097 1098 Args: 1099 exp_class: The expression class to instantiate. 1100 comments: An optional list of comments to attach to the expression. 1101 kwargs: The arguments to set for the expression along with their respective values. 1102 1103 Returns: 1104 The target expression. 1105 """ 1106 instance = exp_class(**kwargs) 1107 instance.add_comments(comments) if comments else self._add_comments(instance) 1108 return self.validate_expression(instance) 1109 1110 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1111 if expression and self._prev_comments: 1112 expression.add_comments(self._prev_comments) 1113 self._prev_comments = None 1114 1115 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1116 """ 1117 Validates an Expression, making sure that all its mandatory arguments are set. 1118 1119 Args: 1120 expression: The expression to validate. 1121 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1122 1123 Returns: 1124 The validated expression. 1125 """ 1126 if self.error_level != ErrorLevel.IGNORE: 1127 for error_message in expression.error_messages(args): 1128 self.raise_error(error_message) 1129 1130 return expression 1131 1132 def _find_sql(self, start: Token, end: Token) -> str: 1133 return self.sql[start.start : end.end + 1] 1134 1135 def _advance(self, times: int = 1) -> None: 1136 self._index += times 1137 self._curr = seq_get(self._tokens, self._index) 1138 self._next = seq_get(self._tokens, self._index + 1) 1139 1140 if self._index > 0: 1141 self._prev = self._tokens[self._index - 1] 1142 self._prev_comments = self._prev.comments 1143 else: 1144 self._prev = None 1145 self._prev_comments = None 1146 1147 def _retreat(self, index: int) -> None: 1148 if index != self._index: 1149 self._advance(index - self._index) 1150 1151 def _parse_command(self) -> exp.Command: 1152 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1153 1154 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1155 start = self._prev 1156 exists = self._parse_exists() if allow_exists else None 1157 1158 self._match(TokenType.ON) 1159 1160 kind = self._match_set(self.CREATABLES) and self._prev 1161 if not kind: 1162 return self._parse_as_command(start) 1163 1164 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1165 this = self._parse_user_defined_function(kind=kind.token_type) 1166 elif kind.token_type == TokenType.TABLE: 1167 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1168 elif kind.token_type == TokenType.COLUMN: 1169 this = self._parse_column() 1170 else: 1171 this = self._parse_id_var() 1172 1173 self._match(TokenType.IS) 1174 1175 return self.expression( 1176 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1177 ) 1178 1179 def _parse_to_table( 1180 self, 1181 ) -> exp.ToTableProperty: 1182 table = self._parse_table_parts(schema=True) 1183 return self.expression(exp.ToTableProperty, this=table) 1184 1185 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1186 def _parse_ttl(self) -> exp.Expression: 1187 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1188 this = self._parse_bitwise() 1189 1190 if self._match_text_seq("DELETE"): 1191 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1192 if self._match_text_seq("RECOMPRESS"): 1193 return self.expression( 1194 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1195 ) 1196 if self._match_text_seq("TO", "DISK"): 1197 return self.expression( 1198 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1199 ) 1200 if self._match_text_seq("TO", "VOLUME"): 1201 return self.expression( 1202 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1203 ) 1204 1205 return this 1206 1207 expressions = self._parse_csv(_parse_ttl_action) 1208 where = self._parse_where() 1209 group = self._parse_group() 1210 1211 aggregates = None 1212 if group and self._match(TokenType.SET): 1213 aggregates = self._parse_csv(self._parse_set_item) 1214 1215 return self.expression( 1216 exp.MergeTreeTTL, 1217 expressions=expressions, 1218 where=where, 1219 group=group, 1220 aggregates=aggregates, 1221 ) 1222 1223 def _parse_statement(self) -> t.Optional[exp.Expression]: 1224 if self._curr is None: 1225 return None 1226 1227 if self._match_set(self.STATEMENT_PARSERS): 1228 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1229 1230 if self._match_set(Tokenizer.COMMANDS): 1231 return self._parse_command() 1232 1233 expression = self._parse_expression() 1234 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1235 return self._parse_query_modifiers(expression) 1236 1237 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1238 start = self._prev 1239 temporary = self._match(TokenType.TEMPORARY) 1240 materialized = self._match_text_seq("MATERIALIZED") 1241 1242 kind = self._match_set(self.CREATABLES) and self._prev.text 1243 if not kind: 1244 return self._parse_as_command(start) 1245 1246 return self.expression( 1247 exp.Drop, 1248 comments=start.comments, 1249 exists=exists or self._parse_exists(), 1250 this=self._parse_table(schema=True), 1251 kind=kind, 1252 temporary=temporary, 1253 materialized=materialized, 1254 cascade=self._match_text_seq("CASCADE"), 1255 constraints=self._match_text_seq("CONSTRAINTS"), 1256 purge=self._match_text_seq("PURGE"), 1257 ) 1258 1259 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1260 return ( 1261 self._match_text_seq("IF") 1262 and (not not_ or self._match(TokenType.NOT)) 1263 and self._match(TokenType.EXISTS) 1264 ) 1265 1266 def _parse_create(self) -> exp.Create | exp.Command: 1267 # Note: this can't be None because we've matched a statement parser 1268 start = self._prev 1269 comments = self._prev_comments 1270 1271 replace = start.text.upper() == "REPLACE" or self._match_pair( 1272 TokenType.OR, TokenType.REPLACE 1273 ) 1274 unique = self._match(TokenType.UNIQUE) 1275 1276 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1277 self._advance() 1278 1279 properties = None 1280 create_token = self._match_set(self.CREATABLES) and self._prev 1281 1282 if not create_token: 1283 # exp.Properties.Location.POST_CREATE 1284 properties = self._parse_properties() 1285 create_token = self._match_set(self.CREATABLES) and self._prev 1286 1287 if not properties or not create_token: 1288 return self._parse_as_command(start) 1289 1290 exists = self._parse_exists(not_=True) 1291 this = None 1292 expression: t.Optional[exp.Expression] = None 1293 indexes = None 1294 no_schema_binding = None 1295 begin = None 1296 end = None 1297 clone = None 1298 1299 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1300 nonlocal properties 1301 if properties and temp_props: 1302 properties.expressions.extend(temp_props.expressions) 1303 elif temp_props: 1304 properties = temp_props 1305 1306 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1307 this = self._parse_user_defined_function(kind=create_token.token_type) 1308 1309 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1310 extend_props(self._parse_properties()) 1311 1312 self._match(TokenType.ALIAS) 1313 1314 if self._match(TokenType.COMMAND): 1315 expression = self._parse_as_command(self._prev) 1316 else: 1317 begin = self._match(TokenType.BEGIN) 1318 return_ = self._match_text_seq("RETURN") 1319 1320 if self._match(TokenType.STRING, advance=False): 1321 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1322 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1323 expression = self._parse_string() 1324 extend_props(self._parse_properties()) 1325 else: 1326 expression = self._parse_statement() 1327 1328 end = self._match_text_seq("END") 1329 1330 if return_: 1331 expression = self.expression(exp.Return, this=expression) 1332 elif create_token.token_type == TokenType.INDEX: 1333 this = self._parse_index(index=self._parse_id_var()) 1334 elif create_token.token_type in self.DB_CREATABLES: 1335 table_parts = self._parse_table_parts(schema=True) 1336 1337 # exp.Properties.Location.POST_NAME 1338 self._match(TokenType.COMMA) 1339 extend_props(self._parse_properties(before=True)) 1340 1341 this = self._parse_schema(this=table_parts) 1342 1343 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1344 extend_props(self._parse_properties()) 1345 1346 self._match(TokenType.ALIAS) 1347 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1348 # exp.Properties.Location.POST_ALIAS 1349 extend_props(self._parse_properties()) 1350 1351 expression = self._parse_ddl_select() 1352 1353 if create_token.token_type == TokenType.TABLE: 1354 # exp.Properties.Location.POST_EXPRESSION 1355 extend_props(self._parse_properties()) 1356 1357 indexes = [] 1358 while True: 1359 index = self._parse_index() 1360 1361 # exp.Properties.Location.POST_INDEX 1362 extend_props(self._parse_properties()) 1363 1364 if not index: 1365 break 1366 else: 1367 self._match(TokenType.COMMA) 1368 indexes.append(index) 1369 elif create_token.token_type == TokenType.VIEW: 1370 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1371 no_schema_binding = True 1372 1373 shallow = self._match_text_seq("SHALLOW") 1374 1375 if self._match_texts(self.CLONE_KEYWORDS): 1376 copy = self._prev.text.lower() == "copy" 1377 clone = self._parse_table(schema=True) 1378 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1379 clone_kind = ( 1380 self._match(TokenType.L_PAREN) 1381 and self._match_texts(self.CLONE_KINDS) 1382 and self._prev.text.upper() 1383 ) 1384 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1385 self._match(TokenType.R_PAREN) 1386 clone = self.expression( 1387 exp.Clone, 1388 this=clone, 1389 when=when, 1390 kind=clone_kind, 1391 shallow=shallow, 1392 expression=clone_expression, 1393 copy=copy, 1394 ) 1395 1396 return self.expression( 1397 exp.Create, 1398 comments=comments, 1399 this=this, 1400 kind=create_token.text, 1401 replace=replace, 1402 unique=unique, 1403 expression=expression, 1404 exists=exists, 1405 properties=properties, 1406 indexes=indexes, 1407 no_schema_binding=no_schema_binding, 1408 begin=begin, 1409 end=end, 1410 clone=clone, 1411 ) 1412 1413 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1414 # only used for teradata currently 1415 self._match(TokenType.COMMA) 1416 1417 kwargs = { 1418 "no": self._match_text_seq("NO"), 1419 "dual": self._match_text_seq("DUAL"), 1420 "before": self._match_text_seq("BEFORE"), 1421 "default": self._match_text_seq("DEFAULT"), 1422 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1423 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1424 "after": self._match_text_seq("AFTER"), 1425 "minimum": self._match_texts(("MIN", "MINIMUM")), 1426 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1427 } 1428 1429 if self._match_texts(self.PROPERTY_PARSERS): 1430 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1431 try: 1432 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1433 except TypeError: 1434 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1435 1436 return None 1437 1438 def _parse_property(self) -> t.Optional[exp.Expression]: 1439 if self._match_texts(self.PROPERTY_PARSERS): 1440 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1441 1442 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1443 return self._parse_character_set(default=True) 1444 1445 if self._match_text_seq("COMPOUND", "SORTKEY"): 1446 return self._parse_sortkey(compound=True) 1447 1448 if self._match_text_seq("SQL", "SECURITY"): 1449 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1450 1451 index = self._index 1452 key = self._parse_column() 1453 1454 if not self._match(TokenType.EQ): 1455 self._retreat(index) 1456 return None 1457 1458 return self.expression( 1459 exp.Property, 1460 this=key.to_dot() if isinstance(key, exp.Column) else key, 1461 value=self._parse_column() or self._parse_var(any_token=True), 1462 ) 1463 1464 def _parse_stored(self) -> exp.FileFormatProperty: 1465 self._match(TokenType.ALIAS) 1466 1467 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1468 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1469 1470 return self.expression( 1471 exp.FileFormatProperty, 1472 this=self.expression( 1473 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1474 ) 1475 if input_format or output_format 1476 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1477 ) 1478 1479 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1480 self._match(TokenType.EQ) 1481 self._match(TokenType.ALIAS) 1482 return self.expression(exp_class, this=self._parse_field()) 1483 1484 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1485 properties = [] 1486 while True: 1487 if before: 1488 prop = self._parse_property_before() 1489 else: 1490 prop = self._parse_property() 1491 1492 if not prop: 1493 break 1494 for p in ensure_list(prop): 1495 properties.append(p) 1496 1497 if properties: 1498 return self.expression(exp.Properties, expressions=properties) 1499 1500 return None 1501 1502 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1503 return self.expression( 1504 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1505 ) 1506 1507 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1508 if self._index >= 2: 1509 pre_volatile_token = self._tokens[self._index - 2] 1510 else: 1511 pre_volatile_token = None 1512 1513 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1514 return exp.VolatileProperty() 1515 1516 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1517 1518 def _parse_with_property( 1519 self, 1520 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1521 if self._match(TokenType.L_PAREN, advance=False): 1522 return self._parse_wrapped_csv(self._parse_property) 1523 1524 if self._match_text_seq("JOURNAL"): 1525 return self._parse_withjournaltable() 1526 1527 if self._match_text_seq("DATA"): 1528 return self._parse_withdata(no=False) 1529 elif self._match_text_seq("NO", "DATA"): 1530 return self._parse_withdata(no=True) 1531 1532 if not self._next: 1533 return None 1534 1535 return self._parse_withisolatedloading() 1536 1537 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1538 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1539 self._match(TokenType.EQ) 1540 1541 user = self._parse_id_var() 1542 self._match(TokenType.PARAMETER) 1543 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1544 1545 if not user or not host: 1546 return None 1547 1548 return exp.DefinerProperty(this=f"{user}@{host}") 1549 1550 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1551 self._match(TokenType.TABLE) 1552 self._match(TokenType.EQ) 1553 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1554 1555 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1556 return self.expression(exp.LogProperty, no=no) 1557 1558 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1559 return self.expression(exp.JournalProperty, **kwargs) 1560 1561 def _parse_checksum(self) -> exp.ChecksumProperty: 1562 self._match(TokenType.EQ) 1563 1564 on = None 1565 if self._match(TokenType.ON): 1566 on = True 1567 elif self._match_text_seq("OFF"): 1568 on = False 1569 1570 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1571 1572 def _parse_cluster(self) -> exp.Cluster: 1573 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1574 1575 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1576 self._match_text_seq("BY") 1577 1578 self._match_l_paren() 1579 expressions = self._parse_csv(self._parse_column) 1580 self._match_r_paren() 1581 1582 if self._match_text_seq("SORTED", "BY"): 1583 self._match_l_paren() 1584 sorted_by = self._parse_csv(self._parse_ordered) 1585 self._match_r_paren() 1586 else: 1587 sorted_by = None 1588 1589 self._match(TokenType.INTO) 1590 buckets = self._parse_number() 1591 self._match_text_seq("BUCKETS") 1592 1593 return self.expression( 1594 exp.ClusteredByProperty, 1595 expressions=expressions, 1596 sorted_by=sorted_by, 1597 buckets=buckets, 1598 ) 1599 1600 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1601 if not self._match_text_seq("GRANTS"): 1602 self._retreat(self._index - 1) 1603 return None 1604 1605 return self.expression(exp.CopyGrantsProperty) 1606 1607 def _parse_freespace(self) -> exp.FreespaceProperty: 1608 self._match(TokenType.EQ) 1609 return self.expression( 1610 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1611 ) 1612 1613 def _parse_mergeblockratio( 1614 self, no: bool = False, default: bool = False 1615 ) -> exp.MergeBlockRatioProperty: 1616 if self._match(TokenType.EQ): 1617 return self.expression( 1618 exp.MergeBlockRatioProperty, 1619 this=self._parse_number(), 1620 percent=self._match(TokenType.PERCENT), 1621 ) 1622 1623 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1624 1625 def _parse_datablocksize( 1626 self, 1627 default: t.Optional[bool] = None, 1628 minimum: t.Optional[bool] = None, 1629 maximum: t.Optional[bool] = None, 1630 ) -> exp.DataBlocksizeProperty: 1631 self._match(TokenType.EQ) 1632 size = self._parse_number() 1633 1634 units = None 1635 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1636 units = self._prev.text 1637 1638 return self.expression( 1639 exp.DataBlocksizeProperty, 1640 size=size, 1641 units=units, 1642 default=default, 1643 minimum=minimum, 1644 maximum=maximum, 1645 ) 1646 1647 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1648 self._match(TokenType.EQ) 1649 always = self._match_text_seq("ALWAYS") 1650 manual = self._match_text_seq("MANUAL") 1651 never = self._match_text_seq("NEVER") 1652 default = self._match_text_seq("DEFAULT") 1653 1654 autotemp = None 1655 if self._match_text_seq("AUTOTEMP"): 1656 autotemp = self._parse_schema() 1657 1658 return self.expression( 1659 exp.BlockCompressionProperty, 1660 always=always, 1661 manual=manual, 1662 never=never, 1663 default=default, 1664 autotemp=autotemp, 1665 ) 1666 1667 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1668 no = self._match_text_seq("NO") 1669 concurrent = self._match_text_seq("CONCURRENT") 1670 self._match_text_seq("ISOLATED", "LOADING") 1671 for_all = self._match_text_seq("FOR", "ALL") 1672 for_insert = self._match_text_seq("FOR", "INSERT") 1673 for_none = self._match_text_seq("FOR", "NONE") 1674 return self.expression( 1675 exp.IsolatedLoadingProperty, 1676 no=no, 1677 concurrent=concurrent, 1678 for_all=for_all, 1679 for_insert=for_insert, 1680 for_none=for_none, 1681 ) 1682 1683 def _parse_locking(self) -> exp.LockingProperty: 1684 if self._match(TokenType.TABLE): 1685 kind = "TABLE" 1686 elif self._match(TokenType.VIEW): 1687 kind = "VIEW" 1688 elif self._match(TokenType.ROW): 1689 kind = "ROW" 1690 elif self._match_text_seq("DATABASE"): 1691 kind = "DATABASE" 1692 else: 1693 kind = None 1694 1695 if kind in ("DATABASE", "TABLE", "VIEW"): 1696 this = self._parse_table_parts() 1697 else: 1698 this = None 1699 1700 if self._match(TokenType.FOR): 1701 for_or_in = "FOR" 1702 elif self._match(TokenType.IN): 1703 for_or_in = "IN" 1704 else: 1705 for_or_in = None 1706 1707 if self._match_text_seq("ACCESS"): 1708 lock_type = "ACCESS" 1709 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1710 lock_type = "EXCLUSIVE" 1711 elif self._match_text_seq("SHARE"): 1712 lock_type = "SHARE" 1713 elif self._match_text_seq("READ"): 1714 lock_type = "READ" 1715 elif self._match_text_seq("WRITE"): 1716 lock_type = "WRITE" 1717 elif self._match_text_seq("CHECKSUM"): 1718 lock_type = "CHECKSUM" 1719 else: 1720 lock_type = None 1721 1722 override = self._match_text_seq("OVERRIDE") 1723 1724 return self.expression( 1725 exp.LockingProperty, 1726 this=this, 1727 kind=kind, 1728 for_or_in=for_or_in, 1729 lock_type=lock_type, 1730 override=override, 1731 ) 1732 1733 def _parse_partition_by(self) -> t.List[exp.Expression]: 1734 if self._match(TokenType.PARTITION_BY): 1735 return self._parse_csv(self._parse_conjunction) 1736 return [] 1737 1738 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1739 self._match(TokenType.EQ) 1740 return self.expression( 1741 exp.PartitionedByProperty, 1742 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1743 ) 1744 1745 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1746 if self._match_text_seq("AND", "STATISTICS"): 1747 statistics = True 1748 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1749 statistics = False 1750 else: 1751 statistics = None 1752 1753 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1754 1755 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1756 if self._match_text_seq("PRIMARY", "INDEX"): 1757 return exp.NoPrimaryIndexProperty() 1758 return None 1759 1760 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1761 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1762 return exp.OnCommitProperty() 1763 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1764 return exp.OnCommitProperty(delete=True) 1765 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1766 1767 def _parse_distkey(self) -> exp.DistKeyProperty: 1768 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1769 1770 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1771 table = self._parse_table(schema=True) 1772 1773 options = [] 1774 while self._match_texts(("INCLUDING", "EXCLUDING")): 1775 this = self._prev.text.upper() 1776 1777 id_var = self._parse_id_var() 1778 if not id_var: 1779 return None 1780 1781 options.append( 1782 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1783 ) 1784 1785 return self.expression(exp.LikeProperty, this=table, expressions=options) 1786 1787 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1788 return self.expression( 1789 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1790 ) 1791 1792 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1793 self._match(TokenType.EQ) 1794 return self.expression( 1795 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1796 ) 1797 1798 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1799 self._match_text_seq("WITH", "CONNECTION") 1800 return self.expression( 1801 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1802 ) 1803 1804 def _parse_returns(self) -> exp.ReturnsProperty: 1805 value: t.Optional[exp.Expression] 1806 is_table = self._match(TokenType.TABLE) 1807 1808 if is_table: 1809 if self._match(TokenType.LT): 1810 value = self.expression( 1811 exp.Schema, 1812 this="TABLE", 1813 expressions=self._parse_csv(self._parse_struct_types), 1814 ) 1815 if not self._match(TokenType.GT): 1816 self.raise_error("Expecting >") 1817 else: 1818 value = self._parse_schema(exp.var("TABLE")) 1819 else: 1820 value = self._parse_types() 1821 1822 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1823 1824 def _parse_describe(self) -> exp.Describe: 1825 kind = self._match_set(self.CREATABLES) and self._prev.text 1826 this = self._parse_table(schema=True) 1827 properties = self._parse_properties() 1828 expressions = properties.expressions if properties else None 1829 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1830 1831 def _parse_insert(self) -> exp.Insert: 1832 comments = ensure_list(self._prev_comments) 1833 overwrite = self._match(TokenType.OVERWRITE) 1834 ignore = self._match(TokenType.IGNORE) 1835 local = self._match_text_seq("LOCAL") 1836 alternative = None 1837 1838 if self._match_text_seq("DIRECTORY"): 1839 this: t.Optional[exp.Expression] = self.expression( 1840 exp.Directory, 1841 this=self._parse_var_or_string(), 1842 local=local, 1843 row_format=self._parse_row_format(match_row=True), 1844 ) 1845 else: 1846 if self._match(TokenType.OR): 1847 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1848 1849 self._match(TokenType.INTO) 1850 comments += ensure_list(self._prev_comments) 1851 self._match(TokenType.TABLE) 1852 this = self._parse_table(schema=True) 1853 1854 returning = self._parse_returning() 1855 1856 return self.expression( 1857 exp.Insert, 1858 comments=comments, 1859 this=this, 1860 by_name=self._match_text_seq("BY", "NAME"), 1861 exists=self._parse_exists(), 1862 partition=self._parse_partition(), 1863 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1864 and self._parse_conjunction(), 1865 expression=self._parse_ddl_select(), 1866 conflict=self._parse_on_conflict(), 1867 returning=returning or self._parse_returning(), 1868 overwrite=overwrite, 1869 alternative=alternative, 1870 ignore=ignore, 1871 ) 1872 1873 def _parse_kill(self) -> exp.Kill: 1874 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1875 1876 return self.expression( 1877 exp.Kill, 1878 this=self._parse_primary(), 1879 kind=kind, 1880 ) 1881 1882 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1883 conflict = self._match_text_seq("ON", "CONFLICT") 1884 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1885 1886 if not conflict and not duplicate: 1887 return None 1888 1889 nothing = None 1890 expressions = None 1891 key = None 1892 constraint = None 1893 1894 if conflict: 1895 if self._match_text_seq("ON", "CONSTRAINT"): 1896 constraint = self._parse_id_var() 1897 else: 1898 key = self._parse_csv(self._parse_value) 1899 1900 self._match_text_seq("DO") 1901 if self._match_text_seq("NOTHING"): 1902 nothing = True 1903 else: 1904 self._match(TokenType.UPDATE) 1905 self._match(TokenType.SET) 1906 expressions = self._parse_csv(self._parse_equality) 1907 1908 return self.expression( 1909 exp.OnConflict, 1910 duplicate=duplicate, 1911 expressions=expressions, 1912 nothing=nothing, 1913 key=key, 1914 constraint=constraint, 1915 ) 1916 1917 def _parse_returning(self) -> t.Optional[exp.Returning]: 1918 if not self._match(TokenType.RETURNING): 1919 return None 1920 return self.expression( 1921 exp.Returning, 1922 expressions=self._parse_csv(self._parse_expression), 1923 into=self._match(TokenType.INTO) and self._parse_table_part(), 1924 ) 1925 1926 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1927 if not self._match(TokenType.FORMAT): 1928 return None 1929 return self._parse_row_format() 1930 1931 def _parse_row_format( 1932 self, match_row: bool = False 1933 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1934 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1935 return None 1936 1937 if self._match_text_seq("SERDE"): 1938 this = self._parse_string() 1939 1940 serde_properties = None 1941 if self._match(TokenType.SERDE_PROPERTIES): 1942 serde_properties = self.expression( 1943 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1944 ) 1945 1946 return self.expression( 1947 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1948 ) 1949 1950 self._match_text_seq("DELIMITED") 1951 1952 kwargs = {} 1953 1954 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1955 kwargs["fields"] = self._parse_string() 1956 if self._match_text_seq("ESCAPED", "BY"): 1957 kwargs["escaped"] = self._parse_string() 1958 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1959 kwargs["collection_items"] = self._parse_string() 1960 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1961 kwargs["map_keys"] = self._parse_string() 1962 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1963 kwargs["lines"] = self._parse_string() 1964 if self._match_text_seq("NULL", "DEFINED", "AS"): 1965 kwargs["null"] = self._parse_string() 1966 1967 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1968 1969 def _parse_load(self) -> exp.LoadData | exp.Command: 1970 if self._match_text_seq("DATA"): 1971 local = self._match_text_seq("LOCAL") 1972 self._match_text_seq("INPATH") 1973 inpath = self._parse_string() 1974 overwrite = self._match(TokenType.OVERWRITE) 1975 self._match_pair(TokenType.INTO, TokenType.TABLE) 1976 1977 return self.expression( 1978 exp.LoadData, 1979 this=self._parse_table(schema=True), 1980 local=local, 1981 overwrite=overwrite, 1982 inpath=inpath, 1983 partition=self._parse_partition(), 1984 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1985 serde=self._match_text_seq("SERDE") and self._parse_string(), 1986 ) 1987 return self._parse_as_command(self._prev) 1988 1989 def _parse_delete(self) -> exp.Delete: 1990 # This handles MySQL's "Multiple-Table Syntax" 1991 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1992 tables = None 1993 comments = self._prev_comments 1994 if not self._match(TokenType.FROM, advance=False): 1995 tables = self._parse_csv(self._parse_table) or None 1996 1997 returning = self._parse_returning() 1998 1999 return self.expression( 2000 exp.Delete, 2001 comments=comments, 2002 tables=tables, 2003 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2004 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2005 where=self._parse_where(), 2006 returning=returning or self._parse_returning(), 2007 limit=self._parse_limit(), 2008 ) 2009 2010 def _parse_update(self) -> exp.Update: 2011 comments = self._prev_comments 2012 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2013 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2014 returning = self._parse_returning() 2015 return self.expression( 2016 exp.Update, 2017 comments=comments, 2018 **{ # type: ignore 2019 "this": this, 2020 "expressions": expressions, 2021 "from": self._parse_from(joins=True), 2022 "where": self._parse_where(), 2023 "returning": returning or self._parse_returning(), 2024 "order": self._parse_order(), 2025 "limit": self._parse_limit(), 2026 }, 2027 ) 2028 2029 def _parse_uncache(self) -> exp.Uncache: 2030 if not self._match(TokenType.TABLE): 2031 self.raise_error("Expecting TABLE after UNCACHE") 2032 2033 return self.expression( 2034 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2035 ) 2036 2037 def _parse_cache(self) -> exp.Cache: 2038 lazy = self._match_text_seq("LAZY") 2039 self._match(TokenType.TABLE) 2040 table = self._parse_table(schema=True) 2041 2042 options = [] 2043 if self._match_text_seq("OPTIONS"): 2044 self._match_l_paren() 2045 k = self._parse_string() 2046 self._match(TokenType.EQ) 2047 v = self._parse_string() 2048 options = [k, v] 2049 self._match_r_paren() 2050 2051 self._match(TokenType.ALIAS) 2052 return self.expression( 2053 exp.Cache, 2054 this=table, 2055 lazy=lazy, 2056 options=options, 2057 expression=self._parse_select(nested=True), 2058 ) 2059 2060 def _parse_partition(self) -> t.Optional[exp.Partition]: 2061 if not self._match(TokenType.PARTITION): 2062 return None 2063 2064 return self.expression( 2065 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2066 ) 2067 2068 def _parse_value(self) -> exp.Tuple: 2069 if self._match(TokenType.L_PAREN): 2070 expressions = self._parse_csv(self._parse_conjunction) 2071 self._match_r_paren() 2072 return self.expression(exp.Tuple, expressions=expressions) 2073 2074 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2075 # https://prestodb.io/docs/current/sql/values.html 2076 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2077 2078 def _parse_projections(self) -> t.List[exp.Expression]: 2079 return self._parse_expressions() 2080 2081 def _parse_select( 2082 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2083 ) -> t.Optional[exp.Expression]: 2084 cte = self._parse_with() 2085 2086 if cte: 2087 this = self._parse_statement() 2088 2089 if not this: 2090 self.raise_error("Failed to parse any statement following CTE") 2091 return cte 2092 2093 if "with" in this.arg_types: 2094 this.set("with", cte) 2095 else: 2096 self.raise_error(f"{this.key} does not support CTE") 2097 this = cte 2098 2099 return this 2100 2101 # duckdb supports leading with FROM x 2102 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2103 2104 if self._match(TokenType.SELECT): 2105 comments = self._prev_comments 2106 2107 hint = self._parse_hint() 2108 all_ = self._match(TokenType.ALL) 2109 distinct = self._match_set(self.DISTINCT_TOKENS) 2110 2111 kind = ( 2112 self._match(TokenType.ALIAS) 2113 and self._match_texts(("STRUCT", "VALUE")) 2114 and self._prev.text 2115 ) 2116 2117 if distinct: 2118 distinct = self.expression( 2119 exp.Distinct, 2120 on=self._parse_value() if self._match(TokenType.ON) else None, 2121 ) 2122 2123 if all_ and distinct: 2124 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2125 2126 limit = self._parse_limit(top=True) 2127 projections = self._parse_projections() 2128 2129 this = self.expression( 2130 exp.Select, 2131 kind=kind, 2132 hint=hint, 2133 distinct=distinct, 2134 expressions=projections, 2135 limit=limit, 2136 ) 2137 this.comments = comments 2138 2139 into = self._parse_into() 2140 if into: 2141 this.set("into", into) 2142 2143 if not from_: 2144 from_ = self._parse_from() 2145 2146 if from_: 2147 this.set("from", from_) 2148 2149 this = self._parse_query_modifiers(this) 2150 elif (table or nested) and self._match(TokenType.L_PAREN): 2151 if self._match(TokenType.PIVOT): 2152 this = self._parse_simplified_pivot() 2153 elif self._match(TokenType.FROM): 2154 this = exp.select("*").from_( 2155 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2156 ) 2157 else: 2158 this = self._parse_table() if table else self._parse_select(nested=True) 2159 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2160 2161 self._match_r_paren() 2162 2163 # We return early here so that the UNION isn't attached to the subquery by the 2164 # following call to _parse_set_operations, but instead becomes the parent node 2165 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2166 elif self._match(TokenType.VALUES): 2167 this = self.expression( 2168 exp.Values, 2169 expressions=self._parse_csv(self._parse_value), 2170 alias=self._parse_table_alias(), 2171 ) 2172 elif from_: 2173 this = exp.select("*").from_(from_.this, copy=False) 2174 else: 2175 this = None 2176 2177 return self._parse_set_operations(this) 2178 2179 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2180 if not skip_with_token and not self._match(TokenType.WITH): 2181 return None 2182 2183 comments = self._prev_comments 2184 recursive = self._match(TokenType.RECURSIVE) 2185 2186 expressions = [] 2187 while True: 2188 expressions.append(self._parse_cte()) 2189 2190 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2191 break 2192 else: 2193 self._match(TokenType.WITH) 2194 2195 return self.expression( 2196 exp.With, comments=comments, expressions=expressions, recursive=recursive 2197 ) 2198 2199 def _parse_cte(self) -> exp.CTE: 2200 alias = self._parse_table_alias() 2201 if not alias or not alias.this: 2202 self.raise_error("Expected CTE to have alias") 2203 2204 self._match(TokenType.ALIAS) 2205 return self.expression( 2206 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2207 ) 2208 2209 def _parse_table_alias( 2210 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2211 ) -> t.Optional[exp.TableAlias]: 2212 any_token = self._match(TokenType.ALIAS) 2213 alias = ( 2214 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2215 or self._parse_string_as_identifier() 2216 ) 2217 2218 index = self._index 2219 if self._match(TokenType.L_PAREN): 2220 columns = self._parse_csv(self._parse_function_parameter) 2221 self._match_r_paren() if columns else self._retreat(index) 2222 else: 2223 columns = None 2224 2225 if not alias and not columns: 2226 return None 2227 2228 return self.expression(exp.TableAlias, this=alias, columns=columns) 2229 2230 def _parse_subquery( 2231 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2232 ) -> t.Optional[exp.Subquery]: 2233 if not this: 2234 return None 2235 2236 return self.expression( 2237 exp.Subquery, 2238 this=this, 2239 pivots=self._parse_pivots(), 2240 alias=self._parse_table_alias() if parse_alias else None, 2241 ) 2242 2243 def _parse_query_modifiers( 2244 self, this: t.Optional[exp.Expression] 2245 ) -> t.Optional[exp.Expression]: 2246 if isinstance(this, self.MODIFIABLES): 2247 for join in iter(self._parse_join, None): 2248 this.append("joins", join) 2249 for lateral in iter(self._parse_lateral, None): 2250 this.append("laterals", lateral) 2251 2252 while True: 2253 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2254 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2255 key, expression = parser(self) 2256 2257 if expression: 2258 this.set(key, expression) 2259 if key == "limit": 2260 offset = expression.args.pop("offset", None) 2261 if offset: 2262 this.set("offset", exp.Offset(expression=offset)) 2263 continue 2264 break 2265 return this 2266 2267 def _parse_hint(self) -> t.Optional[exp.Hint]: 2268 if self._match(TokenType.HINT): 2269 hints = [] 2270 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2271 hints.extend(hint) 2272 2273 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2274 self.raise_error("Expected */ after HINT") 2275 2276 return self.expression(exp.Hint, expressions=hints) 2277 2278 return None 2279 2280 def _parse_into(self) -> t.Optional[exp.Into]: 2281 if not self._match(TokenType.INTO): 2282 return None 2283 2284 temp = self._match(TokenType.TEMPORARY) 2285 unlogged = self._match_text_seq("UNLOGGED") 2286 self._match(TokenType.TABLE) 2287 2288 return self.expression( 2289 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2290 ) 2291 2292 def _parse_from( 2293 self, joins: bool = False, skip_from_token: bool = False 2294 ) -> t.Optional[exp.From]: 2295 if not skip_from_token and not self._match(TokenType.FROM): 2296 return None 2297 2298 return self.expression( 2299 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2300 ) 2301 2302 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2303 if not self._match(TokenType.MATCH_RECOGNIZE): 2304 return None 2305 2306 self._match_l_paren() 2307 2308 partition = self._parse_partition_by() 2309 order = self._parse_order() 2310 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2311 2312 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2313 rows = exp.var("ONE ROW PER MATCH") 2314 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2315 text = "ALL ROWS PER MATCH" 2316 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2317 text += f" SHOW EMPTY MATCHES" 2318 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2319 text += f" OMIT EMPTY MATCHES" 2320 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2321 text += f" WITH UNMATCHED ROWS" 2322 rows = exp.var(text) 2323 else: 2324 rows = None 2325 2326 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2327 text = "AFTER MATCH SKIP" 2328 if self._match_text_seq("PAST", "LAST", "ROW"): 2329 text += f" PAST LAST ROW" 2330 elif self._match_text_seq("TO", "NEXT", "ROW"): 2331 text += f" TO NEXT ROW" 2332 elif self._match_text_seq("TO", "FIRST"): 2333 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2334 elif self._match_text_seq("TO", "LAST"): 2335 text += f" TO LAST {self._advance_any().text}" # type: ignore 2336 after = exp.var(text) 2337 else: 2338 after = None 2339 2340 if self._match_text_seq("PATTERN"): 2341 self._match_l_paren() 2342 2343 if not self._curr: 2344 self.raise_error("Expecting )", self._curr) 2345 2346 paren = 1 2347 start = self._curr 2348 2349 while self._curr and paren > 0: 2350 if self._curr.token_type == TokenType.L_PAREN: 2351 paren += 1 2352 if self._curr.token_type == TokenType.R_PAREN: 2353 paren -= 1 2354 2355 end = self._prev 2356 self._advance() 2357 2358 if paren > 0: 2359 self.raise_error("Expecting )", self._curr) 2360 2361 pattern = exp.var(self._find_sql(start, end)) 2362 else: 2363 pattern = None 2364 2365 define = ( 2366 self._parse_csv( 2367 lambda: self.expression( 2368 exp.Alias, 2369 alias=self._parse_id_var(any_token=True), 2370 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2371 ) 2372 ) 2373 if self._match_text_seq("DEFINE") 2374 else None 2375 ) 2376 2377 self._match_r_paren() 2378 2379 return self.expression( 2380 exp.MatchRecognize, 2381 partition_by=partition, 2382 order=order, 2383 measures=measures, 2384 rows=rows, 2385 after=after, 2386 pattern=pattern, 2387 define=define, 2388 alias=self._parse_table_alias(), 2389 ) 2390 2391 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2392 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2393 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2394 2395 if outer_apply or cross_apply: 2396 this = self._parse_select(table=True) 2397 view = None 2398 outer = not cross_apply 2399 elif self._match(TokenType.LATERAL): 2400 this = self._parse_select(table=True) 2401 view = self._match(TokenType.VIEW) 2402 outer = self._match(TokenType.OUTER) 2403 else: 2404 return None 2405 2406 if not this: 2407 this = ( 2408 self._parse_unnest() 2409 or self._parse_function() 2410 or self._parse_id_var(any_token=False) 2411 ) 2412 2413 while self._match(TokenType.DOT): 2414 this = exp.Dot( 2415 this=this, 2416 expression=self._parse_function() or self._parse_id_var(any_token=False), 2417 ) 2418 2419 if view: 2420 table = self._parse_id_var(any_token=False) 2421 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2422 table_alias: t.Optional[exp.TableAlias] = self.expression( 2423 exp.TableAlias, this=table, columns=columns 2424 ) 2425 elif isinstance(this, exp.Subquery) and this.alias: 2426 # Ensures parity between the Subquery's and the Lateral's "alias" args 2427 table_alias = this.args["alias"].copy() 2428 else: 2429 table_alias = self._parse_table_alias() 2430 2431 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2432 2433 def _parse_join_parts( 2434 self, 2435 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2436 return ( 2437 self._match_set(self.JOIN_METHODS) and self._prev, 2438 self._match_set(self.JOIN_SIDES) and self._prev, 2439 self._match_set(self.JOIN_KINDS) and self._prev, 2440 ) 2441 2442 def _parse_join( 2443 self, skip_join_token: bool = False, parse_bracket: bool = False 2444 ) -> t.Optional[exp.Join]: 2445 if self._match(TokenType.COMMA): 2446 return self.expression(exp.Join, this=self._parse_table()) 2447 2448 index = self._index 2449 method, side, kind = self._parse_join_parts() 2450 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2451 join = self._match(TokenType.JOIN) 2452 2453 if not skip_join_token and not join: 2454 self._retreat(index) 2455 kind = None 2456 method = None 2457 side = None 2458 2459 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2460 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2461 2462 if not skip_join_token and not join and not outer_apply and not cross_apply: 2463 return None 2464 2465 if outer_apply: 2466 side = Token(TokenType.LEFT, "LEFT") 2467 2468 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2469 2470 if method: 2471 kwargs["method"] = method.text 2472 if side: 2473 kwargs["side"] = side.text 2474 if kind: 2475 kwargs["kind"] = kind.text 2476 if hint: 2477 kwargs["hint"] = hint 2478 2479 if self._match(TokenType.ON): 2480 kwargs["on"] = self._parse_conjunction() 2481 elif self._match(TokenType.USING): 2482 kwargs["using"] = self._parse_wrapped_id_vars() 2483 elif not (kind and kind.token_type == TokenType.CROSS): 2484 index = self._index 2485 join = self._parse_join() 2486 2487 if join and self._match(TokenType.ON): 2488 kwargs["on"] = self._parse_conjunction() 2489 elif join and self._match(TokenType.USING): 2490 kwargs["using"] = self._parse_wrapped_id_vars() 2491 else: 2492 join = None 2493 self._retreat(index) 2494 2495 kwargs["this"].set("joins", [join] if join else None) 2496 2497 comments = [c for token in (method, side, kind) if token for c in token.comments] 2498 return self.expression(exp.Join, comments=comments, **kwargs) 2499 2500 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2501 this = self._parse_conjunction() 2502 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2503 return this 2504 2505 opclass = self._parse_var(any_token=True) 2506 if opclass: 2507 return self.expression(exp.Opclass, this=this, expression=opclass) 2508 2509 return this 2510 2511 def _parse_index( 2512 self, 2513 index: t.Optional[exp.Expression] = None, 2514 ) -> t.Optional[exp.Index]: 2515 if index: 2516 unique = None 2517 primary = None 2518 amp = None 2519 2520 self._match(TokenType.ON) 2521 self._match(TokenType.TABLE) # hive 2522 table = self._parse_table_parts(schema=True) 2523 else: 2524 unique = self._match(TokenType.UNIQUE) 2525 primary = self._match_text_seq("PRIMARY") 2526 amp = self._match_text_seq("AMP") 2527 2528 if not self._match(TokenType.INDEX): 2529 return None 2530 2531 index = self._parse_id_var() 2532 table = None 2533 2534 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2535 2536 if self._match(TokenType.L_PAREN, advance=False): 2537 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2538 else: 2539 columns = None 2540 2541 return self.expression( 2542 exp.Index, 2543 this=index, 2544 table=table, 2545 using=using, 2546 columns=columns, 2547 unique=unique, 2548 primary=primary, 2549 amp=amp, 2550 partition_by=self._parse_partition_by(), 2551 where=self._parse_where(), 2552 ) 2553 2554 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2555 hints: t.List[exp.Expression] = [] 2556 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2557 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2558 hints.append( 2559 self.expression( 2560 exp.WithTableHint, 2561 expressions=self._parse_csv( 2562 lambda: self._parse_function() or self._parse_var(any_token=True) 2563 ), 2564 ) 2565 ) 2566 self._match_r_paren() 2567 else: 2568 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2569 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2570 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2571 2572 self._match_texts({"INDEX", "KEY"}) 2573 if self._match(TokenType.FOR): 2574 hint.set("target", self._advance_any() and self._prev.text.upper()) 2575 2576 hint.set("expressions", self._parse_wrapped_id_vars()) 2577 hints.append(hint) 2578 2579 return hints or None 2580 2581 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2582 return ( 2583 (not schema and self._parse_function(optional_parens=False)) 2584 or self._parse_id_var(any_token=False) 2585 or self._parse_string_as_identifier() 2586 or self._parse_placeholder() 2587 ) 2588 2589 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2590 catalog = None 2591 db = None 2592 table = self._parse_table_part(schema=schema) 2593 2594 while self._match(TokenType.DOT): 2595 if catalog: 2596 # This allows nesting the table in arbitrarily many dot expressions if needed 2597 table = self.expression( 2598 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2599 ) 2600 else: 2601 catalog = db 2602 db = table 2603 table = self._parse_table_part(schema=schema) 2604 2605 if not table: 2606 self.raise_error(f"Expected table name but got {self._curr}") 2607 2608 return self.expression( 2609 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2610 ) 2611 2612 def _parse_table( 2613 self, 2614 schema: bool = False, 2615 joins: bool = False, 2616 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2617 parse_bracket: bool = False, 2618 ) -> t.Optional[exp.Expression]: 2619 lateral = self._parse_lateral() 2620 if lateral: 2621 return lateral 2622 2623 unnest = self._parse_unnest() 2624 if unnest: 2625 return unnest 2626 2627 values = self._parse_derived_table_values() 2628 if values: 2629 return values 2630 2631 subquery = self._parse_select(table=True) 2632 if subquery: 2633 if not subquery.args.get("pivots"): 2634 subquery.set("pivots", self._parse_pivots()) 2635 return subquery 2636 2637 bracket = parse_bracket and self._parse_bracket(None) 2638 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2639 this = t.cast( 2640 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2641 ) 2642 2643 if schema: 2644 return self._parse_schema(this=this) 2645 2646 version = self._parse_version() 2647 2648 if version: 2649 this.set("version", version) 2650 2651 if self.ALIAS_POST_TABLESAMPLE: 2652 table_sample = self._parse_table_sample() 2653 2654 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2655 if alias: 2656 this.set("alias", alias) 2657 2658 if self._match_text_seq("AT"): 2659 this.set("index", self._parse_id_var()) 2660 2661 this.set("hints", self._parse_table_hints()) 2662 2663 if not this.args.get("pivots"): 2664 this.set("pivots", self._parse_pivots()) 2665 2666 if not self.ALIAS_POST_TABLESAMPLE: 2667 table_sample = self._parse_table_sample() 2668 2669 if table_sample: 2670 table_sample.set("this", this) 2671 this = table_sample 2672 2673 if joins: 2674 for join in iter(self._parse_join, None): 2675 this.append("joins", join) 2676 2677 return this 2678 2679 def _parse_version(self) -> t.Optional[exp.Version]: 2680 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2681 this = "TIMESTAMP" 2682 elif self._match(TokenType.VERSION_SNAPSHOT): 2683 this = "VERSION" 2684 else: 2685 return None 2686 2687 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2688 kind = self._prev.text.upper() 2689 start = self._parse_bitwise() 2690 self._match_texts(("TO", "AND")) 2691 end = self._parse_bitwise() 2692 expression: t.Optional[exp.Expression] = self.expression( 2693 exp.Tuple, expressions=[start, end] 2694 ) 2695 elif self._match_text_seq("CONTAINED", "IN"): 2696 kind = "CONTAINED IN" 2697 expression = self.expression( 2698 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2699 ) 2700 elif self._match(TokenType.ALL): 2701 kind = "ALL" 2702 expression = None 2703 else: 2704 self._match_text_seq("AS", "OF") 2705 kind = "AS OF" 2706 expression = self._parse_type() 2707 2708 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2709 2710 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2711 if not self._match(TokenType.UNNEST): 2712 return None 2713 2714 expressions = self._parse_wrapped_csv(self._parse_type) 2715 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2716 2717 alias = self._parse_table_alias() if with_alias else None 2718 2719 if alias: 2720 if self.UNNEST_COLUMN_ONLY: 2721 if alias.args.get("columns"): 2722 self.raise_error("Unexpected extra column alias in unnest.") 2723 2724 alias.set("columns", [alias.this]) 2725 alias.set("this", None) 2726 2727 columns = alias.args.get("columns") or [] 2728 if offset and len(expressions) < len(columns): 2729 offset = columns.pop() 2730 2731 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2732 self._match(TokenType.ALIAS) 2733 offset = self._parse_id_var( 2734 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2735 ) or exp.to_identifier("offset") 2736 2737 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2738 2739 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2740 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2741 if not is_derived and not self._match(TokenType.VALUES): 2742 return None 2743 2744 expressions = self._parse_csv(self._parse_value) 2745 alias = self._parse_table_alias() 2746 2747 if is_derived: 2748 self._match_r_paren() 2749 2750 return self.expression( 2751 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2752 ) 2753 2754 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2755 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2756 as_modifier and self._match_text_seq("USING", "SAMPLE") 2757 ): 2758 return None 2759 2760 bucket_numerator = None 2761 bucket_denominator = None 2762 bucket_field = None 2763 percent = None 2764 rows = None 2765 size = None 2766 seed = None 2767 2768 kind = ( 2769 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2770 ) 2771 method = self._parse_var(tokens=(TokenType.ROW,)) 2772 2773 matched_l_paren = self._match(TokenType.L_PAREN) 2774 2775 if self.TABLESAMPLE_CSV: 2776 num = None 2777 expressions = self._parse_csv(self._parse_primary) 2778 else: 2779 expressions = None 2780 num = ( 2781 self._parse_factor() 2782 if self._match(TokenType.NUMBER, advance=False) 2783 else self._parse_primary() 2784 ) 2785 2786 if self._match_text_seq("BUCKET"): 2787 bucket_numerator = self._parse_number() 2788 self._match_text_seq("OUT", "OF") 2789 bucket_denominator = bucket_denominator = self._parse_number() 2790 self._match(TokenType.ON) 2791 bucket_field = self._parse_field() 2792 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2793 percent = num 2794 elif self._match(TokenType.ROWS): 2795 rows = num 2796 elif num: 2797 size = num 2798 2799 if matched_l_paren: 2800 self._match_r_paren() 2801 2802 if self._match(TokenType.L_PAREN): 2803 method = self._parse_var() 2804 seed = self._match(TokenType.COMMA) and self._parse_number() 2805 self._match_r_paren() 2806 elif self._match_texts(("SEED", "REPEATABLE")): 2807 seed = self._parse_wrapped(self._parse_number) 2808 2809 return self.expression( 2810 exp.TableSample, 2811 expressions=expressions, 2812 method=method, 2813 bucket_numerator=bucket_numerator, 2814 bucket_denominator=bucket_denominator, 2815 bucket_field=bucket_field, 2816 percent=percent, 2817 rows=rows, 2818 size=size, 2819 seed=seed, 2820 kind=kind, 2821 ) 2822 2823 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2824 return list(iter(self._parse_pivot, None)) or None 2825 2826 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2827 return list(iter(self._parse_join, None)) or None 2828 2829 # https://duckdb.org/docs/sql/statements/pivot 2830 def _parse_simplified_pivot(self) -> exp.Pivot: 2831 def _parse_on() -> t.Optional[exp.Expression]: 2832 this = self._parse_bitwise() 2833 return self._parse_in(this) if self._match(TokenType.IN) else this 2834 2835 this = self._parse_table() 2836 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2837 using = self._match(TokenType.USING) and self._parse_csv( 2838 lambda: self._parse_alias(self._parse_function()) 2839 ) 2840 group = self._parse_group() 2841 return self.expression( 2842 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2843 ) 2844 2845 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2846 index = self._index 2847 include_nulls = None 2848 2849 if self._match(TokenType.PIVOT): 2850 unpivot = False 2851 elif self._match(TokenType.UNPIVOT): 2852 unpivot = True 2853 2854 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2855 if self._match_text_seq("INCLUDE", "NULLS"): 2856 include_nulls = True 2857 elif self._match_text_seq("EXCLUDE", "NULLS"): 2858 include_nulls = False 2859 else: 2860 return None 2861 2862 expressions = [] 2863 field = None 2864 2865 if not self._match(TokenType.L_PAREN): 2866 self._retreat(index) 2867 return None 2868 2869 if unpivot: 2870 expressions = self._parse_csv(self._parse_column) 2871 else: 2872 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2873 2874 if not expressions: 2875 self.raise_error("Failed to parse PIVOT's aggregation list") 2876 2877 if not self._match(TokenType.FOR): 2878 self.raise_error("Expecting FOR") 2879 2880 value = self._parse_column() 2881 2882 if not self._match(TokenType.IN): 2883 self.raise_error("Expecting IN") 2884 2885 field = self._parse_in(value, alias=True) 2886 2887 self._match_r_paren() 2888 2889 pivot = self.expression( 2890 exp.Pivot, 2891 expressions=expressions, 2892 field=field, 2893 unpivot=unpivot, 2894 include_nulls=include_nulls, 2895 ) 2896 2897 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2898 pivot.set("alias", self._parse_table_alias()) 2899 2900 if not unpivot: 2901 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2902 2903 columns: t.List[exp.Expression] = [] 2904 for fld in pivot.args["field"].expressions: 2905 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2906 for name in names: 2907 if self.PREFIXED_PIVOT_COLUMNS: 2908 name = f"{name}_{field_name}" if name else field_name 2909 else: 2910 name = f"{field_name}_{name}" if name else field_name 2911 2912 columns.append(exp.to_identifier(name)) 2913 2914 pivot.set("columns", columns) 2915 2916 return pivot 2917 2918 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2919 return [agg.alias for agg in aggregations] 2920 2921 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2922 if not skip_where_token and not self._match(TokenType.WHERE): 2923 return None 2924 2925 return self.expression( 2926 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2927 ) 2928 2929 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2930 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2931 return None 2932 2933 elements = defaultdict(list) 2934 2935 if self._match(TokenType.ALL): 2936 return self.expression(exp.Group, all=True) 2937 2938 while True: 2939 expressions = self._parse_csv(self._parse_conjunction) 2940 if expressions: 2941 elements["expressions"].extend(expressions) 2942 2943 grouping_sets = self._parse_grouping_sets() 2944 if grouping_sets: 2945 elements["grouping_sets"].extend(grouping_sets) 2946 2947 rollup = None 2948 cube = None 2949 totals = None 2950 2951 with_ = self._match(TokenType.WITH) 2952 if self._match(TokenType.ROLLUP): 2953 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2954 elements["rollup"].extend(ensure_list(rollup)) 2955 2956 if self._match(TokenType.CUBE): 2957 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2958 elements["cube"].extend(ensure_list(cube)) 2959 2960 if self._match_text_seq("TOTALS"): 2961 totals = True 2962 elements["totals"] = True # type: ignore 2963 2964 if not (grouping_sets or rollup or cube or totals): 2965 break 2966 2967 return self.expression(exp.Group, **elements) # type: ignore 2968 2969 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2970 if not self._match(TokenType.GROUPING_SETS): 2971 return None 2972 2973 return self._parse_wrapped_csv(self._parse_grouping_set) 2974 2975 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2976 if self._match(TokenType.L_PAREN): 2977 grouping_set = self._parse_csv(self._parse_column) 2978 self._match_r_paren() 2979 return self.expression(exp.Tuple, expressions=grouping_set) 2980 2981 return self._parse_column() 2982 2983 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2984 if not skip_having_token and not self._match(TokenType.HAVING): 2985 return None 2986 return self.expression(exp.Having, this=self._parse_conjunction()) 2987 2988 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2989 if not self._match(TokenType.QUALIFY): 2990 return None 2991 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2992 2993 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2994 if skip_start_token: 2995 start = None 2996 elif self._match(TokenType.START_WITH): 2997 start = self._parse_conjunction() 2998 else: 2999 return None 3000 3001 self._match(TokenType.CONNECT_BY) 3002 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3003 exp.Prior, this=self._parse_bitwise() 3004 ) 3005 connect = self._parse_conjunction() 3006 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3007 3008 if not start and self._match(TokenType.START_WITH): 3009 start = self._parse_conjunction() 3010 3011 return self.expression(exp.Connect, start=start, connect=connect) 3012 3013 def _parse_order( 3014 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3015 ) -> t.Optional[exp.Expression]: 3016 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3017 return this 3018 3019 return self.expression( 3020 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3021 ) 3022 3023 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3024 if not self._match(token): 3025 return None 3026 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3027 3028 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3029 this = parse_method() if parse_method else self._parse_conjunction() 3030 3031 asc = self._match(TokenType.ASC) 3032 desc = self._match(TokenType.DESC) or (asc and False) 3033 3034 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3035 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3036 3037 nulls_first = is_nulls_first or False 3038 explicitly_null_ordered = is_nulls_first or is_nulls_last 3039 3040 if ( 3041 not explicitly_null_ordered 3042 and ( 3043 (not desc and self.NULL_ORDERING == "nulls_are_small") 3044 or (desc and self.NULL_ORDERING != "nulls_are_small") 3045 ) 3046 and self.NULL_ORDERING != "nulls_are_last" 3047 ): 3048 nulls_first = True 3049 3050 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3051 3052 def _parse_limit( 3053 self, this: t.Optional[exp.Expression] = None, top: bool = False 3054 ) -> t.Optional[exp.Expression]: 3055 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3056 comments = self._prev_comments 3057 if top: 3058 limit_paren = self._match(TokenType.L_PAREN) 3059 expression = self._parse_number() 3060 3061 if limit_paren: 3062 self._match_r_paren() 3063 else: 3064 expression = self._parse_term() 3065 3066 if self._match(TokenType.COMMA): 3067 offset = expression 3068 expression = self._parse_term() 3069 else: 3070 offset = None 3071 3072 limit_exp = self.expression( 3073 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3074 ) 3075 3076 return limit_exp 3077 3078 if self._match(TokenType.FETCH): 3079 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3080 direction = self._prev.text if direction else "FIRST" 3081 3082 count = self._parse_field(tokens=self.FETCH_TOKENS) 3083 percent = self._match(TokenType.PERCENT) 3084 3085 self._match_set((TokenType.ROW, TokenType.ROWS)) 3086 3087 only = self._match_text_seq("ONLY") 3088 with_ties = self._match_text_seq("WITH", "TIES") 3089 3090 if only and with_ties: 3091 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3092 3093 return self.expression( 3094 exp.Fetch, 3095 direction=direction, 3096 count=count, 3097 percent=percent, 3098 with_ties=with_ties, 3099 ) 3100 3101 return this 3102 3103 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3104 if not self._match(TokenType.OFFSET): 3105 return this 3106 3107 count = self._parse_term() 3108 self._match_set((TokenType.ROW, TokenType.ROWS)) 3109 return self.expression(exp.Offset, this=this, expression=count) 3110 3111 def _parse_locks(self) -> t.List[exp.Lock]: 3112 locks = [] 3113 while True: 3114 if self._match_text_seq("FOR", "UPDATE"): 3115 update = True 3116 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3117 "LOCK", "IN", "SHARE", "MODE" 3118 ): 3119 update = False 3120 else: 3121 break 3122 3123 expressions = None 3124 if self._match_text_seq("OF"): 3125 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3126 3127 wait: t.Optional[bool | exp.Expression] = None 3128 if self._match_text_seq("NOWAIT"): 3129 wait = True 3130 elif self._match_text_seq("WAIT"): 3131 wait = self._parse_primary() 3132 elif self._match_text_seq("SKIP", "LOCKED"): 3133 wait = False 3134 3135 locks.append( 3136 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3137 ) 3138 3139 return locks 3140 3141 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3142 if not self._match_set(self.SET_OPERATIONS): 3143 return this 3144 3145 token_type = self._prev.token_type 3146 3147 if token_type == TokenType.UNION: 3148 expression = exp.Union 3149 elif token_type == TokenType.EXCEPT: 3150 expression = exp.Except 3151 else: 3152 expression = exp.Intersect 3153 3154 return self.expression( 3155 expression, 3156 this=this, 3157 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3158 by_name=self._match_text_seq("BY", "NAME"), 3159 expression=self._parse_set_operations(self._parse_select(nested=True)), 3160 ) 3161 3162 def _parse_expression(self) -> t.Optional[exp.Expression]: 3163 return self._parse_alias(self._parse_conjunction()) 3164 3165 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3166 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3167 3168 def _parse_equality(self) -> t.Optional[exp.Expression]: 3169 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3170 3171 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3172 return self._parse_tokens(self._parse_range, self.COMPARISON) 3173 3174 def _parse_range(self) -> t.Optional[exp.Expression]: 3175 this = self._parse_bitwise() 3176 negate = self._match(TokenType.NOT) 3177 3178 if self._match_set(self.RANGE_PARSERS): 3179 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3180 if not expression: 3181 return this 3182 3183 this = expression 3184 elif self._match(TokenType.ISNULL): 3185 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3186 3187 # Postgres supports ISNULL and NOTNULL for conditions. 3188 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3189 if self._match(TokenType.NOTNULL): 3190 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3191 this = self.expression(exp.Not, this=this) 3192 3193 if negate: 3194 this = self.expression(exp.Not, this=this) 3195 3196 if self._match(TokenType.IS): 3197 this = self._parse_is(this) 3198 3199 return this 3200 3201 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3202 index = self._index - 1 3203 negate = self._match(TokenType.NOT) 3204 3205 if self._match_text_seq("DISTINCT", "FROM"): 3206 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3207 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3208 3209 expression = self._parse_null() or self._parse_boolean() 3210 if not expression: 3211 self._retreat(index) 3212 return None 3213 3214 this = self.expression(exp.Is, this=this, expression=expression) 3215 return self.expression(exp.Not, this=this) if negate else this 3216 3217 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3218 unnest = self._parse_unnest(with_alias=False) 3219 if unnest: 3220 this = self.expression(exp.In, this=this, unnest=unnest) 3221 elif self._match(TokenType.L_PAREN): 3222 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3223 3224 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3225 this = self.expression(exp.In, this=this, query=expressions[0]) 3226 else: 3227 this = self.expression(exp.In, this=this, expressions=expressions) 3228 3229 self._match_r_paren(this) 3230 else: 3231 this = self.expression(exp.In, this=this, field=self._parse_field()) 3232 3233 return this 3234 3235 def _parse_between(self, this: exp.Expression) -> exp.Between: 3236 low = self._parse_bitwise() 3237 self._match(TokenType.AND) 3238 high = self._parse_bitwise() 3239 return self.expression(exp.Between, this=this, low=low, high=high) 3240 3241 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3242 if not self._match(TokenType.ESCAPE): 3243 return this 3244 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3245 3246 def _parse_interval(self) -> t.Optional[exp.Interval]: 3247 index = self._index 3248 3249 if not self._match(TokenType.INTERVAL): 3250 return None 3251 3252 if self._match(TokenType.STRING, advance=False): 3253 this = self._parse_primary() 3254 else: 3255 this = self._parse_term() 3256 3257 if not this: 3258 self._retreat(index) 3259 return None 3260 3261 unit = self._parse_function() or self._parse_var(any_token=True) 3262 3263 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3264 # each INTERVAL expression into this canonical form so it's easy to transpile 3265 if this and this.is_number: 3266 this = exp.Literal.string(this.name) 3267 elif this and this.is_string: 3268 parts = this.name.split() 3269 3270 if len(parts) == 2: 3271 if unit: 3272 # This is not actually a unit, it's something else (e.g. a "window side") 3273 unit = None 3274 self._retreat(self._index - 1) 3275 3276 this = exp.Literal.string(parts[0]) 3277 unit = self.expression(exp.Var, this=parts[1]) 3278 3279 return self.expression(exp.Interval, this=this, unit=unit) 3280 3281 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3282 this = self._parse_term() 3283 3284 while True: 3285 if self._match_set(self.BITWISE): 3286 this = self.expression( 3287 self.BITWISE[self._prev.token_type], 3288 this=this, 3289 expression=self._parse_term(), 3290 ) 3291 elif self._match(TokenType.DQMARK): 3292 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3293 elif self._match_pair(TokenType.LT, TokenType.LT): 3294 this = self.expression( 3295 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3296 ) 3297 elif self._match_pair(TokenType.GT, TokenType.GT): 3298 this = self.expression( 3299 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3300 ) 3301 else: 3302 break 3303 3304 return this 3305 3306 def _parse_term(self) -> t.Optional[exp.Expression]: 3307 return self._parse_tokens(self._parse_factor, self.TERM) 3308 3309 def _parse_factor(self) -> t.Optional[exp.Expression]: 3310 return self._parse_tokens(self._parse_unary, self.FACTOR) 3311 3312 def _parse_unary(self) -> t.Optional[exp.Expression]: 3313 if self._match_set(self.UNARY_PARSERS): 3314 return self.UNARY_PARSERS[self._prev.token_type](self) 3315 return self._parse_at_time_zone(self._parse_type()) 3316 3317 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3318 interval = parse_interval and self._parse_interval() 3319 if interval: 3320 return interval 3321 3322 index = self._index 3323 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3324 this = self._parse_column() 3325 3326 if data_type: 3327 if isinstance(this, exp.Literal): 3328 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3329 if parser: 3330 return parser(self, this, data_type) 3331 return self.expression(exp.Cast, this=this, to=data_type) 3332 if not data_type.expressions: 3333 self._retreat(index) 3334 return self._parse_column() 3335 return self._parse_column_ops(data_type) 3336 3337 return this and self._parse_column_ops(this) 3338 3339 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3340 this = self._parse_type() 3341 if not this: 3342 return None 3343 3344 return self.expression( 3345 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3346 ) 3347 3348 def _parse_types( 3349 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3350 ) -> t.Optional[exp.Expression]: 3351 index = self._index 3352 3353 prefix = self._match_text_seq("SYSUDTLIB", ".") 3354 3355 if not self._match_set(self.TYPE_TOKENS): 3356 identifier = allow_identifiers and self._parse_id_var( 3357 any_token=False, tokens=(TokenType.VAR,) 3358 ) 3359 3360 if identifier: 3361 tokens = self._tokenizer.tokenize(identifier.name) 3362 3363 if len(tokens) != 1: 3364 self.raise_error("Unexpected identifier", self._prev) 3365 3366 if tokens[0].token_type in self.TYPE_TOKENS: 3367 self._prev = tokens[0] 3368 elif self.SUPPORTS_USER_DEFINED_TYPES: 3369 type_name = identifier.name 3370 3371 while self._match(TokenType.DOT): 3372 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3373 3374 return exp.DataType.build(type_name, udt=True) 3375 else: 3376 return None 3377 else: 3378 return None 3379 3380 type_token = self._prev.token_type 3381 3382 if type_token == TokenType.PSEUDO_TYPE: 3383 return self.expression(exp.PseudoType, this=self._prev.text) 3384 3385 if type_token == TokenType.OBJECT_IDENTIFIER: 3386 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3387 3388 nested = type_token in self.NESTED_TYPE_TOKENS 3389 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3390 expressions = None 3391 maybe_func = False 3392 3393 if self._match(TokenType.L_PAREN): 3394 if is_struct: 3395 expressions = self._parse_csv(self._parse_struct_types) 3396 elif nested: 3397 expressions = self._parse_csv( 3398 lambda: self._parse_types( 3399 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3400 ) 3401 ) 3402 elif type_token in self.ENUM_TYPE_TOKENS: 3403 expressions = self._parse_csv(self._parse_equality) 3404 else: 3405 expressions = self._parse_csv(self._parse_type_size) 3406 3407 if not expressions or not self._match(TokenType.R_PAREN): 3408 self._retreat(index) 3409 return None 3410 3411 maybe_func = True 3412 3413 this: t.Optional[exp.Expression] = None 3414 values: t.Optional[t.List[exp.Expression]] = None 3415 3416 if nested and self._match(TokenType.LT): 3417 if is_struct: 3418 expressions = self._parse_csv(self._parse_struct_types) 3419 else: 3420 expressions = self._parse_csv( 3421 lambda: self._parse_types( 3422 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3423 ) 3424 ) 3425 3426 if not self._match(TokenType.GT): 3427 self.raise_error("Expecting >") 3428 3429 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3430 values = self._parse_csv(self._parse_conjunction) 3431 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3432 3433 if type_token in self.TIMESTAMPS: 3434 if self._match_text_seq("WITH", "TIME", "ZONE"): 3435 maybe_func = False 3436 tz_type = ( 3437 exp.DataType.Type.TIMETZ 3438 if type_token in self.TIMES 3439 else exp.DataType.Type.TIMESTAMPTZ 3440 ) 3441 this = exp.DataType(this=tz_type, expressions=expressions) 3442 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3443 maybe_func = False 3444 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3445 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3446 maybe_func = False 3447 elif type_token == TokenType.INTERVAL: 3448 unit = self._parse_var() 3449 3450 if self._match_text_seq("TO"): 3451 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3452 else: 3453 span = None 3454 3455 if span or not unit: 3456 this = self.expression( 3457 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3458 ) 3459 else: 3460 this = self.expression(exp.Interval, unit=unit) 3461 3462 if maybe_func and check_func: 3463 index2 = self._index 3464 peek = self._parse_string() 3465 3466 if not peek: 3467 self._retreat(index) 3468 return None 3469 3470 self._retreat(index2) 3471 3472 if not this: 3473 if self._match_text_seq("UNSIGNED"): 3474 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3475 if not unsigned_type_token: 3476 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3477 3478 type_token = unsigned_type_token or type_token 3479 3480 this = exp.DataType( 3481 this=exp.DataType.Type[type_token.value], 3482 expressions=expressions, 3483 nested=nested, 3484 values=values, 3485 prefix=prefix, 3486 ) 3487 3488 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3489 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3490 3491 return this 3492 3493 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3494 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3495 self._match(TokenType.COLON) 3496 return self._parse_column_def(this) 3497 3498 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3499 if not self._match_text_seq("AT", "TIME", "ZONE"): 3500 return this 3501 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3502 3503 def _parse_column(self) -> t.Optional[exp.Expression]: 3504 this = self._parse_field() 3505 if isinstance(this, exp.Identifier): 3506 this = self.expression(exp.Column, this=this) 3507 elif not this: 3508 return self._parse_bracket(this) 3509 return self._parse_column_ops(this) 3510 3511 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3512 this = self._parse_bracket(this) 3513 3514 while self._match_set(self.COLUMN_OPERATORS): 3515 op_token = self._prev.token_type 3516 op = self.COLUMN_OPERATORS.get(op_token) 3517 3518 if op_token == TokenType.DCOLON: 3519 field = self._parse_types() 3520 if not field: 3521 self.raise_error("Expected type") 3522 elif op and self._curr: 3523 self._advance() 3524 value = self._prev.text 3525 field = ( 3526 exp.Literal.number(value) 3527 if self._prev.token_type == TokenType.NUMBER 3528 else exp.Literal.string(value) 3529 ) 3530 else: 3531 field = self._parse_field(anonymous_func=True, any_token=True) 3532 3533 if isinstance(field, exp.Func): 3534 # bigquery allows function calls like x.y.count(...) 3535 # SAFE.SUBSTR(...) 3536 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3537 this = self._replace_columns_with_dots(this) 3538 3539 if op: 3540 this = op(self, this, field) 3541 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3542 this = self.expression( 3543 exp.Column, 3544 this=field, 3545 table=this.this, 3546 db=this.args.get("table"), 3547 catalog=this.args.get("db"), 3548 ) 3549 else: 3550 this = self.expression(exp.Dot, this=this, expression=field) 3551 this = self._parse_bracket(this) 3552 return this 3553 3554 def _parse_primary(self) -> t.Optional[exp.Expression]: 3555 if self._match_set(self.PRIMARY_PARSERS): 3556 token_type = self._prev.token_type 3557 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3558 3559 if token_type == TokenType.STRING: 3560 expressions = [primary] 3561 while self._match(TokenType.STRING): 3562 expressions.append(exp.Literal.string(self._prev.text)) 3563 3564 if len(expressions) > 1: 3565 return self.expression(exp.Concat, expressions=expressions) 3566 3567 return primary 3568 3569 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3570 return exp.Literal.number(f"0.{self._prev.text}") 3571 3572 if self._match(TokenType.L_PAREN): 3573 comments = self._prev_comments 3574 query = self._parse_select() 3575 3576 if query: 3577 expressions = [query] 3578 else: 3579 expressions = self._parse_expressions() 3580 3581 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3582 3583 if isinstance(this, exp.Subqueryable): 3584 this = self._parse_set_operations( 3585 self._parse_subquery(this=this, parse_alias=False) 3586 ) 3587 elif len(expressions) > 1: 3588 this = self.expression(exp.Tuple, expressions=expressions) 3589 else: 3590 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3591 3592 if this: 3593 this.add_comments(comments) 3594 3595 self._match_r_paren(expression=this) 3596 return this 3597 3598 return None 3599 3600 def _parse_field( 3601 self, 3602 any_token: bool = False, 3603 tokens: t.Optional[t.Collection[TokenType]] = None, 3604 anonymous_func: bool = False, 3605 ) -> t.Optional[exp.Expression]: 3606 return ( 3607 self._parse_primary() 3608 or self._parse_function(anonymous=anonymous_func) 3609 or self._parse_id_var(any_token=any_token, tokens=tokens) 3610 ) 3611 3612 def _parse_function( 3613 self, 3614 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3615 anonymous: bool = False, 3616 optional_parens: bool = True, 3617 ) -> t.Optional[exp.Expression]: 3618 if not self._curr: 3619 return None 3620 3621 token_type = self._curr.token_type 3622 this = self._curr.text 3623 upper = this.upper() 3624 3625 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3626 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3627 self._advance() 3628 return parser(self) 3629 3630 if not self._next or self._next.token_type != TokenType.L_PAREN: 3631 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3632 self._advance() 3633 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3634 3635 return None 3636 3637 if token_type not in self.FUNC_TOKENS: 3638 return None 3639 3640 self._advance(2) 3641 3642 parser = self.FUNCTION_PARSERS.get(upper) 3643 if parser and not anonymous: 3644 this = parser(self) 3645 else: 3646 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3647 3648 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3649 this = self.expression(subquery_predicate, this=self._parse_select()) 3650 self._match_r_paren() 3651 return this 3652 3653 if functions is None: 3654 functions = self.FUNCTIONS 3655 3656 function = functions.get(upper) 3657 3658 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3659 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3660 3661 if function and not anonymous: 3662 func = self.validate_expression(function(args), args) 3663 if not self.NORMALIZE_FUNCTIONS: 3664 func.meta["name"] = this 3665 this = func 3666 else: 3667 this = self.expression(exp.Anonymous, this=this, expressions=args) 3668 3669 self._match_r_paren(this) 3670 return self._parse_window(this) 3671 3672 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3673 return self._parse_column_def(self._parse_id_var()) 3674 3675 def _parse_user_defined_function( 3676 self, kind: t.Optional[TokenType] = None 3677 ) -> t.Optional[exp.Expression]: 3678 this = self._parse_id_var() 3679 3680 while self._match(TokenType.DOT): 3681 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3682 3683 if not self._match(TokenType.L_PAREN): 3684 return this 3685 3686 expressions = self._parse_csv(self._parse_function_parameter) 3687 self._match_r_paren() 3688 return self.expression( 3689 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3690 ) 3691 3692 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3693 literal = self._parse_primary() 3694 if literal: 3695 return self.expression(exp.Introducer, this=token.text, expression=literal) 3696 3697 return self.expression(exp.Identifier, this=token.text) 3698 3699 def _parse_session_parameter(self) -> exp.SessionParameter: 3700 kind = None 3701 this = self._parse_id_var() or self._parse_primary() 3702 3703 if this and self._match(TokenType.DOT): 3704 kind = this.name 3705 this = self._parse_var() or self._parse_primary() 3706 3707 return self.expression(exp.SessionParameter, this=this, kind=kind) 3708 3709 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3710 index = self._index 3711 3712 if self._match(TokenType.L_PAREN): 3713 expressions = t.cast( 3714 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3715 ) 3716 3717 if not self._match(TokenType.R_PAREN): 3718 self._retreat(index) 3719 else: 3720 expressions = [self._parse_id_var()] 3721 3722 if self._match_set(self.LAMBDAS): 3723 return self.LAMBDAS[self._prev.token_type](self, expressions) 3724 3725 self._retreat(index) 3726 3727 this: t.Optional[exp.Expression] 3728 3729 if self._match(TokenType.DISTINCT): 3730 this = self.expression( 3731 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3732 ) 3733 else: 3734 this = self._parse_select_or_expression(alias=alias) 3735 3736 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3737 3738 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3739 index = self._index 3740 3741 if not self.errors: 3742 try: 3743 if self._parse_select(nested=True): 3744 return this 3745 except ParseError: 3746 pass 3747 finally: 3748 self.errors.clear() 3749 self._retreat(index) 3750 3751 if not self._match(TokenType.L_PAREN): 3752 return this 3753 3754 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3755 3756 self._match_r_paren() 3757 return self.expression(exp.Schema, this=this, expressions=args) 3758 3759 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3760 return self._parse_column_def(self._parse_field(any_token=True)) 3761 3762 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3763 # column defs are not really columns, they're identifiers 3764 if isinstance(this, exp.Column): 3765 this = this.this 3766 3767 kind = self._parse_types(schema=True) 3768 3769 if self._match_text_seq("FOR", "ORDINALITY"): 3770 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3771 3772 constraints: t.List[exp.Expression] = [] 3773 3774 if not kind and self._match(TokenType.ALIAS): 3775 constraints.append( 3776 self.expression( 3777 exp.ComputedColumnConstraint, 3778 this=self._parse_conjunction(), 3779 persisted=self._match_text_seq("PERSISTED"), 3780 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3781 ) 3782 ) 3783 3784 while True: 3785 constraint = self._parse_column_constraint() 3786 if not constraint: 3787 break 3788 constraints.append(constraint) 3789 3790 if not kind and not constraints: 3791 return this 3792 3793 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3794 3795 def _parse_auto_increment( 3796 self, 3797 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3798 start = None 3799 increment = None 3800 3801 if self._match(TokenType.L_PAREN, advance=False): 3802 args = self._parse_wrapped_csv(self._parse_bitwise) 3803 start = seq_get(args, 0) 3804 increment = seq_get(args, 1) 3805 elif self._match_text_seq("START"): 3806 start = self._parse_bitwise() 3807 self._match_text_seq("INCREMENT") 3808 increment = self._parse_bitwise() 3809 3810 if start and increment: 3811 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3812 3813 return exp.AutoIncrementColumnConstraint() 3814 3815 def _parse_compress(self) -> exp.CompressColumnConstraint: 3816 if self._match(TokenType.L_PAREN, advance=False): 3817 return self.expression( 3818 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3819 ) 3820 3821 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3822 3823 def _parse_generated_as_identity( 3824 self, 3825 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3826 if self._match_text_seq("BY", "DEFAULT"): 3827 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3828 this = self.expression( 3829 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3830 ) 3831 else: 3832 self._match_text_seq("ALWAYS") 3833 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3834 3835 self._match(TokenType.ALIAS) 3836 identity = self._match_text_seq("IDENTITY") 3837 3838 if self._match(TokenType.L_PAREN): 3839 if self._match(TokenType.START_WITH): 3840 this.set("start", self._parse_bitwise()) 3841 if self._match_text_seq("INCREMENT", "BY"): 3842 this.set("increment", self._parse_bitwise()) 3843 if self._match_text_seq("MINVALUE"): 3844 this.set("minvalue", self._parse_bitwise()) 3845 if self._match_text_seq("MAXVALUE"): 3846 this.set("maxvalue", self._parse_bitwise()) 3847 3848 if self._match_text_seq("CYCLE"): 3849 this.set("cycle", True) 3850 elif self._match_text_seq("NO", "CYCLE"): 3851 this.set("cycle", False) 3852 3853 if not identity: 3854 this.set("expression", self._parse_bitwise()) 3855 3856 self._match_r_paren() 3857 3858 return this 3859 3860 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3861 self._match_text_seq("LENGTH") 3862 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3863 3864 def _parse_not_constraint( 3865 self, 3866 ) -> t.Optional[exp.Expression]: 3867 if self._match_text_seq("NULL"): 3868 return self.expression(exp.NotNullColumnConstraint) 3869 if self._match_text_seq("CASESPECIFIC"): 3870 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3871 if self._match_text_seq("FOR", "REPLICATION"): 3872 return self.expression(exp.NotForReplicationColumnConstraint) 3873 return None 3874 3875 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3876 if self._match(TokenType.CONSTRAINT): 3877 this = self._parse_id_var() 3878 else: 3879 this = None 3880 3881 if self._match_texts(self.CONSTRAINT_PARSERS): 3882 return self.expression( 3883 exp.ColumnConstraint, 3884 this=this, 3885 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3886 ) 3887 3888 return this 3889 3890 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3891 if not self._match(TokenType.CONSTRAINT): 3892 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3893 3894 this = self._parse_id_var() 3895 expressions = [] 3896 3897 while True: 3898 constraint = self._parse_unnamed_constraint() or self._parse_function() 3899 if not constraint: 3900 break 3901 expressions.append(constraint) 3902 3903 return self.expression(exp.Constraint, this=this, expressions=expressions) 3904 3905 def _parse_unnamed_constraint( 3906 self, constraints: t.Optional[t.Collection[str]] = None 3907 ) -> t.Optional[exp.Expression]: 3908 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3909 constraints or self.CONSTRAINT_PARSERS 3910 ): 3911 return None 3912 3913 constraint = self._prev.text.upper() 3914 if constraint not in self.CONSTRAINT_PARSERS: 3915 self.raise_error(f"No parser found for schema constraint {constraint}.") 3916 3917 return self.CONSTRAINT_PARSERS[constraint](self) 3918 3919 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3920 self._match_text_seq("KEY") 3921 return self.expression( 3922 exp.UniqueColumnConstraint, 3923 this=self._parse_schema(self._parse_id_var(any_token=False)), 3924 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3925 ) 3926 3927 def _parse_key_constraint_options(self) -> t.List[str]: 3928 options = [] 3929 while True: 3930 if not self._curr: 3931 break 3932 3933 if self._match(TokenType.ON): 3934 action = None 3935 on = self._advance_any() and self._prev.text 3936 3937 if self._match_text_seq("NO", "ACTION"): 3938 action = "NO ACTION" 3939 elif self._match_text_seq("CASCADE"): 3940 action = "CASCADE" 3941 elif self._match_text_seq("RESTRICT"): 3942 action = "RESTRICT" 3943 elif self._match_pair(TokenType.SET, TokenType.NULL): 3944 action = "SET NULL" 3945 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3946 action = "SET DEFAULT" 3947 else: 3948 self.raise_error("Invalid key constraint") 3949 3950 options.append(f"ON {on} {action}") 3951 elif self._match_text_seq("NOT", "ENFORCED"): 3952 options.append("NOT ENFORCED") 3953 elif self._match_text_seq("DEFERRABLE"): 3954 options.append("DEFERRABLE") 3955 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3956 options.append("INITIALLY DEFERRED") 3957 elif self._match_text_seq("NORELY"): 3958 options.append("NORELY") 3959 elif self._match_text_seq("MATCH", "FULL"): 3960 options.append("MATCH FULL") 3961 else: 3962 break 3963 3964 return options 3965 3966 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3967 if match and not self._match(TokenType.REFERENCES): 3968 return None 3969 3970 expressions = None 3971 this = self._parse_table(schema=True) 3972 options = self._parse_key_constraint_options() 3973 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3974 3975 def _parse_foreign_key(self) -> exp.ForeignKey: 3976 expressions = self._parse_wrapped_id_vars() 3977 reference = self._parse_references() 3978 options = {} 3979 3980 while self._match(TokenType.ON): 3981 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3982 self.raise_error("Expected DELETE or UPDATE") 3983 3984 kind = self._prev.text.lower() 3985 3986 if self._match_text_seq("NO", "ACTION"): 3987 action = "NO ACTION" 3988 elif self._match(TokenType.SET): 3989 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3990 action = "SET " + self._prev.text.upper() 3991 else: 3992 self._advance() 3993 action = self._prev.text.upper() 3994 3995 options[kind] = action 3996 3997 return self.expression( 3998 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3999 ) 4000 4001 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4002 return self._parse_field() 4003 4004 def _parse_primary_key( 4005 self, wrapped_optional: bool = False, in_props: bool = False 4006 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4007 desc = ( 4008 self._match_set((TokenType.ASC, TokenType.DESC)) 4009 and self._prev.token_type == TokenType.DESC 4010 ) 4011 4012 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4013 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4014 4015 expressions = self._parse_wrapped_csv( 4016 self._parse_primary_key_part, optional=wrapped_optional 4017 ) 4018 options = self._parse_key_constraint_options() 4019 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4020 4021 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4022 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4023 return this 4024 4025 bracket_kind = self._prev.token_type 4026 4027 if self._match(TokenType.COLON): 4028 expressions: t.List[exp.Expression] = [ 4029 self.expression(exp.Slice, expression=self._parse_conjunction()) 4030 ] 4031 else: 4032 expressions = self._parse_csv( 4033 lambda: self._parse_slice( 4034 self._parse_alias(self._parse_conjunction(), explicit=True) 4035 ) 4036 ) 4037 4038 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4039 if bracket_kind == TokenType.L_BRACE: 4040 this = self.expression(exp.Struct, expressions=expressions) 4041 elif not this or this.name.upper() == "ARRAY": 4042 this = self.expression(exp.Array, expressions=expressions) 4043 else: 4044 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4045 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4046 4047 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4048 self.raise_error("Expected ]") 4049 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4050 self.raise_error("Expected }") 4051 4052 self._add_comments(this) 4053 return self._parse_bracket(this) 4054 4055 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4056 if self._match(TokenType.COLON): 4057 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4058 return this 4059 4060 def _parse_case(self) -> t.Optional[exp.Expression]: 4061 ifs = [] 4062 default = None 4063 4064 comments = self._prev_comments 4065 expression = self._parse_conjunction() 4066 4067 while self._match(TokenType.WHEN): 4068 this = self._parse_conjunction() 4069 self._match(TokenType.THEN) 4070 then = self._parse_conjunction() 4071 ifs.append(self.expression(exp.If, this=this, true=then)) 4072 4073 if self._match(TokenType.ELSE): 4074 default = self._parse_conjunction() 4075 4076 if not self._match(TokenType.END): 4077 self.raise_error("Expected END after CASE", self._prev) 4078 4079 return self._parse_window( 4080 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4081 ) 4082 4083 def _parse_if(self) -> t.Optional[exp.Expression]: 4084 if self._match(TokenType.L_PAREN): 4085 args = self._parse_csv(self._parse_conjunction) 4086 this = self.validate_expression(exp.If.from_arg_list(args), args) 4087 self._match_r_paren() 4088 else: 4089 index = self._index - 1 4090 condition = self._parse_conjunction() 4091 4092 if not condition: 4093 self._retreat(index) 4094 return None 4095 4096 self._match(TokenType.THEN) 4097 true = self._parse_conjunction() 4098 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4099 self._match(TokenType.END) 4100 this = self.expression(exp.If, this=condition, true=true, false=false) 4101 4102 return self._parse_window(this) 4103 4104 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4105 if not self._match_text_seq("VALUE", "FOR"): 4106 self._retreat(self._index - 1) 4107 return None 4108 4109 return self.expression( 4110 exp.NextValueFor, 4111 this=self._parse_column(), 4112 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4113 ) 4114 4115 def _parse_extract(self) -> exp.Extract: 4116 this = self._parse_function() or self._parse_var() or self._parse_type() 4117 4118 if self._match(TokenType.FROM): 4119 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4120 4121 if not self._match(TokenType.COMMA): 4122 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4123 4124 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4125 4126 def _parse_any_value(self) -> exp.AnyValue: 4127 this = self._parse_lambda() 4128 is_max = None 4129 having = None 4130 4131 if self._match(TokenType.HAVING): 4132 self._match_texts(("MAX", "MIN")) 4133 is_max = self._prev.text == "MAX" 4134 having = self._parse_column() 4135 4136 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4137 4138 def _parse_cast(self, strict: bool) -> exp.Expression: 4139 this = self._parse_conjunction() 4140 4141 if not self._match(TokenType.ALIAS): 4142 if self._match(TokenType.COMMA): 4143 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4144 4145 self.raise_error("Expected AS after CAST") 4146 4147 fmt = None 4148 to = self._parse_types() 4149 4150 if not to: 4151 self.raise_error("Expected TYPE after CAST") 4152 elif isinstance(to, exp.Identifier): 4153 to = exp.DataType.build(to.name, udt=True) 4154 elif to.this == exp.DataType.Type.CHAR: 4155 if self._match(TokenType.CHARACTER_SET): 4156 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4157 elif self._match(TokenType.FORMAT): 4158 fmt_string = self._parse_string() 4159 fmt = self._parse_at_time_zone(fmt_string) 4160 4161 if to.this in exp.DataType.TEMPORAL_TYPES: 4162 this = self.expression( 4163 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4164 this=this, 4165 format=exp.Literal.string( 4166 format_time( 4167 fmt_string.this if fmt_string else "", 4168 self.FORMAT_MAPPING or self.TIME_MAPPING, 4169 self.FORMAT_TRIE or self.TIME_TRIE, 4170 ) 4171 ), 4172 ) 4173 4174 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4175 this.set("zone", fmt.args["zone"]) 4176 4177 return this 4178 4179 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4180 4181 def _parse_concat(self) -> t.Optional[exp.Expression]: 4182 args = self._parse_csv(self._parse_conjunction) 4183 if self.CONCAT_NULL_OUTPUTS_STRING: 4184 args = self._ensure_string_if_null(args) 4185 4186 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4187 # we find such a call we replace it with its argument. 4188 if len(args) == 1: 4189 return args[0] 4190 4191 return self.expression( 4192 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4193 ) 4194 4195 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4196 args = self._parse_csv(self._parse_conjunction) 4197 if len(args) < 2: 4198 return self.expression(exp.ConcatWs, expressions=args) 4199 delim, *values = args 4200 if self.CONCAT_NULL_OUTPUTS_STRING: 4201 values = self._ensure_string_if_null(values) 4202 4203 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4204 4205 def _parse_string_agg(self) -> exp.Expression: 4206 if self._match(TokenType.DISTINCT): 4207 args: t.List[t.Optional[exp.Expression]] = [ 4208 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4209 ] 4210 if self._match(TokenType.COMMA): 4211 args.extend(self._parse_csv(self._parse_conjunction)) 4212 else: 4213 args = self._parse_csv(self._parse_conjunction) # type: ignore 4214 4215 index = self._index 4216 if not self._match(TokenType.R_PAREN) and args: 4217 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4218 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4219 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4220 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4221 4222 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4223 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4224 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4225 if not self._match_text_seq("WITHIN", "GROUP"): 4226 self._retreat(index) 4227 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4228 4229 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4230 order = self._parse_order(this=seq_get(args, 0)) 4231 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4232 4233 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4234 this = self._parse_bitwise() 4235 4236 if self._match(TokenType.USING): 4237 to: t.Optional[exp.Expression] = self.expression( 4238 exp.CharacterSet, this=self._parse_var() 4239 ) 4240 elif self._match(TokenType.COMMA): 4241 to = self._parse_types() 4242 else: 4243 to = None 4244 4245 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4246 4247 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4248 """ 4249 There are generally two variants of the DECODE function: 4250 4251 - DECODE(bin, charset) 4252 - DECODE(expression, search, result [, search, result] ... [, default]) 4253 4254 The second variant will always be parsed into a CASE expression. Note that NULL 4255 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4256 instead of relying on pattern matching. 4257 """ 4258 args = self._parse_csv(self._parse_conjunction) 4259 4260 if len(args) < 3: 4261 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4262 4263 expression, *expressions = args 4264 if not expression: 4265 return None 4266 4267 ifs = [] 4268 for search, result in zip(expressions[::2], expressions[1::2]): 4269 if not search or not result: 4270 return None 4271 4272 if isinstance(search, exp.Literal): 4273 ifs.append( 4274 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4275 ) 4276 elif isinstance(search, exp.Null): 4277 ifs.append( 4278 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4279 ) 4280 else: 4281 cond = exp.or_( 4282 exp.EQ(this=expression.copy(), expression=search), 4283 exp.and_( 4284 exp.Is(this=expression.copy(), expression=exp.Null()), 4285 exp.Is(this=search.copy(), expression=exp.Null()), 4286 copy=False, 4287 ), 4288 copy=False, 4289 ) 4290 ifs.append(exp.If(this=cond, true=result)) 4291 4292 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4293 4294 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4295 self._match_text_seq("KEY") 4296 key = self._parse_column() 4297 self._match_set((TokenType.COLON, TokenType.COMMA)) 4298 self._match_text_seq("VALUE") 4299 value = self._parse_bitwise() 4300 4301 if not key and not value: 4302 return None 4303 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4304 4305 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4306 if not this or not self._match_text_seq("FORMAT", "JSON"): 4307 return this 4308 4309 return self.expression(exp.FormatJson, this=this) 4310 4311 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4312 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4313 for value in values: 4314 if self._match_text_seq(value, "ON", on): 4315 return f"{value} ON {on}" 4316 4317 return None 4318 4319 def _parse_json_object(self) -> exp.JSONObject: 4320 star = self._parse_star() 4321 expressions = ( 4322 [star] 4323 if star 4324 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4325 ) 4326 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4327 4328 unique_keys = None 4329 if self._match_text_seq("WITH", "UNIQUE"): 4330 unique_keys = True 4331 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4332 unique_keys = False 4333 4334 self._match_text_seq("KEYS") 4335 4336 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4337 self._parse_type() 4338 ) 4339 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4340 4341 return self.expression( 4342 exp.JSONObject, 4343 expressions=expressions, 4344 null_handling=null_handling, 4345 unique_keys=unique_keys, 4346 return_type=return_type, 4347 encoding=encoding, 4348 ) 4349 4350 def _parse_logarithm(self) -> exp.Func: 4351 # Default argument order is base, expression 4352 args = self._parse_csv(self._parse_range) 4353 4354 if len(args) > 1: 4355 if not self.LOG_BASE_FIRST: 4356 args.reverse() 4357 return exp.Log.from_arg_list(args) 4358 4359 return self.expression( 4360 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4361 ) 4362 4363 def _parse_match_against(self) -> exp.MatchAgainst: 4364 expressions = self._parse_csv(self._parse_column) 4365 4366 self._match_text_seq(")", "AGAINST", "(") 4367 4368 this = self._parse_string() 4369 4370 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4371 modifier = "IN NATURAL LANGUAGE MODE" 4372 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4373 modifier = f"{modifier} WITH QUERY EXPANSION" 4374 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4375 modifier = "IN BOOLEAN MODE" 4376 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4377 modifier = "WITH QUERY EXPANSION" 4378 else: 4379 modifier = None 4380 4381 return self.expression( 4382 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4383 ) 4384 4385 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4386 def _parse_open_json(self) -> exp.OpenJSON: 4387 this = self._parse_bitwise() 4388 path = self._match(TokenType.COMMA) and self._parse_string() 4389 4390 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4391 this = self._parse_field(any_token=True) 4392 kind = self._parse_types() 4393 path = self._parse_string() 4394 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4395 4396 return self.expression( 4397 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4398 ) 4399 4400 expressions = None 4401 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4402 self._match_l_paren() 4403 expressions = self._parse_csv(_parse_open_json_column_def) 4404 4405 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4406 4407 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4408 args = self._parse_csv(self._parse_bitwise) 4409 4410 if self._match(TokenType.IN): 4411 return self.expression( 4412 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4413 ) 4414 4415 if haystack_first: 4416 haystack = seq_get(args, 0) 4417 needle = seq_get(args, 1) 4418 else: 4419 needle = seq_get(args, 0) 4420 haystack = seq_get(args, 1) 4421 4422 return self.expression( 4423 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4424 ) 4425 4426 def _parse_predict(self) -> exp.Predict: 4427 self._match_text_seq("MODEL") 4428 this = self._parse_table() 4429 4430 self._match(TokenType.COMMA) 4431 self._match_text_seq("TABLE") 4432 4433 return self.expression( 4434 exp.Predict, 4435 this=this, 4436 expression=self._parse_table(), 4437 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4438 ) 4439 4440 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4441 args = self._parse_csv(self._parse_table) 4442 return exp.JoinHint(this=func_name.upper(), expressions=args) 4443 4444 def _parse_substring(self) -> exp.Substring: 4445 # Postgres supports the form: substring(string [from int] [for int]) 4446 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4447 4448 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4449 4450 if self._match(TokenType.FROM): 4451 args.append(self._parse_bitwise()) 4452 if self._match(TokenType.FOR): 4453 args.append(self._parse_bitwise()) 4454 4455 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4456 4457 def _parse_trim(self) -> exp.Trim: 4458 # https://www.w3resource.com/sql/character-functions/trim.php 4459 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4460 4461 position = None 4462 collation = None 4463 expression = None 4464 4465 if self._match_texts(self.TRIM_TYPES): 4466 position = self._prev.text.upper() 4467 4468 this = self._parse_bitwise() 4469 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4470 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4471 expression = self._parse_bitwise() 4472 4473 if invert_order: 4474 this, expression = expression, this 4475 4476 if self._match(TokenType.COLLATE): 4477 collation = self._parse_bitwise() 4478 4479 return self.expression( 4480 exp.Trim, this=this, position=position, expression=expression, collation=collation 4481 ) 4482 4483 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4484 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4485 4486 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4487 return self._parse_window(self._parse_id_var(), alias=True) 4488 4489 def _parse_respect_or_ignore_nulls( 4490 self, this: t.Optional[exp.Expression] 4491 ) -> t.Optional[exp.Expression]: 4492 if self._match_text_seq("IGNORE", "NULLS"): 4493 return self.expression(exp.IgnoreNulls, this=this) 4494 if self._match_text_seq("RESPECT", "NULLS"): 4495 return self.expression(exp.RespectNulls, this=this) 4496 return this 4497 4498 def _parse_window( 4499 self, this: t.Optional[exp.Expression], alias: bool = False 4500 ) -> t.Optional[exp.Expression]: 4501 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4502 self._match(TokenType.WHERE) 4503 this = self.expression( 4504 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4505 ) 4506 self._match_r_paren() 4507 4508 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4509 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4510 if self._match_text_seq("WITHIN", "GROUP"): 4511 order = self._parse_wrapped(self._parse_order) 4512 this = self.expression(exp.WithinGroup, this=this, expression=order) 4513 4514 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4515 # Some dialects choose to implement and some do not. 4516 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4517 4518 # There is some code above in _parse_lambda that handles 4519 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4520 4521 # The below changes handle 4522 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4523 4524 # Oracle allows both formats 4525 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4526 # and Snowflake chose to do the same for familiarity 4527 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4528 this = self._parse_respect_or_ignore_nulls(this) 4529 4530 # bigquery select from window x AS (partition by ...) 4531 if alias: 4532 over = None 4533 self._match(TokenType.ALIAS) 4534 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4535 return this 4536 else: 4537 over = self._prev.text.upper() 4538 4539 if not self._match(TokenType.L_PAREN): 4540 return self.expression( 4541 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4542 ) 4543 4544 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4545 4546 first = self._match(TokenType.FIRST) 4547 if self._match_text_seq("LAST"): 4548 first = False 4549 4550 partition, order = self._parse_partition_and_order() 4551 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4552 4553 if kind: 4554 self._match(TokenType.BETWEEN) 4555 start = self._parse_window_spec() 4556 self._match(TokenType.AND) 4557 end = self._parse_window_spec() 4558 4559 spec = self.expression( 4560 exp.WindowSpec, 4561 kind=kind, 4562 start=start["value"], 4563 start_side=start["side"], 4564 end=end["value"], 4565 end_side=end["side"], 4566 ) 4567 else: 4568 spec = None 4569 4570 self._match_r_paren() 4571 4572 window = self.expression( 4573 exp.Window, 4574 this=this, 4575 partition_by=partition, 4576 order=order, 4577 spec=spec, 4578 alias=window_alias, 4579 over=over, 4580 first=first, 4581 ) 4582 4583 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4584 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4585 return self._parse_window(window, alias=alias) 4586 4587 return window 4588 4589 def _parse_partition_and_order( 4590 self, 4591 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4592 return self._parse_partition_by(), self._parse_order() 4593 4594 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4595 self._match(TokenType.BETWEEN) 4596 4597 return { 4598 "value": ( 4599 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4600 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4601 or self._parse_bitwise() 4602 ), 4603 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4604 } 4605 4606 def _parse_alias( 4607 self, this: t.Optional[exp.Expression], explicit: bool = False 4608 ) -> t.Optional[exp.Expression]: 4609 any_token = self._match(TokenType.ALIAS) 4610 4611 if explicit and not any_token: 4612 return this 4613 4614 if self._match(TokenType.L_PAREN): 4615 aliases = self.expression( 4616 exp.Aliases, 4617 this=this, 4618 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4619 ) 4620 self._match_r_paren(aliases) 4621 return aliases 4622 4623 alias = self._parse_id_var(any_token) 4624 4625 if alias: 4626 return self.expression(exp.Alias, this=this, alias=alias) 4627 4628 return this 4629 4630 def _parse_id_var( 4631 self, 4632 any_token: bool = True, 4633 tokens: t.Optional[t.Collection[TokenType]] = None, 4634 ) -> t.Optional[exp.Expression]: 4635 identifier = self._parse_identifier() 4636 4637 if identifier: 4638 return identifier 4639 4640 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4641 quoted = self._prev.token_type == TokenType.STRING 4642 return exp.Identifier(this=self._prev.text, quoted=quoted) 4643 4644 return None 4645 4646 def _parse_string(self) -> t.Optional[exp.Expression]: 4647 if self._match(TokenType.STRING): 4648 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4649 return self._parse_placeholder() 4650 4651 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4652 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4653 4654 def _parse_number(self) -> t.Optional[exp.Expression]: 4655 if self._match(TokenType.NUMBER): 4656 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4657 return self._parse_placeholder() 4658 4659 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4660 if self._match(TokenType.IDENTIFIER): 4661 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4662 return self._parse_placeholder() 4663 4664 def _parse_var( 4665 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4666 ) -> t.Optional[exp.Expression]: 4667 if ( 4668 (any_token and self._advance_any()) 4669 or self._match(TokenType.VAR) 4670 or (self._match_set(tokens) if tokens else False) 4671 ): 4672 return self.expression(exp.Var, this=self._prev.text) 4673 return self._parse_placeholder() 4674 4675 def _advance_any(self) -> t.Optional[Token]: 4676 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4677 self._advance() 4678 return self._prev 4679 return None 4680 4681 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4682 return self._parse_var() or self._parse_string() 4683 4684 def _parse_null(self) -> t.Optional[exp.Expression]: 4685 if self._match_set(self.NULL_TOKENS): 4686 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4687 return self._parse_placeholder() 4688 4689 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4690 if self._match(TokenType.TRUE): 4691 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4692 if self._match(TokenType.FALSE): 4693 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4694 return self._parse_placeholder() 4695 4696 def _parse_star(self) -> t.Optional[exp.Expression]: 4697 if self._match(TokenType.STAR): 4698 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4699 return self._parse_placeholder() 4700 4701 def _parse_parameter(self) -> exp.Parameter: 4702 wrapped = self._match(TokenType.L_BRACE) 4703 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4704 self._match(TokenType.R_BRACE) 4705 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4706 4707 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4708 if self._match_set(self.PLACEHOLDER_PARSERS): 4709 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4710 if placeholder: 4711 return placeholder 4712 self._advance(-1) 4713 return None 4714 4715 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4716 if not self._match(TokenType.EXCEPT): 4717 return None 4718 if self._match(TokenType.L_PAREN, advance=False): 4719 return self._parse_wrapped_csv(self._parse_column) 4720 4721 except_column = self._parse_column() 4722 return [except_column] if except_column else None 4723 4724 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4725 if not self._match(TokenType.REPLACE): 4726 return None 4727 if self._match(TokenType.L_PAREN, advance=False): 4728 return self._parse_wrapped_csv(self._parse_expression) 4729 4730 replace_expression = self._parse_expression() 4731 return [replace_expression] if replace_expression else None 4732 4733 def _parse_csv( 4734 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4735 ) -> t.List[exp.Expression]: 4736 parse_result = parse_method() 4737 items = [parse_result] if parse_result is not None else [] 4738 4739 while self._match(sep): 4740 self._add_comments(parse_result) 4741 parse_result = parse_method() 4742 if parse_result is not None: 4743 items.append(parse_result) 4744 4745 return items 4746 4747 def _parse_tokens( 4748 self, parse_method: t.Callable, expressions: t.Dict 4749 ) -> t.Optional[exp.Expression]: 4750 this = parse_method() 4751 4752 while self._match_set(expressions): 4753 this = self.expression( 4754 expressions[self._prev.token_type], 4755 this=this, 4756 comments=self._prev_comments, 4757 expression=parse_method(), 4758 ) 4759 4760 return this 4761 4762 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4763 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4764 4765 def _parse_wrapped_csv( 4766 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4767 ) -> t.List[exp.Expression]: 4768 return self._parse_wrapped( 4769 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4770 ) 4771 4772 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4773 wrapped = self._match(TokenType.L_PAREN) 4774 if not wrapped and not optional: 4775 self.raise_error("Expecting (") 4776 parse_result = parse_method() 4777 if wrapped: 4778 self._match_r_paren() 4779 return parse_result 4780 4781 def _parse_expressions(self) -> t.List[exp.Expression]: 4782 return self._parse_csv(self._parse_expression) 4783 4784 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4785 return self._parse_select() or self._parse_set_operations( 4786 self._parse_expression() if alias else self._parse_conjunction() 4787 ) 4788 4789 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4790 return self._parse_query_modifiers( 4791 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4792 ) 4793 4794 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4795 this = None 4796 if self._match_texts(self.TRANSACTION_KIND): 4797 this = self._prev.text 4798 4799 self._match_texts({"TRANSACTION", "WORK"}) 4800 4801 modes = [] 4802 while True: 4803 mode = [] 4804 while self._match(TokenType.VAR): 4805 mode.append(self._prev.text) 4806 4807 if mode: 4808 modes.append(" ".join(mode)) 4809 if not self._match(TokenType.COMMA): 4810 break 4811 4812 return self.expression(exp.Transaction, this=this, modes=modes) 4813 4814 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4815 chain = None 4816 savepoint = None 4817 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4818 4819 self._match_texts({"TRANSACTION", "WORK"}) 4820 4821 if self._match_text_seq("TO"): 4822 self._match_text_seq("SAVEPOINT") 4823 savepoint = self._parse_id_var() 4824 4825 if self._match(TokenType.AND): 4826 chain = not self._match_text_seq("NO") 4827 self._match_text_seq("CHAIN") 4828 4829 if is_rollback: 4830 return self.expression(exp.Rollback, savepoint=savepoint) 4831 4832 return self.expression(exp.Commit, chain=chain) 4833 4834 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4835 if not self._match_text_seq("ADD"): 4836 return None 4837 4838 self._match(TokenType.COLUMN) 4839 exists_column = self._parse_exists(not_=True) 4840 expression = self._parse_field_def() 4841 4842 if expression: 4843 expression.set("exists", exists_column) 4844 4845 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4846 if self._match_texts(("FIRST", "AFTER")): 4847 position = self._prev.text 4848 column_position = self.expression( 4849 exp.ColumnPosition, this=self._parse_column(), position=position 4850 ) 4851 expression.set("position", column_position) 4852 4853 return expression 4854 4855 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4856 drop = self._match(TokenType.DROP) and self._parse_drop() 4857 if drop and not isinstance(drop, exp.Command): 4858 drop.set("kind", drop.args.get("kind", "COLUMN")) 4859 return drop 4860 4861 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4862 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4863 return self.expression( 4864 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4865 ) 4866 4867 def _parse_add_constraint(self) -> exp.AddConstraint: 4868 this = None 4869 kind = self._prev.token_type 4870 4871 if kind == TokenType.CONSTRAINT: 4872 this = self._parse_id_var() 4873 4874 if self._match_text_seq("CHECK"): 4875 expression = self._parse_wrapped(self._parse_conjunction) 4876 enforced = self._match_text_seq("ENFORCED") 4877 4878 return self.expression( 4879 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4880 ) 4881 4882 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4883 expression = self._parse_foreign_key() 4884 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4885 expression = self._parse_primary_key() 4886 else: 4887 expression = None 4888 4889 return self.expression(exp.AddConstraint, this=this, expression=expression) 4890 4891 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4892 index = self._index - 1 4893 4894 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4895 return self._parse_csv(self._parse_add_constraint) 4896 4897 self._retreat(index) 4898 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4899 return self._parse_csv(self._parse_field_def) 4900 4901 return self._parse_csv(self._parse_add_column) 4902 4903 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4904 self._match(TokenType.COLUMN) 4905 column = self._parse_field(any_token=True) 4906 4907 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4908 return self.expression(exp.AlterColumn, this=column, drop=True) 4909 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4910 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4911 4912 self._match_text_seq("SET", "DATA") 4913 return self.expression( 4914 exp.AlterColumn, 4915 this=column, 4916 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4917 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4918 using=self._match(TokenType.USING) and self._parse_conjunction(), 4919 ) 4920 4921 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4922 index = self._index - 1 4923 4924 partition_exists = self._parse_exists() 4925 if self._match(TokenType.PARTITION, advance=False): 4926 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4927 4928 self._retreat(index) 4929 return self._parse_csv(self._parse_drop_column) 4930 4931 def _parse_alter_table_rename(self) -> exp.RenameTable: 4932 self._match_text_seq("TO") 4933 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4934 4935 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4936 start = self._prev 4937 4938 if not self._match(TokenType.TABLE): 4939 return self._parse_as_command(start) 4940 4941 exists = self._parse_exists() 4942 only = self._match_text_seq("ONLY") 4943 this = self._parse_table(schema=True) 4944 4945 if self._next: 4946 self._advance() 4947 4948 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4949 if parser: 4950 actions = ensure_list(parser(self)) 4951 4952 if not self._curr: 4953 return self.expression( 4954 exp.AlterTable, 4955 this=this, 4956 exists=exists, 4957 actions=actions, 4958 only=only, 4959 ) 4960 4961 return self._parse_as_command(start) 4962 4963 def _parse_merge(self) -> exp.Merge: 4964 self._match(TokenType.INTO) 4965 target = self._parse_table() 4966 4967 if target and self._match(TokenType.ALIAS, advance=False): 4968 target.set("alias", self._parse_table_alias()) 4969 4970 self._match(TokenType.USING) 4971 using = self._parse_table() 4972 4973 self._match(TokenType.ON) 4974 on = self._parse_conjunction() 4975 4976 whens = [] 4977 while self._match(TokenType.WHEN): 4978 matched = not self._match(TokenType.NOT) 4979 self._match_text_seq("MATCHED") 4980 source = ( 4981 False 4982 if self._match_text_seq("BY", "TARGET") 4983 else self._match_text_seq("BY", "SOURCE") 4984 ) 4985 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4986 4987 self._match(TokenType.THEN) 4988 4989 if self._match(TokenType.INSERT): 4990 _this = self._parse_star() 4991 if _this: 4992 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4993 else: 4994 then = self.expression( 4995 exp.Insert, 4996 this=self._parse_value(), 4997 expression=self._match(TokenType.VALUES) and self._parse_value(), 4998 ) 4999 elif self._match(TokenType.UPDATE): 5000 expressions = self._parse_star() 5001 if expressions: 5002 then = self.expression(exp.Update, expressions=expressions) 5003 else: 5004 then = self.expression( 5005 exp.Update, 5006 expressions=self._match(TokenType.SET) 5007 and self._parse_csv(self._parse_equality), 5008 ) 5009 elif self._match(TokenType.DELETE): 5010 then = self.expression(exp.Var, this=self._prev.text) 5011 else: 5012 then = None 5013 5014 whens.append( 5015 self.expression( 5016 exp.When, 5017 matched=matched, 5018 source=source, 5019 condition=condition, 5020 then=then, 5021 ) 5022 ) 5023 5024 return self.expression( 5025 exp.Merge, 5026 this=target, 5027 using=using, 5028 on=on, 5029 expressions=whens, 5030 ) 5031 5032 def _parse_show(self) -> t.Optional[exp.Expression]: 5033 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5034 if parser: 5035 return parser(self) 5036 return self._parse_as_command(self._prev) 5037 5038 def _parse_set_item_assignment( 5039 self, kind: t.Optional[str] = None 5040 ) -> t.Optional[exp.Expression]: 5041 index = self._index 5042 5043 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5044 return self._parse_set_transaction(global_=kind == "GLOBAL") 5045 5046 left = self._parse_primary() or self._parse_id_var() 5047 assignment_delimiter = self._match_texts(("=", "TO")) 5048 5049 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5050 self._retreat(index) 5051 return None 5052 5053 right = self._parse_statement() or self._parse_id_var() 5054 this = self.expression(exp.EQ, this=left, expression=right) 5055 5056 return self.expression(exp.SetItem, this=this, kind=kind) 5057 5058 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5059 self._match_text_seq("TRANSACTION") 5060 characteristics = self._parse_csv( 5061 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5062 ) 5063 return self.expression( 5064 exp.SetItem, 5065 expressions=characteristics, 5066 kind="TRANSACTION", 5067 **{"global": global_}, # type: ignore 5068 ) 5069 5070 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5071 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5072 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5073 5074 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5075 index = self._index 5076 set_ = self.expression( 5077 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5078 ) 5079 5080 if self._curr: 5081 self._retreat(index) 5082 return self._parse_as_command(self._prev) 5083 5084 return set_ 5085 5086 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5087 for option in options: 5088 if self._match_text_seq(*option.split(" ")): 5089 return exp.var(option) 5090 return None 5091 5092 def _parse_as_command(self, start: Token) -> exp.Command: 5093 while self._curr: 5094 self._advance() 5095 text = self._find_sql(start, self._prev) 5096 size = len(start.text) 5097 return exp.Command(this=text[:size], expression=text[size:]) 5098 5099 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5100 settings = [] 5101 5102 self._match_l_paren() 5103 kind = self._parse_id_var() 5104 5105 if self._match(TokenType.L_PAREN): 5106 while True: 5107 key = self._parse_id_var() 5108 value = self._parse_primary() 5109 5110 if not key and value is None: 5111 break 5112 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5113 self._match(TokenType.R_PAREN) 5114 5115 self._match_r_paren() 5116 5117 return self.expression( 5118 exp.DictProperty, 5119 this=this, 5120 kind=kind.this if kind else None, 5121 settings=settings, 5122 ) 5123 5124 def _parse_dict_range(self, this: str) -> exp.DictRange: 5125 self._match_l_paren() 5126 has_min = self._match_text_seq("MIN") 5127 if has_min: 5128 min = self._parse_var() or self._parse_primary() 5129 self._match_text_seq("MAX") 5130 max = self._parse_var() or self._parse_primary() 5131 else: 5132 max = self._parse_var() or self._parse_primary() 5133 min = exp.Literal.number(0) 5134 self._match_r_paren() 5135 return self.expression(exp.DictRange, this=this, min=min, max=max) 5136 5137 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5138 index = self._index 5139 expression = self._parse_column() 5140 if not self._match(TokenType.IN): 5141 self._retreat(index - 1) 5142 return None 5143 iterator = self._parse_column() 5144 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5145 return self.expression( 5146 exp.Comprehension, 5147 this=this, 5148 expression=expression, 5149 iterator=iterator, 5150 condition=condition, 5151 ) 5152 5153 def _find_parser( 5154 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5155 ) -> t.Optional[t.Callable]: 5156 if not self._curr: 5157 return None 5158 5159 index = self._index 5160 this = [] 5161 while True: 5162 # The current token might be multiple words 5163 curr = self._curr.text.upper() 5164 key = curr.split(" ") 5165 this.append(curr) 5166 5167 self._advance() 5168 result, trie = in_trie(trie, key) 5169 if result == TrieResult.FAILED: 5170 break 5171 5172 if result == TrieResult.EXISTS: 5173 subparser = parsers[" ".join(this)] 5174 return subparser 5175 5176 self._retreat(index) 5177 return None 5178 5179 def _match(self, token_type, advance=True, expression=None): 5180 if not self._curr: 5181 return None 5182 5183 if self._curr.token_type == token_type: 5184 if advance: 5185 self._advance() 5186 self._add_comments(expression) 5187 return True 5188 5189 return None 5190 5191 def _match_set(self, types, advance=True): 5192 if not self._curr: 5193 return None 5194 5195 if self._curr.token_type in types: 5196 if advance: 5197 self._advance() 5198 return True 5199 5200 return None 5201 5202 def _match_pair(self, token_type_a, token_type_b, advance=True): 5203 if not self._curr or not self._next: 5204 return None 5205 5206 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5207 if advance: 5208 self._advance(2) 5209 return True 5210 5211 return None 5212 5213 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5214 if not self._match(TokenType.L_PAREN, expression=expression): 5215 self.raise_error("Expecting (") 5216 5217 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5218 if not self._match(TokenType.R_PAREN, expression=expression): 5219 self.raise_error("Expecting )") 5220 5221 def _match_texts(self, texts, advance=True): 5222 if self._curr and self._curr.text.upper() in texts: 5223 if advance: 5224 self._advance() 5225 return True 5226 return False 5227 5228 def _match_text_seq(self, *texts, advance=True): 5229 index = self._index 5230 for text in texts: 5231 if self._curr and self._curr.text.upper() == text: 5232 self._advance() 5233 else: 5234 self._retreat(index) 5235 return False 5236 5237 if not advance: 5238 self._retreat(index) 5239 5240 return True 5241 5242 @t.overload 5243 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5244 ... 5245 5246 @t.overload 5247 def _replace_columns_with_dots( 5248 self, this: t.Optional[exp.Expression] 5249 ) -> t.Optional[exp.Expression]: 5250 ... 5251 5252 def _replace_columns_with_dots(self, this): 5253 if isinstance(this, exp.Dot): 5254 exp.replace_children(this, self._replace_columns_with_dots) 5255 elif isinstance(this, exp.Column): 5256 exp.replace_children(this, self._replace_columns_with_dots) 5257 table = this.args.get("table") 5258 this = ( 5259 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5260 ) 5261 5262 return this 5263 5264 def _replace_lambda( 5265 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5266 ) -> t.Optional[exp.Expression]: 5267 if not node: 5268 return node 5269 5270 for column in node.find_all(exp.Column): 5271 if column.parts[0].name in lambda_variables: 5272 dot_or_id = column.to_dot() if column.table else column.this 5273 parent = column.parent 5274 5275 while isinstance(parent, exp.Dot): 5276 if not isinstance(parent.parent, exp.Dot): 5277 parent.replace(dot_or_id) 5278 break 5279 parent = parent.parent 5280 else: 5281 if column is node: 5282 node = dot_or_id 5283 else: 5284 column.replace(dot_or_id) 5285 return node 5286 5287 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5288 return [ 5289 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5290 for value in values 5291 if value 5292 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.UDECIMAL, 183 TokenType.BIGDECIMAL, 184 TokenType.UUID, 185 TokenType.GEOGRAPHY, 186 TokenType.GEOMETRY, 187 TokenType.HLLSKETCH, 188 TokenType.HSTORE, 189 TokenType.PSEUDO_TYPE, 190 TokenType.SUPER, 191 TokenType.SERIAL, 192 TokenType.SMALLSERIAL, 193 TokenType.BIGSERIAL, 194 TokenType.XML, 195 TokenType.YEAR, 196 TokenType.UNIQUEIDENTIFIER, 197 TokenType.USERDEFINED, 198 TokenType.MONEY, 199 TokenType.SMALLMONEY, 200 TokenType.ROWVERSION, 201 TokenType.IMAGE, 202 TokenType.VARIANT, 203 TokenType.OBJECT, 204 TokenType.OBJECT_IDENTIFIER, 205 TokenType.INET, 206 TokenType.IPADDRESS, 207 TokenType.IPPREFIX, 208 TokenType.UNKNOWN, 209 TokenType.NULL, 210 *ENUM_TYPE_TOKENS, 211 *NESTED_TYPE_TOKENS, 212 } 213 214 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 215 TokenType.BIGINT: TokenType.UBIGINT, 216 TokenType.INT: TokenType.UINT, 217 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 218 TokenType.SMALLINT: TokenType.USMALLINT, 219 TokenType.TINYINT: TokenType.UTINYINT, 220 TokenType.DECIMAL: TokenType.UDECIMAL, 221 } 222 223 SUBQUERY_PREDICATES = { 224 TokenType.ANY: exp.Any, 225 TokenType.ALL: exp.All, 226 TokenType.EXISTS: exp.Exists, 227 TokenType.SOME: exp.Any, 228 } 229 230 RESERVED_KEYWORDS = { 231 *Tokenizer.SINGLE_TOKENS.values(), 232 TokenType.SELECT, 233 } 234 235 DB_CREATABLES = { 236 TokenType.DATABASE, 237 TokenType.SCHEMA, 238 TokenType.TABLE, 239 TokenType.VIEW, 240 TokenType.MODEL, 241 TokenType.DICTIONARY, 242 } 243 244 CREATABLES = { 245 TokenType.COLUMN, 246 TokenType.FUNCTION, 247 TokenType.INDEX, 248 TokenType.PROCEDURE, 249 *DB_CREATABLES, 250 } 251 252 # Tokens that can represent identifiers 253 ID_VAR_TOKENS = { 254 TokenType.VAR, 255 TokenType.ANTI, 256 TokenType.APPLY, 257 TokenType.ASC, 258 TokenType.AUTO_INCREMENT, 259 TokenType.BEGIN, 260 TokenType.CACHE, 261 TokenType.CASE, 262 TokenType.COLLATE, 263 TokenType.COMMAND, 264 TokenType.COMMENT, 265 TokenType.COMMIT, 266 TokenType.CONSTRAINT, 267 TokenType.DEFAULT, 268 TokenType.DELETE, 269 TokenType.DESC, 270 TokenType.DESCRIBE, 271 TokenType.DICTIONARY, 272 TokenType.DIV, 273 TokenType.END, 274 TokenType.EXECUTE, 275 TokenType.ESCAPE, 276 TokenType.FALSE, 277 TokenType.FIRST, 278 TokenType.FILTER, 279 TokenType.FORMAT, 280 TokenType.FULL, 281 TokenType.IS, 282 TokenType.ISNULL, 283 TokenType.INTERVAL, 284 TokenType.KEEP, 285 TokenType.KILL, 286 TokenType.LEFT, 287 TokenType.LOAD, 288 TokenType.MERGE, 289 TokenType.NATURAL, 290 TokenType.NEXT, 291 TokenType.OFFSET, 292 TokenType.ORDINALITY, 293 TokenType.OVERLAPS, 294 TokenType.OVERWRITE, 295 TokenType.PARTITION, 296 TokenType.PERCENT, 297 TokenType.PIVOT, 298 TokenType.PRAGMA, 299 TokenType.RANGE, 300 TokenType.REFERENCES, 301 TokenType.RIGHT, 302 TokenType.ROW, 303 TokenType.ROWS, 304 TokenType.SEMI, 305 TokenType.SET, 306 TokenType.SETTINGS, 307 TokenType.SHOW, 308 TokenType.TEMPORARY, 309 TokenType.TOP, 310 TokenType.TRUE, 311 TokenType.UNIQUE, 312 TokenType.UNPIVOT, 313 TokenType.UPDATE, 314 TokenType.VOLATILE, 315 TokenType.WINDOW, 316 *CREATABLES, 317 *SUBQUERY_PREDICATES, 318 *TYPE_TOKENS, 319 *NO_PAREN_FUNCTIONS, 320 } 321 322 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 323 324 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 325 TokenType.ANTI, 326 TokenType.APPLY, 327 TokenType.ASOF, 328 TokenType.FULL, 329 TokenType.LEFT, 330 TokenType.LOCK, 331 TokenType.NATURAL, 332 TokenType.OFFSET, 333 TokenType.RIGHT, 334 TokenType.SEMI, 335 TokenType.WINDOW, 336 } 337 338 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 339 340 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 341 342 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 343 344 FUNC_TOKENS = { 345 TokenType.COLLATE, 346 TokenType.COMMAND, 347 TokenType.CURRENT_DATE, 348 TokenType.CURRENT_DATETIME, 349 TokenType.CURRENT_TIMESTAMP, 350 TokenType.CURRENT_TIME, 351 TokenType.CURRENT_USER, 352 TokenType.FILTER, 353 TokenType.FIRST, 354 TokenType.FORMAT, 355 TokenType.GLOB, 356 TokenType.IDENTIFIER, 357 TokenType.INDEX, 358 TokenType.ISNULL, 359 TokenType.ILIKE, 360 TokenType.INSERT, 361 TokenType.LIKE, 362 TokenType.MERGE, 363 TokenType.OFFSET, 364 TokenType.PRIMARY_KEY, 365 TokenType.RANGE, 366 TokenType.REPLACE, 367 TokenType.RLIKE, 368 TokenType.ROW, 369 TokenType.UNNEST, 370 TokenType.VAR, 371 TokenType.LEFT, 372 TokenType.RIGHT, 373 TokenType.DATE, 374 TokenType.DATETIME, 375 TokenType.TABLE, 376 TokenType.TIMESTAMP, 377 TokenType.TIMESTAMPTZ, 378 TokenType.WINDOW, 379 TokenType.XOR, 380 *TYPE_TOKENS, 381 *SUBQUERY_PREDICATES, 382 } 383 384 CONJUNCTION = { 385 TokenType.AND: exp.And, 386 TokenType.OR: exp.Or, 387 } 388 389 EQUALITY = { 390 TokenType.EQ: exp.EQ, 391 TokenType.NEQ: exp.NEQ, 392 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 393 } 394 395 COMPARISON = { 396 TokenType.GT: exp.GT, 397 TokenType.GTE: exp.GTE, 398 TokenType.LT: exp.LT, 399 TokenType.LTE: exp.LTE, 400 } 401 402 BITWISE = { 403 TokenType.AMP: exp.BitwiseAnd, 404 TokenType.CARET: exp.BitwiseXor, 405 TokenType.PIPE: exp.BitwiseOr, 406 TokenType.DPIPE: exp.DPipe, 407 } 408 409 TERM = { 410 TokenType.DASH: exp.Sub, 411 TokenType.PLUS: exp.Add, 412 TokenType.MOD: exp.Mod, 413 TokenType.COLLATE: exp.Collate, 414 } 415 416 FACTOR = { 417 TokenType.DIV: exp.IntDiv, 418 TokenType.LR_ARROW: exp.Distance, 419 TokenType.SLASH: exp.Div, 420 TokenType.STAR: exp.Mul, 421 } 422 423 TIMES = { 424 TokenType.TIME, 425 TokenType.TIMETZ, 426 } 427 428 TIMESTAMPS = { 429 TokenType.TIMESTAMP, 430 TokenType.TIMESTAMPTZ, 431 TokenType.TIMESTAMPLTZ, 432 *TIMES, 433 } 434 435 SET_OPERATIONS = { 436 TokenType.UNION, 437 TokenType.INTERSECT, 438 TokenType.EXCEPT, 439 } 440 441 JOIN_METHODS = { 442 TokenType.NATURAL, 443 TokenType.ASOF, 444 } 445 446 JOIN_SIDES = { 447 TokenType.LEFT, 448 TokenType.RIGHT, 449 TokenType.FULL, 450 } 451 452 JOIN_KINDS = { 453 TokenType.INNER, 454 TokenType.OUTER, 455 TokenType.CROSS, 456 TokenType.SEMI, 457 TokenType.ANTI, 458 } 459 460 JOIN_HINTS: t.Set[str] = set() 461 462 LAMBDAS = { 463 TokenType.ARROW: lambda self, expressions: self.expression( 464 exp.Lambda, 465 this=self._replace_lambda( 466 self._parse_conjunction(), 467 {node.name for node in expressions}, 468 ), 469 expressions=expressions, 470 ), 471 TokenType.FARROW: lambda self, expressions: self.expression( 472 exp.Kwarg, 473 this=exp.var(expressions[0].name), 474 expression=self._parse_conjunction(), 475 ), 476 } 477 478 COLUMN_OPERATORS = { 479 TokenType.DOT: None, 480 TokenType.DCOLON: lambda self, this, to: self.expression( 481 exp.Cast if self.STRICT_CAST else exp.TryCast, 482 this=this, 483 to=to, 484 ), 485 TokenType.ARROW: lambda self, this, path: self.expression( 486 exp.JSONExtract, 487 this=this, 488 expression=path, 489 ), 490 TokenType.DARROW: lambda self, this, path: self.expression( 491 exp.JSONExtractScalar, 492 this=this, 493 expression=path, 494 ), 495 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 496 exp.JSONBExtract, 497 this=this, 498 expression=path, 499 ), 500 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 501 exp.JSONBExtractScalar, 502 this=this, 503 expression=path, 504 ), 505 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 506 exp.JSONBContains, 507 this=this, 508 expression=key, 509 ), 510 } 511 512 EXPRESSION_PARSERS = { 513 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 514 exp.Column: lambda self: self._parse_column(), 515 exp.Condition: lambda self: self._parse_conjunction(), 516 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 517 exp.Expression: lambda self: self._parse_statement(), 518 exp.From: lambda self: self._parse_from(), 519 exp.Group: lambda self: self._parse_group(), 520 exp.Having: lambda self: self._parse_having(), 521 exp.Identifier: lambda self: self._parse_id_var(), 522 exp.Join: lambda self: self._parse_join(), 523 exp.Lambda: lambda self: self._parse_lambda(), 524 exp.Lateral: lambda self: self._parse_lateral(), 525 exp.Limit: lambda self: self._parse_limit(), 526 exp.Offset: lambda self: self._parse_offset(), 527 exp.Order: lambda self: self._parse_order(), 528 exp.Ordered: lambda self: self._parse_ordered(), 529 exp.Properties: lambda self: self._parse_properties(), 530 exp.Qualify: lambda self: self._parse_qualify(), 531 exp.Returning: lambda self: self._parse_returning(), 532 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 533 exp.Table: lambda self: self._parse_table_parts(), 534 exp.TableAlias: lambda self: self._parse_table_alias(), 535 exp.Where: lambda self: self._parse_where(), 536 exp.Window: lambda self: self._parse_named_window(), 537 exp.With: lambda self: self._parse_with(), 538 "JOIN_TYPE": lambda self: self._parse_join_parts(), 539 } 540 541 STATEMENT_PARSERS = { 542 TokenType.ALTER: lambda self: self._parse_alter(), 543 TokenType.BEGIN: lambda self: self._parse_transaction(), 544 TokenType.CACHE: lambda self: self._parse_cache(), 545 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 546 TokenType.COMMENT: lambda self: self._parse_comment(), 547 TokenType.CREATE: lambda self: self._parse_create(), 548 TokenType.DELETE: lambda self: self._parse_delete(), 549 TokenType.DESC: lambda self: self._parse_describe(), 550 TokenType.DESCRIBE: lambda self: self._parse_describe(), 551 TokenType.DROP: lambda self: self._parse_drop(), 552 TokenType.INSERT: lambda self: self._parse_insert(), 553 TokenType.KILL: lambda self: self._parse_kill(), 554 TokenType.LOAD: lambda self: self._parse_load(), 555 TokenType.MERGE: lambda self: self._parse_merge(), 556 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 557 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 558 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 559 TokenType.SET: lambda self: self._parse_set(), 560 TokenType.UNCACHE: lambda self: self._parse_uncache(), 561 TokenType.UPDATE: lambda self: self._parse_update(), 562 TokenType.USE: lambda self: self.expression( 563 exp.Use, 564 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 565 and exp.var(self._prev.text), 566 this=self._parse_table(schema=False), 567 ), 568 } 569 570 UNARY_PARSERS = { 571 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 572 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 573 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 574 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 575 } 576 577 PRIMARY_PARSERS = { 578 TokenType.STRING: lambda self, token: self.expression( 579 exp.Literal, this=token.text, is_string=True 580 ), 581 TokenType.NUMBER: lambda self, token: self.expression( 582 exp.Literal, this=token.text, is_string=False 583 ), 584 TokenType.STAR: lambda self, _: self.expression( 585 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 586 ), 587 TokenType.NULL: lambda self, _: self.expression(exp.Null), 588 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 589 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 590 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 591 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 592 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 593 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 594 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 595 exp.National, this=token.text 596 ), 597 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 598 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 599 exp.RawString, this=token.text 600 ), 601 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 602 } 603 604 PLACEHOLDER_PARSERS = { 605 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 606 TokenType.PARAMETER: lambda self: self._parse_parameter(), 607 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 608 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 609 else None, 610 } 611 612 RANGE_PARSERS = { 613 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 614 TokenType.GLOB: binary_range_parser(exp.Glob), 615 TokenType.ILIKE: binary_range_parser(exp.ILike), 616 TokenType.IN: lambda self, this: self._parse_in(this), 617 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 618 TokenType.IS: lambda self, this: self._parse_is(this), 619 TokenType.LIKE: binary_range_parser(exp.Like), 620 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 621 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 622 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 623 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 624 } 625 626 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 627 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 628 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 629 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 630 "CHARACTER SET": lambda self: self._parse_character_set(), 631 "CHECKSUM": lambda self: self._parse_checksum(), 632 "CLUSTER BY": lambda self: self._parse_cluster(), 633 "CLUSTERED": lambda self: self._parse_clustered_by(), 634 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 635 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 636 "COPY": lambda self: self._parse_copy_property(), 637 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 638 "DEFINER": lambda self: self._parse_definer(), 639 "DETERMINISTIC": lambda self: self.expression( 640 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 641 ), 642 "DISTKEY": lambda self: self._parse_distkey(), 643 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 644 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 645 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 646 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 647 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 648 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "FREESPACE": lambda self: self._parse_freespace(), 650 "HEAP": lambda self: self.expression(exp.HeapProperty), 651 "IMMUTABLE": lambda self: self.expression( 652 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 653 ), 654 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 655 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 656 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 657 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 658 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 659 "LIKE": lambda self: self._parse_create_like(), 660 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 661 "LOCK": lambda self: self._parse_locking(), 662 "LOCKING": lambda self: self._parse_locking(), 663 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 664 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 665 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 666 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 667 "NO": lambda self: self._parse_no_property(), 668 "ON": lambda self: self._parse_on_property(), 669 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 670 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 671 "PARTITION BY": lambda self: self._parse_partitioned_by(), 672 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 673 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 674 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 675 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 676 "REMOTE": lambda self: self._parse_remote_with_connection(), 677 "RETURNS": lambda self: self._parse_returns(), 678 "ROW": lambda self: self._parse_row(), 679 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 680 "SAMPLE": lambda self: self.expression( 681 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 682 ), 683 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 684 "SETTINGS": lambda self: self.expression( 685 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 686 ), 687 "SORTKEY": lambda self: self._parse_sortkey(), 688 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 689 "STABLE": lambda self: self.expression( 690 exp.StabilityProperty, this=exp.Literal.string("STABLE") 691 ), 692 "STORED": lambda self: self._parse_stored(), 693 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 694 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 695 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 696 "TO": lambda self: self._parse_to_table(), 697 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 698 "TRANSFORM": lambda self: self.expression( 699 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 700 ), 701 "TTL": lambda self: self._parse_ttl(), 702 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 703 "VOLATILE": lambda self: self._parse_volatile_property(), 704 "WITH": lambda self: self._parse_with_property(), 705 } 706 707 CONSTRAINT_PARSERS = { 708 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 709 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 710 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 711 "CHARACTER SET": lambda self: self.expression( 712 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 713 ), 714 "CHECK": lambda self: self.expression( 715 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 716 ), 717 "COLLATE": lambda self: self.expression( 718 exp.CollateColumnConstraint, this=self._parse_var() 719 ), 720 "COMMENT": lambda self: self.expression( 721 exp.CommentColumnConstraint, this=self._parse_string() 722 ), 723 "COMPRESS": lambda self: self._parse_compress(), 724 "CLUSTERED": lambda self: self.expression( 725 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 726 ), 727 "NONCLUSTERED": lambda self: self.expression( 728 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 729 ), 730 "DEFAULT": lambda self: self.expression( 731 exp.DefaultColumnConstraint, this=self._parse_bitwise() 732 ), 733 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 734 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 735 "FORMAT": lambda self: self.expression( 736 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 737 ), 738 "GENERATED": lambda self: self._parse_generated_as_identity(), 739 "IDENTITY": lambda self: self._parse_auto_increment(), 740 "INLINE": lambda self: self._parse_inline(), 741 "LIKE": lambda self: self._parse_create_like(), 742 "NOT": lambda self: self._parse_not_constraint(), 743 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 744 "ON": lambda self: ( 745 self._match(TokenType.UPDATE) 746 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 747 ) 748 or self.expression(exp.OnProperty, this=self._parse_id_var()), 749 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 750 "PRIMARY KEY": lambda self: self._parse_primary_key(), 751 "REFERENCES": lambda self: self._parse_references(match=False), 752 "TITLE": lambda self: self.expression( 753 exp.TitleColumnConstraint, this=self._parse_var_or_string() 754 ), 755 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 756 "UNIQUE": lambda self: self._parse_unique(), 757 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 758 "WITH": lambda self: self.expression( 759 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 760 ), 761 } 762 763 ALTER_PARSERS = { 764 "ADD": lambda self: self._parse_alter_table_add(), 765 "ALTER": lambda self: self._parse_alter_table_alter(), 766 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 767 "DROP": lambda self: self._parse_alter_table_drop(), 768 "RENAME": lambda self: self._parse_alter_table_rename(), 769 } 770 771 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 772 773 NO_PAREN_FUNCTION_PARSERS = { 774 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 775 "CASE": lambda self: self._parse_case(), 776 "IF": lambda self: self._parse_if(), 777 "NEXT": lambda self: self._parse_next_value_for(), 778 } 779 780 INVALID_FUNC_NAME_TOKENS = { 781 TokenType.IDENTIFIER, 782 TokenType.STRING, 783 } 784 785 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 786 787 FUNCTION_PARSERS = { 788 "ANY_VALUE": lambda self: self._parse_any_value(), 789 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 790 "CONCAT": lambda self: self._parse_concat(), 791 "CONCAT_WS": lambda self: self._parse_concat_ws(), 792 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 793 "DECODE": lambda self: self._parse_decode(), 794 "EXTRACT": lambda self: self._parse_extract(), 795 "JSON_OBJECT": lambda self: self._parse_json_object(), 796 "LOG": lambda self: self._parse_logarithm(), 797 "MATCH": lambda self: self._parse_match_against(), 798 "OPENJSON": lambda self: self._parse_open_json(), 799 "POSITION": lambda self: self._parse_position(), 800 "PREDICT": lambda self: self._parse_predict(), 801 "SAFE_CAST": lambda self: self._parse_cast(False), 802 "STRING_AGG": lambda self: self._parse_string_agg(), 803 "SUBSTRING": lambda self: self._parse_substring(), 804 "TRIM": lambda self: self._parse_trim(), 805 "TRY_CAST": lambda self: self._parse_cast(False), 806 "TRY_CONVERT": lambda self: self._parse_convert(False), 807 } 808 809 QUERY_MODIFIER_PARSERS = { 810 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 811 TokenType.WHERE: lambda self: ("where", self._parse_where()), 812 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 813 TokenType.HAVING: lambda self: ("having", self._parse_having()), 814 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 815 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 816 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 817 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 818 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 819 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 820 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 821 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 822 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 823 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 824 TokenType.CLUSTER_BY: lambda self: ( 825 "cluster", 826 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 827 ), 828 TokenType.DISTRIBUTE_BY: lambda self: ( 829 "distribute", 830 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 831 ), 832 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 833 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 834 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 835 } 836 837 SET_PARSERS = { 838 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 839 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 840 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 841 "TRANSACTION": lambda self: self._parse_set_transaction(), 842 } 843 844 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 845 846 TYPE_LITERAL_PARSERS = { 847 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 848 } 849 850 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 851 852 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 853 854 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 855 856 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 857 TRANSACTION_CHARACTERISTICS = { 858 "ISOLATION LEVEL REPEATABLE READ", 859 "ISOLATION LEVEL READ COMMITTED", 860 "ISOLATION LEVEL READ UNCOMMITTED", 861 "ISOLATION LEVEL SERIALIZABLE", 862 "READ WRITE", 863 "READ ONLY", 864 } 865 866 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 867 868 CLONE_KEYWORDS = {"CLONE", "COPY"} 869 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 870 871 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 872 873 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 874 875 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 876 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 877 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 878 879 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 880 881 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 882 883 DISTINCT_TOKENS = {TokenType.DISTINCT} 884 885 NULL_TOKENS = {TokenType.NULL} 886 887 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 888 889 STRICT_CAST = True 890 891 # A NULL arg in CONCAT yields NULL by default 892 CONCAT_NULL_OUTPUTS_STRING = False 893 894 PREFIXED_PIVOT_COLUMNS = False 895 IDENTIFY_PIVOT_STRINGS = False 896 897 LOG_BASE_FIRST = True 898 LOG_DEFAULTS_TO_LN = False 899 900 # Whether or not ADD is present for each column added by ALTER TABLE 901 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 902 903 # Whether or not the table sample clause expects CSV syntax 904 TABLESAMPLE_CSV = False 905 906 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 907 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 908 909 # Whether the TRIM function expects the characters to trim as its first argument 910 TRIM_PATTERN_FIRST = False 911 912 __slots__ = ( 913 "error_level", 914 "error_message_context", 915 "max_errors", 916 "sql", 917 "errors", 918 "_tokens", 919 "_index", 920 "_curr", 921 "_next", 922 "_prev", 923 "_prev_comments", 924 "_tokenizer", 925 ) 926 927 # Autofilled 928 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 929 INDEX_OFFSET: int = 0 930 UNNEST_COLUMN_ONLY: bool = False 931 ALIAS_POST_TABLESAMPLE: bool = False 932 STRICT_STRING_CONCAT = False 933 SUPPORTS_USER_DEFINED_TYPES = True 934 NORMALIZE_FUNCTIONS = "upper" 935 NULL_ORDERING: str = "nulls_are_small" 936 SHOW_TRIE: t.Dict = {} 937 SET_TRIE: t.Dict = {} 938 FORMAT_MAPPING: t.Dict[str, str] = {} 939 FORMAT_TRIE: t.Dict = {} 940 TIME_MAPPING: t.Dict[str, str] = {} 941 TIME_TRIE: t.Dict = {} 942 943 def __init__( 944 self, 945 error_level: t.Optional[ErrorLevel] = None, 946 error_message_context: int = 100, 947 max_errors: int = 3, 948 ): 949 self.error_level = error_level or ErrorLevel.IMMEDIATE 950 self.error_message_context = error_message_context 951 self.max_errors = max_errors 952 self._tokenizer = self.TOKENIZER_CLASS() 953 self.reset() 954 955 def reset(self): 956 self.sql = "" 957 self.errors = [] 958 self._tokens = [] 959 self._index = 0 960 self._curr = None 961 self._next = None 962 self._prev = None 963 self._prev_comments = None 964 965 def parse( 966 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 967 ) -> t.List[t.Optional[exp.Expression]]: 968 """ 969 Parses a list of tokens and returns a list of syntax trees, one tree 970 per parsed SQL statement. 971 972 Args: 973 raw_tokens: The list of tokens. 974 sql: The original SQL string, used to produce helpful debug messages. 975 976 Returns: 977 The list of the produced syntax trees. 978 """ 979 return self._parse( 980 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 981 ) 982 983 def parse_into( 984 self, 985 expression_types: exp.IntoType, 986 raw_tokens: t.List[Token], 987 sql: t.Optional[str] = None, 988 ) -> t.List[t.Optional[exp.Expression]]: 989 """ 990 Parses a list of tokens into a given Expression type. If a collection of Expression 991 types is given instead, this method will try to parse the token list into each one 992 of them, stopping at the first for which the parsing succeeds. 993 994 Args: 995 expression_types: The expression type(s) to try and parse the token list into. 996 raw_tokens: The list of tokens. 997 sql: The original SQL string, used to produce helpful debug messages. 998 999 Returns: 1000 The target Expression. 1001 """ 1002 errors = [] 1003 for expression_type in ensure_list(expression_types): 1004 parser = self.EXPRESSION_PARSERS.get(expression_type) 1005 if not parser: 1006 raise TypeError(f"No parser registered for {expression_type}") 1007 1008 try: 1009 return self._parse(parser, raw_tokens, sql) 1010 except ParseError as e: 1011 e.errors[0]["into_expression"] = expression_type 1012 errors.append(e) 1013 1014 raise ParseError( 1015 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1016 errors=merge_errors(errors), 1017 ) from errors[-1] 1018 1019 def _parse( 1020 self, 1021 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1022 raw_tokens: t.List[Token], 1023 sql: t.Optional[str] = None, 1024 ) -> t.List[t.Optional[exp.Expression]]: 1025 self.reset() 1026 self.sql = sql or "" 1027 1028 total = len(raw_tokens) 1029 chunks: t.List[t.List[Token]] = [[]] 1030 1031 for i, token in enumerate(raw_tokens): 1032 if token.token_type == TokenType.SEMICOLON: 1033 if i < total - 1: 1034 chunks.append([]) 1035 else: 1036 chunks[-1].append(token) 1037 1038 expressions = [] 1039 1040 for tokens in chunks: 1041 self._index = -1 1042 self._tokens = tokens 1043 self._advance() 1044 1045 expressions.append(parse_method(self)) 1046 1047 if self._index < len(self._tokens): 1048 self.raise_error("Invalid expression / Unexpected token") 1049 1050 self.check_errors() 1051 1052 return expressions 1053 1054 def check_errors(self) -> None: 1055 """Logs or raises any found errors, depending on the chosen error level setting.""" 1056 if self.error_level == ErrorLevel.WARN: 1057 for error in self.errors: 1058 logger.error(str(error)) 1059 elif self.error_level == ErrorLevel.RAISE and self.errors: 1060 raise ParseError( 1061 concat_messages(self.errors, self.max_errors), 1062 errors=merge_errors(self.errors), 1063 ) 1064 1065 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1066 """ 1067 Appends an error in the list of recorded errors or raises it, depending on the chosen 1068 error level setting. 1069 """ 1070 token = token or self._curr or self._prev or Token.string("") 1071 start = token.start 1072 end = token.end + 1 1073 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1074 highlight = self.sql[start:end] 1075 end_context = self.sql[end : end + self.error_message_context] 1076 1077 error = ParseError.new( 1078 f"{message}. Line {token.line}, Col: {token.col}.\n" 1079 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1080 description=message, 1081 line=token.line, 1082 col=token.col, 1083 start_context=start_context, 1084 highlight=highlight, 1085 end_context=end_context, 1086 ) 1087 1088 if self.error_level == ErrorLevel.IMMEDIATE: 1089 raise error 1090 1091 self.errors.append(error) 1092 1093 def expression( 1094 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1095 ) -> E: 1096 """ 1097 Creates a new, validated Expression. 1098 1099 Args: 1100 exp_class: The expression class to instantiate. 1101 comments: An optional list of comments to attach to the expression. 1102 kwargs: The arguments to set for the expression along with their respective values. 1103 1104 Returns: 1105 The target expression. 1106 """ 1107 instance = exp_class(**kwargs) 1108 instance.add_comments(comments) if comments else self._add_comments(instance) 1109 return self.validate_expression(instance) 1110 1111 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1112 if expression and self._prev_comments: 1113 expression.add_comments(self._prev_comments) 1114 self._prev_comments = None 1115 1116 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1117 """ 1118 Validates an Expression, making sure that all its mandatory arguments are set. 1119 1120 Args: 1121 expression: The expression to validate. 1122 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1123 1124 Returns: 1125 The validated expression. 1126 """ 1127 if self.error_level != ErrorLevel.IGNORE: 1128 for error_message in expression.error_messages(args): 1129 self.raise_error(error_message) 1130 1131 return expression 1132 1133 def _find_sql(self, start: Token, end: Token) -> str: 1134 return self.sql[start.start : end.end + 1] 1135 1136 def _advance(self, times: int = 1) -> None: 1137 self._index += times 1138 self._curr = seq_get(self._tokens, self._index) 1139 self._next = seq_get(self._tokens, self._index + 1) 1140 1141 if self._index > 0: 1142 self._prev = self._tokens[self._index - 1] 1143 self._prev_comments = self._prev.comments 1144 else: 1145 self._prev = None 1146 self._prev_comments = None 1147 1148 def _retreat(self, index: int) -> None: 1149 if index != self._index: 1150 self._advance(index - self._index) 1151 1152 def _parse_command(self) -> exp.Command: 1153 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1154 1155 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1156 start = self._prev 1157 exists = self._parse_exists() if allow_exists else None 1158 1159 self._match(TokenType.ON) 1160 1161 kind = self._match_set(self.CREATABLES) and self._prev 1162 if not kind: 1163 return self._parse_as_command(start) 1164 1165 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1166 this = self._parse_user_defined_function(kind=kind.token_type) 1167 elif kind.token_type == TokenType.TABLE: 1168 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1169 elif kind.token_type == TokenType.COLUMN: 1170 this = self._parse_column() 1171 else: 1172 this = self._parse_id_var() 1173 1174 self._match(TokenType.IS) 1175 1176 return self.expression( 1177 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1178 ) 1179 1180 def _parse_to_table( 1181 self, 1182 ) -> exp.ToTableProperty: 1183 table = self._parse_table_parts(schema=True) 1184 return self.expression(exp.ToTableProperty, this=table) 1185 1186 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1187 def _parse_ttl(self) -> exp.Expression: 1188 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1189 this = self._parse_bitwise() 1190 1191 if self._match_text_seq("DELETE"): 1192 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1193 if self._match_text_seq("RECOMPRESS"): 1194 return self.expression( 1195 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1196 ) 1197 if self._match_text_seq("TO", "DISK"): 1198 return self.expression( 1199 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1200 ) 1201 if self._match_text_seq("TO", "VOLUME"): 1202 return self.expression( 1203 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1204 ) 1205 1206 return this 1207 1208 expressions = self._parse_csv(_parse_ttl_action) 1209 where = self._parse_where() 1210 group = self._parse_group() 1211 1212 aggregates = None 1213 if group and self._match(TokenType.SET): 1214 aggregates = self._parse_csv(self._parse_set_item) 1215 1216 return self.expression( 1217 exp.MergeTreeTTL, 1218 expressions=expressions, 1219 where=where, 1220 group=group, 1221 aggregates=aggregates, 1222 ) 1223 1224 def _parse_statement(self) -> t.Optional[exp.Expression]: 1225 if self._curr is None: 1226 return None 1227 1228 if self._match_set(self.STATEMENT_PARSERS): 1229 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1230 1231 if self._match_set(Tokenizer.COMMANDS): 1232 return self._parse_command() 1233 1234 expression = self._parse_expression() 1235 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1236 return self._parse_query_modifiers(expression) 1237 1238 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1239 start = self._prev 1240 temporary = self._match(TokenType.TEMPORARY) 1241 materialized = self._match_text_seq("MATERIALIZED") 1242 1243 kind = self._match_set(self.CREATABLES) and self._prev.text 1244 if not kind: 1245 return self._parse_as_command(start) 1246 1247 return self.expression( 1248 exp.Drop, 1249 comments=start.comments, 1250 exists=exists or self._parse_exists(), 1251 this=self._parse_table(schema=True), 1252 kind=kind, 1253 temporary=temporary, 1254 materialized=materialized, 1255 cascade=self._match_text_seq("CASCADE"), 1256 constraints=self._match_text_seq("CONSTRAINTS"), 1257 purge=self._match_text_seq("PURGE"), 1258 ) 1259 1260 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1261 return ( 1262 self._match_text_seq("IF") 1263 and (not not_ or self._match(TokenType.NOT)) 1264 and self._match(TokenType.EXISTS) 1265 ) 1266 1267 def _parse_create(self) -> exp.Create | exp.Command: 1268 # Note: this can't be None because we've matched a statement parser 1269 start = self._prev 1270 comments = self._prev_comments 1271 1272 replace = start.text.upper() == "REPLACE" or self._match_pair( 1273 TokenType.OR, TokenType.REPLACE 1274 ) 1275 unique = self._match(TokenType.UNIQUE) 1276 1277 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1278 self._advance() 1279 1280 properties = None 1281 create_token = self._match_set(self.CREATABLES) and self._prev 1282 1283 if not create_token: 1284 # exp.Properties.Location.POST_CREATE 1285 properties = self._parse_properties() 1286 create_token = self._match_set(self.CREATABLES) and self._prev 1287 1288 if not properties or not create_token: 1289 return self._parse_as_command(start) 1290 1291 exists = self._parse_exists(not_=True) 1292 this = None 1293 expression: t.Optional[exp.Expression] = None 1294 indexes = None 1295 no_schema_binding = None 1296 begin = None 1297 end = None 1298 clone = None 1299 1300 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1301 nonlocal properties 1302 if properties and temp_props: 1303 properties.expressions.extend(temp_props.expressions) 1304 elif temp_props: 1305 properties = temp_props 1306 1307 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1308 this = self._parse_user_defined_function(kind=create_token.token_type) 1309 1310 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1311 extend_props(self._parse_properties()) 1312 1313 self._match(TokenType.ALIAS) 1314 1315 if self._match(TokenType.COMMAND): 1316 expression = self._parse_as_command(self._prev) 1317 else: 1318 begin = self._match(TokenType.BEGIN) 1319 return_ = self._match_text_seq("RETURN") 1320 1321 if self._match(TokenType.STRING, advance=False): 1322 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1323 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1324 expression = self._parse_string() 1325 extend_props(self._parse_properties()) 1326 else: 1327 expression = self._parse_statement() 1328 1329 end = self._match_text_seq("END") 1330 1331 if return_: 1332 expression = self.expression(exp.Return, this=expression) 1333 elif create_token.token_type == TokenType.INDEX: 1334 this = self._parse_index(index=self._parse_id_var()) 1335 elif create_token.token_type in self.DB_CREATABLES: 1336 table_parts = self._parse_table_parts(schema=True) 1337 1338 # exp.Properties.Location.POST_NAME 1339 self._match(TokenType.COMMA) 1340 extend_props(self._parse_properties(before=True)) 1341 1342 this = self._parse_schema(this=table_parts) 1343 1344 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1345 extend_props(self._parse_properties()) 1346 1347 self._match(TokenType.ALIAS) 1348 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1349 # exp.Properties.Location.POST_ALIAS 1350 extend_props(self._parse_properties()) 1351 1352 expression = self._parse_ddl_select() 1353 1354 if create_token.token_type == TokenType.TABLE: 1355 # exp.Properties.Location.POST_EXPRESSION 1356 extend_props(self._parse_properties()) 1357 1358 indexes = [] 1359 while True: 1360 index = self._parse_index() 1361 1362 # exp.Properties.Location.POST_INDEX 1363 extend_props(self._parse_properties()) 1364 1365 if not index: 1366 break 1367 else: 1368 self._match(TokenType.COMMA) 1369 indexes.append(index) 1370 elif create_token.token_type == TokenType.VIEW: 1371 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1372 no_schema_binding = True 1373 1374 shallow = self._match_text_seq("SHALLOW") 1375 1376 if self._match_texts(self.CLONE_KEYWORDS): 1377 copy = self._prev.text.lower() == "copy" 1378 clone = self._parse_table(schema=True) 1379 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1380 clone_kind = ( 1381 self._match(TokenType.L_PAREN) 1382 and self._match_texts(self.CLONE_KINDS) 1383 and self._prev.text.upper() 1384 ) 1385 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1386 self._match(TokenType.R_PAREN) 1387 clone = self.expression( 1388 exp.Clone, 1389 this=clone, 1390 when=when, 1391 kind=clone_kind, 1392 shallow=shallow, 1393 expression=clone_expression, 1394 copy=copy, 1395 ) 1396 1397 return self.expression( 1398 exp.Create, 1399 comments=comments, 1400 this=this, 1401 kind=create_token.text, 1402 replace=replace, 1403 unique=unique, 1404 expression=expression, 1405 exists=exists, 1406 properties=properties, 1407 indexes=indexes, 1408 no_schema_binding=no_schema_binding, 1409 begin=begin, 1410 end=end, 1411 clone=clone, 1412 ) 1413 1414 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1415 # only used for teradata currently 1416 self._match(TokenType.COMMA) 1417 1418 kwargs = { 1419 "no": self._match_text_seq("NO"), 1420 "dual": self._match_text_seq("DUAL"), 1421 "before": self._match_text_seq("BEFORE"), 1422 "default": self._match_text_seq("DEFAULT"), 1423 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1424 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1425 "after": self._match_text_seq("AFTER"), 1426 "minimum": self._match_texts(("MIN", "MINIMUM")), 1427 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1428 } 1429 1430 if self._match_texts(self.PROPERTY_PARSERS): 1431 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1432 try: 1433 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1434 except TypeError: 1435 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1436 1437 return None 1438 1439 def _parse_property(self) -> t.Optional[exp.Expression]: 1440 if self._match_texts(self.PROPERTY_PARSERS): 1441 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1442 1443 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1444 return self._parse_character_set(default=True) 1445 1446 if self._match_text_seq("COMPOUND", "SORTKEY"): 1447 return self._parse_sortkey(compound=True) 1448 1449 if self._match_text_seq("SQL", "SECURITY"): 1450 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1451 1452 index = self._index 1453 key = self._parse_column() 1454 1455 if not self._match(TokenType.EQ): 1456 self._retreat(index) 1457 return None 1458 1459 return self.expression( 1460 exp.Property, 1461 this=key.to_dot() if isinstance(key, exp.Column) else key, 1462 value=self._parse_column() or self._parse_var(any_token=True), 1463 ) 1464 1465 def _parse_stored(self) -> exp.FileFormatProperty: 1466 self._match(TokenType.ALIAS) 1467 1468 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1469 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1470 1471 return self.expression( 1472 exp.FileFormatProperty, 1473 this=self.expression( 1474 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1475 ) 1476 if input_format or output_format 1477 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1478 ) 1479 1480 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1481 self._match(TokenType.EQ) 1482 self._match(TokenType.ALIAS) 1483 return self.expression(exp_class, this=self._parse_field()) 1484 1485 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1486 properties = [] 1487 while True: 1488 if before: 1489 prop = self._parse_property_before() 1490 else: 1491 prop = self._parse_property() 1492 1493 if not prop: 1494 break 1495 for p in ensure_list(prop): 1496 properties.append(p) 1497 1498 if properties: 1499 return self.expression(exp.Properties, expressions=properties) 1500 1501 return None 1502 1503 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1504 return self.expression( 1505 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1506 ) 1507 1508 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1509 if self._index >= 2: 1510 pre_volatile_token = self._tokens[self._index - 2] 1511 else: 1512 pre_volatile_token = None 1513 1514 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1515 return exp.VolatileProperty() 1516 1517 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1518 1519 def _parse_with_property( 1520 self, 1521 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1522 if self._match(TokenType.L_PAREN, advance=False): 1523 return self._parse_wrapped_csv(self._parse_property) 1524 1525 if self._match_text_seq("JOURNAL"): 1526 return self._parse_withjournaltable() 1527 1528 if self._match_text_seq("DATA"): 1529 return self._parse_withdata(no=False) 1530 elif self._match_text_seq("NO", "DATA"): 1531 return self._parse_withdata(no=True) 1532 1533 if not self._next: 1534 return None 1535 1536 return self._parse_withisolatedloading() 1537 1538 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1539 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1540 self._match(TokenType.EQ) 1541 1542 user = self._parse_id_var() 1543 self._match(TokenType.PARAMETER) 1544 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1545 1546 if not user or not host: 1547 return None 1548 1549 return exp.DefinerProperty(this=f"{user}@{host}") 1550 1551 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1552 self._match(TokenType.TABLE) 1553 self._match(TokenType.EQ) 1554 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1555 1556 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1557 return self.expression(exp.LogProperty, no=no) 1558 1559 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1560 return self.expression(exp.JournalProperty, **kwargs) 1561 1562 def _parse_checksum(self) -> exp.ChecksumProperty: 1563 self._match(TokenType.EQ) 1564 1565 on = None 1566 if self._match(TokenType.ON): 1567 on = True 1568 elif self._match_text_seq("OFF"): 1569 on = False 1570 1571 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1572 1573 def _parse_cluster(self) -> exp.Cluster: 1574 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1575 1576 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1577 self._match_text_seq("BY") 1578 1579 self._match_l_paren() 1580 expressions = self._parse_csv(self._parse_column) 1581 self._match_r_paren() 1582 1583 if self._match_text_seq("SORTED", "BY"): 1584 self._match_l_paren() 1585 sorted_by = self._parse_csv(self._parse_ordered) 1586 self._match_r_paren() 1587 else: 1588 sorted_by = None 1589 1590 self._match(TokenType.INTO) 1591 buckets = self._parse_number() 1592 self._match_text_seq("BUCKETS") 1593 1594 return self.expression( 1595 exp.ClusteredByProperty, 1596 expressions=expressions, 1597 sorted_by=sorted_by, 1598 buckets=buckets, 1599 ) 1600 1601 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1602 if not self._match_text_seq("GRANTS"): 1603 self._retreat(self._index - 1) 1604 return None 1605 1606 return self.expression(exp.CopyGrantsProperty) 1607 1608 def _parse_freespace(self) -> exp.FreespaceProperty: 1609 self._match(TokenType.EQ) 1610 return self.expression( 1611 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1612 ) 1613 1614 def _parse_mergeblockratio( 1615 self, no: bool = False, default: bool = False 1616 ) -> exp.MergeBlockRatioProperty: 1617 if self._match(TokenType.EQ): 1618 return self.expression( 1619 exp.MergeBlockRatioProperty, 1620 this=self._parse_number(), 1621 percent=self._match(TokenType.PERCENT), 1622 ) 1623 1624 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1625 1626 def _parse_datablocksize( 1627 self, 1628 default: t.Optional[bool] = None, 1629 minimum: t.Optional[bool] = None, 1630 maximum: t.Optional[bool] = None, 1631 ) -> exp.DataBlocksizeProperty: 1632 self._match(TokenType.EQ) 1633 size = self._parse_number() 1634 1635 units = None 1636 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1637 units = self._prev.text 1638 1639 return self.expression( 1640 exp.DataBlocksizeProperty, 1641 size=size, 1642 units=units, 1643 default=default, 1644 minimum=minimum, 1645 maximum=maximum, 1646 ) 1647 1648 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1649 self._match(TokenType.EQ) 1650 always = self._match_text_seq("ALWAYS") 1651 manual = self._match_text_seq("MANUAL") 1652 never = self._match_text_seq("NEVER") 1653 default = self._match_text_seq("DEFAULT") 1654 1655 autotemp = None 1656 if self._match_text_seq("AUTOTEMP"): 1657 autotemp = self._parse_schema() 1658 1659 return self.expression( 1660 exp.BlockCompressionProperty, 1661 always=always, 1662 manual=manual, 1663 never=never, 1664 default=default, 1665 autotemp=autotemp, 1666 ) 1667 1668 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1669 no = self._match_text_seq("NO") 1670 concurrent = self._match_text_seq("CONCURRENT") 1671 self._match_text_seq("ISOLATED", "LOADING") 1672 for_all = self._match_text_seq("FOR", "ALL") 1673 for_insert = self._match_text_seq("FOR", "INSERT") 1674 for_none = self._match_text_seq("FOR", "NONE") 1675 return self.expression( 1676 exp.IsolatedLoadingProperty, 1677 no=no, 1678 concurrent=concurrent, 1679 for_all=for_all, 1680 for_insert=for_insert, 1681 for_none=for_none, 1682 ) 1683 1684 def _parse_locking(self) -> exp.LockingProperty: 1685 if self._match(TokenType.TABLE): 1686 kind = "TABLE" 1687 elif self._match(TokenType.VIEW): 1688 kind = "VIEW" 1689 elif self._match(TokenType.ROW): 1690 kind = "ROW" 1691 elif self._match_text_seq("DATABASE"): 1692 kind = "DATABASE" 1693 else: 1694 kind = None 1695 1696 if kind in ("DATABASE", "TABLE", "VIEW"): 1697 this = self._parse_table_parts() 1698 else: 1699 this = None 1700 1701 if self._match(TokenType.FOR): 1702 for_or_in = "FOR" 1703 elif self._match(TokenType.IN): 1704 for_or_in = "IN" 1705 else: 1706 for_or_in = None 1707 1708 if self._match_text_seq("ACCESS"): 1709 lock_type = "ACCESS" 1710 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1711 lock_type = "EXCLUSIVE" 1712 elif self._match_text_seq("SHARE"): 1713 lock_type = "SHARE" 1714 elif self._match_text_seq("READ"): 1715 lock_type = "READ" 1716 elif self._match_text_seq("WRITE"): 1717 lock_type = "WRITE" 1718 elif self._match_text_seq("CHECKSUM"): 1719 lock_type = "CHECKSUM" 1720 else: 1721 lock_type = None 1722 1723 override = self._match_text_seq("OVERRIDE") 1724 1725 return self.expression( 1726 exp.LockingProperty, 1727 this=this, 1728 kind=kind, 1729 for_or_in=for_or_in, 1730 lock_type=lock_type, 1731 override=override, 1732 ) 1733 1734 def _parse_partition_by(self) -> t.List[exp.Expression]: 1735 if self._match(TokenType.PARTITION_BY): 1736 return self._parse_csv(self._parse_conjunction) 1737 return [] 1738 1739 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1740 self._match(TokenType.EQ) 1741 return self.expression( 1742 exp.PartitionedByProperty, 1743 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1744 ) 1745 1746 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1747 if self._match_text_seq("AND", "STATISTICS"): 1748 statistics = True 1749 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1750 statistics = False 1751 else: 1752 statistics = None 1753 1754 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1755 1756 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1757 if self._match_text_seq("PRIMARY", "INDEX"): 1758 return exp.NoPrimaryIndexProperty() 1759 return None 1760 1761 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1762 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1763 return exp.OnCommitProperty() 1764 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1765 return exp.OnCommitProperty(delete=True) 1766 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1767 1768 def _parse_distkey(self) -> exp.DistKeyProperty: 1769 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1770 1771 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1772 table = self._parse_table(schema=True) 1773 1774 options = [] 1775 while self._match_texts(("INCLUDING", "EXCLUDING")): 1776 this = self._prev.text.upper() 1777 1778 id_var = self._parse_id_var() 1779 if not id_var: 1780 return None 1781 1782 options.append( 1783 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1784 ) 1785 1786 return self.expression(exp.LikeProperty, this=table, expressions=options) 1787 1788 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1789 return self.expression( 1790 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1791 ) 1792 1793 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1794 self._match(TokenType.EQ) 1795 return self.expression( 1796 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1797 ) 1798 1799 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1800 self._match_text_seq("WITH", "CONNECTION") 1801 return self.expression( 1802 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1803 ) 1804 1805 def _parse_returns(self) -> exp.ReturnsProperty: 1806 value: t.Optional[exp.Expression] 1807 is_table = self._match(TokenType.TABLE) 1808 1809 if is_table: 1810 if self._match(TokenType.LT): 1811 value = self.expression( 1812 exp.Schema, 1813 this="TABLE", 1814 expressions=self._parse_csv(self._parse_struct_types), 1815 ) 1816 if not self._match(TokenType.GT): 1817 self.raise_error("Expecting >") 1818 else: 1819 value = self._parse_schema(exp.var("TABLE")) 1820 else: 1821 value = self._parse_types() 1822 1823 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1824 1825 def _parse_describe(self) -> exp.Describe: 1826 kind = self._match_set(self.CREATABLES) and self._prev.text 1827 this = self._parse_table(schema=True) 1828 properties = self._parse_properties() 1829 expressions = properties.expressions if properties else None 1830 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1831 1832 def _parse_insert(self) -> exp.Insert: 1833 comments = ensure_list(self._prev_comments) 1834 overwrite = self._match(TokenType.OVERWRITE) 1835 ignore = self._match(TokenType.IGNORE) 1836 local = self._match_text_seq("LOCAL") 1837 alternative = None 1838 1839 if self._match_text_seq("DIRECTORY"): 1840 this: t.Optional[exp.Expression] = self.expression( 1841 exp.Directory, 1842 this=self._parse_var_or_string(), 1843 local=local, 1844 row_format=self._parse_row_format(match_row=True), 1845 ) 1846 else: 1847 if self._match(TokenType.OR): 1848 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1849 1850 self._match(TokenType.INTO) 1851 comments += ensure_list(self._prev_comments) 1852 self._match(TokenType.TABLE) 1853 this = self._parse_table(schema=True) 1854 1855 returning = self._parse_returning() 1856 1857 return self.expression( 1858 exp.Insert, 1859 comments=comments, 1860 this=this, 1861 by_name=self._match_text_seq("BY", "NAME"), 1862 exists=self._parse_exists(), 1863 partition=self._parse_partition(), 1864 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1865 and self._parse_conjunction(), 1866 expression=self._parse_ddl_select(), 1867 conflict=self._parse_on_conflict(), 1868 returning=returning or self._parse_returning(), 1869 overwrite=overwrite, 1870 alternative=alternative, 1871 ignore=ignore, 1872 ) 1873 1874 def _parse_kill(self) -> exp.Kill: 1875 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1876 1877 return self.expression( 1878 exp.Kill, 1879 this=self._parse_primary(), 1880 kind=kind, 1881 ) 1882 1883 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1884 conflict = self._match_text_seq("ON", "CONFLICT") 1885 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1886 1887 if not conflict and not duplicate: 1888 return None 1889 1890 nothing = None 1891 expressions = None 1892 key = None 1893 constraint = None 1894 1895 if conflict: 1896 if self._match_text_seq("ON", "CONSTRAINT"): 1897 constraint = self._parse_id_var() 1898 else: 1899 key = self._parse_csv(self._parse_value) 1900 1901 self._match_text_seq("DO") 1902 if self._match_text_seq("NOTHING"): 1903 nothing = True 1904 else: 1905 self._match(TokenType.UPDATE) 1906 self._match(TokenType.SET) 1907 expressions = self._parse_csv(self._parse_equality) 1908 1909 return self.expression( 1910 exp.OnConflict, 1911 duplicate=duplicate, 1912 expressions=expressions, 1913 nothing=nothing, 1914 key=key, 1915 constraint=constraint, 1916 ) 1917 1918 def _parse_returning(self) -> t.Optional[exp.Returning]: 1919 if not self._match(TokenType.RETURNING): 1920 return None 1921 return self.expression( 1922 exp.Returning, 1923 expressions=self._parse_csv(self._parse_expression), 1924 into=self._match(TokenType.INTO) and self._parse_table_part(), 1925 ) 1926 1927 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1928 if not self._match(TokenType.FORMAT): 1929 return None 1930 return self._parse_row_format() 1931 1932 def _parse_row_format( 1933 self, match_row: bool = False 1934 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1935 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1936 return None 1937 1938 if self._match_text_seq("SERDE"): 1939 this = self._parse_string() 1940 1941 serde_properties = None 1942 if self._match(TokenType.SERDE_PROPERTIES): 1943 serde_properties = self.expression( 1944 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1945 ) 1946 1947 return self.expression( 1948 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1949 ) 1950 1951 self._match_text_seq("DELIMITED") 1952 1953 kwargs = {} 1954 1955 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1956 kwargs["fields"] = self._parse_string() 1957 if self._match_text_seq("ESCAPED", "BY"): 1958 kwargs["escaped"] = self._parse_string() 1959 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1960 kwargs["collection_items"] = self._parse_string() 1961 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1962 kwargs["map_keys"] = self._parse_string() 1963 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1964 kwargs["lines"] = self._parse_string() 1965 if self._match_text_seq("NULL", "DEFINED", "AS"): 1966 kwargs["null"] = self._parse_string() 1967 1968 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1969 1970 def _parse_load(self) -> exp.LoadData | exp.Command: 1971 if self._match_text_seq("DATA"): 1972 local = self._match_text_seq("LOCAL") 1973 self._match_text_seq("INPATH") 1974 inpath = self._parse_string() 1975 overwrite = self._match(TokenType.OVERWRITE) 1976 self._match_pair(TokenType.INTO, TokenType.TABLE) 1977 1978 return self.expression( 1979 exp.LoadData, 1980 this=self._parse_table(schema=True), 1981 local=local, 1982 overwrite=overwrite, 1983 inpath=inpath, 1984 partition=self._parse_partition(), 1985 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1986 serde=self._match_text_seq("SERDE") and self._parse_string(), 1987 ) 1988 return self._parse_as_command(self._prev) 1989 1990 def _parse_delete(self) -> exp.Delete: 1991 # This handles MySQL's "Multiple-Table Syntax" 1992 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1993 tables = None 1994 comments = self._prev_comments 1995 if not self._match(TokenType.FROM, advance=False): 1996 tables = self._parse_csv(self._parse_table) or None 1997 1998 returning = self._parse_returning() 1999 2000 return self.expression( 2001 exp.Delete, 2002 comments=comments, 2003 tables=tables, 2004 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2005 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2006 where=self._parse_where(), 2007 returning=returning or self._parse_returning(), 2008 limit=self._parse_limit(), 2009 ) 2010 2011 def _parse_update(self) -> exp.Update: 2012 comments = self._prev_comments 2013 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2014 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2015 returning = self._parse_returning() 2016 return self.expression( 2017 exp.Update, 2018 comments=comments, 2019 **{ # type: ignore 2020 "this": this, 2021 "expressions": expressions, 2022 "from": self._parse_from(joins=True), 2023 "where": self._parse_where(), 2024 "returning": returning or self._parse_returning(), 2025 "order": self._parse_order(), 2026 "limit": self._parse_limit(), 2027 }, 2028 ) 2029 2030 def _parse_uncache(self) -> exp.Uncache: 2031 if not self._match(TokenType.TABLE): 2032 self.raise_error("Expecting TABLE after UNCACHE") 2033 2034 return self.expression( 2035 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2036 ) 2037 2038 def _parse_cache(self) -> exp.Cache: 2039 lazy = self._match_text_seq("LAZY") 2040 self._match(TokenType.TABLE) 2041 table = self._parse_table(schema=True) 2042 2043 options = [] 2044 if self._match_text_seq("OPTIONS"): 2045 self._match_l_paren() 2046 k = self._parse_string() 2047 self._match(TokenType.EQ) 2048 v = self._parse_string() 2049 options = [k, v] 2050 self._match_r_paren() 2051 2052 self._match(TokenType.ALIAS) 2053 return self.expression( 2054 exp.Cache, 2055 this=table, 2056 lazy=lazy, 2057 options=options, 2058 expression=self._parse_select(nested=True), 2059 ) 2060 2061 def _parse_partition(self) -> t.Optional[exp.Partition]: 2062 if not self._match(TokenType.PARTITION): 2063 return None 2064 2065 return self.expression( 2066 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2067 ) 2068 2069 def _parse_value(self) -> exp.Tuple: 2070 if self._match(TokenType.L_PAREN): 2071 expressions = self._parse_csv(self._parse_conjunction) 2072 self._match_r_paren() 2073 return self.expression(exp.Tuple, expressions=expressions) 2074 2075 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2076 # https://prestodb.io/docs/current/sql/values.html 2077 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2078 2079 def _parse_projections(self) -> t.List[exp.Expression]: 2080 return self._parse_expressions() 2081 2082 def _parse_select( 2083 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2084 ) -> t.Optional[exp.Expression]: 2085 cte = self._parse_with() 2086 2087 if cte: 2088 this = self._parse_statement() 2089 2090 if not this: 2091 self.raise_error("Failed to parse any statement following CTE") 2092 return cte 2093 2094 if "with" in this.arg_types: 2095 this.set("with", cte) 2096 else: 2097 self.raise_error(f"{this.key} does not support CTE") 2098 this = cte 2099 2100 return this 2101 2102 # duckdb supports leading with FROM x 2103 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2104 2105 if self._match(TokenType.SELECT): 2106 comments = self._prev_comments 2107 2108 hint = self._parse_hint() 2109 all_ = self._match(TokenType.ALL) 2110 distinct = self._match_set(self.DISTINCT_TOKENS) 2111 2112 kind = ( 2113 self._match(TokenType.ALIAS) 2114 and self._match_texts(("STRUCT", "VALUE")) 2115 and self._prev.text 2116 ) 2117 2118 if distinct: 2119 distinct = self.expression( 2120 exp.Distinct, 2121 on=self._parse_value() if self._match(TokenType.ON) else None, 2122 ) 2123 2124 if all_ and distinct: 2125 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2126 2127 limit = self._parse_limit(top=True) 2128 projections = self._parse_projections() 2129 2130 this = self.expression( 2131 exp.Select, 2132 kind=kind, 2133 hint=hint, 2134 distinct=distinct, 2135 expressions=projections, 2136 limit=limit, 2137 ) 2138 this.comments = comments 2139 2140 into = self._parse_into() 2141 if into: 2142 this.set("into", into) 2143 2144 if not from_: 2145 from_ = self._parse_from() 2146 2147 if from_: 2148 this.set("from", from_) 2149 2150 this = self._parse_query_modifiers(this) 2151 elif (table or nested) and self._match(TokenType.L_PAREN): 2152 if self._match(TokenType.PIVOT): 2153 this = self._parse_simplified_pivot() 2154 elif self._match(TokenType.FROM): 2155 this = exp.select("*").from_( 2156 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2157 ) 2158 else: 2159 this = self._parse_table() if table else self._parse_select(nested=True) 2160 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2161 2162 self._match_r_paren() 2163 2164 # We return early here so that the UNION isn't attached to the subquery by the 2165 # following call to _parse_set_operations, but instead becomes the parent node 2166 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2167 elif self._match(TokenType.VALUES): 2168 this = self.expression( 2169 exp.Values, 2170 expressions=self._parse_csv(self._parse_value), 2171 alias=self._parse_table_alias(), 2172 ) 2173 elif from_: 2174 this = exp.select("*").from_(from_.this, copy=False) 2175 else: 2176 this = None 2177 2178 return self._parse_set_operations(this) 2179 2180 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2181 if not skip_with_token and not self._match(TokenType.WITH): 2182 return None 2183 2184 comments = self._prev_comments 2185 recursive = self._match(TokenType.RECURSIVE) 2186 2187 expressions = [] 2188 while True: 2189 expressions.append(self._parse_cte()) 2190 2191 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2192 break 2193 else: 2194 self._match(TokenType.WITH) 2195 2196 return self.expression( 2197 exp.With, comments=comments, expressions=expressions, recursive=recursive 2198 ) 2199 2200 def _parse_cte(self) -> exp.CTE: 2201 alias = self._parse_table_alias() 2202 if not alias or not alias.this: 2203 self.raise_error("Expected CTE to have alias") 2204 2205 self._match(TokenType.ALIAS) 2206 return self.expression( 2207 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2208 ) 2209 2210 def _parse_table_alias( 2211 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2212 ) -> t.Optional[exp.TableAlias]: 2213 any_token = self._match(TokenType.ALIAS) 2214 alias = ( 2215 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2216 or self._parse_string_as_identifier() 2217 ) 2218 2219 index = self._index 2220 if self._match(TokenType.L_PAREN): 2221 columns = self._parse_csv(self._parse_function_parameter) 2222 self._match_r_paren() if columns else self._retreat(index) 2223 else: 2224 columns = None 2225 2226 if not alias and not columns: 2227 return None 2228 2229 return self.expression(exp.TableAlias, this=alias, columns=columns) 2230 2231 def _parse_subquery( 2232 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2233 ) -> t.Optional[exp.Subquery]: 2234 if not this: 2235 return None 2236 2237 return self.expression( 2238 exp.Subquery, 2239 this=this, 2240 pivots=self._parse_pivots(), 2241 alias=self._parse_table_alias() if parse_alias else None, 2242 ) 2243 2244 def _parse_query_modifiers( 2245 self, this: t.Optional[exp.Expression] 2246 ) -> t.Optional[exp.Expression]: 2247 if isinstance(this, self.MODIFIABLES): 2248 for join in iter(self._parse_join, None): 2249 this.append("joins", join) 2250 for lateral in iter(self._parse_lateral, None): 2251 this.append("laterals", lateral) 2252 2253 while True: 2254 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2255 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2256 key, expression = parser(self) 2257 2258 if expression: 2259 this.set(key, expression) 2260 if key == "limit": 2261 offset = expression.args.pop("offset", None) 2262 if offset: 2263 this.set("offset", exp.Offset(expression=offset)) 2264 continue 2265 break 2266 return this 2267 2268 def _parse_hint(self) -> t.Optional[exp.Hint]: 2269 if self._match(TokenType.HINT): 2270 hints = [] 2271 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2272 hints.extend(hint) 2273 2274 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2275 self.raise_error("Expected */ after HINT") 2276 2277 return self.expression(exp.Hint, expressions=hints) 2278 2279 return None 2280 2281 def _parse_into(self) -> t.Optional[exp.Into]: 2282 if not self._match(TokenType.INTO): 2283 return None 2284 2285 temp = self._match(TokenType.TEMPORARY) 2286 unlogged = self._match_text_seq("UNLOGGED") 2287 self._match(TokenType.TABLE) 2288 2289 return self.expression( 2290 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2291 ) 2292 2293 def _parse_from( 2294 self, joins: bool = False, skip_from_token: bool = False 2295 ) -> t.Optional[exp.From]: 2296 if not skip_from_token and not self._match(TokenType.FROM): 2297 return None 2298 2299 return self.expression( 2300 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2301 ) 2302 2303 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2304 if not self._match(TokenType.MATCH_RECOGNIZE): 2305 return None 2306 2307 self._match_l_paren() 2308 2309 partition = self._parse_partition_by() 2310 order = self._parse_order() 2311 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2312 2313 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2314 rows = exp.var("ONE ROW PER MATCH") 2315 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2316 text = "ALL ROWS PER MATCH" 2317 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2318 text += f" SHOW EMPTY MATCHES" 2319 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2320 text += f" OMIT EMPTY MATCHES" 2321 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2322 text += f" WITH UNMATCHED ROWS" 2323 rows = exp.var(text) 2324 else: 2325 rows = None 2326 2327 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2328 text = "AFTER MATCH SKIP" 2329 if self._match_text_seq("PAST", "LAST", "ROW"): 2330 text += f" PAST LAST ROW" 2331 elif self._match_text_seq("TO", "NEXT", "ROW"): 2332 text += f" TO NEXT ROW" 2333 elif self._match_text_seq("TO", "FIRST"): 2334 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2335 elif self._match_text_seq("TO", "LAST"): 2336 text += f" TO LAST {self._advance_any().text}" # type: ignore 2337 after = exp.var(text) 2338 else: 2339 after = None 2340 2341 if self._match_text_seq("PATTERN"): 2342 self._match_l_paren() 2343 2344 if not self._curr: 2345 self.raise_error("Expecting )", self._curr) 2346 2347 paren = 1 2348 start = self._curr 2349 2350 while self._curr and paren > 0: 2351 if self._curr.token_type == TokenType.L_PAREN: 2352 paren += 1 2353 if self._curr.token_type == TokenType.R_PAREN: 2354 paren -= 1 2355 2356 end = self._prev 2357 self._advance() 2358 2359 if paren > 0: 2360 self.raise_error("Expecting )", self._curr) 2361 2362 pattern = exp.var(self._find_sql(start, end)) 2363 else: 2364 pattern = None 2365 2366 define = ( 2367 self._parse_csv( 2368 lambda: self.expression( 2369 exp.Alias, 2370 alias=self._parse_id_var(any_token=True), 2371 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2372 ) 2373 ) 2374 if self._match_text_seq("DEFINE") 2375 else None 2376 ) 2377 2378 self._match_r_paren() 2379 2380 return self.expression( 2381 exp.MatchRecognize, 2382 partition_by=partition, 2383 order=order, 2384 measures=measures, 2385 rows=rows, 2386 after=after, 2387 pattern=pattern, 2388 define=define, 2389 alias=self._parse_table_alias(), 2390 ) 2391 2392 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2393 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2394 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2395 2396 if outer_apply or cross_apply: 2397 this = self._parse_select(table=True) 2398 view = None 2399 outer = not cross_apply 2400 elif self._match(TokenType.LATERAL): 2401 this = self._parse_select(table=True) 2402 view = self._match(TokenType.VIEW) 2403 outer = self._match(TokenType.OUTER) 2404 else: 2405 return None 2406 2407 if not this: 2408 this = ( 2409 self._parse_unnest() 2410 or self._parse_function() 2411 or self._parse_id_var(any_token=False) 2412 ) 2413 2414 while self._match(TokenType.DOT): 2415 this = exp.Dot( 2416 this=this, 2417 expression=self._parse_function() or self._parse_id_var(any_token=False), 2418 ) 2419 2420 if view: 2421 table = self._parse_id_var(any_token=False) 2422 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2423 table_alias: t.Optional[exp.TableAlias] = self.expression( 2424 exp.TableAlias, this=table, columns=columns 2425 ) 2426 elif isinstance(this, exp.Subquery) and this.alias: 2427 # Ensures parity between the Subquery's and the Lateral's "alias" args 2428 table_alias = this.args["alias"].copy() 2429 else: 2430 table_alias = self._parse_table_alias() 2431 2432 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2433 2434 def _parse_join_parts( 2435 self, 2436 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2437 return ( 2438 self._match_set(self.JOIN_METHODS) and self._prev, 2439 self._match_set(self.JOIN_SIDES) and self._prev, 2440 self._match_set(self.JOIN_KINDS) and self._prev, 2441 ) 2442 2443 def _parse_join( 2444 self, skip_join_token: bool = False, parse_bracket: bool = False 2445 ) -> t.Optional[exp.Join]: 2446 if self._match(TokenType.COMMA): 2447 return self.expression(exp.Join, this=self._parse_table()) 2448 2449 index = self._index 2450 method, side, kind = self._parse_join_parts() 2451 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2452 join = self._match(TokenType.JOIN) 2453 2454 if not skip_join_token and not join: 2455 self._retreat(index) 2456 kind = None 2457 method = None 2458 side = None 2459 2460 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2461 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2462 2463 if not skip_join_token and not join and not outer_apply and not cross_apply: 2464 return None 2465 2466 if outer_apply: 2467 side = Token(TokenType.LEFT, "LEFT") 2468 2469 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2470 2471 if method: 2472 kwargs["method"] = method.text 2473 if side: 2474 kwargs["side"] = side.text 2475 if kind: 2476 kwargs["kind"] = kind.text 2477 if hint: 2478 kwargs["hint"] = hint 2479 2480 if self._match(TokenType.ON): 2481 kwargs["on"] = self._parse_conjunction() 2482 elif self._match(TokenType.USING): 2483 kwargs["using"] = self._parse_wrapped_id_vars() 2484 elif not (kind and kind.token_type == TokenType.CROSS): 2485 index = self._index 2486 join = self._parse_join() 2487 2488 if join and self._match(TokenType.ON): 2489 kwargs["on"] = self._parse_conjunction() 2490 elif join and self._match(TokenType.USING): 2491 kwargs["using"] = self._parse_wrapped_id_vars() 2492 else: 2493 join = None 2494 self._retreat(index) 2495 2496 kwargs["this"].set("joins", [join] if join else None) 2497 2498 comments = [c for token in (method, side, kind) if token for c in token.comments] 2499 return self.expression(exp.Join, comments=comments, **kwargs) 2500 2501 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2502 this = self._parse_conjunction() 2503 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2504 return this 2505 2506 opclass = self._parse_var(any_token=True) 2507 if opclass: 2508 return self.expression(exp.Opclass, this=this, expression=opclass) 2509 2510 return this 2511 2512 def _parse_index( 2513 self, 2514 index: t.Optional[exp.Expression] = None, 2515 ) -> t.Optional[exp.Index]: 2516 if index: 2517 unique = None 2518 primary = None 2519 amp = None 2520 2521 self._match(TokenType.ON) 2522 self._match(TokenType.TABLE) # hive 2523 table = self._parse_table_parts(schema=True) 2524 else: 2525 unique = self._match(TokenType.UNIQUE) 2526 primary = self._match_text_seq("PRIMARY") 2527 amp = self._match_text_seq("AMP") 2528 2529 if not self._match(TokenType.INDEX): 2530 return None 2531 2532 index = self._parse_id_var() 2533 table = None 2534 2535 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2536 2537 if self._match(TokenType.L_PAREN, advance=False): 2538 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2539 else: 2540 columns = None 2541 2542 return self.expression( 2543 exp.Index, 2544 this=index, 2545 table=table, 2546 using=using, 2547 columns=columns, 2548 unique=unique, 2549 primary=primary, 2550 amp=amp, 2551 partition_by=self._parse_partition_by(), 2552 where=self._parse_where(), 2553 ) 2554 2555 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2556 hints: t.List[exp.Expression] = [] 2557 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2558 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2559 hints.append( 2560 self.expression( 2561 exp.WithTableHint, 2562 expressions=self._parse_csv( 2563 lambda: self._parse_function() or self._parse_var(any_token=True) 2564 ), 2565 ) 2566 ) 2567 self._match_r_paren() 2568 else: 2569 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2570 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2571 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2572 2573 self._match_texts({"INDEX", "KEY"}) 2574 if self._match(TokenType.FOR): 2575 hint.set("target", self._advance_any() and self._prev.text.upper()) 2576 2577 hint.set("expressions", self._parse_wrapped_id_vars()) 2578 hints.append(hint) 2579 2580 return hints or None 2581 2582 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2583 return ( 2584 (not schema and self._parse_function(optional_parens=False)) 2585 or self._parse_id_var(any_token=False) 2586 or self._parse_string_as_identifier() 2587 or self._parse_placeholder() 2588 ) 2589 2590 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2591 catalog = None 2592 db = None 2593 table = self._parse_table_part(schema=schema) 2594 2595 while self._match(TokenType.DOT): 2596 if catalog: 2597 # This allows nesting the table in arbitrarily many dot expressions if needed 2598 table = self.expression( 2599 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2600 ) 2601 else: 2602 catalog = db 2603 db = table 2604 table = self._parse_table_part(schema=schema) 2605 2606 if not table: 2607 self.raise_error(f"Expected table name but got {self._curr}") 2608 2609 return self.expression( 2610 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2611 ) 2612 2613 def _parse_table( 2614 self, 2615 schema: bool = False, 2616 joins: bool = False, 2617 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2618 parse_bracket: bool = False, 2619 ) -> t.Optional[exp.Expression]: 2620 lateral = self._parse_lateral() 2621 if lateral: 2622 return lateral 2623 2624 unnest = self._parse_unnest() 2625 if unnest: 2626 return unnest 2627 2628 values = self._parse_derived_table_values() 2629 if values: 2630 return values 2631 2632 subquery = self._parse_select(table=True) 2633 if subquery: 2634 if not subquery.args.get("pivots"): 2635 subquery.set("pivots", self._parse_pivots()) 2636 return subquery 2637 2638 bracket = parse_bracket and self._parse_bracket(None) 2639 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2640 this = t.cast( 2641 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2642 ) 2643 2644 if schema: 2645 return self._parse_schema(this=this) 2646 2647 version = self._parse_version() 2648 2649 if version: 2650 this.set("version", version) 2651 2652 if self.ALIAS_POST_TABLESAMPLE: 2653 table_sample = self._parse_table_sample() 2654 2655 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2656 if alias: 2657 this.set("alias", alias) 2658 2659 if self._match_text_seq("AT"): 2660 this.set("index", self._parse_id_var()) 2661 2662 this.set("hints", self._parse_table_hints()) 2663 2664 if not this.args.get("pivots"): 2665 this.set("pivots", self._parse_pivots()) 2666 2667 if not self.ALIAS_POST_TABLESAMPLE: 2668 table_sample = self._parse_table_sample() 2669 2670 if table_sample: 2671 table_sample.set("this", this) 2672 this = table_sample 2673 2674 if joins: 2675 for join in iter(self._parse_join, None): 2676 this.append("joins", join) 2677 2678 return this 2679 2680 def _parse_version(self) -> t.Optional[exp.Version]: 2681 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2682 this = "TIMESTAMP" 2683 elif self._match(TokenType.VERSION_SNAPSHOT): 2684 this = "VERSION" 2685 else: 2686 return None 2687 2688 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2689 kind = self._prev.text.upper() 2690 start = self._parse_bitwise() 2691 self._match_texts(("TO", "AND")) 2692 end = self._parse_bitwise() 2693 expression: t.Optional[exp.Expression] = self.expression( 2694 exp.Tuple, expressions=[start, end] 2695 ) 2696 elif self._match_text_seq("CONTAINED", "IN"): 2697 kind = "CONTAINED IN" 2698 expression = self.expression( 2699 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2700 ) 2701 elif self._match(TokenType.ALL): 2702 kind = "ALL" 2703 expression = None 2704 else: 2705 self._match_text_seq("AS", "OF") 2706 kind = "AS OF" 2707 expression = self._parse_type() 2708 2709 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2710 2711 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2712 if not self._match(TokenType.UNNEST): 2713 return None 2714 2715 expressions = self._parse_wrapped_csv(self._parse_type) 2716 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2717 2718 alias = self._parse_table_alias() if with_alias else None 2719 2720 if alias: 2721 if self.UNNEST_COLUMN_ONLY: 2722 if alias.args.get("columns"): 2723 self.raise_error("Unexpected extra column alias in unnest.") 2724 2725 alias.set("columns", [alias.this]) 2726 alias.set("this", None) 2727 2728 columns = alias.args.get("columns") or [] 2729 if offset and len(expressions) < len(columns): 2730 offset = columns.pop() 2731 2732 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2733 self._match(TokenType.ALIAS) 2734 offset = self._parse_id_var( 2735 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2736 ) or exp.to_identifier("offset") 2737 2738 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2739 2740 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2741 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2742 if not is_derived and not self._match(TokenType.VALUES): 2743 return None 2744 2745 expressions = self._parse_csv(self._parse_value) 2746 alias = self._parse_table_alias() 2747 2748 if is_derived: 2749 self._match_r_paren() 2750 2751 return self.expression( 2752 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2753 ) 2754 2755 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2756 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2757 as_modifier and self._match_text_seq("USING", "SAMPLE") 2758 ): 2759 return None 2760 2761 bucket_numerator = None 2762 bucket_denominator = None 2763 bucket_field = None 2764 percent = None 2765 rows = None 2766 size = None 2767 seed = None 2768 2769 kind = ( 2770 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2771 ) 2772 method = self._parse_var(tokens=(TokenType.ROW,)) 2773 2774 matched_l_paren = self._match(TokenType.L_PAREN) 2775 2776 if self.TABLESAMPLE_CSV: 2777 num = None 2778 expressions = self._parse_csv(self._parse_primary) 2779 else: 2780 expressions = None 2781 num = ( 2782 self._parse_factor() 2783 if self._match(TokenType.NUMBER, advance=False) 2784 else self._parse_primary() 2785 ) 2786 2787 if self._match_text_seq("BUCKET"): 2788 bucket_numerator = self._parse_number() 2789 self._match_text_seq("OUT", "OF") 2790 bucket_denominator = bucket_denominator = self._parse_number() 2791 self._match(TokenType.ON) 2792 bucket_field = self._parse_field() 2793 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2794 percent = num 2795 elif self._match(TokenType.ROWS): 2796 rows = num 2797 elif num: 2798 size = num 2799 2800 if matched_l_paren: 2801 self._match_r_paren() 2802 2803 if self._match(TokenType.L_PAREN): 2804 method = self._parse_var() 2805 seed = self._match(TokenType.COMMA) and self._parse_number() 2806 self._match_r_paren() 2807 elif self._match_texts(("SEED", "REPEATABLE")): 2808 seed = self._parse_wrapped(self._parse_number) 2809 2810 return self.expression( 2811 exp.TableSample, 2812 expressions=expressions, 2813 method=method, 2814 bucket_numerator=bucket_numerator, 2815 bucket_denominator=bucket_denominator, 2816 bucket_field=bucket_field, 2817 percent=percent, 2818 rows=rows, 2819 size=size, 2820 seed=seed, 2821 kind=kind, 2822 ) 2823 2824 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2825 return list(iter(self._parse_pivot, None)) or None 2826 2827 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2828 return list(iter(self._parse_join, None)) or None 2829 2830 # https://duckdb.org/docs/sql/statements/pivot 2831 def _parse_simplified_pivot(self) -> exp.Pivot: 2832 def _parse_on() -> t.Optional[exp.Expression]: 2833 this = self._parse_bitwise() 2834 return self._parse_in(this) if self._match(TokenType.IN) else this 2835 2836 this = self._parse_table() 2837 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2838 using = self._match(TokenType.USING) and self._parse_csv( 2839 lambda: self._parse_alias(self._parse_function()) 2840 ) 2841 group = self._parse_group() 2842 return self.expression( 2843 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2844 ) 2845 2846 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2847 index = self._index 2848 include_nulls = None 2849 2850 if self._match(TokenType.PIVOT): 2851 unpivot = False 2852 elif self._match(TokenType.UNPIVOT): 2853 unpivot = True 2854 2855 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2856 if self._match_text_seq("INCLUDE", "NULLS"): 2857 include_nulls = True 2858 elif self._match_text_seq("EXCLUDE", "NULLS"): 2859 include_nulls = False 2860 else: 2861 return None 2862 2863 expressions = [] 2864 field = None 2865 2866 if not self._match(TokenType.L_PAREN): 2867 self._retreat(index) 2868 return None 2869 2870 if unpivot: 2871 expressions = self._parse_csv(self._parse_column) 2872 else: 2873 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2874 2875 if not expressions: 2876 self.raise_error("Failed to parse PIVOT's aggregation list") 2877 2878 if not self._match(TokenType.FOR): 2879 self.raise_error("Expecting FOR") 2880 2881 value = self._parse_column() 2882 2883 if not self._match(TokenType.IN): 2884 self.raise_error("Expecting IN") 2885 2886 field = self._parse_in(value, alias=True) 2887 2888 self._match_r_paren() 2889 2890 pivot = self.expression( 2891 exp.Pivot, 2892 expressions=expressions, 2893 field=field, 2894 unpivot=unpivot, 2895 include_nulls=include_nulls, 2896 ) 2897 2898 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2899 pivot.set("alias", self._parse_table_alias()) 2900 2901 if not unpivot: 2902 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2903 2904 columns: t.List[exp.Expression] = [] 2905 for fld in pivot.args["field"].expressions: 2906 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2907 for name in names: 2908 if self.PREFIXED_PIVOT_COLUMNS: 2909 name = f"{name}_{field_name}" if name else field_name 2910 else: 2911 name = f"{field_name}_{name}" if name else field_name 2912 2913 columns.append(exp.to_identifier(name)) 2914 2915 pivot.set("columns", columns) 2916 2917 return pivot 2918 2919 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2920 return [agg.alias for agg in aggregations] 2921 2922 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2923 if not skip_where_token and not self._match(TokenType.WHERE): 2924 return None 2925 2926 return self.expression( 2927 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2928 ) 2929 2930 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2931 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2932 return None 2933 2934 elements = defaultdict(list) 2935 2936 if self._match(TokenType.ALL): 2937 return self.expression(exp.Group, all=True) 2938 2939 while True: 2940 expressions = self._parse_csv(self._parse_conjunction) 2941 if expressions: 2942 elements["expressions"].extend(expressions) 2943 2944 grouping_sets = self._parse_grouping_sets() 2945 if grouping_sets: 2946 elements["grouping_sets"].extend(grouping_sets) 2947 2948 rollup = None 2949 cube = None 2950 totals = None 2951 2952 with_ = self._match(TokenType.WITH) 2953 if self._match(TokenType.ROLLUP): 2954 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2955 elements["rollup"].extend(ensure_list(rollup)) 2956 2957 if self._match(TokenType.CUBE): 2958 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2959 elements["cube"].extend(ensure_list(cube)) 2960 2961 if self._match_text_seq("TOTALS"): 2962 totals = True 2963 elements["totals"] = True # type: ignore 2964 2965 if not (grouping_sets or rollup or cube or totals): 2966 break 2967 2968 return self.expression(exp.Group, **elements) # type: ignore 2969 2970 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2971 if not self._match(TokenType.GROUPING_SETS): 2972 return None 2973 2974 return self._parse_wrapped_csv(self._parse_grouping_set) 2975 2976 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2977 if self._match(TokenType.L_PAREN): 2978 grouping_set = self._parse_csv(self._parse_column) 2979 self._match_r_paren() 2980 return self.expression(exp.Tuple, expressions=grouping_set) 2981 2982 return self._parse_column() 2983 2984 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2985 if not skip_having_token and not self._match(TokenType.HAVING): 2986 return None 2987 return self.expression(exp.Having, this=self._parse_conjunction()) 2988 2989 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2990 if not self._match(TokenType.QUALIFY): 2991 return None 2992 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2993 2994 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2995 if skip_start_token: 2996 start = None 2997 elif self._match(TokenType.START_WITH): 2998 start = self._parse_conjunction() 2999 else: 3000 return None 3001 3002 self._match(TokenType.CONNECT_BY) 3003 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3004 exp.Prior, this=self._parse_bitwise() 3005 ) 3006 connect = self._parse_conjunction() 3007 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3008 3009 if not start and self._match(TokenType.START_WITH): 3010 start = self._parse_conjunction() 3011 3012 return self.expression(exp.Connect, start=start, connect=connect) 3013 3014 def _parse_order( 3015 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3016 ) -> t.Optional[exp.Expression]: 3017 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3018 return this 3019 3020 return self.expression( 3021 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3022 ) 3023 3024 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3025 if not self._match(token): 3026 return None 3027 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3028 3029 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3030 this = parse_method() if parse_method else self._parse_conjunction() 3031 3032 asc = self._match(TokenType.ASC) 3033 desc = self._match(TokenType.DESC) or (asc and False) 3034 3035 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3036 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3037 3038 nulls_first = is_nulls_first or False 3039 explicitly_null_ordered = is_nulls_first or is_nulls_last 3040 3041 if ( 3042 not explicitly_null_ordered 3043 and ( 3044 (not desc and self.NULL_ORDERING == "nulls_are_small") 3045 or (desc and self.NULL_ORDERING != "nulls_are_small") 3046 ) 3047 and self.NULL_ORDERING != "nulls_are_last" 3048 ): 3049 nulls_first = True 3050 3051 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3052 3053 def _parse_limit( 3054 self, this: t.Optional[exp.Expression] = None, top: bool = False 3055 ) -> t.Optional[exp.Expression]: 3056 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3057 comments = self._prev_comments 3058 if top: 3059 limit_paren = self._match(TokenType.L_PAREN) 3060 expression = self._parse_number() 3061 3062 if limit_paren: 3063 self._match_r_paren() 3064 else: 3065 expression = self._parse_term() 3066 3067 if self._match(TokenType.COMMA): 3068 offset = expression 3069 expression = self._parse_term() 3070 else: 3071 offset = None 3072 3073 limit_exp = self.expression( 3074 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3075 ) 3076 3077 return limit_exp 3078 3079 if self._match(TokenType.FETCH): 3080 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3081 direction = self._prev.text if direction else "FIRST" 3082 3083 count = self._parse_field(tokens=self.FETCH_TOKENS) 3084 percent = self._match(TokenType.PERCENT) 3085 3086 self._match_set((TokenType.ROW, TokenType.ROWS)) 3087 3088 only = self._match_text_seq("ONLY") 3089 with_ties = self._match_text_seq("WITH", "TIES") 3090 3091 if only and with_ties: 3092 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3093 3094 return self.expression( 3095 exp.Fetch, 3096 direction=direction, 3097 count=count, 3098 percent=percent, 3099 with_ties=with_ties, 3100 ) 3101 3102 return this 3103 3104 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3105 if not self._match(TokenType.OFFSET): 3106 return this 3107 3108 count = self._parse_term() 3109 self._match_set((TokenType.ROW, TokenType.ROWS)) 3110 return self.expression(exp.Offset, this=this, expression=count) 3111 3112 def _parse_locks(self) -> t.List[exp.Lock]: 3113 locks = [] 3114 while True: 3115 if self._match_text_seq("FOR", "UPDATE"): 3116 update = True 3117 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3118 "LOCK", "IN", "SHARE", "MODE" 3119 ): 3120 update = False 3121 else: 3122 break 3123 3124 expressions = None 3125 if self._match_text_seq("OF"): 3126 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3127 3128 wait: t.Optional[bool | exp.Expression] = None 3129 if self._match_text_seq("NOWAIT"): 3130 wait = True 3131 elif self._match_text_seq("WAIT"): 3132 wait = self._parse_primary() 3133 elif self._match_text_seq("SKIP", "LOCKED"): 3134 wait = False 3135 3136 locks.append( 3137 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3138 ) 3139 3140 return locks 3141 3142 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3143 if not self._match_set(self.SET_OPERATIONS): 3144 return this 3145 3146 token_type = self._prev.token_type 3147 3148 if token_type == TokenType.UNION: 3149 expression = exp.Union 3150 elif token_type == TokenType.EXCEPT: 3151 expression = exp.Except 3152 else: 3153 expression = exp.Intersect 3154 3155 return self.expression( 3156 expression, 3157 this=this, 3158 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3159 by_name=self._match_text_seq("BY", "NAME"), 3160 expression=self._parse_set_operations(self._parse_select(nested=True)), 3161 ) 3162 3163 def _parse_expression(self) -> t.Optional[exp.Expression]: 3164 return self._parse_alias(self._parse_conjunction()) 3165 3166 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3167 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3168 3169 def _parse_equality(self) -> t.Optional[exp.Expression]: 3170 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3171 3172 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3173 return self._parse_tokens(self._parse_range, self.COMPARISON) 3174 3175 def _parse_range(self) -> t.Optional[exp.Expression]: 3176 this = self._parse_bitwise() 3177 negate = self._match(TokenType.NOT) 3178 3179 if self._match_set(self.RANGE_PARSERS): 3180 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3181 if not expression: 3182 return this 3183 3184 this = expression 3185 elif self._match(TokenType.ISNULL): 3186 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3187 3188 # Postgres supports ISNULL and NOTNULL for conditions. 3189 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3190 if self._match(TokenType.NOTNULL): 3191 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3192 this = self.expression(exp.Not, this=this) 3193 3194 if negate: 3195 this = self.expression(exp.Not, this=this) 3196 3197 if self._match(TokenType.IS): 3198 this = self._parse_is(this) 3199 3200 return this 3201 3202 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3203 index = self._index - 1 3204 negate = self._match(TokenType.NOT) 3205 3206 if self._match_text_seq("DISTINCT", "FROM"): 3207 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3208 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3209 3210 expression = self._parse_null() or self._parse_boolean() 3211 if not expression: 3212 self._retreat(index) 3213 return None 3214 3215 this = self.expression(exp.Is, this=this, expression=expression) 3216 return self.expression(exp.Not, this=this) if negate else this 3217 3218 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3219 unnest = self._parse_unnest(with_alias=False) 3220 if unnest: 3221 this = self.expression(exp.In, this=this, unnest=unnest) 3222 elif self._match(TokenType.L_PAREN): 3223 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3224 3225 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3226 this = self.expression(exp.In, this=this, query=expressions[0]) 3227 else: 3228 this = self.expression(exp.In, this=this, expressions=expressions) 3229 3230 self._match_r_paren(this) 3231 else: 3232 this = self.expression(exp.In, this=this, field=self._parse_field()) 3233 3234 return this 3235 3236 def _parse_between(self, this: exp.Expression) -> exp.Between: 3237 low = self._parse_bitwise() 3238 self._match(TokenType.AND) 3239 high = self._parse_bitwise() 3240 return self.expression(exp.Between, this=this, low=low, high=high) 3241 3242 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3243 if not self._match(TokenType.ESCAPE): 3244 return this 3245 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3246 3247 def _parse_interval(self) -> t.Optional[exp.Interval]: 3248 index = self._index 3249 3250 if not self._match(TokenType.INTERVAL): 3251 return None 3252 3253 if self._match(TokenType.STRING, advance=False): 3254 this = self._parse_primary() 3255 else: 3256 this = self._parse_term() 3257 3258 if not this: 3259 self._retreat(index) 3260 return None 3261 3262 unit = self._parse_function() or self._parse_var(any_token=True) 3263 3264 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3265 # each INTERVAL expression into this canonical form so it's easy to transpile 3266 if this and this.is_number: 3267 this = exp.Literal.string(this.name) 3268 elif this and this.is_string: 3269 parts = this.name.split() 3270 3271 if len(parts) == 2: 3272 if unit: 3273 # This is not actually a unit, it's something else (e.g. a "window side") 3274 unit = None 3275 self._retreat(self._index - 1) 3276 3277 this = exp.Literal.string(parts[0]) 3278 unit = self.expression(exp.Var, this=parts[1]) 3279 3280 return self.expression(exp.Interval, this=this, unit=unit) 3281 3282 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3283 this = self._parse_term() 3284 3285 while True: 3286 if self._match_set(self.BITWISE): 3287 this = self.expression( 3288 self.BITWISE[self._prev.token_type], 3289 this=this, 3290 expression=self._parse_term(), 3291 ) 3292 elif self._match(TokenType.DQMARK): 3293 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3294 elif self._match_pair(TokenType.LT, TokenType.LT): 3295 this = self.expression( 3296 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3297 ) 3298 elif self._match_pair(TokenType.GT, TokenType.GT): 3299 this = self.expression( 3300 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3301 ) 3302 else: 3303 break 3304 3305 return this 3306 3307 def _parse_term(self) -> t.Optional[exp.Expression]: 3308 return self._parse_tokens(self._parse_factor, self.TERM) 3309 3310 def _parse_factor(self) -> t.Optional[exp.Expression]: 3311 return self._parse_tokens(self._parse_unary, self.FACTOR) 3312 3313 def _parse_unary(self) -> t.Optional[exp.Expression]: 3314 if self._match_set(self.UNARY_PARSERS): 3315 return self.UNARY_PARSERS[self._prev.token_type](self) 3316 return self._parse_at_time_zone(self._parse_type()) 3317 3318 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3319 interval = parse_interval and self._parse_interval() 3320 if interval: 3321 return interval 3322 3323 index = self._index 3324 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3325 this = self._parse_column() 3326 3327 if data_type: 3328 if isinstance(this, exp.Literal): 3329 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3330 if parser: 3331 return parser(self, this, data_type) 3332 return self.expression(exp.Cast, this=this, to=data_type) 3333 if not data_type.expressions: 3334 self._retreat(index) 3335 return self._parse_column() 3336 return self._parse_column_ops(data_type) 3337 3338 return this and self._parse_column_ops(this) 3339 3340 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3341 this = self._parse_type() 3342 if not this: 3343 return None 3344 3345 return self.expression( 3346 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3347 ) 3348 3349 def _parse_types( 3350 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3351 ) -> t.Optional[exp.Expression]: 3352 index = self._index 3353 3354 prefix = self._match_text_seq("SYSUDTLIB", ".") 3355 3356 if not self._match_set(self.TYPE_TOKENS): 3357 identifier = allow_identifiers and self._parse_id_var( 3358 any_token=False, tokens=(TokenType.VAR,) 3359 ) 3360 3361 if identifier: 3362 tokens = self._tokenizer.tokenize(identifier.name) 3363 3364 if len(tokens) != 1: 3365 self.raise_error("Unexpected identifier", self._prev) 3366 3367 if tokens[0].token_type in self.TYPE_TOKENS: 3368 self._prev = tokens[0] 3369 elif self.SUPPORTS_USER_DEFINED_TYPES: 3370 type_name = identifier.name 3371 3372 while self._match(TokenType.DOT): 3373 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3374 3375 return exp.DataType.build(type_name, udt=True) 3376 else: 3377 return None 3378 else: 3379 return None 3380 3381 type_token = self._prev.token_type 3382 3383 if type_token == TokenType.PSEUDO_TYPE: 3384 return self.expression(exp.PseudoType, this=self._prev.text) 3385 3386 if type_token == TokenType.OBJECT_IDENTIFIER: 3387 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3388 3389 nested = type_token in self.NESTED_TYPE_TOKENS 3390 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3391 expressions = None 3392 maybe_func = False 3393 3394 if self._match(TokenType.L_PAREN): 3395 if is_struct: 3396 expressions = self._parse_csv(self._parse_struct_types) 3397 elif nested: 3398 expressions = self._parse_csv( 3399 lambda: self._parse_types( 3400 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3401 ) 3402 ) 3403 elif type_token in self.ENUM_TYPE_TOKENS: 3404 expressions = self._parse_csv(self._parse_equality) 3405 else: 3406 expressions = self._parse_csv(self._parse_type_size) 3407 3408 if not expressions or not self._match(TokenType.R_PAREN): 3409 self._retreat(index) 3410 return None 3411 3412 maybe_func = True 3413 3414 this: t.Optional[exp.Expression] = None 3415 values: t.Optional[t.List[exp.Expression]] = None 3416 3417 if nested and self._match(TokenType.LT): 3418 if is_struct: 3419 expressions = self._parse_csv(self._parse_struct_types) 3420 else: 3421 expressions = self._parse_csv( 3422 lambda: self._parse_types( 3423 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3424 ) 3425 ) 3426 3427 if not self._match(TokenType.GT): 3428 self.raise_error("Expecting >") 3429 3430 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3431 values = self._parse_csv(self._parse_conjunction) 3432 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3433 3434 if type_token in self.TIMESTAMPS: 3435 if self._match_text_seq("WITH", "TIME", "ZONE"): 3436 maybe_func = False 3437 tz_type = ( 3438 exp.DataType.Type.TIMETZ 3439 if type_token in self.TIMES 3440 else exp.DataType.Type.TIMESTAMPTZ 3441 ) 3442 this = exp.DataType(this=tz_type, expressions=expressions) 3443 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3444 maybe_func = False 3445 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3446 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3447 maybe_func = False 3448 elif type_token == TokenType.INTERVAL: 3449 unit = self._parse_var() 3450 3451 if self._match_text_seq("TO"): 3452 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3453 else: 3454 span = None 3455 3456 if span or not unit: 3457 this = self.expression( 3458 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3459 ) 3460 else: 3461 this = self.expression(exp.Interval, unit=unit) 3462 3463 if maybe_func and check_func: 3464 index2 = self._index 3465 peek = self._parse_string() 3466 3467 if not peek: 3468 self._retreat(index) 3469 return None 3470 3471 self._retreat(index2) 3472 3473 if not this: 3474 if self._match_text_seq("UNSIGNED"): 3475 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3476 if not unsigned_type_token: 3477 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3478 3479 type_token = unsigned_type_token or type_token 3480 3481 this = exp.DataType( 3482 this=exp.DataType.Type[type_token.value], 3483 expressions=expressions, 3484 nested=nested, 3485 values=values, 3486 prefix=prefix, 3487 ) 3488 3489 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3490 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3491 3492 return this 3493 3494 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3495 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3496 self._match(TokenType.COLON) 3497 return self._parse_column_def(this) 3498 3499 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3500 if not self._match_text_seq("AT", "TIME", "ZONE"): 3501 return this 3502 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3503 3504 def _parse_column(self) -> t.Optional[exp.Expression]: 3505 this = self._parse_field() 3506 if isinstance(this, exp.Identifier): 3507 this = self.expression(exp.Column, this=this) 3508 elif not this: 3509 return self._parse_bracket(this) 3510 return self._parse_column_ops(this) 3511 3512 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3513 this = self._parse_bracket(this) 3514 3515 while self._match_set(self.COLUMN_OPERATORS): 3516 op_token = self._prev.token_type 3517 op = self.COLUMN_OPERATORS.get(op_token) 3518 3519 if op_token == TokenType.DCOLON: 3520 field = self._parse_types() 3521 if not field: 3522 self.raise_error("Expected type") 3523 elif op and self._curr: 3524 self._advance() 3525 value = self._prev.text 3526 field = ( 3527 exp.Literal.number(value) 3528 if self._prev.token_type == TokenType.NUMBER 3529 else exp.Literal.string(value) 3530 ) 3531 else: 3532 field = self._parse_field(anonymous_func=True, any_token=True) 3533 3534 if isinstance(field, exp.Func): 3535 # bigquery allows function calls like x.y.count(...) 3536 # SAFE.SUBSTR(...) 3537 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3538 this = self._replace_columns_with_dots(this) 3539 3540 if op: 3541 this = op(self, this, field) 3542 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3543 this = self.expression( 3544 exp.Column, 3545 this=field, 3546 table=this.this, 3547 db=this.args.get("table"), 3548 catalog=this.args.get("db"), 3549 ) 3550 else: 3551 this = self.expression(exp.Dot, this=this, expression=field) 3552 this = self._parse_bracket(this) 3553 return this 3554 3555 def _parse_primary(self) -> t.Optional[exp.Expression]: 3556 if self._match_set(self.PRIMARY_PARSERS): 3557 token_type = self._prev.token_type 3558 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3559 3560 if token_type == TokenType.STRING: 3561 expressions = [primary] 3562 while self._match(TokenType.STRING): 3563 expressions.append(exp.Literal.string(self._prev.text)) 3564 3565 if len(expressions) > 1: 3566 return self.expression(exp.Concat, expressions=expressions) 3567 3568 return primary 3569 3570 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3571 return exp.Literal.number(f"0.{self._prev.text}") 3572 3573 if self._match(TokenType.L_PAREN): 3574 comments = self._prev_comments 3575 query = self._parse_select() 3576 3577 if query: 3578 expressions = [query] 3579 else: 3580 expressions = self._parse_expressions() 3581 3582 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3583 3584 if isinstance(this, exp.Subqueryable): 3585 this = self._parse_set_operations( 3586 self._parse_subquery(this=this, parse_alias=False) 3587 ) 3588 elif len(expressions) > 1: 3589 this = self.expression(exp.Tuple, expressions=expressions) 3590 else: 3591 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3592 3593 if this: 3594 this.add_comments(comments) 3595 3596 self._match_r_paren(expression=this) 3597 return this 3598 3599 return None 3600 3601 def _parse_field( 3602 self, 3603 any_token: bool = False, 3604 tokens: t.Optional[t.Collection[TokenType]] = None, 3605 anonymous_func: bool = False, 3606 ) -> t.Optional[exp.Expression]: 3607 return ( 3608 self._parse_primary() 3609 or self._parse_function(anonymous=anonymous_func) 3610 or self._parse_id_var(any_token=any_token, tokens=tokens) 3611 ) 3612 3613 def _parse_function( 3614 self, 3615 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3616 anonymous: bool = False, 3617 optional_parens: bool = True, 3618 ) -> t.Optional[exp.Expression]: 3619 if not self._curr: 3620 return None 3621 3622 token_type = self._curr.token_type 3623 this = self._curr.text 3624 upper = this.upper() 3625 3626 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3627 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3628 self._advance() 3629 return parser(self) 3630 3631 if not self._next or self._next.token_type != TokenType.L_PAREN: 3632 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3633 self._advance() 3634 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3635 3636 return None 3637 3638 if token_type not in self.FUNC_TOKENS: 3639 return None 3640 3641 self._advance(2) 3642 3643 parser = self.FUNCTION_PARSERS.get(upper) 3644 if parser and not anonymous: 3645 this = parser(self) 3646 else: 3647 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3648 3649 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3650 this = self.expression(subquery_predicate, this=self._parse_select()) 3651 self._match_r_paren() 3652 return this 3653 3654 if functions is None: 3655 functions = self.FUNCTIONS 3656 3657 function = functions.get(upper) 3658 3659 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3660 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3661 3662 if function and not anonymous: 3663 func = self.validate_expression(function(args), args) 3664 if not self.NORMALIZE_FUNCTIONS: 3665 func.meta["name"] = this 3666 this = func 3667 else: 3668 this = self.expression(exp.Anonymous, this=this, expressions=args) 3669 3670 self._match_r_paren(this) 3671 return self._parse_window(this) 3672 3673 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3674 return self._parse_column_def(self._parse_id_var()) 3675 3676 def _parse_user_defined_function( 3677 self, kind: t.Optional[TokenType] = None 3678 ) -> t.Optional[exp.Expression]: 3679 this = self._parse_id_var() 3680 3681 while self._match(TokenType.DOT): 3682 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3683 3684 if not self._match(TokenType.L_PAREN): 3685 return this 3686 3687 expressions = self._parse_csv(self._parse_function_parameter) 3688 self._match_r_paren() 3689 return self.expression( 3690 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3691 ) 3692 3693 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3694 literal = self._parse_primary() 3695 if literal: 3696 return self.expression(exp.Introducer, this=token.text, expression=literal) 3697 3698 return self.expression(exp.Identifier, this=token.text) 3699 3700 def _parse_session_parameter(self) -> exp.SessionParameter: 3701 kind = None 3702 this = self._parse_id_var() or self._parse_primary() 3703 3704 if this and self._match(TokenType.DOT): 3705 kind = this.name 3706 this = self._parse_var() or self._parse_primary() 3707 3708 return self.expression(exp.SessionParameter, this=this, kind=kind) 3709 3710 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3711 index = self._index 3712 3713 if self._match(TokenType.L_PAREN): 3714 expressions = t.cast( 3715 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3716 ) 3717 3718 if not self._match(TokenType.R_PAREN): 3719 self._retreat(index) 3720 else: 3721 expressions = [self._parse_id_var()] 3722 3723 if self._match_set(self.LAMBDAS): 3724 return self.LAMBDAS[self._prev.token_type](self, expressions) 3725 3726 self._retreat(index) 3727 3728 this: t.Optional[exp.Expression] 3729 3730 if self._match(TokenType.DISTINCT): 3731 this = self.expression( 3732 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3733 ) 3734 else: 3735 this = self._parse_select_or_expression(alias=alias) 3736 3737 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3738 3739 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3740 index = self._index 3741 3742 if not self.errors: 3743 try: 3744 if self._parse_select(nested=True): 3745 return this 3746 except ParseError: 3747 pass 3748 finally: 3749 self.errors.clear() 3750 self._retreat(index) 3751 3752 if not self._match(TokenType.L_PAREN): 3753 return this 3754 3755 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3756 3757 self._match_r_paren() 3758 return self.expression(exp.Schema, this=this, expressions=args) 3759 3760 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3761 return self._parse_column_def(self._parse_field(any_token=True)) 3762 3763 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3764 # column defs are not really columns, they're identifiers 3765 if isinstance(this, exp.Column): 3766 this = this.this 3767 3768 kind = self._parse_types(schema=True) 3769 3770 if self._match_text_seq("FOR", "ORDINALITY"): 3771 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3772 3773 constraints: t.List[exp.Expression] = [] 3774 3775 if not kind and self._match(TokenType.ALIAS): 3776 constraints.append( 3777 self.expression( 3778 exp.ComputedColumnConstraint, 3779 this=self._parse_conjunction(), 3780 persisted=self._match_text_seq("PERSISTED"), 3781 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3782 ) 3783 ) 3784 3785 while True: 3786 constraint = self._parse_column_constraint() 3787 if not constraint: 3788 break 3789 constraints.append(constraint) 3790 3791 if not kind and not constraints: 3792 return this 3793 3794 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3795 3796 def _parse_auto_increment( 3797 self, 3798 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3799 start = None 3800 increment = None 3801 3802 if self._match(TokenType.L_PAREN, advance=False): 3803 args = self._parse_wrapped_csv(self._parse_bitwise) 3804 start = seq_get(args, 0) 3805 increment = seq_get(args, 1) 3806 elif self._match_text_seq("START"): 3807 start = self._parse_bitwise() 3808 self._match_text_seq("INCREMENT") 3809 increment = self._parse_bitwise() 3810 3811 if start and increment: 3812 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3813 3814 return exp.AutoIncrementColumnConstraint() 3815 3816 def _parse_compress(self) -> exp.CompressColumnConstraint: 3817 if self._match(TokenType.L_PAREN, advance=False): 3818 return self.expression( 3819 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3820 ) 3821 3822 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3823 3824 def _parse_generated_as_identity( 3825 self, 3826 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3827 if self._match_text_seq("BY", "DEFAULT"): 3828 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3829 this = self.expression( 3830 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3831 ) 3832 else: 3833 self._match_text_seq("ALWAYS") 3834 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3835 3836 self._match(TokenType.ALIAS) 3837 identity = self._match_text_seq("IDENTITY") 3838 3839 if self._match(TokenType.L_PAREN): 3840 if self._match(TokenType.START_WITH): 3841 this.set("start", self._parse_bitwise()) 3842 if self._match_text_seq("INCREMENT", "BY"): 3843 this.set("increment", self._parse_bitwise()) 3844 if self._match_text_seq("MINVALUE"): 3845 this.set("minvalue", self._parse_bitwise()) 3846 if self._match_text_seq("MAXVALUE"): 3847 this.set("maxvalue", self._parse_bitwise()) 3848 3849 if self._match_text_seq("CYCLE"): 3850 this.set("cycle", True) 3851 elif self._match_text_seq("NO", "CYCLE"): 3852 this.set("cycle", False) 3853 3854 if not identity: 3855 this.set("expression", self._parse_bitwise()) 3856 3857 self._match_r_paren() 3858 3859 return this 3860 3861 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3862 self._match_text_seq("LENGTH") 3863 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3864 3865 def _parse_not_constraint( 3866 self, 3867 ) -> t.Optional[exp.Expression]: 3868 if self._match_text_seq("NULL"): 3869 return self.expression(exp.NotNullColumnConstraint) 3870 if self._match_text_seq("CASESPECIFIC"): 3871 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3872 if self._match_text_seq("FOR", "REPLICATION"): 3873 return self.expression(exp.NotForReplicationColumnConstraint) 3874 return None 3875 3876 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3877 if self._match(TokenType.CONSTRAINT): 3878 this = self._parse_id_var() 3879 else: 3880 this = None 3881 3882 if self._match_texts(self.CONSTRAINT_PARSERS): 3883 return self.expression( 3884 exp.ColumnConstraint, 3885 this=this, 3886 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3887 ) 3888 3889 return this 3890 3891 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3892 if not self._match(TokenType.CONSTRAINT): 3893 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3894 3895 this = self._parse_id_var() 3896 expressions = [] 3897 3898 while True: 3899 constraint = self._parse_unnamed_constraint() or self._parse_function() 3900 if not constraint: 3901 break 3902 expressions.append(constraint) 3903 3904 return self.expression(exp.Constraint, this=this, expressions=expressions) 3905 3906 def _parse_unnamed_constraint( 3907 self, constraints: t.Optional[t.Collection[str]] = None 3908 ) -> t.Optional[exp.Expression]: 3909 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3910 constraints or self.CONSTRAINT_PARSERS 3911 ): 3912 return None 3913 3914 constraint = self._prev.text.upper() 3915 if constraint not in self.CONSTRAINT_PARSERS: 3916 self.raise_error(f"No parser found for schema constraint {constraint}.") 3917 3918 return self.CONSTRAINT_PARSERS[constraint](self) 3919 3920 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3921 self._match_text_seq("KEY") 3922 return self.expression( 3923 exp.UniqueColumnConstraint, 3924 this=self._parse_schema(self._parse_id_var(any_token=False)), 3925 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3926 ) 3927 3928 def _parse_key_constraint_options(self) -> t.List[str]: 3929 options = [] 3930 while True: 3931 if not self._curr: 3932 break 3933 3934 if self._match(TokenType.ON): 3935 action = None 3936 on = self._advance_any() and self._prev.text 3937 3938 if self._match_text_seq("NO", "ACTION"): 3939 action = "NO ACTION" 3940 elif self._match_text_seq("CASCADE"): 3941 action = "CASCADE" 3942 elif self._match_text_seq("RESTRICT"): 3943 action = "RESTRICT" 3944 elif self._match_pair(TokenType.SET, TokenType.NULL): 3945 action = "SET NULL" 3946 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3947 action = "SET DEFAULT" 3948 else: 3949 self.raise_error("Invalid key constraint") 3950 3951 options.append(f"ON {on} {action}") 3952 elif self._match_text_seq("NOT", "ENFORCED"): 3953 options.append("NOT ENFORCED") 3954 elif self._match_text_seq("DEFERRABLE"): 3955 options.append("DEFERRABLE") 3956 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3957 options.append("INITIALLY DEFERRED") 3958 elif self._match_text_seq("NORELY"): 3959 options.append("NORELY") 3960 elif self._match_text_seq("MATCH", "FULL"): 3961 options.append("MATCH FULL") 3962 else: 3963 break 3964 3965 return options 3966 3967 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3968 if match and not self._match(TokenType.REFERENCES): 3969 return None 3970 3971 expressions = None 3972 this = self._parse_table(schema=True) 3973 options = self._parse_key_constraint_options() 3974 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3975 3976 def _parse_foreign_key(self) -> exp.ForeignKey: 3977 expressions = self._parse_wrapped_id_vars() 3978 reference = self._parse_references() 3979 options = {} 3980 3981 while self._match(TokenType.ON): 3982 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3983 self.raise_error("Expected DELETE or UPDATE") 3984 3985 kind = self._prev.text.lower() 3986 3987 if self._match_text_seq("NO", "ACTION"): 3988 action = "NO ACTION" 3989 elif self._match(TokenType.SET): 3990 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3991 action = "SET " + self._prev.text.upper() 3992 else: 3993 self._advance() 3994 action = self._prev.text.upper() 3995 3996 options[kind] = action 3997 3998 return self.expression( 3999 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4000 ) 4001 4002 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4003 return self._parse_field() 4004 4005 def _parse_primary_key( 4006 self, wrapped_optional: bool = False, in_props: bool = False 4007 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4008 desc = ( 4009 self._match_set((TokenType.ASC, TokenType.DESC)) 4010 and self._prev.token_type == TokenType.DESC 4011 ) 4012 4013 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4014 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4015 4016 expressions = self._parse_wrapped_csv( 4017 self._parse_primary_key_part, optional=wrapped_optional 4018 ) 4019 options = self._parse_key_constraint_options() 4020 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4021 4022 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4023 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4024 return this 4025 4026 bracket_kind = self._prev.token_type 4027 4028 if self._match(TokenType.COLON): 4029 expressions: t.List[exp.Expression] = [ 4030 self.expression(exp.Slice, expression=self._parse_conjunction()) 4031 ] 4032 else: 4033 expressions = self._parse_csv( 4034 lambda: self._parse_slice( 4035 self._parse_alias(self._parse_conjunction(), explicit=True) 4036 ) 4037 ) 4038 4039 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4040 if bracket_kind == TokenType.L_BRACE: 4041 this = self.expression(exp.Struct, expressions=expressions) 4042 elif not this or this.name.upper() == "ARRAY": 4043 this = self.expression(exp.Array, expressions=expressions) 4044 else: 4045 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4046 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4047 4048 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4049 self.raise_error("Expected ]") 4050 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4051 self.raise_error("Expected }") 4052 4053 self._add_comments(this) 4054 return self._parse_bracket(this) 4055 4056 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4057 if self._match(TokenType.COLON): 4058 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4059 return this 4060 4061 def _parse_case(self) -> t.Optional[exp.Expression]: 4062 ifs = [] 4063 default = None 4064 4065 comments = self._prev_comments 4066 expression = self._parse_conjunction() 4067 4068 while self._match(TokenType.WHEN): 4069 this = self._parse_conjunction() 4070 self._match(TokenType.THEN) 4071 then = self._parse_conjunction() 4072 ifs.append(self.expression(exp.If, this=this, true=then)) 4073 4074 if self._match(TokenType.ELSE): 4075 default = self._parse_conjunction() 4076 4077 if not self._match(TokenType.END): 4078 self.raise_error("Expected END after CASE", self._prev) 4079 4080 return self._parse_window( 4081 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4082 ) 4083 4084 def _parse_if(self) -> t.Optional[exp.Expression]: 4085 if self._match(TokenType.L_PAREN): 4086 args = self._parse_csv(self._parse_conjunction) 4087 this = self.validate_expression(exp.If.from_arg_list(args), args) 4088 self._match_r_paren() 4089 else: 4090 index = self._index - 1 4091 condition = self._parse_conjunction() 4092 4093 if not condition: 4094 self._retreat(index) 4095 return None 4096 4097 self._match(TokenType.THEN) 4098 true = self._parse_conjunction() 4099 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4100 self._match(TokenType.END) 4101 this = self.expression(exp.If, this=condition, true=true, false=false) 4102 4103 return self._parse_window(this) 4104 4105 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4106 if not self._match_text_seq("VALUE", "FOR"): 4107 self._retreat(self._index - 1) 4108 return None 4109 4110 return self.expression( 4111 exp.NextValueFor, 4112 this=self._parse_column(), 4113 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4114 ) 4115 4116 def _parse_extract(self) -> exp.Extract: 4117 this = self._parse_function() or self._parse_var() or self._parse_type() 4118 4119 if self._match(TokenType.FROM): 4120 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4121 4122 if not self._match(TokenType.COMMA): 4123 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4124 4125 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4126 4127 def _parse_any_value(self) -> exp.AnyValue: 4128 this = self._parse_lambda() 4129 is_max = None 4130 having = None 4131 4132 if self._match(TokenType.HAVING): 4133 self._match_texts(("MAX", "MIN")) 4134 is_max = self._prev.text == "MAX" 4135 having = self._parse_column() 4136 4137 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4138 4139 def _parse_cast(self, strict: bool) -> exp.Expression: 4140 this = self._parse_conjunction() 4141 4142 if not self._match(TokenType.ALIAS): 4143 if self._match(TokenType.COMMA): 4144 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4145 4146 self.raise_error("Expected AS after CAST") 4147 4148 fmt = None 4149 to = self._parse_types() 4150 4151 if not to: 4152 self.raise_error("Expected TYPE after CAST") 4153 elif isinstance(to, exp.Identifier): 4154 to = exp.DataType.build(to.name, udt=True) 4155 elif to.this == exp.DataType.Type.CHAR: 4156 if self._match(TokenType.CHARACTER_SET): 4157 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4158 elif self._match(TokenType.FORMAT): 4159 fmt_string = self._parse_string() 4160 fmt = self._parse_at_time_zone(fmt_string) 4161 4162 if to.this in exp.DataType.TEMPORAL_TYPES: 4163 this = self.expression( 4164 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4165 this=this, 4166 format=exp.Literal.string( 4167 format_time( 4168 fmt_string.this if fmt_string else "", 4169 self.FORMAT_MAPPING or self.TIME_MAPPING, 4170 self.FORMAT_TRIE or self.TIME_TRIE, 4171 ) 4172 ), 4173 ) 4174 4175 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4176 this.set("zone", fmt.args["zone"]) 4177 4178 return this 4179 4180 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4181 4182 def _parse_concat(self) -> t.Optional[exp.Expression]: 4183 args = self._parse_csv(self._parse_conjunction) 4184 if self.CONCAT_NULL_OUTPUTS_STRING: 4185 args = self._ensure_string_if_null(args) 4186 4187 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4188 # we find such a call we replace it with its argument. 4189 if len(args) == 1: 4190 return args[0] 4191 4192 return self.expression( 4193 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4194 ) 4195 4196 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4197 args = self._parse_csv(self._parse_conjunction) 4198 if len(args) < 2: 4199 return self.expression(exp.ConcatWs, expressions=args) 4200 delim, *values = args 4201 if self.CONCAT_NULL_OUTPUTS_STRING: 4202 values = self._ensure_string_if_null(values) 4203 4204 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4205 4206 def _parse_string_agg(self) -> exp.Expression: 4207 if self._match(TokenType.DISTINCT): 4208 args: t.List[t.Optional[exp.Expression]] = [ 4209 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4210 ] 4211 if self._match(TokenType.COMMA): 4212 args.extend(self._parse_csv(self._parse_conjunction)) 4213 else: 4214 args = self._parse_csv(self._parse_conjunction) # type: ignore 4215 4216 index = self._index 4217 if not self._match(TokenType.R_PAREN) and args: 4218 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4219 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4220 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4221 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4222 4223 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4224 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4225 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4226 if not self._match_text_seq("WITHIN", "GROUP"): 4227 self._retreat(index) 4228 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4229 4230 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4231 order = self._parse_order(this=seq_get(args, 0)) 4232 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4233 4234 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4235 this = self._parse_bitwise() 4236 4237 if self._match(TokenType.USING): 4238 to: t.Optional[exp.Expression] = self.expression( 4239 exp.CharacterSet, this=self._parse_var() 4240 ) 4241 elif self._match(TokenType.COMMA): 4242 to = self._parse_types() 4243 else: 4244 to = None 4245 4246 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4247 4248 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4249 """ 4250 There are generally two variants of the DECODE function: 4251 4252 - DECODE(bin, charset) 4253 - DECODE(expression, search, result [, search, result] ... [, default]) 4254 4255 The second variant will always be parsed into a CASE expression. Note that NULL 4256 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4257 instead of relying on pattern matching. 4258 """ 4259 args = self._parse_csv(self._parse_conjunction) 4260 4261 if len(args) < 3: 4262 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4263 4264 expression, *expressions = args 4265 if not expression: 4266 return None 4267 4268 ifs = [] 4269 for search, result in zip(expressions[::2], expressions[1::2]): 4270 if not search or not result: 4271 return None 4272 4273 if isinstance(search, exp.Literal): 4274 ifs.append( 4275 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4276 ) 4277 elif isinstance(search, exp.Null): 4278 ifs.append( 4279 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4280 ) 4281 else: 4282 cond = exp.or_( 4283 exp.EQ(this=expression.copy(), expression=search), 4284 exp.and_( 4285 exp.Is(this=expression.copy(), expression=exp.Null()), 4286 exp.Is(this=search.copy(), expression=exp.Null()), 4287 copy=False, 4288 ), 4289 copy=False, 4290 ) 4291 ifs.append(exp.If(this=cond, true=result)) 4292 4293 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4294 4295 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4296 self._match_text_seq("KEY") 4297 key = self._parse_column() 4298 self._match_set((TokenType.COLON, TokenType.COMMA)) 4299 self._match_text_seq("VALUE") 4300 value = self._parse_bitwise() 4301 4302 if not key and not value: 4303 return None 4304 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4305 4306 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4307 if not this or not self._match_text_seq("FORMAT", "JSON"): 4308 return this 4309 4310 return self.expression(exp.FormatJson, this=this) 4311 4312 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4313 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4314 for value in values: 4315 if self._match_text_seq(value, "ON", on): 4316 return f"{value} ON {on}" 4317 4318 return None 4319 4320 def _parse_json_object(self) -> exp.JSONObject: 4321 star = self._parse_star() 4322 expressions = ( 4323 [star] 4324 if star 4325 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4326 ) 4327 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4328 4329 unique_keys = None 4330 if self._match_text_seq("WITH", "UNIQUE"): 4331 unique_keys = True 4332 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4333 unique_keys = False 4334 4335 self._match_text_seq("KEYS") 4336 4337 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4338 self._parse_type() 4339 ) 4340 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4341 4342 return self.expression( 4343 exp.JSONObject, 4344 expressions=expressions, 4345 null_handling=null_handling, 4346 unique_keys=unique_keys, 4347 return_type=return_type, 4348 encoding=encoding, 4349 ) 4350 4351 def _parse_logarithm(self) -> exp.Func: 4352 # Default argument order is base, expression 4353 args = self._parse_csv(self._parse_range) 4354 4355 if len(args) > 1: 4356 if not self.LOG_BASE_FIRST: 4357 args.reverse() 4358 return exp.Log.from_arg_list(args) 4359 4360 return self.expression( 4361 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4362 ) 4363 4364 def _parse_match_against(self) -> exp.MatchAgainst: 4365 expressions = self._parse_csv(self._parse_column) 4366 4367 self._match_text_seq(")", "AGAINST", "(") 4368 4369 this = self._parse_string() 4370 4371 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4372 modifier = "IN NATURAL LANGUAGE MODE" 4373 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4374 modifier = f"{modifier} WITH QUERY EXPANSION" 4375 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4376 modifier = "IN BOOLEAN MODE" 4377 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4378 modifier = "WITH QUERY EXPANSION" 4379 else: 4380 modifier = None 4381 4382 return self.expression( 4383 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4384 ) 4385 4386 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4387 def _parse_open_json(self) -> exp.OpenJSON: 4388 this = self._parse_bitwise() 4389 path = self._match(TokenType.COMMA) and self._parse_string() 4390 4391 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4392 this = self._parse_field(any_token=True) 4393 kind = self._parse_types() 4394 path = self._parse_string() 4395 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4396 4397 return self.expression( 4398 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4399 ) 4400 4401 expressions = None 4402 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4403 self._match_l_paren() 4404 expressions = self._parse_csv(_parse_open_json_column_def) 4405 4406 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4407 4408 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4409 args = self._parse_csv(self._parse_bitwise) 4410 4411 if self._match(TokenType.IN): 4412 return self.expression( 4413 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4414 ) 4415 4416 if haystack_first: 4417 haystack = seq_get(args, 0) 4418 needle = seq_get(args, 1) 4419 else: 4420 needle = seq_get(args, 0) 4421 haystack = seq_get(args, 1) 4422 4423 return self.expression( 4424 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4425 ) 4426 4427 def _parse_predict(self) -> exp.Predict: 4428 self._match_text_seq("MODEL") 4429 this = self._parse_table() 4430 4431 self._match(TokenType.COMMA) 4432 self._match_text_seq("TABLE") 4433 4434 return self.expression( 4435 exp.Predict, 4436 this=this, 4437 expression=self._parse_table(), 4438 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4439 ) 4440 4441 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4442 args = self._parse_csv(self._parse_table) 4443 return exp.JoinHint(this=func_name.upper(), expressions=args) 4444 4445 def _parse_substring(self) -> exp.Substring: 4446 # Postgres supports the form: substring(string [from int] [for int]) 4447 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4448 4449 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4450 4451 if self._match(TokenType.FROM): 4452 args.append(self._parse_bitwise()) 4453 if self._match(TokenType.FOR): 4454 args.append(self._parse_bitwise()) 4455 4456 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4457 4458 def _parse_trim(self) -> exp.Trim: 4459 # https://www.w3resource.com/sql/character-functions/trim.php 4460 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4461 4462 position = None 4463 collation = None 4464 expression = None 4465 4466 if self._match_texts(self.TRIM_TYPES): 4467 position = self._prev.text.upper() 4468 4469 this = self._parse_bitwise() 4470 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4471 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4472 expression = self._parse_bitwise() 4473 4474 if invert_order: 4475 this, expression = expression, this 4476 4477 if self._match(TokenType.COLLATE): 4478 collation = self._parse_bitwise() 4479 4480 return self.expression( 4481 exp.Trim, this=this, position=position, expression=expression, collation=collation 4482 ) 4483 4484 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4485 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4486 4487 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4488 return self._parse_window(self._parse_id_var(), alias=True) 4489 4490 def _parse_respect_or_ignore_nulls( 4491 self, this: t.Optional[exp.Expression] 4492 ) -> t.Optional[exp.Expression]: 4493 if self._match_text_seq("IGNORE", "NULLS"): 4494 return self.expression(exp.IgnoreNulls, this=this) 4495 if self._match_text_seq("RESPECT", "NULLS"): 4496 return self.expression(exp.RespectNulls, this=this) 4497 return this 4498 4499 def _parse_window( 4500 self, this: t.Optional[exp.Expression], alias: bool = False 4501 ) -> t.Optional[exp.Expression]: 4502 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4503 self._match(TokenType.WHERE) 4504 this = self.expression( 4505 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4506 ) 4507 self._match_r_paren() 4508 4509 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4510 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4511 if self._match_text_seq("WITHIN", "GROUP"): 4512 order = self._parse_wrapped(self._parse_order) 4513 this = self.expression(exp.WithinGroup, this=this, expression=order) 4514 4515 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4516 # Some dialects choose to implement and some do not. 4517 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4518 4519 # There is some code above in _parse_lambda that handles 4520 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4521 4522 # The below changes handle 4523 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4524 4525 # Oracle allows both formats 4526 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4527 # and Snowflake chose to do the same for familiarity 4528 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4529 this = self._parse_respect_or_ignore_nulls(this) 4530 4531 # bigquery select from window x AS (partition by ...) 4532 if alias: 4533 over = None 4534 self._match(TokenType.ALIAS) 4535 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4536 return this 4537 else: 4538 over = self._prev.text.upper() 4539 4540 if not self._match(TokenType.L_PAREN): 4541 return self.expression( 4542 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4543 ) 4544 4545 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4546 4547 first = self._match(TokenType.FIRST) 4548 if self._match_text_seq("LAST"): 4549 first = False 4550 4551 partition, order = self._parse_partition_and_order() 4552 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4553 4554 if kind: 4555 self._match(TokenType.BETWEEN) 4556 start = self._parse_window_spec() 4557 self._match(TokenType.AND) 4558 end = self._parse_window_spec() 4559 4560 spec = self.expression( 4561 exp.WindowSpec, 4562 kind=kind, 4563 start=start["value"], 4564 start_side=start["side"], 4565 end=end["value"], 4566 end_side=end["side"], 4567 ) 4568 else: 4569 spec = None 4570 4571 self._match_r_paren() 4572 4573 window = self.expression( 4574 exp.Window, 4575 this=this, 4576 partition_by=partition, 4577 order=order, 4578 spec=spec, 4579 alias=window_alias, 4580 over=over, 4581 first=first, 4582 ) 4583 4584 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4585 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4586 return self._parse_window(window, alias=alias) 4587 4588 return window 4589 4590 def _parse_partition_and_order( 4591 self, 4592 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4593 return self._parse_partition_by(), self._parse_order() 4594 4595 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4596 self._match(TokenType.BETWEEN) 4597 4598 return { 4599 "value": ( 4600 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4601 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4602 or self._parse_bitwise() 4603 ), 4604 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4605 } 4606 4607 def _parse_alias( 4608 self, this: t.Optional[exp.Expression], explicit: bool = False 4609 ) -> t.Optional[exp.Expression]: 4610 any_token = self._match(TokenType.ALIAS) 4611 4612 if explicit and not any_token: 4613 return this 4614 4615 if self._match(TokenType.L_PAREN): 4616 aliases = self.expression( 4617 exp.Aliases, 4618 this=this, 4619 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4620 ) 4621 self._match_r_paren(aliases) 4622 return aliases 4623 4624 alias = self._parse_id_var(any_token) 4625 4626 if alias: 4627 return self.expression(exp.Alias, this=this, alias=alias) 4628 4629 return this 4630 4631 def _parse_id_var( 4632 self, 4633 any_token: bool = True, 4634 tokens: t.Optional[t.Collection[TokenType]] = None, 4635 ) -> t.Optional[exp.Expression]: 4636 identifier = self._parse_identifier() 4637 4638 if identifier: 4639 return identifier 4640 4641 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4642 quoted = self._prev.token_type == TokenType.STRING 4643 return exp.Identifier(this=self._prev.text, quoted=quoted) 4644 4645 return None 4646 4647 def _parse_string(self) -> t.Optional[exp.Expression]: 4648 if self._match(TokenType.STRING): 4649 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4650 return self._parse_placeholder() 4651 4652 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4653 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4654 4655 def _parse_number(self) -> t.Optional[exp.Expression]: 4656 if self._match(TokenType.NUMBER): 4657 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4658 return self._parse_placeholder() 4659 4660 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4661 if self._match(TokenType.IDENTIFIER): 4662 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4663 return self._parse_placeholder() 4664 4665 def _parse_var( 4666 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4667 ) -> t.Optional[exp.Expression]: 4668 if ( 4669 (any_token and self._advance_any()) 4670 or self._match(TokenType.VAR) 4671 or (self._match_set(tokens) if tokens else False) 4672 ): 4673 return self.expression(exp.Var, this=self._prev.text) 4674 return self._parse_placeholder() 4675 4676 def _advance_any(self) -> t.Optional[Token]: 4677 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4678 self._advance() 4679 return self._prev 4680 return None 4681 4682 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4683 return self._parse_var() or self._parse_string() 4684 4685 def _parse_null(self) -> t.Optional[exp.Expression]: 4686 if self._match_set(self.NULL_TOKENS): 4687 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4688 return self._parse_placeholder() 4689 4690 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4691 if self._match(TokenType.TRUE): 4692 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4693 if self._match(TokenType.FALSE): 4694 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4695 return self._parse_placeholder() 4696 4697 def _parse_star(self) -> t.Optional[exp.Expression]: 4698 if self._match(TokenType.STAR): 4699 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4700 return self._parse_placeholder() 4701 4702 def _parse_parameter(self) -> exp.Parameter: 4703 wrapped = self._match(TokenType.L_BRACE) 4704 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4705 self._match(TokenType.R_BRACE) 4706 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4707 4708 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4709 if self._match_set(self.PLACEHOLDER_PARSERS): 4710 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4711 if placeholder: 4712 return placeholder 4713 self._advance(-1) 4714 return None 4715 4716 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4717 if not self._match(TokenType.EXCEPT): 4718 return None 4719 if self._match(TokenType.L_PAREN, advance=False): 4720 return self._parse_wrapped_csv(self._parse_column) 4721 4722 except_column = self._parse_column() 4723 return [except_column] if except_column else None 4724 4725 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4726 if not self._match(TokenType.REPLACE): 4727 return None 4728 if self._match(TokenType.L_PAREN, advance=False): 4729 return self._parse_wrapped_csv(self._parse_expression) 4730 4731 replace_expression = self._parse_expression() 4732 return [replace_expression] if replace_expression else None 4733 4734 def _parse_csv( 4735 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4736 ) -> t.List[exp.Expression]: 4737 parse_result = parse_method() 4738 items = [parse_result] if parse_result is not None else [] 4739 4740 while self._match(sep): 4741 self._add_comments(parse_result) 4742 parse_result = parse_method() 4743 if parse_result is not None: 4744 items.append(parse_result) 4745 4746 return items 4747 4748 def _parse_tokens( 4749 self, parse_method: t.Callable, expressions: t.Dict 4750 ) -> t.Optional[exp.Expression]: 4751 this = parse_method() 4752 4753 while self._match_set(expressions): 4754 this = self.expression( 4755 expressions[self._prev.token_type], 4756 this=this, 4757 comments=self._prev_comments, 4758 expression=parse_method(), 4759 ) 4760 4761 return this 4762 4763 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4764 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4765 4766 def _parse_wrapped_csv( 4767 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4768 ) -> t.List[exp.Expression]: 4769 return self._parse_wrapped( 4770 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4771 ) 4772 4773 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4774 wrapped = self._match(TokenType.L_PAREN) 4775 if not wrapped and not optional: 4776 self.raise_error("Expecting (") 4777 parse_result = parse_method() 4778 if wrapped: 4779 self._match_r_paren() 4780 return parse_result 4781 4782 def _parse_expressions(self) -> t.List[exp.Expression]: 4783 return self._parse_csv(self._parse_expression) 4784 4785 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4786 return self._parse_select() or self._parse_set_operations( 4787 self._parse_expression() if alias else self._parse_conjunction() 4788 ) 4789 4790 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4791 return self._parse_query_modifiers( 4792 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4793 ) 4794 4795 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4796 this = None 4797 if self._match_texts(self.TRANSACTION_KIND): 4798 this = self._prev.text 4799 4800 self._match_texts({"TRANSACTION", "WORK"}) 4801 4802 modes = [] 4803 while True: 4804 mode = [] 4805 while self._match(TokenType.VAR): 4806 mode.append(self._prev.text) 4807 4808 if mode: 4809 modes.append(" ".join(mode)) 4810 if not self._match(TokenType.COMMA): 4811 break 4812 4813 return self.expression(exp.Transaction, this=this, modes=modes) 4814 4815 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4816 chain = None 4817 savepoint = None 4818 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4819 4820 self._match_texts({"TRANSACTION", "WORK"}) 4821 4822 if self._match_text_seq("TO"): 4823 self._match_text_seq("SAVEPOINT") 4824 savepoint = self._parse_id_var() 4825 4826 if self._match(TokenType.AND): 4827 chain = not self._match_text_seq("NO") 4828 self._match_text_seq("CHAIN") 4829 4830 if is_rollback: 4831 return self.expression(exp.Rollback, savepoint=savepoint) 4832 4833 return self.expression(exp.Commit, chain=chain) 4834 4835 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4836 if not self._match_text_seq("ADD"): 4837 return None 4838 4839 self._match(TokenType.COLUMN) 4840 exists_column = self._parse_exists(not_=True) 4841 expression = self._parse_field_def() 4842 4843 if expression: 4844 expression.set("exists", exists_column) 4845 4846 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4847 if self._match_texts(("FIRST", "AFTER")): 4848 position = self._prev.text 4849 column_position = self.expression( 4850 exp.ColumnPosition, this=self._parse_column(), position=position 4851 ) 4852 expression.set("position", column_position) 4853 4854 return expression 4855 4856 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4857 drop = self._match(TokenType.DROP) and self._parse_drop() 4858 if drop and not isinstance(drop, exp.Command): 4859 drop.set("kind", drop.args.get("kind", "COLUMN")) 4860 return drop 4861 4862 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4863 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4864 return self.expression( 4865 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4866 ) 4867 4868 def _parse_add_constraint(self) -> exp.AddConstraint: 4869 this = None 4870 kind = self._prev.token_type 4871 4872 if kind == TokenType.CONSTRAINT: 4873 this = self._parse_id_var() 4874 4875 if self._match_text_seq("CHECK"): 4876 expression = self._parse_wrapped(self._parse_conjunction) 4877 enforced = self._match_text_seq("ENFORCED") 4878 4879 return self.expression( 4880 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4881 ) 4882 4883 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4884 expression = self._parse_foreign_key() 4885 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4886 expression = self._parse_primary_key() 4887 else: 4888 expression = None 4889 4890 return self.expression(exp.AddConstraint, this=this, expression=expression) 4891 4892 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4893 index = self._index - 1 4894 4895 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4896 return self._parse_csv(self._parse_add_constraint) 4897 4898 self._retreat(index) 4899 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4900 return self._parse_csv(self._parse_field_def) 4901 4902 return self._parse_csv(self._parse_add_column) 4903 4904 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4905 self._match(TokenType.COLUMN) 4906 column = self._parse_field(any_token=True) 4907 4908 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4909 return self.expression(exp.AlterColumn, this=column, drop=True) 4910 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4911 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4912 4913 self._match_text_seq("SET", "DATA") 4914 return self.expression( 4915 exp.AlterColumn, 4916 this=column, 4917 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4918 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4919 using=self._match(TokenType.USING) and self._parse_conjunction(), 4920 ) 4921 4922 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4923 index = self._index - 1 4924 4925 partition_exists = self._parse_exists() 4926 if self._match(TokenType.PARTITION, advance=False): 4927 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4928 4929 self._retreat(index) 4930 return self._parse_csv(self._parse_drop_column) 4931 4932 def _parse_alter_table_rename(self) -> exp.RenameTable: 4933 self._match_text_seq("TO") 4934 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4935 4936 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4937 start = self._prev 4938 4939 if not self._match(TokenType.TABLE): 4940 return self._parse_as_command(start) 4941 4942 exists = self._parse_exists() 4943 only = self._match_text_seq("ONLY") 4944 this = self._parse_table(schema=True) 4945 4946 if self._next: 4947 self._advance() 4948 4949 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4950 if parser: 4951 actions = ensure_list(parser(self)) 4952 4953 if not self._curr: 4954 return self.expression( 4955 exp.AlterTable, 4956 this=this, 4957 exists=exists, 4958 actions=actions, 4959 only=only, 4960 ) 4961 4962 return self._parse_as_command(start) 4963 4964 def _parse_merge(self) -> exp.Merge: 4965 self._match(TokenType.INTO) 4966 target = self._parse_table() 4967 4968 if target and self._match(TokenType.ALIAS, advance=False): 4969 target.set("alias", self._parse_table_alias()) 4970 4971 self._match(TokenType.USING) 4972 using = self._parse_table() 4973 4974 self._match(TokenType.ON) 4975 on = self._parse_conjunction() 4976 4977 whens = [] 4978 while self._match(TokenType.WHEN): 4979 matched = not self._match(TokenType.NOT) 4980 self._match_text_seq("MATCHED") 4981 source = ( 4982 False 4983 if self._match_text_seq("BY", "TARGET") 4984 else self._match_text_seq("BY", "SOURCE") 4985 ) 4986 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4987 4988 self._match(TokenType.THEN) 4989 4990 if self._match(TokenType.INSERT): 4991 _this = self._parse_star() 4992 if _this: 4993 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4994 else: 4995 then = self.expression( 4996 exp.Insert, 4997 this=self._parse_value(), 4998 expression=self._match(TokenType.VALUES) and self._parse_value(), 4999 ) 5000 elif self._match(TokenType.UPDATE): 5001 expressions = self._parse_star() 5002 if expressions: 5003 then = self.expression(exp.Update, expressions=expressions) 5004 else: 5005 then = self.expression( 5006 exp.Update, 5007 expressions=self._match(TokenType.SET) 5008 and self._parse_csv(self._parse_equality), 5009 ) 5010 elif self._match(TokenType.DELETE): 5011 then = self.expression(exp.Var, this=self._prev.text) 5012 else: 5013 then = None 5014 5015 whens.append( 5016 self.expression( 5017 exp.When, 5018 matched=matched, 5019 source=source, 5020 condition=condition, 5021 then=then, 5022 ) 5023 ) 5024 5025 return self.expression( 5026 exp.Merge, 5027 this=target, 5028 using=using, 5029 on=on, 5030 expressions=whens, 5031 ) 5032 5033 def _parse_show(self) -> t.Optional[exp.Expression]: 5034 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5035 if parser: 5036 return parser(self) 5037 return self._parse_as_command(self._prev) 5038 5039 def _parse_set_item_assignment( 5040 self, kind: t.Optional[str] = None 5041 ) -> t.Optional[exp.Expression]: 5042 index = self._index 5043 5044 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5045 return self._parse_set_transaction(global_=kind == "GLOBAL") 5046 5047 left = self._parse_primary() or self._parse_id_var() 5048 assignment_delimiter = self._match_texts(("=", "TO")) 5049 5050 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5051 self._retreat(index) 5052 return None 5053 5054 right = self._parse_statement() or self._parse_id_var() 5055 this = self.expression(exp.EQ, this=left, expression=right) 5056 5057 return self.expression(exp.SetItem, this=this, kind=kind) 5058 5059 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5060 self._match_text_seq("TRANSACTION") 5061 characteristics = self._parse_csv( 5062 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5063 ) 5064 return self.expression( 5065 exp.SetItem, 5066 expressions=characteristics, 5067 kind="TRANSACTION", 5068 **{"global": global_}, # type: ignore 5069 ) 5070 5071 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5072 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5073 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5074 5075 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5076 index = self._index 5077 set_ = self.expression( 5078 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5079 ) 5080 5081 if self._curr: 5082 self._retreat(index) 5083 return self._parse_as_command(self._prev) 5084 5085 return set_ 5086 5087 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5088 for option in options: 5089 if self._match_text_seq(*option.split(" ")): 5090 return exp.var(option) 5091 return None 5092 5093 def _parse_as_command(self, start: Token) -> exp.Command: 5094 while self._curr: 5095 self._advance() 5096 text = self._find_sql(start, self._prev) 5097 size = len(start.text) 5098 return exp.Command(this=text[:size], expression=text[size:]) 5099 5100 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5101 settings = [] 5102 5103 self._match_l_paren() 5104 kind = self._parse_id_var() 5105 5106 if self._match(TokenType.L_PAREN): 5107 while True: 5108 key = self._parse_id_var() 5109 value = self._parse_primary() 5110 5111 if not key and value is None: 5112 break 5113 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5114 self._match(TokenType.R_PAREN) 5115 5116 self._match_r_paren() 5117 5118 return self.expression( 5119 exp.DictProperty, 5120 this=this, 5121 kind=kind.this if kind else None, 5122 settings=settings, 5123 ) 5124 5125 def _parse_dict_range(self, this: str) -> exp.DictRange: 5126 self._match_l_paren() 5127 has_min = self._match_text_seq("MIN") 5128 if has_min: 5129 min = self._parse_var() or self._parse_primary() 5130 self._match_text_seq("MAX") 5131 max = self._parse_var() or self._parse_primary() 5132 else: 5133 max = self._parse_var() or self._parse_primary() 5134 min = exp.Literal.number(0) 5135 self._match_r_paren() 5136 return self.expression(exp.DictRange, this=this, min=min, max=max) 5137 5138 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5139 index = self._index 5140 expression = self._parse_column() 5141 if not self._match(TokenType.IN): 5142 self._retreat(index - 1) 5143 return None 5144 iterator = self._parse_column() 5145 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5146 return self.expression( 5147 exp.Comprehension, 5148 this=this, 5149 expression=expression, 5150 iterator=iterator, 5151 condition=condition, 5152 ) 5153 5154 def _find_parser( 5155 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5156 ) -> t.Optional[t.Callable]: 5157 if not self._curr: 5158 return None 5159 5160 index = self._index 5161 this = [] 5162 while True: 5163 # The current token might be multiple words 5164 curr = self._curr.text.upper() 5165 key = curr.split(" ") 5166 this.append(curr) 5167 5168 self._advance() 5169 result, trie = in_trie(trie, key) 5170 if result == TrieResult.FAILED: 5171 break 5172 5173 if result == TrieResult.EXISTS: 5174 subparser = parsers[" ".join(this)] 5175 return subparser 5176 5177 self._retreat(index) 5178 return None 5179 5180 def _match(self, token_type, advance=True, expression=None): 5181 if not self._curr: 5182 return None 5183 5184 if self._curr.token_type == token_type: 5185 if advance: 5186 self._advance() 5187 self._add_comments(expression) 5188 return True 5189 5190 return None 5191 5192 def _match_set(self, types, advance=True): 5193 if not self._curr: 5194 return None 5195 5196 if self._curr.token_type in types: 5197 if advance: 5198 self._advance() 5199 return True 5200 5201 return None 5202 5203 def _match_pair(self, token_type_a, token_type_b, advance=True): 5204 if not self._curr or not self._next: 5205 return None 5206 5207 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5208 if advance: 5209 self._advance(2) 5210 return True 5211 5212 return None 5213 5214 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5215 if not self._match(TokenType.L_PAREN, expression=expression): 5216 self.raise_error("Expecting (") 5217 5218 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5219 if not self._match(TokenType.R_PAREN, expression=expression): 5220 self.raise_error("Expecting )") 5221 5222 def _match_texts(self, texts, advance=True): 5223 if self._curr and self._curr.text.upper() in texts: 5224 if advance: 5225 self._advance() 5226 return True 5227 return False 5228 5229 def _match_text_seq(self, *texts, advance=True): 5230 index = self._index 5231 for text in texts: 5232 if self._curr and self._curr.text.upper() == text: 5233 self._advance() 5234 else: 5235 self._retreat(index) 5236 return False 5237 5238 if not advance: 5239 self._retreat(index) 5240 5241 return True 5242 5243 @t.overload 5244 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5245 ... 5246 5247 @t.overload 5248 def _replace_columns_with_dots( 5249 self, this: t.Optional[exp.Expression] 5250 ) -> t.Optional[exp.Expression]: 5251 ... 5252 5253 def _replace_columns_with_dots(self, this): 5254 if isinstance(this, exp.Dot): 5255 exp.replace_children(this, self._replace_columns_with_dots) 5256 elif isinstance(this, exp.Column): 5257 exp.replace_children(this, self._replace_columns_with_dots) 5258 table = this.args.get("table") 5259 this = ( 5260 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5261 ) 5262 5263 return this 5264 5265 def _replace_lambda( 5266 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5267 ) -> t.Optional[exp.Expression]: 5268 if not node: 5269 return node 5270 5271 for column in node.find_all(exp.Column): 5272 if column.parts[0].name in lambda_variables: 5273 dot_or_id = column.to_dot() if column.table else column.this 5274 parent = column.parent 5275 5276 while isinstance(parent, exp.Dot): 5277 if not isinstance(parent.parent, exp.Dot): 5278 parent.replace(dot_or_id) 5279 break 5280 parent = parent.parent 5281 else: 5282 if column is node: 5283 node = dot_or_id 5284 else: 5285 column.replace(dot_or_id) 5286 return node 5287 5288 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5289 return [ 5290 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5291 for value in values 5292 if value 5293 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
943 def __init__( 944 self, 945 error_level: t.Optional[ErrorLevel] = None, 946 error_message_context: int = 100, 947 max_errors: int = 3, 948 ): 949 self.error_level = error_level or ErrorLevel.IMMEDIATE 950 self.error_message_context = error_message_context 951 self.max_errors = max_errors 952 self._tokenizer = self.TOKENIZER_CLASS() 953 self.reset()
965 def parse( 966 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 967 ) -> t.List[t.Optional[exp.Expression]]: 968 """ 969 Parses a list of tokens and returns a list of syntax trees, one tree 970 per parsed SQL statement. 971 972 Args: 973 raw_tokens: The list of tokens. 974 sql: The original SQL string, used to produce helpful debug messages. 975 976 Returns: 977 The list of the produced syntax trees. 978 """ 979 return self._parse( 980 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 981 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
983 def parse_into( 984 self, 985 expression_types: exp.IntoType, 986 raw_tokens: t.List[Token], 987 sql: t.Optional[str] = None, 988 ) -> t.List[t.Optional[exp.Expression]]: 989 """ 990 Parses a list of tokens into a given Expression type. If a collection of Expression 991 types is given instead, this method will try to parse the token list into each one 992 of them, stopping at the first for which the parsing succeeds. 993 994 Args: 995 expression_types: The expression type(s) to try and parse the token list into. 996 raw_tokens: The list of tokens. 997 sql: The original SQL string, used to produce helpful debug messages. 998 999 Returns: 1000 The target Expression. 1001 """ 1002 errors = [] 1003 for expression_type in ensure_list(expression_types): 1004 parser = self.EXPRESSION_PARSERS.get(expression_type) 1005 if not parser: 1006 raise TypeError(f"No parser registered for {expression_type}") 1007 1008 try: 1009 return self._parse(parser, raw_tokens, sql) 1010 except ParseError as e: 1011 e.errors[0]["into_expression"] = expression_type 1012 errors.append(e) 1013 1014 raise ParseError( 1015 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1016 errors=merge_errors(errors), 1017 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1054 def check_errors(self) -> None: 1055 """Logs or raises any found errors, depending on the chosen error level setting.""" 1056 if self.error_level == ErrorLevel.WARN: 1057 for error in self.errors: 1058 logger.error(str(error)) 1059 elif self.error_level == ErrorLevel.RAISE and self.errors: 1060 raise ParseError( 1061 concat_messages(self.errors, self.max_errors), 1062 errors=merge_errors(self.errors), 1063 )
Logs or raises any found errors, depending on the chosen error level setting.
1065 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1066 """ 1067 Appends an error in the list of recorded errors or raises it, depending on the chosen 1068 error level setting. 1069 """ 1070 token = token or self._curr or self._prev or Token.string("") 1071 start = token.start 1072 end = token.end + 1 1073 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1074 highlight = self.sql[start:end] 1075 end_context = self.sql[end : end + self.error_message_context] 1076 1077 error = ParseError.new( 1078 f"{message}. Line {token.line}, Col: {token.col}.\n" 1079 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1080 description=message, 1081 line=token.line, 1082 col=token.col, 1083 start_context=start_context, 1084 highlight=highlight, 1085 end_context=end_context, 1086 ) 1087 1088 if self.error_level == ErrorLevel.IMMEDIATE: 1089 raise error 1090 1091 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1093 def expression( 1094 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1095 ) -> E: 1096 """ 1097 Creates a new, validated Expression. 1098 1099 Args: 1100 exp_class: The expression class to instantiate. 1101 comments: An optional list of comments to attach to the expression. 1102 kwargs: The arguments to set for the expression along with their respective values. 1103 1104 Returns: 1105 The target expression. 1106 """ 1107 instance = exp_class(**kwargs) 1108 instance.add_comments(comments) if comments else self._add_comments(instance) 1109 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1116 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1117 """ 1118 Validates an Expression, making sure that all its mandatory arguments are set. 1119 1120 Args: 1121 expression: The expression to validate. 1122 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1123 1124 Returns: 1125 The validated expression. 1126 """ 1127 if self.error_level != ErrorLevel.IGNORE: 1128 for error_message in expression.error_messages(args): 1129 self.raise_error(error_message) 1130 1131 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.