sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.BIGDECIMAL, 182 TokenType.UUID, 183 TokenType.GEOGRAPHY, 184 TokenType.GEOMETRY, 185 TokenType.HLLSKETCH, 186 TokenType.HSTORE, 187 TokenType.PSEUDO_TYPE, 188 TokenType.SUPER, 189 TokenType.SERIAL, 190 TokenType.SMALLSERIAL, 191 TokenType.BIGSERIAL, 192 TokenType.XML, 193 TokenType.YEAR, 194 TokenType.UNIQUEIDENTIFIER, 195 TokenType.USERDEFINED, 196 TokenType.MONEY, 197 TokenType.SMALLMONEY, 198 TokenType.ROWVERSION, 199 TokenType.IMAGE, 200 TokenType.VARIANT, 201 TokenType.OBJECT, 202 TokenType.OBJECT_IDENTIFIER, 203 TokenType.INET, 204 TokenType.IPADDRESS, 205 TokenType.IPPREFIX, 206 TokenType.UNKNOWN, 207 TokenType.NULL, 208 *ENUM_TYPE_TOKENS, 209 *NESTED_TYPE_TOKENS, 210 } 211 212 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 213 TokenType.BIGINT: TokenType.UBIGINT, 214 TokenType.INT: TokenType.UINT, 215 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 216 TokenType.SMALLINT: TokenType.USMALLINT, 217 TokenType.TINYINT: TokenType.UTINYINT, 218 } 219 220 SUBQUERY_PREDICATES = { 221 TokenType.ANY: exp.Any, 222 TokenType.ALL: exp.All, 223 TokenType.EXISTS: exp.Exists, 224 TokenType.SOME: exp.Any, 225 } 226 227 RESERVED_KEYWORDS = { 228 *Tokenizer.SINGLE_TOKENS.values(), 229 TokenType.SELECT, 230 } 231 232 DB_CREATABLES = { 233 TokenType.DATABASE, 234 TokenType.SCHEMA, 235 TokenType.TABLE, 236 TokenType.VIEW, 237 TokenType.DICTIONARY, 238 } 239 240 CREATABLES = { 241 TokenType.COLUMN, 242 TokenType.FUNCTION, 243 TokenType.INDEX, 244 TokenType.PROCEDURE, 245 *DB_CREATABLES, 246 } 247 248 # Tokens that can represent identifiers 249 ID_VAR_TOKENS = { 250 TokenType.VAR, 251 TokenType.ANTI, 252 TokenType.APPLY, 253 TokenType.ASC, 254 TokenType.AUTO_INCREMENT, 255 TokenType.BEGIN, 256 TokenType.CACHE, 257 TokenType.CASE, 258 TokenType.COLLATE, 259 TokenType.COMMAND, 260 TokenType.COMMENT, 261 TokenType.COMMIT, 262 TokenType.CONSTRAINT, 263 TokenType.DEFAULT, 264 TokenType.DELETE, 265 TokenType.DESC, 266 TokenType.DESCRIBE, 267 TokenType.DICTIONARY, 268 TokenType.DIV, 269 TokenType.END, 270 TokenType.EXECUTE, 271 TokenType.ESCAPE, 272 TokenType.FALSE, 273 TokenType.FIRST, 274 TokenType.FILTER, 275 TokenType.FORMAT, 276 TokenType.FULL, 277 TokenType.IS, 278 TokenType.ISNULL, 279 TokenType.INTERVAL, 280 TokenType.KEEP, 281 TokenType.KILL, 282 TokenType.LEFT, 283 TokenType.LOAD, 284 TokenType.MERGE, 285 TokenType.NATURAL, 286 TokenType.NEXT, 287 TokenType.OFFSET, 288 TokenType.ORDINALITY, 289 TokenType.OVERLAPS, 290 TokenType.OVERWRITE, 291 TokenType.PARTITION, 292 TokenType.PERCENT, 293 TokenType.PIVOT, 294 TokenType.PRAGMA, 295 TokenType.RANGE, 296 TokenType.REFERENCES, 297 TokenType.RIGHT, 298 TokenType.ROW, 299 TokenType.ROWS, 300 TokenType.SEMI, 301 TokenType.SET, 302 TokenType.SETTINGS, 303 TokenType.SHOW, 304 TokenType.TEMPORARY, 305 TokenType.TOP, 306 TokenType.TRUE, 307 TokenType.UNIQUE, 308 TokenType.UNPIVOT, 309 TokenType.UPDATE, 310 TokenType.VOLATILE, 311 TokenType.WINDOW, 312 *CREATABLES, 313 *SUBQUERY_PREDICATES, 314 *TYPE_TOKENS, 315 *NO_PAREN_FUNCTIONS, 316 } 317 318 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 319 320 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 321 TokenType.ANTI, 322 TokenType.APPLY, 323 TokenType.ASOF, 324 TokenType.FULL, 325 TokenType.LEFT, 326 TokenType.LOCK, 327 TokenType.NATURAL, 328 TokenType.OFFSET, 329 TokenType.RIGHT, 330 TokenType.SEMI, 331 TokenType.WINDOW, 332 } 333 334 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 335 336 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 337 338 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 339 340 FUNC_TOKENS = { 341 TokenType.COMMAND, 342 TokenType.CURRENT_DATE, 343 TokenType.CURRENT_DATETIME, 344 TokenType.CURRENT_TIMESTAMP, 345 TokenType.CURRENT_TIME, 346 TokenType.CURRENT_USER, 347 TokenType.FILTER, 348 TokenType.FIRST, 349 TokenType.FORMAT, 350 TokenType.GLOB, 351 TokenType.IDENTIFIER, 352 TokenType.INDEX, 353 TokenType.ISNULL, 354 TokenType.ILIKE, 355 TokenType.INSERT, 356 TokenType.LIKE, 357 TokenType.MERGE, 358 TokenType.OFFSET, 359 TokenType.PRIMARY_KEY, 360 TokenType.RANGE, 361 TokenType.REPLACE, 362 TokenType.RLIKE, 363 TokenType.ROW, 364 TokenType.UNNEST, 365 TokenType.VAR, 366 TokenType.LEFT, 367 TokenType.RIGHT, 368 TokenType.DATE, 369 TokenType.DATETIME, 370 TokenType.TABLE, 371 TokenType.TIMESTAMP, 372 TokenType.TIMESTAMPTZ, 373 TokenType.WINDOW, 374 TokenType.XOR, 375 *TYPE_TOKENS, 376 *SUBQUERY_PREDICATES, 377 } 378 379 CONJUNCTION = { 380 TokenType.AND: exp.And, 381 TokenType.OR: exp.Or, 382 } 383 384 EQUALITY = { 385 TokenType.EQ: exp.EQ, 386 TokenType.NEQ: exp.NEQ, 387 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 388 } 389 390 COMPARISON = { 391 TokenType.GT: exp.GT, 392 TokenType.GTE: exp.GTE, 393 TokenType.LT: exp.LT, 394 TokenType.LTE: exp.LTE, 395 } 396 397 BITWISE = { 398 TokenType.AMP: exp.BitwiseAnd, 399 TokenType.CARET: exp.BitwiseXor, 400 TokenType.PIPE: exp.BitwiseOr, 401 TokenType.DPIPE: exp.DPipe, 402 } 403 404 TERM = { 405 TokenType.DASH: exp.Sub, 406 TokenType.PLUS: exp.Add, 407 TokenType.MOD: exp.Mod, 408 TokenType.COLLATE: exp.Collate, 409 } 410 411 FACTOR = { 412 TokenType.DIV: exp.IntDiv, 413 TokenType.LR_ARROW: exp.Distance, 414 TokenType.SLASH: exp.Div, 415 TokenType.STAR: exp.Mul, 416 } 417 418 TIMES = { 419 TokenType.TIME, 420 TokenType.TIMETZ, 421 } 422 423 TIMESTAMPS = { 424 TokenType.TIMESTAMP, 425 TokenType.TIMESTAMPTZ, 426 TokenType.TIMESTAMPLTZ, 427 *TIMES, 428 } 429 430 SET_OPERATIONS = { 431 TokenType.UNION, 432 TokenType.INTERSECT, 433 TokenType.EXCEPT, 434 } 435 436 JOIN_METHODS = { 437 TokenType.NATURAL, 438 TokenType.ASOF, 439 } 440 441 JOIN_SIDES = { 442 TokenType.LEFT, 443 TokenType.RIGHT, 444 TokenType.FULL, 445 } 446 447 JOIN_KINDS = { 448 TokenType.INNER, 449 TokenType.OUTER, 450 TokenType.CROSS, 451 TokenType.SEMI, 452 TokenType.ANTI, 453 } 454 455 JOIN_HINTS: t.Set[str] = set() 456 457 LAMBDAS = { 458 TokenType.ARROW: lambda self, expressions: self.expression( 459 exp.Lambda, 460 this=self._replace_lambda( 461 self._parse_conjunction(), 462 {node.name for node in expressions}, 463 ), 464 expressions=expressions, 465 ), 466 TokenType.FARROW: lambda self, expressions: self.expression( 467 exp.Kwarg, 468 this=exp.var(expressions[0].name), 469 expression=self._parse_conjunction(), 470 ), 471 } 472 473 COLUMN_OPERATORS = { 474 TokenType.DOT: None, 475 TokenType.DCOLON: lambda self, this, to: self.expression( 476 exp.Cast if self.STRICT_CAST else exp.TryCast, 477 this=this, 478 to=to, 479 ), 480 TokenType.ARROW: lambda self, this, path: self.expression( 481 exp.JSONExtract, 482 this=this, 483 expression=path, 484 ), 485 TokenType.DARROW: lambda self, this, path: self.expression( 486 exp.JSONExtractScalar, 487 this=this, 488 expression=path, 489 ), 490 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 491 exp.JSONBExtract, 492 this=this, 493 expression=path, 494 ), 495 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 496 exp.JSONBExtractScalar, 497 this=this, 498 expression=path, 499 ), 500 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 501 exp.JSONBContains, 502 this=this, 503 expression=key, 504 ), 505 } 506 507 EXPRESSION_PARSERS = { 508 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 509 exp.Column: lambda self: self._parse_column(), 510 exp.Condition: lambda self: self._parse_conjunction(), 511 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 512 exp.Expression: lambda self: self._parse_statement(), 513 exp.From: lambda self: self._parse_from(), 514 exp.Group: lambda self: self._parse_group(), 515 exp.Having: lambda self: self._parse_having(), 516 exp.Identifier: lambda self: self._parse_id_var(), 517 exp.Join: lambda self: self._parse_join(), 518 exp.Lambda: lambda self: self._parse_lambda(), 519 exp.Lateral: lambda self: self._parse_lateral(), 520 exp.Limit: lambda self: self._parse_limit(), 521 exp.Offset: lambda self: self._parse_offset(), 522 exp.Order: lambda self: self._parse_order(), 523 exp.Ordered: lambda self: self._parse_ordered(), 524 exp.Properties: lambda self: self._parse_properties(), 525 exp.Qualify: lambda self: self._parse_qualify(), 526 exp.Returning: lambda self: self._parse_returning(), 527 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 528 exp.Table: lambda self: self._parse_table_parts(), 529 exp.TableAlias: lambda self: self._parse_table_alias(), 530 exp.Where: lambda self: self._parse_where(), 531 exp.Window: lambda self: self._parse_named_window(), 532 exp.With: lambda self: self._parse_with(), 533 "JOIN_TYPE": lambda self: self._parse_join_parts(), 534 } 535 536 STATEMENT_PARSERS = { 537 TokenType.ALTER: lambda self: self._parse_alter(), 538 TokenType.BEGIN: lambda self: self._parse_transaction(), 539 TokenType.CACHE: lambda self: self._parse_cache(), 540 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 541 TokenType.COMMENT: lambda self: self._parse_comment(), 542 TokenType.CREATE: lambda self: self._parse_create(), 543 TokenType.DELETE: lambda self: self._parse_delete(), 544 TokenType.DESC: lambda self: self._parse_describe(), 545 TokenType.DESCRIBE: lambda self: self._parse_describe(), 546 TokenType.DROP: lambda self: self._parse_drop(), 547 TokenType.INSERT: lambda self: self._parse_insert(), 548 TokenType.KILL: lambda self: self._parse_kill(), 549 TokenType.LOAD: lambda self: self._parse_load(), 550 TokenType.MERGE: lambda self: self._parse_merge(), 551 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 552 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 553 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 554 TokenType.SET: lambda self: self._parse_set(), 555 TokenType.UNCACHE: lambda self: self._parse_uncache(), 556 TokenType.UPDATE: lambda self: self._parse_update(), 557 TokenType.USE: lambda self: self.expression( 558 exp.Use, 559 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 560 and exp.var(self._prev.text), 561 this=self._parse_table(schema=False), 562 ), 563 } 564 565 UNARY_PARSERS = { 566 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 567 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 568 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 569 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 570 } 571 572 PRIMARY_PARSERS = { 573 TokenType.STRING: lambda self, token: self.expression( 574 exp.Literal, this=token.text, is_string=True 575 ), 576 TokenType.NUMBER: lambda self, token: self.expression( 577 exp.Literal, this=token.text, is_string=False 578 ), 579 TokenType.STAR: lambda self, _: self.expression( 580 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 581 ), 582 TokenType.NULL: lambda self, _: self.expression(exp.Null), 583 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 584 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 585 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 586 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 587 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 588 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 589 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 590 exp.National, this=token.text 591 ), 592 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 593 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 594 } 595 596 PLACEHOLDER_PARSERS = { 597 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 598 TokenType.PARAMETER: lambda self: self._parse_parameter(), 599 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 600 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 601 else None, 602 } 603 604 RANGE_PARSERS = { 605 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 606 TokenType.GLOB: binary_range_parser(exp.Glob), 607 TokenType.ILIKE: binary_range_parser(exp.ILike), 608 TokenType.IN: lambda self, this: self._parse_in(this), 609 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 610 TokenType.IS: lambda self, this: self._parse_is(this), 611 TokenType.LIKE: binary_range_parser(exp.Like), 612 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 613 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 614 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 615 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 616 } 617 618 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 619 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 620 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 621 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 622 "CHARACTER SET": lambda self: self._parse_character_set(), 623 "CHECKSUM": lambda self: self._parse_checksum(), 624 "CLUSTER BY": lambda self: self._parse_cluster(), 625 "CLUSTERED": lambda self: self._parse_clustered_by(), 626 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 627 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 628 "COPY": lambda self: self._parse_copy_property(), 629 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 630 "DEFINER": lambda self: self._parse_definer(), 631 "DETERMINISTIC": lambda self: self.expression( 632 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 633 ), 634 "DISTKEY": lambda self: self._parse_distkey(), 635 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 636 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 637 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 638 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 639 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 640 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 641 "FREESPACE": lambda self: self._parse_freespace(), 642 "HEAP": lambda self: self.expression(exp.HeapProperty), 643 "IMMUTABLE": lambda self: self.expression( 644 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 645 ), 646 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 647 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 648 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 649 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 650 "LIKE": lambda self: self._parse_create_like(), 651 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 652 "LOCK": lambda self: self._parse_locking(), 653 "LOCKING": lambda self: self._parse_locking(), 654 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 655 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 656 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 657 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 658 "NO": lambda self: self._parse_no_property(), 659 "ON": lambda self: self._parse_on_property(), 660 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 661 "PARTITION BY": lambda self: self._parse_partitioned_by(), 662 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 663 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 664 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 665 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 666 "RETURNS": lambda self: self._parse_returns(), 667 "ROW": lambda self: self._parse_row(), 668 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 669 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 670 "SETTINGS": lambda self: self.expression( 671 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 672 ), 673 "SORTKEY": lambda self: self._parse_sortkey(), 674 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 675 "STABLE": lambda self: self.expression( 676 exp.StabilityProperty, this=exp.Literal.string("STABLE") 677 ), 678 "STORED": lambda self: self._parse_stored(), 679 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 680 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 681 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 682 "TO": lambda self: self._parse_to_table(), 683 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 684 "TTL": lambda self: self._parse_ttl(), 685 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 686 "VOLATILE": lambda self: self._parse_volatile_property(), 687 "WITH": lambda self: self._parse_with_property(), 688 } 689 690 CONSTRAINT_PARSERS = { 691 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 692 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 693 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 694 "CHARACTER SET": lambda self: self.expression( 695 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 696 ), 697 "CHECK": lambda self: self.expression( 698 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 699 ), 700 "COLLATE": lambda self: self.expression( 701 exp.CollateColumnConstraint, this=self._parse_var() 702 ), 703 "COMMENT": lambda self: self.expression( 704 exp.CommentColumnConstraint, this=self._parse_string() 705 ), 706 "COMPRESS": lambda self: self._parse_compress(), 707 "CLUSTERED": lambda self: self.expression( 708 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 709 ), 710 "NONCLUSTERED": lambda self: self.expression( 711 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 712 ), 713 "DEFAULT": lambda self: self.expression( 714 exp.DefaultColumnConstraint, this=self._parse_bitwise() 715 ), 716 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 717 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 718 "FORMAT": lambda self: self.expression( 719 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 720 ), 721 "GENERATED": lambda self: self._parse_generated_as_identity(), 722 "IDENTITY": lambda self: self._parse_auto_increment(), 723 "INLINE": lambda self: self._parse_inline(), 724 "LIKE": lambda self: self._parse_create_like(), 725 "NOT": lambda self: self._parse_not_constraint(), 726 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 727 "ON": lambda self: ( 728 self._match(TokenType.UPDATE) 729 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 730 ) 731 or self.expression(exp.OnProperty, this=self._parse_id_var()), 732 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 733 "PRIMARY KEY": lambda self: self._parse_primary_key(), 734 "REFERENCES": lambda self: self._parse_references(match=False), 735 "TITLE": lambda self: self.expression( 736 exp.TitleColumnConstraint, this=self._parse_var_or_string() 737 ), 738 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 739 "UNIQUE": lambda self: self._parse_unique(), 740 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 741 "WITH": lambda self: self.expression( 742 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 743 ), 744 } 745 746 ALTER_PARSERS = { 747 "ADD": lambda self: self._parse_alter_table_add(), 748 "ALTER": lambda self: self._parse_alter_table_alter(), 749 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 750 "DROP": lambda self: self._parse_alter_table_drop(), 751 "RENAME": lambda self: self._parse_alter_table_rename(), 752 } 753 754 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 755 756 NO_PAREN_FUNCTION_PARSERS = { 757 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 758 "CASE": lambda self: self._parse_case(), 759 "IF": lambda self: self._parse_if(), 760 "NEXT": lambda self: self._parse_next_value_for(), 761 } 762 763 INVALID_FUNC_NAME_TOKENS = { 764 TokenType.IDENTIFIER, 765 TokenType.STRING, 766 } 767 768 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 769 770 FUNCTION_PARSERS = { 771 "ANY_VALUE": lambda self: self._parse_any_value(), 772 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 773 "CONCAT": lambda self: self._parse_concat(), 774 "CONCAT_WS": lambda self: self._parse_concat_ws(), 775 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 776 "DECODE": lambda self: self._parse_decode(), 777 "EXTRACT": lambda self: self._parse_extract(), 778 "JSON_OBJECT": lambda self: self._parse_json_object(), 779 "LOG": lambda self: self._parse_logarithm(), 780 "MATCH": lambda self: self._parse_match_against(), 781 "OPENJSON": lambda self: self._parse_open_json(), 782 "POSITION": lambda self: self._parse_position(), 783 "SAFE_CAST": lambda self: self._parse_cast(False), 784 "STRING_AGG": lambda self: self._parse_string_agg(), 785 "SUBSTRING": lambda self: self._parse_substring(), 786 "TRIM": lambda self: self._parse_trim(), 787 "TRY_CAST": lambda self: self._parse_cast(False), 788 "TRY_CONVERT": lambda self: self._parse_convert(False), 789 } 790 791 QUERY_MODIFIER_PARSERS = { 792 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 793 TokenType.WHERE: lambda self: ("where", self._parse_where()), 794 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 795 TokenType.HAVING: lambda self: ("having", self._parse_having()), 796 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 797 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 798 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 799 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 800 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 801 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 802 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 803 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 804 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 805 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 806 TokenType.CLUSTER_BY: lambda self: ( 807 "cluster", 808 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 809 ), 810 TokenType.DISTRIBUTE_BY: lambda self: ( 811 "distribute", 812 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 813 ), 814 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 815 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 816 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 817 } 818 819 SET_PARSERS = { 820 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 821 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 822 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 823 "TRANSACTION": lambda self: self._parse_set_transaction(), 824 } 825 826 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 827 828 TYPE_LITERAL_PARSERS = { 829 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 830 } 831 832 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 833 834 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 835 836 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 837 838 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 839 TRANSACTION_CHARACTERISTICS = { 840 "ISOLATION LEVEL REPEATABLE READ", 841 "ISOLATION LEVEL READ COMMITTED", 842 "ISOLATION LEVEL READ UNCOMMITTED", 843 "ISOLATION LEVEL SERIALIZABLE", 844 "READ WRITE", 845 "READ ONLY", 846 } 847 848 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 849 850 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 851 852 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 853 854 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 855 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 856 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 857 858 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 859 860 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 861 862 DISTINCT_TOKENS = {TokenType.DISTINCT} 863 864 NULL_TOKENS = {TokenType.NULL} 865 866 STRICT_CAST = True 867 868 # A NULL arg in CONCAT yields NULL by default 869 CONCAT_NULL_OUTPUTS_STRING = False 870 871 PREFIXED_PIVOT_COLUMNS = False 872 IDENTIFY_PIVOT_STRINGS = False 873 874 LOG_BASE_FIRST = True 875 LOG_DEFAULTS_TO_LN = False 876 877 # Whether or not ADD is present for each column added by ALTER TABLE 878 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 879 880 # Whether or not the table sample clause expects CSV syntax 881 TABLESAMPLE_CSV = False 882 883 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. 884 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 885 886 __slots__ = ( 887 "error_level", 888 "error_message_context", 889 "max_errors", 890 "sql", 891 "errors", 892 "_tokens", 893 "_index", 894 "_curr", 895 "_next", 896 "_prev", 897 "_prev_comments", 898 "_tokenizer", 899 ) 900 901 # Autofilled 902 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 903 INDEX_OFFSET: int = 0 904 UNNEST_COLUMN_ONLY: bool = False 905 ALIAS_POST_TABLESAMPLE: bool = False 906 STRICT_STRING_CONCAT = False 907 SUPPORTS_USER_DEFINED_TYPES = True 908 NORMALIZE_FUNCTIONS = "upper" 909 NULL_ORDERING: str = "nulls_are_small" 910 SHOW_TRIE: t.Dict = {} 911 SET_TRIE: t.Dict = {} 912 FORMAT_MAPPING: t.Dict[str, str] = {} 913 FORMAT_TRIE: t.Dict = {} 914 TIME_MAPPING: t.Dict[str, str] = {} 915 TIME_TRIE: t.Dict = {} 916 917 def __init__( 918 self, 919 error_level: t.Optional[ErrorLevel] = None, 920 error_message_context: int = 100, 921 max_errors: int = 3, 922 ): 923 self.error_level = error_level or ErrorLevel.IMMEDIATE 924 self.error_message_context = error_message_context 925 self.max_errors = max_errors 926 self._tokenizer = self.TOKENIZER_CLASS() 927 self.reset() 928 929 def reset(self): 930 self.sql = "" 931 self.errors = [] 932 self._tokens = [] 933 self._index = 0 934 self._curr = None 935 self._next = None 936 self._prev = None 937 self._prev_comments = None 938 939 def parse( 940 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 941 ) -> t.List[t.Optional[exp.Expression]]: 942 """ 943 Parses a list of tokens and returns a list of syntax trees, one tree 944 per parsed SQL statement. 945 946 Args: 947 raw_tokens: The list of tokens. 948 sql: The original SQL string, used to produce helpful debug messages. 949 950 Returns: 951 The list of the produced syntax trees. 952 """ 953 return self._parse( 954 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 955 ) 956 957 def parse_into( 958 self, 959 expression_types: exp.IntoType, 960 raw_tokens: t.List[Token], 961 sql: t.Optional[str] = None, 962 ) -> t.List[t.Optional[exp.Expression]]: 963 """ 964 Parses a list of tokens into a given Expression type. If a collection of Expression 965 types is given instead, this method will try to parse the token list into each one 966 of them, stopping at the first for which the parsing succeeds. 967 968 Args: 969 expression_types: The expression type(s) to try and parse the token list into. 970 raw_tokens: The list of tokens. 971 sql: The original SQL string, used to produce helpful debug messages. 972 973 Returns: 974 The target Expression. 975 """ 976 errors = [] 977 for expression_type in ensure_list(expression_types): 978 parser = self.EXPRESSION_PARSERS.get(expression_type) 979 if not parser: 980 raise TypeError(f"No parser registered for {expression_type}") 981 982 try: 983 return self._parse(parser, raw_tokens, sql) 984 except ParseError as e: 985 e.errors[0]["into_expression"] = expression_type 986 errors.append(e) 987 988 raise ParseError( 989 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 990 errors=merge_errors(errors), 991 ) from errors[-1] 992 993 def _parse( 994 self, 995 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 996 raw_tokens: t.List[Token], 997 sql: t.Optional[str] = None, 998 ) -> t.List[t.Optional[exp.Expression]]: 999 self.reset() 1000 self.sql = sql or "" 1001 1002 total = len(raw_tokens) 1003 chunks: t.List[t.List[Token]] = [[]] 1004 1005 for i, token in enumerate(raw_tokens): 1006 if token.token_type == TokenType.SEMICOLON: 1007 if i < total - 1: 1008 chunks.append([]) 1009 else: 1010 chunks[-1].append(token) 1011 1012 expressions = [] 1013 1014 for tokens in chunks: 1015 self._index = -1 1016 self._tokens = tokens 1017 self._advance() 1018 1019 expressions.append(parse_method(self)) 1020 1021 if self._index < len(self._tokens): 1022 self.raise_error("Invalid expression / Unexpected token") 1023 1024 self.check_errors() 1025 1026 return expressions 1027 1028 def check_errors(self) -> None: 1029 """Logs or raises any found errors, depending on the chosen error level setting.""" 1030 if self.error_level == ErrorLevel.WARN: 1031 for error in self.errors: 1032 logger.error(str(error)) 1033 elif self.error_level == ErrorLevel.RAISE and self.errors: 1034 raise ParseError( 1035 concat_messages(self.errors, self.max_errors), 1036 errors=merge_errors(self.errors), 1037 ) 1038 1039 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1040 """ 1041 Appends an error in the list of recorded errors or raises it, depending on the chosen 1042 error level setting. 1043 """ 1044 token = token or self._curr or self._prev or Token.string("") 1045 start = token.start 1046 end = token.end + 1 1047 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1048 highlight = self.sql[start:end] 1049 end_context = self.sql[end : end + self.error_message_context] 1050 1051 error = ParseError.new( 1052 f"{message}. Line {token.line}, Col: {token.col}.\n" 1053 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1054 description=message, 1055 line=token.line, 1056 col=token.col, 1057 start_context=start_context, 1058 highlight=highlight, 1059 end_context=end_context, 1060 ) 1061 1062 if self.error_level == ErrorLevel.IMMEDIATE: 1063 raise error 1064 1065 self.errors.append(error) 1066 1067 def expression( 1068 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1069 ) -> E: 1070 """ 1071 Creates a new, validated Expression. 1072 1073 Args: 1074 exp_class: The expression class to instantiate. 1075 comments: An optional list of comments to attach to the expression. 1076 kwargs: The arguments to set for the expression along with their respective values. 1077 1078 Returns: 1079 The target expression. 1080 """ 1081 instance = exp_class(**kwargs) 1082 instance.add_comments(comments) if comments else self._add_comments(instance) 1083 return self.validate_expression(instance) 1084 1085 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1086 if expression and self._prev_comments: 1087 expression.add_comments(self._prev_comments) 1088 self._prev_comments = None 1089 1090 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1091 """ 1092 Validates an Expression, making sure that all its mandatory arguments are set. 1093 1094 Args: 1095 expression: The expression to validate. 1096 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1097 1098 Returns: 1099 The validated expression. 1100 """ 1101 if self.error_level != ErrorLevel.IGNORE: 1102 for error_message in expression.error_messages(args): 1103 self.raise_error(error_message) 1104 1105 return expression 1106 1107 def _find_sql(self, start: Token, end: Token) -> str: 1108 return self.sql[start.start : end.end + 1] 1109 1110 def _advance(self, times: int = 1) -> None: 1111 self._index += times 1112 self._curr = seq_get(self._tokens, self._index) 1113 self._next = seq_get(self._tokens, self._index + 1) 1114 1115 if self._index > 0: 1116 self._prev = self._tokens[self._index - 1] 1117 self._prev_comments = self._prev.comments 1118 else: 1119 self._prev = None 1120 self._prev_comments = None 1121 1122 def _retreat(self, index: int) -> None: 1123 if index != self._index: 1124 self._advance(index - self._index) 1125 1126 def _parse_command(self) -> exp.Command: 1127 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1128 1129 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1130 start = self._prev 1131 exists = self._parse_exists() if allow_exists else None 1132 1133 self._match(TokenType.ON) 1134 1135 kind = self._match_set(self.CREATABLES) and self._prev 1136 if not kind: 1137 return self._parse_as_command(start) 1138 1139 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1140 this = self._parse_user_defined_function(kind=kind.token_type) 1141 elif kind.token_type == TokenType.TABLE: 1142 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1143 elif kind.token_type == TokenType.COLUMN: 1144 this = self._parse_column() 1145 else: 1146 this = self._parse_id_var() 1147 1148 self._match(TokenType.IS) 1149 1150 return self.expression( 1151 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1152 ) 1153 1154 def _parse_to_table( 1155 self, 1156 ) -> exp.ToTableProperty: 1157 table = self._parse_table_parts(schema=True) 1158 return self.expression(exp.ToTableProperty, this=table) 1159 1160 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1161 def _parse_ttl(self) -> exp.Expression: 1162 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1163 this = self._parse_bitwise() 1164 1165 if self._match_text_seq("DELETE"): 1166 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1167 if self._match_text_seq("RECOMPRESS"): 1168 return self.expression( 1169 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1170 ) 1171 if self._match_text_seq("TO", "DISK"): 1172 return self.expression( 1173 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1174 ) 1175 if self._match_text_seq("TO", "VOLUME"): 1176 return self.expression( 1177 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1178 ) 1179 1180 return this 1181 1182 expressions = self._parse_csv(_parse_ttl_action) 1183 where = self._parse_where() 1184 group = self._parse_group() 1185 1186 aggregates = None 1187 if group and self._match(TokenType.SET): 1188 aggregates = self._parse_csv(self._parse_set_item) 1189 1190 return self.expression( 1191 exp.MergeTreeTTL, 1192 expressions=expressions, 1193 where=where, 1194 group=group, 1195 aggregates=aggregates, 1196 ) 1197 1198 def _parse_statement(self) -> t.Optional[exp.Expression]: 1199 if self._curr is None: 1200 return None 1201 1202 if self._match_set(self.STATEMENT_PARSERS): 1203 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1204 1205 if self._match_set(Tokenizer.COMMANDS): 1206 return self._parse_command() 1207 1208 expression = self._parse_expression() 1209 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1210 return self._parse_query_modifiers(expression) 1211 1212 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1213 start = self._prev 1214 temporary = self._match(TokenType.TEMPORARY) 1215 materialized = self._match_text_seq("MATERIALIZED") 1216 1217 kind = self._match_set(self.CREATABLES) and self._prev.text 1218 if not kind: 1219 return self._parse_as_command(start) 1220 1221 return self.expression( 1222 exp.Drop, 1223 comments=start.comments, 1224 exists=exists or self._parse_exists(), 1225 this=self._parse_table(schema=True), 1226 kind=kind, 1227 temporary=temporary, 1228 materialized=materialized, 1229 cascade=self._match_text_seq("CASCADE"), 1230 constraints=self._match_text_seq("CONSTRAINTS"), 1231 purge=self._match_text_seq("PURGE"), 1232 ) 1233 1234 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1235 return ( 1236 self._match_text_seq("IF") 1237 and (not not_ or self._match(TokenType.NOT)) 1238 and self._match(TokenType.EXISTS) 1239 ) 1240 1241 def _parse_create(self) -> exp.Create | exp.Command: 1242 # Note: this can't be None because we've matched a statement parser 1243 start = self._prev 1244 comments = self._prev_comments 1245 1246 replace = start.text.upper() == "REPLACE" or self._match_pair( 1247 TokenType.OR, TokenType.REPLACE 1248 ) 1249 unique = self._match(TokenType.UNIQUE) 1250 1251 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1252 self._advance() 1253 1254 properties = None 1255 create_token = self._match_set(self.CREATABLES) and self._prev 1256 1257 if not create_token: 1258 # exp.Properties.Location.POST_CREATE 1259 properties = self._parse_properties() 1260 create_token = self._match_set(self.CREATABLES) and self._prev 1261 1262 if not properties or not create_token: 1263 return self._parse_as_command(start) 1264 1265 exists = self._parse_exists(not_=True) 1266 this = None 1267 expression: t.Optional[exp.Expression] = None 1268 indexes = None 1269 no_schema_binding = None 1270 begin = None 1271 clone = None 1272 1273 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1274 nonlocal properties 1275 if properties and temp_props: 1276 properties.expressions.extend(temp_props.expressions) 1277 elif temp_props: 1278 properties = temp_props 1279 1280 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1281 this = self._parse_user_defined_function(kind=create_token.token_type) 1282 1283 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1284 extend_props(self._parse_properties()) 1285 1286 self._match(TokenType.ALIAS) 1287 1288 if self._match(TokenType.COMMAND): 1289 expression = self._parse_as_command(self._prev) 1290 else: 1291 begin = self._match(TokenType.BEGIN) 1292 return_ = self._match_text_seq("RETURN") 1293 expression = self._parse_statement() 1294 1295 if return_: 1296 expression = self.expression(exp.Return, this=expression) 1297 elif create_token.token_type == TokenType.INDEX: 1298 this = self._parse_index(index=self._parse_id_var()) 1299 elif create_token.token_type in self.DB_CREATABLES: 1300 table_parts = self._parse_table_parts(schema=True) 1301 1302 # exp.Properties.Location.POST_NAME 1303 self._match(TokenType.COMMA) 1304 extend_props(self._parse_properties(before=True)) 1305 1306 this = self._parse_schema(this=table_parts) 1307 1308 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1309 extend_props(self._parse_properties()) 1310 1311 self._match(TokenType.ALIAS) 1312 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1313 # exp.Properties.Location.POST_ALIAS 1314 extend_props(self._parse_properties()) 1315 1316 expression = self._parse_ddl_select() 1317 1318 if create_token.token_type == TokenType.TABLE: 1319 # exp.Properties.Location.POST_EXPRESSION 1320 extend_props(self._parse_properties()) 1321 1322 indexes = [] 1323 while True: 1324 index = self._parse_index() 1325 1326 # exp.Properties.Location.POST_INDEX 1327 extend_props(self._parse_properties()) 1328 1329 if not index: 1330 break 1331 else: 1332 self._match(TokenType.COMMA) 1333 indexes.append(index) 1334 elif create_token.token_type == TokenType.VIEW: 1335 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1336 no_schema_binding = True 1337 1338 shallow = self._match_text_seq("SHALLOW") 1339 1340 if self._match_text_seq("CLONE"): 1341 clone = self._parse_table(schema=True) 1342 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1343 clone_kind = ( 1344 self._match(TokenType.L_PAREN) 1345 and self._match_texts(self.CLONE_KINDS) 1346 and self._prev.text.upper() 1347 ) 1348 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1349 self._match(TokenType.R_PAREN) 1350 clone = self.expression( 1351 exp.Clone, 1352 this=clone, 1353 when=when, 1354 kind=clone_kind, 1355 shallow=shallow, 1356 expression=clone_expression, 1357 ) 1358 1359 return self.expression( 1360 exp.Create, 1361 comments=comments, 1362 this=this, 1363 kind=create_token.text, 1364 replace=replace, 1365 unique=unique, 1366 expression=expression, 1367 exists=exists, 1368 properties=properties, 1369 indexes=indexes, 1370 no_schema_binding=no_schema_binding, 1371 begin=begin, 1372 clone=clone, 1373 ) 1374 1375 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1376 # only used for teradata currently 1377 self._match(TokenType.COMMA) 1378 1379 kwargs = { 1380 "no": self._match_text_seq("NO"), 1381 "dual": self._match_text_seq("DUAL"), 1382 "before": self._match_text_seq("BEFORE"), 1383 "default": self._match_text_seq("DEFAULT"), 1384 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1385 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1386 "after": self._match_text_seq("AFTER"), 1387 "minimum": self._match_texts(("MIN", "MINIMUM")), 1388 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1389 } 1390 1391 if self._match_texts(self.PROPERTY_PARSERS): 1392 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1393 try: 1394 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1395 except TypeError: 1396 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1397 1398 return None 1399 1400 def _parse_property(self) -> t.Optional[exp.Expression]: 1401 if self._match_texts(self.PROPERTY_PARSERS): 1402 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1403 1404 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1405 return self._parse_character_set(default=True) 1406 1407 if self._match_text_seq("COMPOUND", "SORTKEY"): 1408 return self._parse_sortkey(compound=True) 1409 1410 if self._match_text_seq("SQL", "SECURITY"): 1411 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1412 1413 assignment = self._match_pair( 1414 TokenType.VAR, TokenType.EQ, advance=False 1415 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1416 1417 if assignment: 1418 key = self._parse_var_or_string() 1419 self._match(TokenType.EQ) 1420 return self.expression( 1421 exp.Property, 1422 this=key, 1423 value=self._parse_column() or self._parse_var(any_token=True), 1424 ) 1425 1426 return None 1427 1428 def _parse_stored(self) -> exp.FileFormatProperty: 1429 self._match(TokenType.ALIAS) 1430 1431 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1432 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1433 1434 return self.expression( 1435 exp.FileFormatProperty, 1436 this=self.expression( 1437 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1438 ) 1439 if input_format or output_format 1440 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1441 ) 1442 1443 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1444 self._match(TokenType.EQ) 1445 self._match(TokenType.ALIAS) 1446 return self.expression(exp_class, this=self._parse_field()) 1447 1448 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1449 properties = [] 1450 while True: 1451 if before: 1452 prop = self._parse_property_before() 1453 else: 1454 prop = self._parse_property() 1455 1456 if not prop: 1457 break 1458 for p in ensure_list(prop): 1459 properties.append(p) 1460 1461 if properties: 1462 return self.expression(exp.Properties, expressions=properties) 1463 1464 return None 1465 1466 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1467 return self.expression( 1468 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1469 ) 1470 1471 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1472 if self._index >= 2: 1473 pre_volatile_token = self._tokens[self._index - 2] 1474 else: 1475 pre_volatile_token = None 1476 1477 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1478 return exp.VolatileProperty() 1479 1480 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1481 1482 def _parse_with_property( 1483 self, 1484 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1485 if self._match(TokenType.L_PAREN, advance=False): 1486 return self._parse_wrapped_csv(self._parse_property) 1487 1488 if self._match_text_seq("JOURNAL"): 1489 return self._parse_withjournaltable() 1490 1491 if self._match_text_seq("DATA"): 1492 return self._parse_withdata(no=False) 1493 elif self._match_text_seq("NO", "DATA"): 1494 return self._parse_withdata(no=True) 1495 1496 if not self._next: 1497 return None 1498 1499 return self._parse_withisolatedloading() 1500 1501 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1502 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1503 self._match(TokenType.EQ) 1504 1505 user = self._parse_id_var() 1506 self._match(TokenType.PARAMETER) 1507 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1508 1509 if not user or not host: 1510 return None 1511 1512 return exp.DefinerProperty(this=f"{user}@{host}") 1513 1514 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1515 self._match(TokenType.TABLE) 1516 self._match(TokenType.EQ) 1517 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1518 1519 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1520 return self.expression(exp.LogProperty, no=no) 1521 1522 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1523 return self.expression(exp.JournalProperty, **kwargs) 1524 1525 def _parse_checksum(self) -> exp.ChecksumProperty: 1526 self._match(TokenType.EQ) 1527 1528 on = None 1529 if self._match(TokenType.ON): 1530 on = True 1531 elif self._match_text_seq("OFF"): 1532 on = False 1533 1534 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1535 1536 def _parse_cluster(self) -> exp.Cluster: 1537 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1538 1539 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1540 self._match_text_seq("BY") 1541 1542 self._match_l_paren() 1543 expressions = self._parse_csv(self._parse_column) 1544 self._match_r_paren() 1545 1546 if self._match_text_seq("SORTED", "BY"): 1547 self._match_l_paren() 1548 sorted_by = self._parse_csv(self._parse_ordered) 1549 self._match_r_paren() 1550 else: 1551 sorted_by = None 1552 1553 self._match(TokenType.INTO) 1554 buckets = self._parse_number() 1555 self._match_text_seq("BUCKETS") 1556 1557 return self.expression( 1558 exp.ClusteredByProperty, 1559 expressions=expressions, 1560 sorted_by=sorted_by, 1561 buckets=buckets, 1562 ) 1563 1564 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1565 if not self._match_text_seq("GRANTS"): 1566 self._retreat(self._index - 1) 1567 return None 1568 1569 return self.expression(exp.CopyGrantsProperty) 1570 1571 def _parse_freespace(self) -> exp.FreespaceProperty: 1572 self._match(TokenType.EQ) 1573 return self.expression( 1574 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1575 ) 1576 1577 def _parse_mergeblockratio( 1578 self, no: bool = False, default: bool = False 1579 ) -> exp.MergeBlockRatioProperty: 1580 if self._match(TokenType.EQ): 1581 return self.expression( 1582 exp.MergeBlockRatioProperty, 1583 this=self._parse_number(), 1584 percent=self._match(TokenType.PERCENT), 1585 ) 1586 1587 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1588 1589 def _parse_datablocksize( 1590 self, 1591 default: t.Optional[bool] = None, 1592 minimum: t.Optional[bool] = None, 1593 maximum: t.Optional[bool] = None, 1594 ) -> exp.DataBlocksizeProperty: 1595 self._match(TokenType.EQ) 1596 size = self._parse_number() 1597 1598 units = None 1599 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1600 units = self._prev.text 1601 1602 return self.expression( 1603 exp.DataBlocksizeProperty, 1604 size=size, 1605 units=units, 1606 default=default, 1607 minimum=minimum, 1608 maximum=maximum, 1609 ) 1610 1611 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1612 self._match(TokenType.EQ) 1613 always = self._match_text_seq("ALWAYS") 1614 manual = self._match_text_seq("MANUAL") 1615 never = self._match_text_seq("NEVER") 1616 default = self._match_text_seq("DEFAULT") 1617 1618 autotemp = None 1619 if self._match_text_seq("AUTOTEMP"): 1620 autotemp = self._parse_schema() 1621 1622 return self.expression( 1623 exp.BlockCompressionProperty, 1624 always=always, 1625 manual=manual, 1626 never=never, 1627 default=default, 1628 autotemp=autotemp, 1629 ) 1630 1631 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1632 no = self._match_text_seq("NO") 1633 concurrent = self._match_text_seq("CONCURRENT") 1634 self._match_text_seq("ISOLATED", "LOADING") 1635 for_all = self._match_text_seq("FOR", "ALL") 1636 for_insert = self._match_text_seq("FOR", "INSERT") 1637 for_none = self._match_text_seq("FOR", "NONE") 1638 return self.expression( 1639 exp.IsolatedLoadingProperty, 1640 no=no, 1641 concurrent=concurrent, 1642 for_all=for_all, 1643 for_insert=for_insert, 1644 for_none=for_none, 1645 ) 1646 1647 def _parse_locking(self) -> exp.LockingProperty: 1648 if self._match(TokenType.TABLE): 1649 kind = "TABLE" 1650 elif self._match(TokenType.VIEW): 1651 kind = "VIEW" 1652 elif self._match(TokenType.ROW): 1653 kind = "ROW" 1654 elif self._match_text_seq("DATABASE"): 1655 kind = "DATABASE" 1656 else: 1657 kind = None 1658 1659 if kind in ("DATABASE", "TABLE", "VIEW"): 1660 this = self._parse_table_parts() 1661 else: 1662 this = None 1663 1664 if self._match(TokenType.FOR): 1665 for_or_in = "FOR" 1666 elif self._match(TokenType.IN): 1667 for_or_in = "IN" 1668 else: 1669 for_or_in = None 1670 1671 if self._match_text_seq("ACCESS"): 1672 lock_type = "ACCESS" 1673 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1674 lock_type = "EXCLUSIVE" 1675 elif self._match_text_seq("SHARE"): 1676 lock_type = "SHARE" 1677 elif self._match_text_seq("READ"): 1678 lock_type = "READ" 1679 elif self._match_text_seq("WRITE"): 1680 lock_type = "WRITE" 1681 elif self._match_text_seq("CHECKSUM"): 1682 lock_type = "CHECKSUM" 1683 else: 1684 lock_type = None 1685 1686 override = self._match_text_seq("OVERRIDE") 1687 1688 return self.expression( 1689 exp.LockingProperty, 1690 this=this, 1691 kind=kind, 1692 for_or_in=for_or_in, 1693 lock_type=lock_type, 1694 override=override, 1695 ) 1696 1697 def _parse_partition_by(self) -> t.List[exp.Expression]: 1698 if self._match(TokenType.PARTITION_BY): 1699 return self._parse_csv(self._parse_conjunction) 1700 return [] 1701 1702 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1703 self._match(TokenType.EQ) 1704 return self.expression( 1705 exp.PartitionedByProperty, 1706 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1707 ) 1708 1709 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1710 if self._match_text_seq("AND", "STATISTICS"): 1711 statistics = True 1712 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1713 statistics = False 1714 else: 1715 statistics = None 1716 1717 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1718 1719 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1720 if self._match_text_seq("PRIMARY", "INDEX"): 1721 return exp.NoPrimaryIndexProperty() 1722 return None 1723 1724 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1725 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1726 return exp.OnCommitProperty() 1727 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1728 return exp.OnCommitProperty(delete=True) 1729 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1730 1731 def _parse_distkey(self) -> exp.DistKeyProperty: 1732 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1733 1734 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1735 table = self._parse_table(schema=True) 1736 1737 options = [] 1738 while self._match_texts(("INCLUDING", "EXCLUDING")): 1739 this = self._prev.text.upper() 1740 1741 id_var = self._parse_id_var() 1742 if not id_var: 1743 return None 1744 1745 options.append( 1746 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1747 ) 1748 1749 return self.expression(exp.LikeProperty, this=table, expressions=options) 1750 1751 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1752 return self.expression( 1753 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1754 ) 1755 1756 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1757 self._match(TokenType.EQ) 1758 return self.expression( 1759 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1760 ) 1761 1762 def _parse_returns(self) -> exp.ReturnsProperty: 1763 value: t.Optional[exp.Expression] 1764 is_table = self._match(TokenType.TABLE) 1765 1766 if is_table: 1767 if self._match(TokenType.LT): 1768 value = self.expression( 1769 exp.Schema, 1770 this="TABLE", 1771 expressions=self._parse_csv(self._parse_struct_types), 1772 ) 1773 if not self._match(TokenType.GT): 1774 self.raise_error("Expecting >") 1775 else: 1776 value = self._parse_schema(exp.var("TABLE")) 1777 else: 1778 value = self._parse_types() 1779 1780 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1781 1782 def _parse_describe(self) -> exp.Describe: 1783 kind = self._match_set(self.CREATABLES) and self._prev.text 1784 this = self._parse_table(schema=True) 1785 properties = self._parse_properties() 1786 expressions = properties.expressions if properties else None 1787 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1788 1789 def _parse_insert(self) -> exp.Insert: 1790 comments = ensure_list(self._prev_comments) 1791 overwrite = self._match(TokenType.OVERWRITE) 1792 ignore = self._match(TokenType.IGNORE) 1793 local = self._match_text_seq("LOCAL") 1794 alternative = None 1795 1796 if self._match_text_seq("DIRECTORY"): 1797 this: t.Optional[exp.Expression] = self.expression( 1798 exp.Directory, 1799 this=self._parse_var_or_string(), 1800 local=local, 1801 row_format=self._parse_row_format(match_row=True), 1802 ) 1803 else: 1804 if self._match(TokenType.OR): 1805 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1806 1807 self._match(TokenType.INTO) 1808 comments += ensure_list(self._prev_comments) 1809 self._match(TokenType.TABLE) 1810 this = self._parse_table(schema=True) 1811 1812 returning = self._parse_returning() 1813 1814 return self.expression( 1815 exp.Insert, 1816 comments=comments, 1817 this=this, 1818 by_name=self._match_text_seq("BY", "NAME"), 1819 exists=self._parse_exists(), 1820 partition=self._parse_partition(), 1821 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1822 and self._parse_conjunction(), 1823 expression=self._parse_ddl_select(), 1824 conflict=self._parse_on_conflict(), 1825 returning=returning or self._parse_returning(), 1826 overwrite=overwrite, 1827 alternative=alternative, 1828 ignore=ignore, 1829 ) 1830 1831 def _parse_kill(self) -> exp.Kill: 1832 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1833 1834 return self.expression( 1835 exp.Kill, 1836 this=self._parse_primary(), 1837 kind=kind, 1838 ) 1839 1840 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1841 conflict = self._match_text_seq("ON", "CONFLICT") 1842 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1843 1844 if not conflict and not duplicate: 1845 return None 1846 1847 nothing = None 1848 expressions = None 1849 key = None 1850 constraint = None 1851 1852 if conflict: 1853 if self._match_text_seq("ON", "CONSTRAINT"): 1854 constraint = self._parse_id_var() 1855 else: 1856 key = self._parse_csv(self._parse_value) 1857 1858 self._match_text_seq("DO") 1859 if self._match_text_seq("NOTHING"): 1860 nothing = True 1861 else: 1862 self._match(TokenType.UPDATE) 1863 self._match(TokenType.SET) 1864 expressions = self._parse_csv(self._parse_equality) 1865 1866 return self.expression( 1867 exp.OnConflict, 1868 duplicate=duplicate, 1869 expressions=expressions, 1870 nothing=nothing, 1871 key=key, 1872 constraint=constraint, 1873 ) 1874 1875 def _parse_returning(self) -> t.Optional[exp.Returning]: 1876 if not self._match(TokenType.RETURNING): 1877 return None 1878 return self.expression( 1879 exp.Returning, 1880 expressions=self._parse_csv(self._parse_expression), 1881 into=self._match(TokenType.INTO) and self._parse_table_part(), 1882 ) 1883 1884 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1885 if not self._match(TokenType.FORMAT): 1886 return None 1887 return self._parse_row_format() 1888 1889 def _parse_row_format( 1890 self, match_row: bool = False 1891 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1892 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1893 return None 1894 1895 if self._match_text_seq("SERDE"): 1896 this = self._parse_string() 1897 1898 serde_properties = None 1899 if self._match(TokenType.SERDE_PROPERTIES): 1900 serde_properties = self.expression( 1901 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1902 ) 1903 1904 return self.expression( 1905 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1906 ) 1907 1908 self._match_text_seq("DELIMITED") 1909 1910 kwargs = {} 1911 1912 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1913 kwargs["fields"] = self._parse_string() 1914 if self._match_text_seq("ESCAPED", "BY"): 1915 kwargs["escaped"] = self._parse_string() 1916 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1917 kwargs["collection_items"] = self._parse_string() 1918 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1919 kwargs["map_keys"] = self._parse_string() 1920 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1921 kwargs["lines"] = self._parse_string() 1922 if self._match_text_seq("NULL", "DEFINED", "AS"): 1923 kwargs["null"] = self._parse_string() 1924 1925 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1926 1927 def _parse_load(self) -> exp.LoadData | exp.Command: 1928 if self._match_text_seq("DATA"): 1929 local = self._match_text_seq("LOCAL") 1930 self._match_text_seq("INPATH") 1931 inpath = self._parse_string() 1932 overwrite = self._match(TokenType.OVERWRITE) 1933 self._match_pair(TokenType.INTO, TokenType.TABLE) 1934 1935 return self.expression( 1936 exp.LoadData, 1937 this=self._parse_table(schema=True), 1938 local=local, 1939 overwrite=overwrite, 1940 inpath=inpath, 1941 partition=self._parse_partition(), 1942 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1943 serde=self._match_text_seq("SERDE") and self._parse_string(), 1944 ) 1945 return self._parse_as_command(self._prev) 1946 1947 def _parse_delete(self) -> exp.Delete: 1948 # This handles MySQL's "Multiple-Table Syntax" 1949 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1950 tables = None 1951 comments = self._prev_comments 1952 if not self._match(TokenType.FROM, advance=False): 1953 tables = self._parse_csv(self._parse_table) or None 1954 1955 returning = self._parse_returning() 1956 1957 return self.expression( 1958 exp.Delete, 1959 comments=comments, 1960 tables=tables, 1961 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1962 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1963 where=self._parse_where(), 1964 returning=returning or self._parse_returning(), 1965 limit=self._parse_limit(), 1966 ) 1967 1968 def _parse_update(self) -> exp.Update: 1969 comments = self._prev_comments 1970 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1971 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1972 returning = self._parse_returning() 1973 return self.expression( 1974 exp.Update, 1975 comments=comments, 1976 **{ # type: ignore 1977 "this": this, 1978 "expressions": expressions, 1979 "from": self._parse_from(joins=True), 1980 "where": self._parse_where(), 1981 "returning": returning or self._parse_returning(), 1982 "order": self._parse_order(), 1983 "limit": self._parse_limit(), 1984 }, 1985 ) 1986 1987 def _parse_uncache(self) -> exp.Uncache: 1988 if not self._match(TokenType.TABLE): 1989 self.raise_error("Expecting TABLE after UNCACHE") 1990 1991 return self.expression( 1992 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1993 ) 1994 1995 def _parse_cache(self) -> exp.Cache: 1996 lazy = self._match_text_seq("LAZY") 1997 self._match(TokenType.TABLE) 1998 table = self._parse_table(schema=True) 1999 2000 options = [] 2001 if self._match_text_seq("OPTIONS"): 2002 self._match_l_paren() 2003 k = self._parse_string() 2004 self._match(TokenType.EQ) 2005 v = self._parse_string() 2006 options = [k, v] 2007 self._match_r_paren() 2008 2009 self._match(TokenType.ALIAS) 2010 return self.expression( 2011 exp.Cache, 2012 this=table, 2013 lazy=lazy, 2014 options=options, 2015 expression=self._parse_select(nested=True), 2016 ) 2017 2018 def _parse_partition(self) -> t.Optional[exp.Partition]: 2019 if not self._match(TokenType.PARTITION): 2020 return None 2021 2022 return self.expression( 2023 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2024 ) 2025 2026 def _parse_value(self) -> exp.Tuple: 2027 if self._match(TokenType.L_PAREN): 2028 expressions = self._parse_csv(self._parse_conjunction) 2029 self._match_r_paren() 2030 return self.expression(exp.Tuple, expressions=expressions) 2031 2032 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2033 # https://prestodb.io/docs/current/sql/values.html 2034 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2035 2036 def _parse_projections(self) -> t.List[exp.Expression]: 2037 return self._parse_expressions() 2038 2039 def _parse_select( 2040 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2041 ) -> t.Optional[exp.Expression]: 2042 cte = self._parse_with() 2043 2044 if cte: 2045 this = self._parse_statement() 2046 2047 if not this: 2048 self.raise_error("Failed to parse any statement following CTE") 2049 return cte 2050 2051 if "with" in this.arg_types: 2052 this.set("with", cte) 2053 else: 2054 self.raise_error(f"{this.key} does not support CTE") 2055 this = cte 2056 2057 return this 2058 2059 # duckdb supports leading with FROM x 2060 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2061 2062 if self._match(TokenType.SELECT): 2063 comments = self._prev_comments 2064 2065 hint = self._parse_hint() 2066 all_ = self._match(TokenType.ALL) 2067 distinct = self._match_set(self.DISTINCT_TOKENS) 2068 2069 kind = ( 2070 self._match(TokenType.ALIAS) 2071 and self._match_texts(("STRUCT", "VALUE")) 2072 and self._prev.text 2073 ) 2074 2075 if distinct: 2076 distinct = self.expression( 2077 exp.Distinct, 2078 on=self._parse_value() if self._match(TokenType.ON) else None, 2079 ) 2080 2081 if all_ and distinct: 2082 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2083 2084 limit = self._parse_limit(top=True) 2085 projections = self._parse_projections() 2086 2087 this = self.expression( 2088 exp.Select, 2089 kind=kind, 2090 hint=hint, 2091 distinct=distinct, 2092 expressions=projections, 2093 limit=limit, 2094 ) 2095 this.comments = comments 2096 2097 into = self._parse_into() 2098 if into: 2099 this.set("into", into) 2100 2101 if not from_: 2102 from_ = self._parse_from() 2103 2104 if from_: 2105 this.set("from", from_) 2106 2107 this = self._parse_query_modifiers(this) 2108 elif (table or nested) and self._match(TokenType.L_PAREN): 2109 if self._match(TokenType.PIVOT): 2110 this = self._parse_simplified_pivot() 2111 elif self._match(TokenType.FROM): 2112 this = exp.select("*").from_( 2113 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2114 ) 2115 else: 2116 this = self._parse_table() if table else self._parse_select(nested=True) 2117 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2118 2119 self._match_r_paren() 2120 2121 # We return early here so that the UNION isn't attached to the subquery by the 2122 # following call to _parse_set_operations, but instead becomes the parent node 2123 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2124 elif self._match(TokenType.VALUES): 2125 this = self.expression( 2126 exp.Values, 2127 expressions=self._parse_csv(self._parse_value), 2128 alias=self._parse_table_alias(), 2129 ) 2130 elif from_: 2131 this = exp.select("*").from_(from_.this, copy=False) 2132 else: 2133 this = None 2134 2135 return self._parse_set_operations(this) 2136 2137 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2138 if not skip_with_token and not self._match(TokenType.WITH): 2139 return None 2140 2141 comments = self._prev_comments 2142 recursive = self._match(TokenType.RECURSIVE) 2143 2144 expressions = [] 2145 while True: 2146 expressions.append(self._parse_cte()) 2147 2148 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2149 break 2150 else: 2151 self._match(TokenType.WITH) 2152 2153 return self.expression( 2154 exp.With, comments=comments, expressions=expressions, recursive=recursive 2155 ) 2156 2157 def _parse_cte(self) -> exp.CTE: 2158 alias = self._parse_table_alias() 2159 if not alias or not alias.this: 2160 self.raise_error("Expected CTE to have alias") 2161 2162 self._match(TokenType.ALIAS) 2163 return self.expression( 2164 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2165 ) 2166 2167 def _parse_table_alias( 2168 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2169 ) -> t.Optional[exp.TableAlias]: 2170 any_token = self._match(TokenType.ALIAS) 2171 alias = ( 2172 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2173 or self._parse_string_as_identifier() 2174 ) 2175 2176 index = self._index 2177 if self._match(TokenType.L_PAREN): 2178 columns = self._parse_csv(self._parse_function_parameter) 2179 self._match_r_paren() if columns else self._retreat(index) 2180 else: 2181 columns = None 2182 2183 if not alias and not columns: 2184 return None 2185 2186 return self.expression(exp.TableAlias, this=alias, columns=columns) 2187 2188 def _parse_subquery( 2189 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2190 ) -> t.Optional[exp.Subquery]: 2191 if not this: 2192 return None 2193 2194 return self.expression( 2195 exp.Subquery, 2196 this=this, 2197 pivots=self._parse_pivots(), 2198 alias=self._parse_table_alias() if parse_alias else None, 2199 ) 2200 2201 def _parse_query_modifiers( 2202 self, this: t.Optional[exp.Expression] 2203 ) -> t.Optional[exp.Expression]: 2204 if isinstance(this, self.MODIFIABLES): 2205 for join in iter(self._parse_join, None): 2206 this.append("joins", join) 2207 for lateral in iter(self._parse_lateral, None): 2208 this.append("laterals", lateral) 2209 2210 while True: 2211 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2212 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2213 key, expression = parser(self) 2214 2215 if expression: 2216 this.set(key, expression) 2217 if key == "limit": 2218 offset = expression.args.pop("offset", None) 2219 if offset: 2220 this.set("offset", exp.Offset(expression=offset)) 2221 continue 2222 break 2223 return this 2224 2225 def _parse_hint(self) -> t.Optional[exp.Hint]: 2226 if self._match(TokenType.HINT): 2227 hints = [] 2228 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2229 hints.extend(hint) 2230 2231 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2232 self.raise_error("Expected */ after HINT") 2233 2234 return self.expression(exp.Hint, expressions=hints) 2235 2236 return None 2237 2238 def _parse_into(self) -> t.Optional[exp.Into]: 2239 if not self._match(TokenType.INTO): 2240 return None 2241 2242 temp = self._match(TokenType.TEMPORARY) 2243 unlogged = self._match_text_seq("UNLOGGED") 2244 self._match(TokenType.TABLE) 2245 2246 return self.expression( 2247 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2248 ) 2249 2250 def _parse_from( 2251 self, joins: bool = False, skip_from_token: bool = False 2252 ) -> t.Optional[exp.From]: 2253 if not skip_from_token and not self._match(TokenType.FROM): 2254 return None 2255 2256 return self.expression( 2257 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2258 ) 2259 2260 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2261 if not self._match(TokenType.MATCH_RECOGNIZE): 2262 return None 2263 2264 self._match_l_paren() 2265 2266 partition = self._parse_partition_by() 2267 order = self._parse_order() 2268 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2269 2270 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2271 rows = exp.var("ONE ROW PER MATCH") 2272 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2273 text = "ALL ROWS PER MATCH" 2274 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2275 text += f" SHOW EMPTY MATCHES" 2276 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2277 text += f" OMIT EMPTY MATCHES" 2278 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2279 text += f" WITH UNMATCHED ROWS" 2280 rows = exp.var(text) 2281 else: 2282 rows = None 2283 2284 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2285 text = "AFTER MATCH SKIP" 2286 if self._match_text_seq("PAST", "LAST", "ROW"): 2287 text += f" PAST LAST ROW" 2288 elif self._match_text_seq("TO", "NEXT", "ROW"): 2289 text += f" TO NEXT ROW" 2290 elif self._match_text_seq("TO", "FIRST"): 2291 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2292 elif self._match_text_seq("TO", "LAST"): 2293 text += f" TO LAST {self._advance_any().text}" # type: ignore 2294 after = exp.var(text) 2295 else: 2296 after = None 2297 2298 if self._match_text_seq("PATTERN"): 2299 self._match_l_paren() 2300 2301 if not self._curr: 2302 self.raise_error("Expecting )", self._curr) 2303 2304 paren = 1 2305 start = self._curr 2306 2307 while self._curr and paren > 0: 2308 if self._curr.token_type == TokenType.L_PAREN: 2309 paren += 1 2310 if self._curr.token_type == TokenType.R_PAREN: 2311 paren -= 1 2312 2313 end = self._prev 2314 self._advance() 2315 2316 if paren > 0: 2317 self.raise_error("Expecting )", self._curr) 2318 2319 pattern = exp.var(self._find_sql(start, end)) 2320 else: 2321 pattern = None 2322 2323 define = ( 2324 self._parse_csv( 2325 lambda: self.expression( 2326 exp.Alias, 2327 alias=self._parse_id_var(any_token=True), 2328 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2329 ) 2330 ) 2331 if self._match_text_seq("DEFINE") 2332 else None 2333 ) 2334 2335 self._match_r_paren() 2336 2337 return self.expression( 2338 exp.MatchRecognize, 2339 partition_by=partition, 2340 order=order, 2341 measures=measures, 2342 rows=rows, 2343 after=after, 2344 pattern=pattern, 2345 define=define, 2346 alias=self._parse_table_alias(), 2347 ) 2348 2349 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2350 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2351 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2352 2353 if outer_apply or cross_apply: 2354 this = self._parse_select(table=True) 2355 view = None 2356 outer = not cross_apply 2357 elif self._match(TokenType.LATERAL): 2358 this = self._parse_select(table=True) 2359 view = self._match(TokenType.VIEW) 2360 outer = self._match(TokenType.OUTER) 2361 else: 2362 return None 2363 2364 if not this: 2365 this = ( 2366 self._parse_unnest() 2367 or self._parse_function() 2368 or self._parse_id_var(any_token=False) 2369 ) 2370 2371 while self._match(TokenType.DOT): 2372 this = exp.Dot( 2373 this=this, 2374 expression=self._parse_function() or self._parse_id_var(any_token=False), 2375 ) 2376 2377 if view: 2378 table = self._parse_id_var(any_token=False) 2379 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2380 table_alias: t.Optional[exp.TableAlias] = self.expression( 2381 exp.TableAlias, this=table, columns=columns 2382 ) 2383 elif isinstance(this, exp.Subquery) and this.alias: 2384 # Ensures parity between the Subquery's and the Lateral's "alias" args 2385 table_alias = this.args["alias"].copy() 2386 else: 2387 table_alias = self._parse_table_alias() 2388 2389 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2390 2391 def _parse_join_parts( 2392 self, 2393 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2394 return ( 2395 self._match_set(self.JOIN_METHODS) and self._prev, 2396 self._match_set(self.JOIN_SIDES) and self._prev, 2397 self._match_set(self.JOIN_KINDS) and self._prev, 2398 ) 2399 2400 def _parse_join( 2401 self, skip_join_token: bool = False, parse_bracket: bool = False 2402 ) -> t.Optional[exp.Join]: 2403 if self._match(TokenType.COMMA): 2404 return self.expression(exp.Join, this=self._parse_table()) 2405 2406 index = self._index 2407 method, side, kind = self._parse_join_parts() 2408 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2409 join = self._match(TokenType.JOIN) 2410 2411 if not skip_join_token and not join: 2412 self._retreat(index) 2413 kind = None 2414 method = None 2415 side = None 2416 2417 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2418 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2419 2420 if not skip_join_token and not join and not outer_apply and not cross_apply: 2421 return None 2422 2423 if outer_apply: 2424 side = Token(TokenType.LEFT, "LEFT") 2425 2426 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2427 2428 if method: 2429 kwargs["method"] = method.text 2430 if side: 2431 kwargs["side"] = side.text 2432 if kind: 2433 kwargs["kind"] = kind.text 2434 if hint: 2435 kwargs["hint"] = hint 2436 2437 if self._match(TokenType.ON): 2438 kwargs["on"] = self._parse_conjunction() 2439 elif self._match(TokenType.USING): 2440 kwargs["using"] = self._parse_wrapped_id_vars() 2441 elif not (kind and kind.token_type == TokenType.CROSS): 2442 index = self._index 2443 joins = self._parse_joins() 2444 2445 if joins and self._match(TokenType.ON): 2446 kwargs["on"] = self._parse_conjunction() 2447 elif joins and self._match(TokenType.USING): 2448 kwargs["using"] = self._parse_wrapped_id_vars() 2449 else: 2450 joins = None 2451 self._retreat(index) 2452 2453 kwargs["this"].set("joins", joins) 2454 2455 comments = [c for token in (method, side, kind) if token for c in token.comments] 2456 return self.expression(exp.Join, comments=comments, **kwargs) 2457 2458 def _parse_index( 2459 self, 2460 index: t.Optional[exp.Expression] = None, 2461 ) -> t.Optional[exp.Index]: 2462 if index: 2463 unique = None 2464 primary = None 2465 amp = None 2466 2467 self._match(TokenType.ON) 2468 self._match(TokenType.TABLE) # hive 2469 table = self._parse_table_parts(schema=True) 2470 else: 2471 unique = self._match(TokenType.UNIQUE) 2472 primary = self._match_text_seq("PRIMARY") 2473 amp = self._match_text_seq("AMP") 2474 2475 if not self._match(TokenType.INDEX): 2476 return None 2477 2478 index = self._parse_id_var() 2479 table = None 2480 2481 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2482 2483 if self._match(TokenType.L_PAREN, advance=False): 2484 columns = self._parse_wrapped_csv(self._parse_ordered) 2485 else: 2486 columns = None 2487 2488 return self.expression( 2489 exp.Index, 2490 this=index, 2491 table=table, 2492 using=using, 2493 columns=columns, 2494 unique=unique, 2495 primary=primary, 2496 amp=amp, 2497 partition_by=self._parse_partition_by(), 2498 where=self._parse_where(), 2499 ) 2500 2501 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2502 hints: t.List[exp.Expression] = [] 2503 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2504 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2505 hints.append( 2506 self.expression( 2507 exp.WithTableHint, 2508 expressions=self._parse_csv( 2509 lambda: self._parse_function() or self._parse_var(any_token=True) 2510 ), 2511 ) 2512 ) 2513 self._match_r_paren() 2514 else: 2515 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2516 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2517 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2518 2519 self._match_texts({"INDEX", "KEY"}) 2520 if self._match(TokenType.FOR): 2521 hint.set("target", self._advance_any() and self._prev.text.upper()) 2522 2523 hint.set("expressions", self._parse_wrapped_id_vars()) 2524 hints.append(hint) 2525 2526 return hints or None 2527 2528 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2529 return ( 2530 (not schema and self._parse_function(optional_parens=False)) 2531 or self._parse_id_var(any_token=False) 2532 or self._parse_string_as_identifier() 2533 or self._parse_placeholder() 2534 ) 2535 2536 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2537 catalog = None 2538 db = None 2539 table = self._parse_table_part(schema=schema) 2540 2541 while self._match(TokenType.DOT): 2542 if catalog: 2543 # This allows nesting the table in arbitrarily many dot expressions if needed 2544 table = self.expression( 2545 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2546 ) 2547 else: 2548 catalog = db 2549 db = table 2550 table = self._parse_table_part(schema=schema) 2551 2552 if not table: 2553 self.raise_error(f"Expected table name but got {self._curr}") 2554 2555 return self.expression( 2556 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2557 ) 2558 2559 def _parse_table( 2560 self, 2561 schema: bool = False, 2562 joins: bool = False, 2563 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2564 parse_bracket: bool = False, 2565 ) -> t.Optional[exp.Expression]: 2566 lateral = self._parse_lateral() 2567 if lateral: 2568 return lateral 2569 2570 unnest = self._parse_unnest() 2571 if unnest: 2572 return unnest 2573 2574 values = self._parse_derived_table_values() 2575 if values: 2576 return values 2577 2578 subquery = self._parse_select(table=True) 2579 if subquery: 2580 if not subquery.args.get("pivots"): 2581 subquery.set("pivots", self._parse_pivots()) 2582 return subquery 2583 2584 bracket = parse_bracket and self._parse_bracket(None) 2585 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2586 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2587 2588 if schema: 2589 return self._parse_schema(this=this) 2590 2591 version = self._parse_version() 2592 2593 if version: 2594 this.set("version", version) 2595 2596 if self.ALIAS_POST_TABLESAMPLE: 2597 table_sample = self._parse_table_sample() 2598 2599 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2600 if alias: 2601 this.set("alias", alias) 2602 2603 this.set("hints", self._parse_table_hints()) 2604 2605 if not this.args.get("pivots"): 2606 this.set("pivots", self._parse_pivots()) 2607 2608 if not self.ALIAS_POST_TABLESAMPLE: 2609 table_sample = self._parse_table_sample() 2610 2611 if table_sample: 2612 table_sample.set("this", this) 2613 this = table_sample 2614 2615 if joins: 2616 for join in iter(self._parse_join, None): 2617 this.append("joins", join) 2618 2619 return this 2620 2621 def _parse_version(self) -> t.Optional[exp.Version]: 2622 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2623 this = "TIMESTAMP" 2624 elif self._match(TokenType.VERSION_SNAPSHOT): 2625 this = "VERSION" 2626 else: 2627 return None 2628 2629 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2630 kind = self._prev.text.upper() 2631 start = self._parse_bitwise() 2632 self._match_texts(("TO", "AND")) 2633 end = self._parse_bitwise() 2634 expression: t.Optional[exp.Expression] = self.expression( 2635 exp.Tuple, expressions=[start, end] 2636 ) 2637 elif self._match_text_seq("CONTAINED", "IN"): 2638 kind = "CONTAINED IN" 2639 expression = self.expression( 2640 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2641 ) 2642 elif self._match(TokenType.ALL): 2643 kind = "ALL" 2644 expression = None 2645 else: 2646 self._match_text_seq("AS", "OF") 2647 kind = "AS OF" 2648 expression = self._parse_type() 2649 2650 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2651 2652 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2653 if not self._match(TokenType.UNNEST): 2654 return None 2655 2656 expressions = self._parse_wrapped_csv(self._parse_type) 2657 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2658 2659 alias = self._parse_table_alias() if with_alias else None 2660 2661 if alias: 2662 if self.UNNEST_COLUMN_ONLY: 2663 if alias.args.get("columns"): 2664 self.raise_error("Unexpected extra column alias in unnest.") 2665 2666 alias.set("columns", [alias.this]) 2667 alias.set("this", None) 2668 2669 columns = alias.args.get("columns") or [] 2670 if offset and len(expressions) < len(columns): 2671 offset = columns.pop() 2672 2673 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2674 self._match(TokenType.ALIAS) 2675 offset = self._parse_id_var() or exp.to_identifier("offset") 2676 2677 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2678 2679 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2680 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2681 if not is_derived and not self._match(TokenType.VALUES): 2682 return None 2683 2684 expressions = self._parse_csv(self._parse_value) 2685 alias = self._parse_table_alias() 2686 2687 if is_derived: 2688 self._match_r_paren() 2689 2690 return self.expression( 2691 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2692 ) 2693 2694 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2695 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2696 as_modifier and self._match_text_seq("USING", "SAMPLE") 2697 ): 2698 return None 2699 2700 bucket_numerator = None 2701 bucket_denominator = None 2702 bucket_field = None 2703 percent = None 2704 rows = None 2705 size = None 2706 seed = None 2707 2708 kind = ( 2709 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2710 ) 2711 method = self._parse_var(tokens=(TokenType.ROW,)) 2712 2713 self._match(TokenType.L_PAREN) 2714 2715 if self.TABLESAMPLE_CSV: 2716 num = None 2717 expressions = self._parse_csv(self._parse_primary) 2718 else: 2719 expressions = None 2720 num = self._parse_primary() 2721 2722 if self._match_text_seq("BUCKET"): 2723 bucket_numerator = self._parse_number() 2724 self._match_text_seq("OUT", "OF") 2725 bucket_denominator = bucket_denominator = self._parse_number() 2726 self._match(TokenType.ON) 2727 bucket_field = self._parse_field() 2728 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2729 percent = num 2730 elif self._match(TokenType.ROWS): 2731 rows = num 2732 elif num: 2733 size = num 2734 2735 self._match(TokenType.R_PAREN) 2736 2737 if self._match(TokenType.L_PAREN): 2738 method = self._parse_var() 2739 seed = self._match(TokenType.COMMA) and self._parse_number() 2740 self._match_r_paren() 2741 elif self._match_texts(("SEED", "REPEATABLE")): 2742 seed = self._parse_wrapped(self._parse_number) 2743 2744 return self.expression( 2745 exp.TableSample, 2746 expressions=expressions, 2747 method=method, 2748 bucket_numerator=bucket_numerator, 2749 bucket_denominator=bucket_denominator, 2750 bucket_field=bucket_field, 2751 percent=percent, 2752 rows=rows, 2753 size=size, 2754 seed=seed, 2755 kind=kind, 2756 ) 2757 2758 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2759 return list(iter(self._parse_pivot, None)) or None 2760 2761 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2762 return list(iter(self._parse_join, None)) or None 2763 2764 # https://duckdb.org/docs/sql/statements/pivot 2765 def _parse_simplified_pivot(self) -> exp.Pivot: 2766 def _parse_on() -> t.Optional[exp.Expression]: 2767 this = self._parse_bitwise() 2768 return self._parse_in(this) if self._match(TokenType.IN) else this 2769 2770 this = self._parse_table() 2771 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2772 using = self._match(TokenType.USING) and self._parse_csv( 2773 lambda: self._parse_alias(self._parse_function()) 2774 ) 2775 group = self._parse_group() 2776 return self.expression( 2777 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2778 ) 2779 2780 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2781 index = self._index 2782 include_nulls = None 2783 2784 if self._match(TokenType.PIVOT): 2785 unpivot = False 2786 elif self._match(TokenType.UNPIVOT): 2787 unpivot = True 2788 2789 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2790 if self._match_text_seq("INCLUDE", "NULLS"): 2791 include_nulls = True 2792 elif self._match_text_seq("EXCLUDE", "NULLS"): 2793 include_nulls = False 2794 else: 2795 return None 2796 2797 expressions = [] 2798 field = None 2799 2800 if not self._match(TokenType.L_PAREN): 2801 self._retreat(index) 2802 return None 2803 2804 if unpivot: 2805 expressions = self._parse_csv(self._parse_column) 2806 else: 2807 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2808 2809 if not expressions: 2810 self.raise_error("Failed to parse PIVOT's aggregation list") 2811 2812 if not self._match(TokenType.FOR): 2813 self.raise_error("Expecting FOR") 2814 2815 value = self._parse_column() 2816 2817 if not self._match(TokenType.IN): 2818 self.raise_error("Expecting IN") 2819 2820 field = self._parse_in(value, alias=True) 2821 2822 self._match_r_paren() 2823 2824 pivot = self.expression( 2825 exp.Pivot, 2826 expressions=expressions, 2827 field=field, 2828 unpivot=unpivot, 2829 include_nulls=include_nulls, 2830 ) 2831 2832 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2833 pivot.set("alias", self._parse_table_alias()) 2834 2835 if not unpivot: 2836 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2837 2838 columns: t.List[exp.Expression] = [] 2839 for fld in pivot.args["field"].expressions: 2840 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2841 for name in names: 2842 if self.PREFIXED_PIVOT_COLUMNS: 2843 name = f"{name}_{field_name}" if name else field_name 2844 else: 2845 name = f"{field_name}_{name}" if name else field_name 2846 2847 columns.append(exp.to_identifier(name)) 2848 2849 pivot.set("columns", columns) 2850 2851 return pivot 2852 2853 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2854 return [agg.alias for agg in aggregations] 2855 2856 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2857 if not skip_where_token and not self._match(TokenType.WHERE): 2858 return None 2859 2860 return self.expression( 2861 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2862 ) 2863 2864 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2865 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2866 return None 2867 2868 elements = defaultdict(list) 2869 2870 if self._match(TokenType.ALL): 2871 return self.expression(exp.Group, all=True) 2872 2873 while True: 2874 expressions = self._parse_csv(self._parse_conjunction) 2875 if expressions: 2876 elements["expressions"].extend(expressions) 2877 2878 grouping_sets = self._parse_grouping_sets() 2879 if grouping_sets: 2880 elements["grouping_sets"].extend(grouping_sets) 2881 2882 rollup = None 2883 cube = None 2884 totals = None 2885 2886 with_ = self._match(TokenType.WITH) 2887 if self._match(TokenType.ROLLUP): 2888 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2889 elements["rollup"].extend(ensure_list(rollup)) 2890 2891 if self._match(TokenType.CUBE): 2892 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2893 elements["cube"].extend(ensure_list(cube)) 2894 2895 if self._match_text_seq("TOTALS"): 2896 totals = True 2897 elements["totals"] = True # type: ignore 2898 2899 if not (grouping_sets or rollup or cube or totals): 2900 break 2901 2902 return self.expression(exp.Group, **elements) # type: ignore 2903 2904 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2905 if not self._match(TokenType.GROUPING_SETS): 2906 return None 2907 2908 return self._parse_wrapped_csv(self._parse_grouping_set) 2909 2910 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2911 if self._match(TokenType.L_PAREN): 2912 grouping_set = self._parse_csv(self._parse_column) 2913 self._match_r_paren() 2914 return self.expression(exp.Tuple, expressions=grouping_set) 2915 2916 return self._parse_column() 2917 2918 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2919 if not skip_having_token and not self._match(TokenType.HAVING): 2920 return None 2921 return self.expression(exp.Having, this=self._parse_conjunction()) 2922 2923 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2924 if not self._match(TokenType.QUALIFY): 2925 return None 2926 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2927 2928 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2929 if skip_start_token: 2930 start = None 2931 elif self._match(TokenType.START_WITH): 2932 start = self._parse_conjunction() 2933 else: 2934 return None 2935 2936 self._match(TokenType.CONNECT_BY) 2937 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2938 exp.Prior, this=self._parse_bitwise() 2939 ) 2940 connect = self._parse_conjunction() 2941 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2942 2943 if not start and self._match(TokenType.START_WITH): 2944 start = self._parse_conjunction() 2945 2946 return self.expression(exp.Connect, start=start, connect=connect) 2947 2948 def _parse_order( 2949 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2950 ) -> t.Optional[exp.Expression]: 2951 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2952 return this 2953 2954 return self.expression( 2955 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2956 ) 2957 2958 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2959 if not self._match(token): 2960 return None 2961 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2962 2963 def _parse_ordered(self) -> exp.Ordered: 2964 this = self._parse_conjunction() 2965 2966 asc = self._match(TokenType.ASC) 2967 desc = self._match(TokenType.DESC) or (asc and False) 2968 2969 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2970 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2971 2972 nulls_first = is_nulls_first or False 2973 explicitly_null_ordered = is_nulls_first or is_nulls_last 2974 2975 if ( 2976 not explicitly_null_ordered 2977 and ( 2978 (not desc and self.NULL_ORDERING == "nulls_are_small") 2979 or (desc and self.NULL_ORDERING != "nulls_are_small") 2980 ) 2981 and self.NULL_ORDERING != "nulls_are_last" 2982 ): 2983 nulls_first = True 2984 2985 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2986 2987 def _parse_limit( 2988 self, this: t.Optional[exp.Expression] = None, top: bool = False 2989 ) -> t.Optional[exp.Expression]: 2990 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2991 comments = self._prev_comments 2992 if top: 2993 limit_paren = self._match(TokenType.L_PAREN) 2994 expression = self._parse_number() 2995 2996 if limit_paren: 2997 self._match_r_paren() 2998 else: 2999 expression = self._parse_term() 3000 3001 if self._match(TokenType.COMMA): 3002 offset = expression 3003 expression = self._parse_term() 3004 else: 3005 offset = None 3006 3007 limit_exp = self.expression( 3008 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3009 ) 3010 3011 return limit_exp 3012 3013 if self._match(TokenType.FETCH): 3014 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3015 direction = self._prev.text if direction else "FIRST" 3016 3017 count = self._parse_field(tokens=self.FETCH_TOKENS) 3018 percent = self._match(TokenType.PERCENT) 3019 3020 self._match_set((TokenType.ROW, TokenType.ROWS)) 3021 3022 only = self._match_text_seq("ONLY") 3023 with_ties = self._match_text_seq("WITH", "TIES") 3024 3025 if only and with_ties: 3026 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3027 3028 return self.expression( 3029 exp.Fetch, 3030 direction=direction, 3031 count=count, 3032 percent=percent, 3033 with_ties=with_ties, 3034 ) 3035 3036 return this 3037 3038 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3039 if not self._match(TokenType.OFFSET): 3040 return this 3041 3042 count = self._parse_term() 3043 self._match_set((TokenType.ROW, TokenType.ROWS)) 3044 return self.expression(exp.Offset, this=this, expression=count) 3045 3046 def _parse_locks(self) -> t.List[exp.Lock]: 3047 locks = [] 3048 while True: 3049 if self._match_text_seq("FOR", "UPDATE"): 3050 update = True 3051 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3052 "LOCK", "IN", "SHARE", "MODE" 3053 ): 3054 update = False 3055 else: 3056 break 3057 3058 expressions = None 3059 if self._match_text_seq("OF"): 3060 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3061 3062 wait: t.Optional[bool | exp.Expression] = None 3063 if self._match_text_seq("NOWAIT"): 3064 wait = True 3065 elif self._match_text_seq("WAIT"): 3066 wait = self._parse_primary() 3067 elif self._match_text_seq("SKIP", "LOCKED"): 3068 wait = False 3069 3070 locks.append( 3071 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3072 ) 3073 3074 return locks 3075 3076 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3077 if not self._match_set(self.SET_OPERATIONS): 3078 return this 3079 3080 token_type = self._prev.token_type 3081 3082 if token_type == TokenType.UNION: 3083 expression = exp.Union 3084 elif token_type == TokenType.EXCEPT: 3085 expression = exp.Except 3086 else: 3087 expression = exp.Intersect 3088 3089 return self.expression( 3090 expression, 3091 this=this, 3092 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3093 by_name=self._match_text_seq("BY", "NAME"), 3094 expression=self._parse_set_operations(self._parse_select(nested=True)), 3095 ) 3096 3097 def _parse_expression(self) -> t.Optional[exp.Expression]: 3098 return self._parse_alias(self._parse_conjunction()) 3099 3100 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3101 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3102 3103 def _parse_equality(self) -> t.Optional[exp.Expression]: 3104 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3105 3106 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3107 return self._parse_tokens(self._parse_range, self.COMPARISON) 3108 3109 def _parse_range(self) -> t.Optional[exp.Expression]: 3110 this = self._parse_bitwise() 3111 negate = self._match(TokenType.NOT) 3112 3113 if self._match_set(self.RANGE_PARSERS): 3114 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3115 if not expression: 3116 return this 3117 3118 this = expression 3119 elif self._match(TokenType.ISNULL): 3120 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3121 3122 # Postgres supports ISNULL and NOTNULL for conditions. 3123 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3124 if self._match(TokenType.NOTNULL): 3125 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3126 this = self.expression(exp.Not, this=this) 3127 3128 if negate: 3129 this = self.expression(exp.Not, this=this) 3130 3131 if self._match(TokenType.IS): 3132 this = self._parse_is(this) 3133 3134 return this 3135 3136 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3137 index = self._index - 1 3138 negate = self._match(TokenType.NOT) 3139 3140 if self._match_text_seq("DISTINCT", "FROM"): 3141 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3142 return self.expression(klass, this=this, expression=self._parse_expression()) 3143 3144 expression = self._parse_null() or self._parse_boolean() 3145 if not expression: 3146 self._retreat(index) 3147 return None 3148 3149 this = self.expression(exp.Is, this=this, expression=expression) 3150 return self.expression(exp.Not, this=this) if negate else this 3151 3152 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3153 unnest = self._parse_unnest(with_alias=False) 3154 if unnest: 3155 this = self.expression(exp.In, this=this, unnest=unnest) 3156 elif self._match(TokenType.L_PAREN): 3157 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3158 3159 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3160 this = self.expression(exp.In, this=this, query=expressions[0]) 3161 else: 3162 this = self.expression(exp.In, this=this, expressions=expressions) 3163 3164 self._match_r_paren(this) 3165 else: 3166 this = self.expression(exp.In, this=this, field=self._parse_field()) 3167 3168 return this 3169 3170 def _parse_between(self, this: exp.Expression) -> exp.Between: 3171 low = self._parse_bitwise() 3172 self._match(TokenType.AND) 3173 high = self._parse_bitwise() 3174 return self.expression(exp.Between, this=this, low=low, high=high) 3175 3176 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3177 if not self._match(TokenType.ESCAPE): 3178 return this 3179 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3180 3181 def _parse_interval(self) -> t.Optional[exp.Interval]: 3182 index = self._index 3183 3184 if not self._match(TokenType.INTERVAL): 3185 return None 3186 3187 if self._match(TokenType.STRING, advance=False): 3188 this = self._parse_primary() 3189 else: 3190 this = self._parse_term() 3191 3192 if not this: 3193 self._retreat(index) 3194 return None 3195 3196 unit = self._parse_function() or self._parse_var(any_token=True) 3197 3198 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3199 # each INTERVAL expression into this canonical form so it's easy to transpile 3200 if this and this.is_number: 3201 this = exp.Literal.string(this.name) 3202 elif this and this.is_string: 3203 parts = this.name.split() 3204 3205 if len(parts) == 2: 3206 if unit: 3207 # This is not actually a unit, it's something else (e.g. a "window side") 3208 unit = None 3209 self._retreat(self._index - 1) 3210 3211 this = exp.Literal.string(parts[0]) 3212 unit = self.expression(exp.Var, this=parts[1]) 3213 3214 return self.expression(exp.Interval, this=this, unit=unit) 3215 3216 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3217 this = self._parse_term() 3218 3219 while True: 3220 if self._match_set(self.BITWISE): 3221 this = self.expression( 3222 self.BITWISE[self._prev.token_type], 3223 this=this, 3224 expression=self._parse_term(), 3225 ) 3226 elif self._match(TokenType.DQMARK): 3227 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3228 elif self._match_pair(TokenType.LT, TokenType.LT): 3229 this = self.expression( 3230 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3231 ) 3232 elif self._match_pair(TokenType.GT, TokenType.GT): 3233 this = self.expression( 3234 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3235 ) 3236 else: 3237 break 3238 3239 return this 3240 3241 def _parse_term(self) -> t.Optional[exp.Expression]: 3242 return self._parse_tokens(self._parse_factor, self.TERM) 3243 3244 def _parse_factor(self) -> t.Optional[exp.Expression]: 3245 return self._parse_tokens(self._parse_unary, self.FACTOR) 3246 3247 def _parse_unary(self) -> t.Optional[exp.Expression]: 3248 if self._match_set(self.UNARY_PARSERS): 3249 return self.UNARY_PARSERS[self._prev.token_type](self) 3250 return self._parse_at_time_zone(self._parse_type()) 3251 3252 def _parse_type(self) -> t.Optional[exp.Expression]: 3253 interval = self._parse_interval() 3254 if interval: 3255 return interval 3256 3257 index = self._index 3258 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3259 this = self._parse_column() 3260 3261 if data_type: 3262 if isinstance(this, exp.Literal): 3263 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3264 if parser: 3265 return parser(self, this, data_type) 3266 return self.expression(exp.Cast, this=this, to=data_type) 3267 if not data_type.expressions: 3268 self._retreat(index) 3269 return self._parse_column() 3270 return self._parse_column_ops(data_type) 3271 3272 return this and self._parse_column_ops(this) 3273 3274 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3275 this = self._parse_type() 3276 if not this: 3277 return None 3278 3279 return self.expression( 3280 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3281 ) 3282 3283 def _parse_types( 3284 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3285 ) -> t.Optional[exp.Expression]: 3286 index = self._index 3287 3288 prefix = self._match_text_seq("SYSUDTLIB", ".") 3289 3290 if not self._match_set(self.TYPE_TOKENS): 3291 identifier = allow_identifiers and self._parse_id_var( 3292 any_token=False, tokens=(TokenType.VAR,) 3293 ) 3294 3295 if identifier: 3296 tokens = self._tokenizer.tokenize(identifier.name) 3297 3298 if len(tokens) != 1: 3299 self.raise_error("Unexpected identifier", self._prev) 3300 3301 if tokens[0].token_type in self.TYPE_TOKENS: 3302 self._prev = tokens[0] 3303 elif self.SUPPORTS_USER_DEFINED_TYPES: 3304 type_name = identifier.name 3305 3306 while self._match(TokenType.DOT): 3307 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3308 3309 return exp.DataType.build(type_name, udt=True) 3310 else: 3311 return None 3312 else: 3313 return None 3314 3315 type_token = self._prev.token_type 3316 3317 if type_token == TokenType.PSEUDO_TYPE: 3318 return self.expression(exp.PseudoType, this=self._prev.text) 3319 3320 if type_token == TokenType.OBJECT_IDENTIFIER: 3321 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3322 3323 nested = type_token in self.NESTED_TYPE_TOKENS 3324 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3325 expressions = None 3326 maybe_func = False 3327 3328 if self._match(TokenType.L_PAREN): 3329 if is_struct: 3330 expressions = self._parse_csv(self._parse_struct_types) 3331 elif nested: 3332 expressions = self._parse_csv( 3333 lambda: self._parse_types( 3334 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3335 ) 3336 ) 3337 elif type_token in self.ENUM_TYPE_TOKENS: 3338 expressions = self._parse_csv(self._parse_equality) 3339 else: 3340 expressions = self._parse_csv(self._parse_type_size) 3341 3342 if not expressions or not self._match(TokenType.R_PAREN): 3343 self._retreat(index) 3344 return None 3345 3346 maybe_func = True 3347 3348 this: t.Optional[exp.Expression] = None 3349 values: t.Optional[t.List[exp.Expression]] = None 3350 3351 if nested and self._match(TokenType.LT): 3352 if is_struct: 3353 expressions = self._parse_csv(self._parse_struct_types) 3354 else: 3355 expressions = self._parse_csv( 3356 lambda: self._parse_types( 3357 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3358 ) 3359 ) 3360 3361 if not self._match(TokenType.GT): 3362 self.raise_error("Expecting >") 3363 3364 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3365 values = self._parse_csv(self._parse_conjunction) 3366 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3367 3368 if type_token in self.TIMESTAMPS: 3369 if self._match_text_seq("WITH", "TIME", "ZONE"): 3370 maybe_func = False 3371 tz_type = ( 3372 exp.DataType.Type.TIMETZ 3373 if type_token in self.TIMES 3374 else exp.DataType.Type.TIMESTAMPTZ 3375 ) 3376 this = exp.DataType(this=tz_type, expressions=expressions) 3377 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3378 maybe_func = False 3379 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3380 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3381 maybe_func = False 3382 elif type_token == TokenType.INTERVAL: 3383 unit = self._parse_var() 3384 3385 if self._match_text_seq("TO"): 3386 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3387 else: 3388 span = None 3389 3390 if span or not unit: 3391 this = self.expression( 3392 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3393 ) 3394 else: 3395 this = self.expression(exp.Interval, unit=unit) 3396 3397 if maybe_func and check_func: 3398 index2 = self._index 3399 peek = self._parse_string() 3400 3401 if not peek: 3402 self._retreat(index) 3403 return None 3404 3405 self._retreat(index2) 3406 3407 if not this: 3408 if self._match_text_seq("UNSIGNED"): 3409 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3410 if not unsigned_type_token: 3411 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3412 3413 type_token = unsigned_type_token or type_token 3414 3415 this = exp.DataType( 3416 this=exp.DataType.Type[type_token.value], 3417 expressions=expressions, 3418 nested=nested, 3419 values=values, 3420 prefix=prefix, 3421 ) 3422 3423 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3424 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3425 3426 return this 3427 3428 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3429 this = self._parse_type() or self._parse_id_var() 3430 self._match(TokenType.COLON) 3431 return self._parse_column_def(this) 3432 3433 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3434 if not self._match_text_seq("AT", "TIME", "ZONE"): 3435 return this 3436 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3437 3438 def _parse_column(self) -> t.Optional[exp.Expression]: 3439 this = self._parse_field() 3440 if isinstance(this, exp.Identifier): 3441 this = self.expression(exp.Column, this=this) 3442 elif not this: 3443 return self._parse_bracket(this) 3444 return self._parse_column_ops(this) 3445 3446 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3447 this = self._parse_bracket(this) 3448 3449 while self._match_set(self.COLUMN_OPERATORS): 3450 op_token = self._prev.token_type 3451 op = self.COLUMN_OPERATORS.get(op_token) 3452 3453 if op_token == TokenType.DCOLON: 3454 field = self._parse_types() 3455 if not field: 3456 self.raise_error("Expected type") 3457 elif op and self._curr: 3458 self._advance() 3459 value = self._prev.text 3460 field = ( 3461 exp.Literal.number(value) 3462 if self._prev.token_type == TokenType.NUMBER 3463 else exp.Literal.string(value) 3464 ) 3465 else: 3466 field = self._parse_field(anonymous_func=True, any_token=True) 3467 3468 if isinstance(field, exp.Func): 3469 # bigquery allows function calls like x.y.count(...) 3470 # SAFE.SUBSTR(...) 3471 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3472 this = self._replace_columns_with_dots(this) 3473 3474 if op: 3475 this = op(self, this, field) 3476 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3477 this = self.expression( 3478 exp.Column, 3479 this=field, 3480 table=this.this, 3481 db=this.args.get("table"), 3482 catalog=this.args.get("db"), 3483 ) 3484 else: 3485 this = self.expression(exp.Dot, this=this, expression=field) 3486 this = self._parse_bracket(this) 3487 return this 3488 3489 def _parse_primary(self) -> t.Optional[exp.Expression]: 3490 if self._match_set(self.PRIMARY_PARSERS): 3491 token_type = self._prev.token_type 3492 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3493 3494 if token_type == TokenType.STRING: 3495 expressions = [primary] 3496 while self._match(TokenType.STRING): 3497 expressions.append(exp.Literal.string(self._prev.text)) 3498 3499 if len(expressions) > 1: 3500 return self.expression(exp.Concat, expressions=expressions) 3501 3502 return primary 3503 3504 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3505 return exp.Literal.number(f"0.{self._prev.text}") 3506 3507 if self._match(TokenType.L_PAREN): 3508 comments = self._prev_comments 3509 query = self._parse_select() 3510 3511 if query: 3512 expressions = [query] 3513 else: 3514 expressions = self._parse_expressions() 3515 3516 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3517 3518 if isinstance(this, exp.Subqueryable): 3519 this = self._parse_set_operations( 3520 self._parse_subquery(this=this, parse_alias=False) 3521 ) 3522 elif len(expressions) > 1: 3523 this = self.expression(exp.Tuple, expressions=expressions) 3524 else: 3525 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3526 3527 if this: 3528 this.add_comments(comments) 3529 3530 self._match_r_paren(expression=this) 3531 return this 3532 3533 return None 3534 3535 def _parse_field( 3536 self, 3537 any_token: bool = False, 3538 tokens: t.Optional[t.Collection[TokenType]] = None, 3539 anonymous_func: bool = False, 3540 ) -> t.Optional[exp.Expression]: 3541 return ( 3542 self._parse_primary() 3543 or self._parse_function(anonymous=anonymous_func) 3544 or self._parse_id_var(any_token=any_token, tokens=tokens) 3545 ) 3546 3547 def _parse_function( 3548 self, 3549 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3550 anonymous: bool = False, 3551 optional_parens: bool = True, 3552 ) -> t.Optional[exp.Expression]: 3553 if not self._curr: 3554 return None 3555 3556 token_type = self._curr.token_type 3557 this = self._curr.text 3558 upper = this.upper() 3559 3560 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3561 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3562 self._advance() 3563 return parser(self) 3564 3565 if not self._next or self._next.token_type != TokenType.L_PAREN: 3566 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3567 self._advance() 3568 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3569 3570 return None 3571 3572 if token_type not in self.FUNC_TOKENS: 3573 return None 3574 3575 self._advance(2) 3576 3577 parser = self.FUNCTION_PARSERS.get(upper) 3578 if parser and not anonymous: 3579 this = parser(self) 3580 else: 3581 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3582 3583 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3584 this = self.expression(subquery_predicate, this=self._parse_select()) 3585 self._match_r_paren() 3586 return this 3587 3588 if functions is None: 3589 functions = self.FUNCTIONS 3590 3591 function = functions.get(upper) 3592 3593 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3594 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3595 3596 if function and not anonymous: 3597 func = self.validate_expression(function(args), args) 3598 if not self.NORMALIZE_FUNCTIONS: 3599 func.meta["name"] = this 3600 this = func 3601 else: 3602 this = self.expression(exp.Anonymous, this=this, expressions=args) 3603 3604 self._match_r_paren(this) 3605 return self._parse_window(this) 3606 3607 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3608 return self._parse_column_def(self._parse_id_var()) 3609 3610 def _parse_user_defined_function( 3611 self, kind: t.Optional[TokenType] = None 3612 ) -> t.Optional[exp.Expression]: 3613 this = self._parse_id_var() 3614 3615 while self._match(TokenType.DOT): 3616 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3617 3618 if not self._match(TokenType.L_PAREN): 3619 return this 3620 3621 expressions = self._parse_csv(self._parse_function_parameter) 3622 self._match_r_paren() 3623 return self.expression( 3624 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3625 ) 3626 3627 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3628 literal = self._parse_primary() 3629 if literal: 3630 return self.expression(exp.Introducer, this=token.text, expression=literal) 3631 3632 return self.expression(exp.Identifier, this=token.text) 3633 3634 def _parse_session_parameter(self) -> exp.SessionParameter: 3635 kind = None 3636 this = self._parse_id_var() or self._parse_primary() 3637 3638 if this and self._match(TokenType.DOT): 3639 kind = this.name 3640 this = self._parse_var() or self._parse_primary() 3641 3642 return self.expression(exp.SessionParameter, this=this, kind=kind) 3643 3644 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3645 index = self._index 3646 3647 if self._match(TokenType.L_PAREN): 3648 expressions = t.cast( 3649 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3650 ) 3651 3652 if not self._match(TokenType.R_PAREN): 3653 self._retreat(index) 3654 else: 3655 expressions = [self._parse_id_var()] 3656 3657 if self._match_set(self.LAMBDAS): 3658 return self.LAMBDAS[self._prev.token_type](self, expressions) 3659 3660 self._retreat(index) 3661 3662 this: t.Optional[exp.Expression] 3663 3664 if self._match(TokenType.DISTINCT): 3665 this = self.expression( 3666 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3667 ) 3668 else: 3669 this = self._parse_select_or_expression(alias=alias) 3670 3671 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3672 3673 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3674 index = self._index 3675 3676 if not self.errors: 3677 try: 3678 if self._parse_select(nested=True): 3679 return this 3680 except ParseError: 3681 pass 3682 finally: 3683 self.errors.clear() 3684 self._retreat(index) 3685 3686 if not self._match(TokenType.L_PAREN): 3687 return this 3688 3689 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3690 3691 self._match_r_paren() 3692 return self.expression(exp.Schema, this=this, expressions=args) 3693 3694 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3695 return self._parse_column_def(self._parse_field(any_token=True)) 3696 3697 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3698 # column defs are not really columns, they're identifiers 3699 if isinstance(this, exp.Column): 3700 this = this.this 3701 3702 kind = self._parse_types(schema=True) 3703 3704 if self._match_text_seq("FOR", "ORDINALITY"): 3705 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3706 3707 constraints: t.List[exp.Expression] = [] 3708 3709 if not kind and self._match(TokenType.ALIAS): 3710 constraints.append( 3711 self.expression( 3712 exp.ComputedColumnConstraint, 3713 this=self._parse_conjunction(), 3714 persisted=self._match_text_seq("PERSISTED"), 3715 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3716 ) 3717 ) 3718 3719 while True: 3720 constraint = self._parse_column_constraint() 3721 if not constraint: 3722 break 3723 constraints.append(constraint) 3724 3725 if not kind and not constraints: 3726 return this 3727 3728 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3729 3730 def _parse_auto_increment( 3731 self, 3732 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3733 start = None 3734 increment = None 3735 3736 if self._match(TokenType.L_PAREN, advance=False): 3737 args = self._parse_wrapped_csv(self._parse_bitwise) 3738 start = seq_get(args, 0) 3739 increment = seq_get(args, 1) 3740 elif self._match_text_seq("START"): 3741 start = self._parse_bitwise() 3742 self._match_text_seq("INCREMENT") 3743 increment = self._parse_bitwise() 3744 3745 if start and increment: 3746 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3747 3748 return exp.AutoIncrementColumnConstraint() 3749 3750 def _parse_compress(self) -> exp.CompressColumnConstraint: 3751 if self._match(TokenType.L_PAREN, advance=False): 3752 return self.expression( 3753 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3754 ) 3755 3756 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3757 3758 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3759 if self._match_text_seq("BY", "DEFAULT"): 3760 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3761 this = self.expression( 3762 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3763 ) 3764 else: 3765 self._match_text_seq("ALWAYS") 3766 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3767 3768 self._match(TokenType.ALIAS) 3769 identity = self._match_text_seq("IDENTITY") 3770 3771 if self._match(TokenType.L_PAREN): 3772 if self._match(TokenType.START_WITH): 3773 this.set("start", self._parse_bitwise()) 3774 if self._match_text_seq("INCREMENT", "BY"): 3775 this.set("increment", self._parse_bitwise()) 3776 if self._match_text_seq("MINVALUE"): 3777 this.set("minvalue", self._parse_bitwise()) 3778 if self._match_text_seq("MAXVALUE"): 3779 this.set("maxvalue", self._parse_bitwise()) 3780 3781 if self._match_text_seq("CYCLE"): 3782 this.set("cycle", True) 3783 elif self._match_text_seq("NO", "CYCLE"): 3784 this.set("cycle", False) 3785 3786 if not identity: 3787 this.set("expression", self._parse_bitwise()) 3788 3789 self._match_r_paren() 3790 3791 return this 3792 3793 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3794 self._match_text_seq("LENGTH") 3795 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3796 3797 def _parse_not_constraint( 3798 self, 3799 ) -> t.Optional[exp.Expression]: 3800 if self._match_text_seq("NULL"): 3801 return self.expression(exp.NotNullColumnConstraint) 3802 if self._match_text_seq("CASESPECIFIC"): 3803 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3804 if self._match_text_seq("FOR", "REPLICATION"): 3805 return self.expression(exp.NotForReplicationColumnConstraint) 3806 return None 3807 3808 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3809 if self._match(TokenType.CONSTRAINT): 3810 this = self._parse_id_var() 3811 else: 3812 this = None 3813 3814 if self._match_texts(self.CONSTRAINT_PARSERS): 3815 return self.expression( 3816 exp.ColumnConstraint, 3817 this=this, 3818 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3819 ) 3820 3821 return this 3822 3823 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3824 if not self._match(TokenType.CONSTRAINT): 3825 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3826 3827 this = self._parse_id_var() 3828 expressions = [] 3829 3830 while True: 3831 constraint = self._parse_unnamed_constraint() or self._parse_function() 3832 if not constraint: 3833 break 3834 expressions.append(constraint) 3835 3836 return self.expression(exp.Constraint, this=this, expressions=expressions) 3837 3838 def _parse_unnamed_constraint( 3839 self, constraints: t.Optional[t.Collection[str]] = None 3840 ) -> t.Optional[exp.Expression]: 3841 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3842 return None 3843 3844 constraint = self._prev.text.upper() 3845 if constraint not in self.CONSTRAINT_PARSERS: 3846 self.raise_error(f"No parser found for schema constraint {constraint}.") 3847 3848 return self.CONSTRAINT_PARSERS[constraint](self) 3849 3850 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3851 self._match_text_seq("KEY") 3852 return self.expression( 3853 exp.UniqueColumnConstraint, 3854 this=self._parse_schema(self._parse_id_var(any_token=False)), 3855 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3856 ) 3857 3858 def _parse_key_constraint_options(self) -> t.List[str]: 3859 options = [] 3860 while True: 3861 if not self._curr: 3862 break 3863 3864 if self._match(TokenType.ON): 3865 action = None 3866 on = self._advance_any() and self._prev.text 3867 3868 if self._match_text_seq("NO", "ACTION"): 3869 action = "NO ACTION" 3870 elif self._match_text_seq("CASCADE"): 3871 action = "CASCADE" 3872 elif self._match_text_seq("RESTRICT"): 3873 action = "RESTRICT" 3874 elif self._match_pair(TokenType.SET, TokenType.NULL): 3875 action = "SET NULL" 3876 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3877 action = "SET DEFAULT" 3878 else: 3879 self.raise_error("Invalid key constraint") 3880 3881 options.append(f"ON {on} {action}") 3882 elif self._match_text_seq("NOT", "ENFORCED"): 3883 options.append("NOT ENFORCED") 3884 elif self._match_text_seq("DEFERRABLE"): 3885 options.append("DEFERRABLE") 3886 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3887 options.append("INITIALLY DEFERRED") 3888 elif self._match_text_seq("NORELY"): 3889 options.append("NORELY") 3890 elif self._match_text_seq("MATCH", "FULL"): 3891 options.append("MATCH FULL") 3892 else: 3893 break 3894 3895 return options 3896 3897 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3898 if match and not self._match(TokenType.REFERENCES): 3899 return None 3900 3901 expressions = None 3902 this = self._parse_table(schema=True) 3903 options = self._parse_key_constraint_options() 3904 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3905 3906 def _parse_foreign_key(self) -> exp.ForeignKey: 3907 expressions = self._parse_wrapped_id_vars() 3908 reference = self._parse_references() 3909 options = {} 3910 3911 while self._match(TokenType.ON): 3912 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3913 self.raise_error("Expected DELETE or UPDATE") 3914 3915 kind = self._prev.text.lower() 3916 3917 if self._match_text_seq("NO", "ACTION"): 3918 action = "NO ACTION" 3919 elif self._match(TokenType.SET): 3920 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3921 action = "SET " + self._prev.text.upper() 3922 else: 3923 self._advance() 3924 action = self._prev.text.upper() 3925 3926 options[kind] = action 3927 3928 return self.expression( 3929 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3930 ) 3931 3932 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3933 return self._parse_field() 3934 3935 def _parse_primary_key( 3936 self, wrapped_optional: bool = False, in_props: bool = False 3937 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3938 desc = ( 3939 self._match_set((TokenType.ASC, TokenType.DESC)) 3940 and self._prev.token_type == TokenType.DESC 3941 ) 3942 3943 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3944 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3945 3946 expressions = self._parse_wrapped_csv( 3947 self._parse_primary_key_part, optional=wrapped_optional 3948 ) 3949 options = self._parse_key_constraint_options() 3950 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3951 3952 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3953 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3954 return this 3955 3956 bracket_kind = self._prev.token_type 3957 3958 if self._match(TokenType.COLON): 3959 expressions: t.List[exp.Expression] = [ 3960 self.expression(exp.Slice, expression=self._parse_conjunction()) 3961 ] 3962 else: 3963 expressions = self._parse_csv( 3964 lambda: self._parse_slice( 3965 self._parse_alias(self._parse_conjunction(), explicit=True) 3966 ) 3967 ) 3968 3969 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3970 if bracket_kind == TokenType.L_BRACE: 3971 this = self.expression(exp.Struct, expressions=expressions) 3972 elif not this or this.name.upper() == "ARRAY": 3973 this = self.expression(exp.Array, expressions=expressions) 3974 else: 3975 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3976 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3977 3978 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3979 self.raise_error("Expected ]") 3980 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3981 self.raise_error("Expected }") 3982 3983 self._add_comments(this) 3984 return self._parse_bracket(this) 3985 3986 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3987 if self._match(TokenType.COLON): 3988 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3989 return this 3990 3991 def _parse_case(self) -> t.Optional[exp.Expression]: 3992 ifs = [] 3993 default = None 3994 3995 comments = self._prev_comments 3996 expression = self._parse_conjunction() 3997 3998 while self._match(TokenType.WHEN): 3999 this = self._parse_conjunction() 4000 self._match(TokenType.THEN) 4001 then = self._parse_conjunction() 4002 ifs.append(self.expression(exp.If, this=this, true=then)) 4003 4004 if self._match(TokenType.ELSE): 4005 default = self._parse_conjunction() 4006 4007 if not self._match(TokenType.END): 4008 self.raise_error("Expected END after CASE", self._prev) 4009 4010 return self._parse_window( 4011 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4012 ) 4013 4014 def _parse_if(self) -> t.Optional[exp.Expression]: 4015 if self._match(TokenType.L_PAREN): 4016 args = self._parse_csv(self._parse_conjunction) 4017 this = self.validate_expression(exp.If.from_arg_list(args), args) 4018 self._match_r_paren() 4019 else: 4020 index = self._index - 1 4021 condition = self._parse_conjunction() 4022 4023 if not condition: 4024 self._retreat(index) 4025 return None 4026 4027 self._match(TokenType.THEN) 4028 true = self._parse_conjunction() 4029 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4030 self._match(TokenType.END) 4031 this = self.expression(exp.If, this=condition, true=true, false=false) 4032 4033 return self._parse_window(this) 4034 4035 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4036 if not self._match_text_seq("VALUE", "FOR"): 4037 self._retreat(self._index - 1) 4038 return None 4039 4040 return self.expression( 4041 exp.NextValueFor, 4042 this=self._parse_column(), 4043 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4044 ) 4045 4046 def _parse_extract(self) -> exp.Extract: 4047 this = self._parse_function() or self._parse_var() or self._parse_type() 4048 4049 if self._match(TokenType.FROM): 4050 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4051 4052 if not self._match(TokenType.COMMA): 4053 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4054 4055 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4056 4057 def _parse_any_value(self) -> exp.AnyValue: 4058 this = self._parse_lambda() 4059 is_max = None 4060 having = None 4061 4062 if self._match(TokenType.HAVING): 4063 self._match_texts(("MAX", "MIN")) 4064 is_max = self._prev.text == "MAX" 4065 having = self._parse_column() 4066 4067 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4068 4069 def _parse_cast(self, strict: bool) -> exp.Expression: 4070 this = self._parse_conjunction() 4071 4072 if not self._match(TokenType.ALIAS): 4073 if self._match(TokenType.COMMA): 4074 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4075 4076 self.raise_error("Expected AS after CAST") 4077 4078 fmt = None 4079 to = self._parse_types() 4080 4081 if not to: 4082 self.raise_error("Expected TYPE after CAST") 4083 elif isinstance(to, exp.Identifier): 4084 to = exp.DataType.build(to.name, udt=True) 4085 elif to.this == exp.DataType.Type.CHAR: 4086 if self._match(TokenType.CHARACTER_SET): 4087 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4088 elif self._match(TokenType.FORMAT): 4089 fmt_string = self._parse_string() 4090 fmt = self._parse_at_time_zone(fmt_string) 4091 4092 if to.this in exp.DataType.TEMPORAL_TYPES: 4093 this = self.expression( 4094 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4095 this=this, 4096 format=exp.Literal.string( 4097 format_time( 4098 fmt_string.this if fmt_string else "", 4099 self.FORMAT_MAPPING or self.TIME_MAPPING, 4100 self.FORMAT_TRIE or self.TIME_TRIE, 4101 ) 4102 ), 4103 ) 4104 4105 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4106 this.set("zone", fmt.args["zone"]) 4107 4108 return this 4109 4110 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4111 4112 def _parse_concat(self) -> t.Optional[exp.Expression]: 4113 args = self._parse_csv(self._parse_conjunction) 4114 if self.CONCAT_NULL_OUTPUTS_STRING: 4115 args = self._ensure_string_if_null(args) 4116 4117 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4118 # we find such a call we replace it with its argument. 4119 if len(args) == 1: 4120 return args[0] 4121 4122 return self.expression( 4123 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4124 ) 4125 4126 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4127 args = self._parse_csv(self._parse_conjunction) 4128 if len(args) < 2: 4129 return self.expression(exp.ConcatWs, expressions=args) 4130 delim, *values = args 4131 if self.CONCAT_NULL_OUTPUTS_STRING: 4132 values = self._ensure_string_if_null(values) 4133 4134 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4135 4136 def _parse_string_agg(self) -> exp.Expression: 4137 if self._match(TokenType.DISTINCT): 4138 args: t.List[t.Optional[exp.Expression]] = [ 4139 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4140 ] 4141 if self._match(TokenType.COMMA): 4142 args.extend(self._parse_csv(self._parse_conjunction)) 4143 else: 4144 args = self._parse_csv(self._parse_conjunction) # type: ignore 4145 4146 index = self._index 4147 if not self._match(TokenType.R_PAREN) and args: 4148 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4149 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4150 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4151 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4152 4153 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4154 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4155 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4156 if not self._match_text_seq("WITHIN", "GROUP"): 4157 self._retreat(index) 4158 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4159 4160 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4161 order = self._parse_order(this=seq_get(args, 0)) 4162 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4163 4164 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4165 this = self._parse_bitwise() 4166 4167 if self._match(TokenType.USING): 4168 to: t.Optional[exp.Expression] = self.expression( 4169 exp.CharacterSet, this=self._parse_var() 4170 ) 4171 elif self._match(TokenType.COMMA): 4172 to = self._parse_types() 4173 else: 4174 to = None 4175 4176 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4177 4178 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4179 """ 4180 There are generally two variants of the DECODE function: 4181 4182 - DECODE(bin, charset) 4183 - DECODE(expression, search, result [, search, result] ... [, default]) 4184 4185 The second variant will always be parsed into a CASE expression. Note that NULL 4186 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4187 instead of relying on pattern matching. 4188 """ 4189 args = self._parse_csv(self._parse_conjunction) 4190 4191 if len(args) < 3: 4192 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4193 4194 expression, *expressions = args 4195 if not expression: 4196 return None 4197 4198 ifs = [] 4199 for search, result in zip(expressions[::2], expressions[1::2]): 4200 if not search or not result: 4201 return None 4202 4203 if isinstance(search, exp.Literal): 4204 ifs.append( 4205 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4206 ) 4207 elif isinstance(search, exp.Null): 4208 ifs.append( 4209 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4210 ) 4211 else: 4212 cond = exp.or_( 4213 exp.EQ(this=expression.copy(), expression=search), 4214 exp.and_( 4215 exp.Is(this=expression.copy(), expression=exp.Null()), 4216 exp.Is(this=search.copy(), expression=exp.Null()), 4217 copy=False, 4218 ), 4219 copy=False, 4220 ) 4221 ifs.append(exp.If(this=cond, true=result)) 4222 4223 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4224 4225 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4226 self._match_text_seq("KEY") 4227 key = self._parse_column() 4228 self._match_set((TokenType.COLON, TokenType.COMMA)) 4229 self._match_text_seq("VALUE") 4230 value = self._parse_bitwise() 4231 4232 if not key and not value: 4233 return None 4234 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4235 4236 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4237 if not this or not self._match_text_seq("FORMAT", "JSON"): 4238 return this 4239 4240 return self.expression(exp.FormatJson, this=this) 4241 4242 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4243 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4244 for value in values: 4245 if self._match_text_seq(value, "ON", on): 4246 return f"{value} ON {on}" 4247 4248 return None 4249 4250 def _parse_json_object(self) -> exp.JSONObject: 4251 star = self._parse_star() 4252 expressions = ( 4253 [star] 4254 if star 4255 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4256 ) 4257 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4258 4259 unique_keys = None 4260 if self._match_text_seq("WITH", "UNIQUE"): 4261 unique_keys = True 4262 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4263 unique_keys = False 4264 4265 self._match_text_seq("KEYS") 4266 4267 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4268 self._parse_type() 4269 ) 4270 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4271 4272 return self.expression( 4273 exp.JSONObject, 4274 expressions=expressions, 4275 null_handling=null_handling, 4276 unique_keys=unique_keys, 4277 return_type=return_type, 4278 encoding=encoding, 4279 ) 4280 4281 def _parse_logarithm(self) -> exp.Func: 4282 # Default argument order is base, expression 4283 args = self._parse_csv(self._parse_range) 4284 4285 if len(args) > 1: 4286 if not self.LOG_BASE_FIRST: 4287 args.reverse() 4288 return exp.Log.from_arg_list(args) 4289 4290 return self.expression( 4291 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4292 ) 4293 4294 def _parse_match_against(self) -> exp.MatchAgainst: 4295 expressions = self._parse_csv(self._parse_column) 4296 4297 self._match_text_seq(")", "AGAINST", "(") 4298 4299 this = self._parse_string() 4300 4301 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4302 modifier = "IN NATURAL LANGUAGE MODE" 4303 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4304 modifier = f"{modifier} WITH QUERY EXPANSION" 4305 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4306 modifier = "IN BOOLEAN MODE" 4307 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4308 modifier = "WITH QUERY EXPANSION" 4309 else: 4310 modifier = None 4311 4312 return self.expression( 4313 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4314 ) 4315 4316 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4317 def _parse_open_json(self) -> exp.OpenJSON: 4318 this = self._parse_bitwise() 4319 path = self._match(TokenType.COMMA) and self._parse_string() 4320 4321 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4322 this = self._parse_field(any_token=True) 4323 kind = self._parse_types() 4324 path = self._parse_string() 4325 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4326 4327 return self.expression( 4328 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4329 ) 4330 4331 expressions = None 4332 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4333 self._match_l_paren() 4334 expressions = self._parse_csv(_parse_open_json_column_def) 4335 4336 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4337 4338 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4339 args = self._parse_csv(self._parse_bitwise) 4340 4341 if self._match(TokenType.IN): 4342 return self.expression( 4343 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4344 ) 4345 4346 if haystack_first: 4347 haystack = seq_get(args, 0) 4348 needle = seq_get(args, 1) 4349 else: 4350 needle = seq_get(args, 0) 4351 haystack = seq_get(args, 1) 4352 4353 return self.expression( 4354 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4355 ) 4356 4357 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4358 args = self._parse_csv(self._parse_table) 4359 return exp.JoinHint(this=func_name.upper(), expressions=args) 4360 4361 def _parse_substring(self) -> exp.Substring: 4362 # Postgres supports the form: substring(string [from int] [for int]) 4363 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4364 4365 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4366 4367 if self._match(TokenType.FROM): 4368 args.append(self._parse_bitwise()) 4369 if self._match(TokenType.FOR): 4370 args.append(self._parse_bitwise()) 4371 4372 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4373 4374 def _parse_trim(self) -> exp.Trim: 4375 # https://www.w3resource.com/sql/character-functions/trim.php 4376 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4377 4378 position = None 4379 collation = None 4380 4381 if self._match_texts(self.TRIM_TYPES): 4382 position = self._prev.text.upper() 4383 4384 expression = self._parse_bitwise() 4385 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4386 this = self._parse_bitwise() 4387 else: 4388 this = expression 4389 expression = None 4390 4391 if self._match(TokenType.COLLATE): 4392 collation = self._parse_bitwise() 4393 4394 return self.expression( 4395 exp.Trim, this=this, position=position, expression=expression, collation=collation 4396 ) 4397 4398 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4399 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4400 4401 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4402 return self._parse_window(self._parse_id_var(), alias=True) 4403 4404 def _parse_respect_or_ignore_nulls( 4405 self, this: t.Optional[exp.Expression] 4406 ) -> t.Optional[exp.Expression]: 4407 if self._match_text_seq("IGNORE", "NULLS"): 4408 return self.expression(exp.IgnoreNulls, this=this) 4409 if self._match_text_seq("RESPECT", "NULLS"): 4410 return self.expression(exp.RespectNulls, this=this) 4411 return this 4412 4413 def _parse_window( 4414 self, this: t.Optional[exp.Expression], alias: bool = False 4415 ) -> t.Optional[exp.Expression]: 4416 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4417 self._match(TokenType.WHERE) 4418 this = self.expression( 4419 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4420 ) 4421 self._match_r_paren() 4422 4423 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4424 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4425 if self._match_text_seq("WITHIN", "GROUP"): 4426 order = self._parse_wrapped(self._parse_order) 4427 this = self.expression(exp.WithinGroup, this=this, expression=order) 4428 4429 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4430 # Some dialects choose to implement and some do not. 4431 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4432 4433 # There is some code above in _parse_lambda that handles 4434 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4435 4436 # The below changes handle 4437 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4438 4439 # Oracle allows both formats 4440 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4441 # and Snowflake chose to do the same for familiarity 4442 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4443 this = self._parse_respect_or_ignore_nulls(this) 4444 4445 # bigquery select from window x AS (partition by ...) 4446 if alias: 4447 over = None 4448 self._match(TokenType.ALIAS) 4449 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4450 return this 4451 else: 4452 over = self._prev.text.upper() 4453 4454 if not self._match(TokenType.L_PAREN): 4455 return self.expression( 4456 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4457 ) 4458 4459 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4460 4461 first = self._match(TokenType.FIRST) 4462 if self._match_text_seq("LAST"): 4463 first = False 4464 4465 partition, order = self._parse_partition_and_order() 4466 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4467 4468 if kind: 4469 self._match(TokenType.BETWEEN) 4470 start = self._parse_window_spec() 4471 self._match(TokenType.AND) 4472 end = self._parse_window_spec() 4473 4474 spec = self.expression( 4475 exp.WindowSpec, 4476 kind=kind, 4477 start=start["value"], 4478 start_side=start["side"], 4479 end=end["value"], 4480 end_side=end["side"], 4481 ) 4482 else: 4483 spec = None 4484 4485 self._match_r_paren() 4486 4487 window = self.expression( 4488 exp.Window, 4489 this=this, 4490 partition_by=partition, 4491 order=order, 4492 spec=spec, 4493 alias=window_alias, 4494 over=over, 4495 first=first, 4496 ) 4497 4498 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4499 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4500 return self._parse_window(window, alias=alias) 4501 4502 return window 4503 4504 def _parse_partition_and_order( 4505 self, 4506 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4507 return self._parse_partition_by(), self._parse_order() 4508 4509 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4510 self._match(TokenType.BETWEEN) 4511 4512 return { 4513 "value": ( 4514 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4515 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4516 or self._parse_bitwise() 4517 ), 4518 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4519 } 4520 4521 def _parse_alias( 4522 self, this: t.Optional[exp.Expression], explicit: bool = False 4523 ) -> t.Optional[exp.Expression]: 4524 any_token = self._match(TokenType.ALIAS) 4525 4526 if explicit and not any_token: 4527 return this 4528 4529 if self._match(TokenType.L_PAREN): 4530 aliases = self.expression( 4531 exp.Aliases, 4532 this=this, 4533 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4534 ) 4535 self._match_r_paren(aliases) 4536 return aliases 4537 4538 alias = self._parse_id_var(any_token) 4539 4540 if alias: 4541 return self.expression(exp.Alias, this=this, alias=alias) 4542 4543 return this 4544 4545 def _parse_id_var( 4546 self, 4547 any_token: bool = True, 4548 tokens: t.Optional[t.Collection[TokenType]] = None, 4549 ) -> t.Optional[exp.Expression]: 4550 identifier = self._parse_identifier() 4551 4552 if identifier: 4553 return identifier 4554 4555 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4556 quoted = self._prev.token_type == TokenType.STRING 4557 return exp.Identifier(this=self._prev.text, quoted=quoted) 4558 4559 return None 4560 4561 def _parse_string(self) -> t.Optional[exp.Expression]: 4562 if self._match(TokenType.STRING): 4563 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4564 return self._parse_placeholder() 4565 4566 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4567 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4568 4569 def _parse_number(self) -> t.Optional[exp.Expression]: 4570 if self._match(TokenType.NUMBER): 4571 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4572 return self._parse_placeholder() 4573 4574 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4575 if self._match(TokenType.IDENTIFIER): 4576 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4577 return self._parse_placeholder() 4578 4579 def _parse_var( 4580 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4581 ) -> t.Optional[exp.Expression]: 4582 if ( 4583 (any_token and self._advance_any()) 4584 or self._match(TokenType.VAR) 4585 or (self._match_set(tokens) if tokens else False) 4586 ): 4587 return self.expression(exp.Var, this=self._prev.text) 4588 return self._parse_placeholder() 4589 4590 def _advance_any(self) -> t.Optional[Token]: 4591 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4592 self._advance() 4593 return self._prev 4594 return None 4595 4596 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4597 return self._parse_var() or self._parse_string() 4598 4599 def _parse_null(self) -> t.Optional[exp.Expression]: 4600 if self._match_set(self.NULL_TOKENS): 4601 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4602 return self._parse_placeholder() 4603 4604 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4605 if self._match(TokenType.TRUE): 4606 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4607 if self._match(TokenType.FALSE): 4608 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4609 return self._parse_placeholder() 4610 4611 def _parse_star(self) -> t.Optional[exp.Expression]: 4612 if self._match(TokenType.STAR): 4613 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4614 return self._parse_placeholder() 4615 4616 def _parse_parameter(self) -> exp.Parameter: 4617 wrapped = self._match(TokenType.L_BRACE) 4618 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4619 self._match(TokenType.R_BRACE) 4620 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4621 4622 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4623 if self._match_set(self.PLACEHOLDER_PARSERS): 4624 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4625 if placeholder: 4626 return placeholder 4627 self._advance(-1) 4628 return None 4629 4630 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4631 if not self._match(TokenType.EXCEPT): 4632 return None 4633 if self._match(TokenType.L_PAREN, advance=False): 4634 return self._parse_wrapped_csv(self._parse_column) 4635 4636 except_column = self._parse_column() 4637 return [except_column] if except_column else None 4638 4639 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4640 if not self._match(TokenType.REPLACE): 4641 return None 4642 if self._match(TokenType.L_PAREN, advance=False): 4643 return self._parse_wrapped_csv(self._parse_expression) 4644 4645 replace_expression = self._parse_expression() 4646 return [replace_expression] if replace_expression else None 4647 4648 def _parse_csv( 4649 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4650 ) -> t.List[exp.Expression]: 4651 parse_result = parse_method() 4652 items = [parse_result] if parse_result is not None else [] 4653 4654 while self._match(sep): 4655 self._add_comments(parse_result) 4656 parse_result = parse_method() 4657 if parse_result is not None: 4658 items.append(parse_result) 4659 4660 return items 4661 4662 def _parse_tokens( 4663 self, parse_method: t.Callable, expressions: t.Dict 4664 ) -> t.Optional[exp.Expression]: 4665 this = parse_method() 4666 4667 while self._match_set(expressions): 4668 this = self.expression( 4669 expressions[self._prev.token_type], 4670 this=this, 4671 comments=self._prev_comments, 4672 expression=parse_method(), 4673 ) 4674 4675 return this 4676 4677 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4678 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4679 4680 def _parse_wrapped_csv( 4681 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4682 ) -> t.List[exp.Expression]: 4683 return self._parse_wrapped( 4684 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4685 ) 4686 4687 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4688 wrapped = self._match(TokenType.L_PAREN) 4689 if not wrapped and not optional: 4690 self.raise_error("Expecting (") 4691 parse_result = parse_method() 4692 if wrapped: 4693 self._match_r_paren() 4694 return parse_result 4695 4696 def _parse_expressions(self) -> t.List[exp.Expression]: 4697 return self._parse_csv(self._parse_expression) 4698 4699 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4700 return self._parse_select() or self._parse_set_operations( 4701 self._parse_expression() if alias else self._parse_conjunction() 4702 ) 4703 4704 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4705 return self._parse_query_modifiers( 4706 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4707 ) 4708 4709 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4710 this = None 4711 if self._match_texts(self.TRANSACTION_KIND): 4712 this = self._prev.text 4713 4714 self._match_texts({"TRANSACTION", "WORK"}) 4715 4716 modes = [] 4717 while True: 4718 mode = [] 4719 while self._match(TokenType.VAR): 4720 mode.append(self._prev.text) 4721 4722 if mode: 4723 modes.append(" ".join(mode)) 4724 if not self._match(TokenType.COMMA): 4725 break 4726 4727 return self.expression(exp.Transaction, this=this, modes=modes) 4728 4729 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4730 chain = None 4731 savepoint = None 4732 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4733 4734 self._match_texts({"TRANSACTION", "WORK"}) 4735 4736 if self._match_text_seq("TO"): 4737 self._match_text_seq("SAVEPOINT") 4738 savepoint = self._parse_id_var() 4739 4740 if self._match(TokenType.AND): 4741 chain = not self._match_text_seq("NO") 4742 self._match_text_seq("CHAIN") 4743 4744 if is_rollback: 4745 return self.expression(exp.Rollback, savepoint=savepoint) 4746 4747 return self.expression(exp.Commit, chain=chain) 4748 4749 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4750 if not self._match_text_seq("ADD"): 4751 return None 4752 4753 self._match(TokenType.COLUMN) 4754 exists_column = self._parse_exists(not_=True) 4755 expression = self._parse_field_def() 4756 4757 if expression: 4758 expression.set("exists", exists_column) 4759 4760 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4761 if self._match_texts(("FIRST", "AFTER")): 4762 position = self._prev.text 4763 column_position = self.expression( 4764 exp.ColumnPosition, this=self._parse_column(), position=position 4765 ) 4766 expression.set("position", column_position) 4767 4768 return expression 4769 4770 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4771 drop = self._match(TokenType.DROP) and self._parse_drop() 4772 if drop and not isinstance(drop, exp.Command): 4773 drop.set("kind", drop.args.get("kind", "COLUMN")) 4774 return drop 4775 4776 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4777 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4778 return self.expression( 4779 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4780 ) 4781 4782 def _parse_add_constraint(self) -> exp.AddConstraint: 4783 this = None 4784 kind = self._prev.token_type 4785 4786 if kind == TokenType.CONSTRAINT: 4787 this = self._parse_id_var() 4788 4789 if self._match_text_seq("CHECK"): 4790 expression = self._parse_wrapped(self._parse_conjunction) 4791 enforced = self._match_text_seq("ENFORCED") 4792 4793 return self.expression( 4794 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4795 ) 4796 4797 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4798 expression = self._parse_foreign_key() 4799 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4800 expression = self._parse_primary_key() 4801 else: 4802 expression = None 4803 4804 return self.expression(exp.AddConstraint, this=this, expression=expression) 4805 4806 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4807 index = self._index - 1 4808 4809 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4810 return self._parse_csv(self._parse_add_constraint) 4811 4812 self._retreat(index) 4813 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4814 return self._parse_csv(self._parse_field_def) 4815 4816 return self._parse_csv(self._parse_add_column) 4817 4818 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4819 self._match(TokenType.COLUMN) 4820 column = self._parse_field(any_token=True) 4821 4822 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4823 return self.expression(exp.AlterColumn, this=column, drop=True) 4824 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4825 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4826 4827 self._match_text_seq("SET", "DATA") 4828 return self.expression( 4829 exp.AlterColumn, 4830 this=column, 4831 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4832 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4833 using=self._match(TokenType.USING) and self._parse_conjunction(), 4834 ) 4835 4836 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4837 index = self._index - 1 4838 4839 partition_exists = self._parse_exists() 4840 if self._match(TokenType.PARTITION, advance=False): 4841 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4842 4843 self._retreat(index) 4844 return self._parse_csv(self._parse_drop_column) 4845 4846 def _parse_alter_table_rename(self) -> exp.RenameTable: 4847 self._match_text_seq("TO") 4848 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4849 4850 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4851 start = self._prev 4852 4853 if not self._match(TokenType.TABLE): 4854 return self._parse_as_command(start) 4855 4856 exists = self._parse_exists() 4857 only = self._match_text_seq("ONLY") 4858 this = self._parse_table(schema=True) 4859 4860 if self._next: 4861 self._advance() 4862 4863 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4864 if parser: 4865 actions = ensure_list(parser(self)) 4866 4867 if not self._curr: 4868 return self.expression( 4869 exp.AlterTable, 4870 this=this, 4871 exists=exists, 4872 actions=actions, 4873 only=only, 4874 ) 4875 4876 return self._parse_as_command(start) 4877 4878 def _parse_merge(self) -> exp.Merge: 4879 self._match(TokenType.INTO) 4880 target = self._parse_table() 4881 4882 if target and self._match(TokenType.ALIAS, advance=False): 4883 target.set("alias", self._parse_table_alias()) 4884 4885 self._match(TokenType.USING) 4886 using = self._parse_table() 4887 4888 self._match(TokenType.ON) 4889 on = self._parse_conjunction() 4890 4891 whens = [] 4892 while self._match(TokenType.WHEN): 4893 matched = not self._match(TokenType.NOT) 4894 self._match_text_seq("MATCHED") 4895 source = ( 4896 False 4897 if self._match_text_seq("BY", "TARGET") 4898 else self._match_text_seq("BY", "SOURCE") 4899 ) 4900 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4901 4902 self._match(TokenType.THEN) 4903 4904 if self._match(TokenType.INSERT): 4905 _this = self._parse_star() 4906 if _this: 4907 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4908 else: 4909 then = self.expression( 4910 exp.Insert, 4911 this=self._parse_value(), 4912 expression=self._match(TokenType.VALUES) and self._parse_value(), 4913 ) 4914 elif self._match(TokenType.UPDATE): 4915 expressions = self._parse_star() 4916 if expressions: 4917 then = self.expression(exp.Update, expressions=expressions) 4918 else: 4919 then = self.expression( 4920 exp.Update, 4921 expressions=self._match(TokenType.SET) 4922 and self._parse_csv(self._parse_equality), 4923 ) 4924 elif self._match(TokenType.DELETE): 4925 then = self.expression(exp.Var, this=self._prev.text) 4926 else: 4927 then = None 4928 4929 whens.append( 4930 self.expression( 4931 exp.When, 4932 matched=matched, 4933 source=source, 4934 condition=condition, 4935 then=then, 4936 ) 4937 ) 4938 4939 return self.expression( 4940 exp.Merge, 4941 this=target, 4942 using=using, 4943 on=on, 4944 expressions=whens, 4945 ) 4946 4947 def _parse_show(self) -> t.Optional[exp.Expression]: 4948 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4949 if parser: 4950 return parser(self) 4951 return self._parse_as_command(self._prev) 4952 4953 def _parse_set_item_assignment( 4954 self, kind: t.Optional[str] = None 4955 ) -> t.Optional[exp.Expression]: 4956 index = self._index 4957 4958 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4959 return self._parse_set_transaction(global_=kind == "GLOBAL") 4960 4961 left = self._parse_primary() or self._parse_id_var() 4962 assignment_delimiter = self._match_texts(("=", "TO")) 4963 4964 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 4965 self._retreat(index) 4966 return None 4967 4968 right = self._parse_statement() or self._parse_id_var() 4969 this = self.expression(exp.EQ, this=left, expression=right) 4970 4971 return self.expression(exp.SetItem, this=this, kind=kind) 4972 4973 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4974 self._match_text_seq("TRANSACTION") 4975 characteristics = self._parse_csv( 4976 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4977 ) 4978 return self.expression( 4979 exp.SetItem, 4980 expressions=characteristics, 4981 kind="TRANSACTION", 4982 **{"global": global_}, # type: ignore 4983 ) 4984 4985 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4986 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4987 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4988 4989 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4990 index = self._index 4991 set_ = self.expression( 4992 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4993 ) 4994 4995 if self._curr: 4996 self._retreat(index) 4997 return self._parse_as_command(self._prev) 4998 4999 return set_ 5000 5001 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5002 for option in options: 5003 if self._match_text_seq(*option.split(" ")): 5004 return exp.var(option) 5005 return None 5006 5007 def _parse_as_command(self, start: Token) -> exp.Command: 5008 while self._curr: 5009 self._advance() 5010 text = self._find_sql(start, self._prev) 5011 size = len(start.text) 5012 return exp.Command(this=text[:size], expression=text[size:]) 5013 5014 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5015 settings = [] 5016 5017 self._match_l_paren() 5018 kind = self._parse_id_var() 5019 5020 if self._match(TokenType.L_PAREN): 5021 while True: 5022 key = self._parse_id_var() 5023 value = self._parse_primary() 5024 5025 if not key and value is None: 5026 break 5027 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5028 self._match(TokenType.R_PAREN) 5029 5030 self._match_r_paren() 5031 5032 return self.expression( 5033 exp.DictProperty, 5034 this=this, 5035 kind=kind.this if kind else None, 5036 settings=settings, 5037 ) 5038 5039 def _parse_dict_range(self, this: str) -> exp.DictRange: 5040 self._match_l_paren() 5041 has_min = self._match_text_seq("MIN") 5042 if has_min: 5043 min = self._parse_var() or self._parse_primary() 5044 self._match_text_seq("MAX") 5045 max = self._parse_var() or self._parse_primary() 5046 else: 5047 max = self._parse_var() or self._parse_primary() 5048 min = exp.Literal.number(0) 5049 self._match_r_paren() 5050 return self.expression(exp.DictRange, this=this, min=min, max=max) 5051 5052 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5053 index = self._index 5054 expression = self._parse_column() 5055 if not self._match(TokenType.IN): 5056 self._retreat(index - 1) 5057 return None 5058 iterator = self._parse_column() 5059 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5060 return self.expression( 5061 exp.Comprehension, 5062 this=this, 5063 expression=expression, 5064 iterator=iterator, 5065 condition=condition, 5066 ) 5067 5068 def _find_parser( 5069 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5070 ) -> t.Optional[t.Callable]: 5071 if not self._curr: 5072 return None 5073 5074 index = self._index 5075 this = [] 5076 while True: 5077 # The current token might be multiple words 5078 curr = self._curr.text.upper() 5079 key = curr.split(" ") 5080 this.append(curr) 5081 5082 self._advance() 5083 result, trie = in_trie(trie, key) 5084 if result == TrieResult.FAILED: 5085 break 5086 5087 if result == TrieResult.EXISTS: 5088 subparser = parsers[" ".join(this)] 5089 return subparser 5090 5091 self._retreat(index) 5092 return None 5093 5094 def _match(self, token_type, advance=True, expression=None): 5095 if not self._curr: 5096 return None 5097 5098 if self._curr.token_type == token_type: 5099 if advance: 5100 self._advance() 5101 self._add_comments(expression) 5102 return True 5103 5104 return None 5105 5106 def _match_set(self, types, advance=True): 5107 if not self._curr: 5108 return None 5109 5110 if self._curr.token_type in types: 5111 if advance: 5112 self._advance() 5113 return True 5114 5115 return None 5116 5117 def _match_pair(self, token_type_a, token_type_b, advance=True): 5118 if not self._curr or not self._next: 5119 return None 5120 5121 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5122 if advance: 5123 self._advance(2) 5124 return True 5125 5126 return None 5127 5128 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5129 if not self._match(TokenType.L_PAREN, expression=expression): 5130 self.raise_error("Expecting (") 5131 5132 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5133 if not self._match(TokenType.R_PAREN, expression=expression): 5134 self.raise_error("Expecting )") 5135 5136 def _match_texts(self, texts, advance=True): 5137 if self._curr and self._curr.text.upper() in texts: 5138 if advance: 5139 self._advance() 5140 return True 5141 return False 5142 5143 def _match_text_seq(self, *texts, advance=True): 5144 index = self._index 5145 for text in texts: 5146 if self._curr and self._curr.text.upper() == text: 5147 self._advance() 5148 else: 5149 self._retreat(index) 5150 return False 5151 5152 if not advance: 5153 self._retreat(index) 5154 5155 return True 5156 5157 @t.overload 5158 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5159 ... 5160 5161 @t.overload 5162 def _replace_columns_with_dots( 5163 self, this: t.Optional[exp.Expression] 5164 ) -> t.Optional[exp.Expression]: 5165 ... 5166 5167 def _replace_columns_with_dots(self, this): 5168 if isinstance(this, exp.Dot): 5169 exp.replace_children(this, self._replace_columns_with_dots) 5170 elif isinstance(this, exp.Column): 5171 exp.replace_children(this, self._replace_columns_with_dots) 5172 table = this.args.get("table") 5173 this = ( 5174 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5175 ) 5176 5177 return this 5178 5179 def _replace_lambda( 5180 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5181 ) -> t.Optional[exp.Expression]: 5182 if not node: 5183 return node 5184 5185 for column in node.find_all(exp.Column): 5186 if column.parts[0].name in lambda_variables: 5187 dot_or_id = column.to_dot() if column.table else column.this 5188 parent = column.parent 5189 5190 while isinstance(parent, exp.Dot): 5191 if not isinstance(parent.parent, exp.Dot): 5192 parent.replace(dot_or_id) 5193 break 5194 parent = parent.parent 5195 else: 5196 if column is node: 5197 node = dot_or_id 5198 else: 5199 column.replace(dot_or_id) 5200 return node 5201 5202 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5203 return [ 5204 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5205 for value in values 5206 if value 5207 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 } 220 221 SUBQUERY_PREDICATES = { 222 TokenType.ANY: exp.Any, 223 TokenType.ALL: exp.All, 224 TokenType.EXISTS: exp.Exists, 225 TokenType.SOME: exp.Any, 226 } 227 228 RESERVED_KEYWORDS = { 229 *Tokenizer.SINGLE_TOKENS.values(), 230 TokenType.SELECT, 231 } 232 233 DB_CREATABLES = { 234 TokenType.DATABASE, 235 TokenType.SCHEMA, 236 TokenType.TABLE, 237 TokenType.VIEW, 238 TokenType.DICTIONARY, 239 } 240 241 CREATABLES = { 242 TokenType.COLUMN, 243 TokenType.FUNCTION, 244 TokenType.INDEX, 245 TokenType.PROCEDURE, 246 *DB_CREATABLES, 247 } 248 249 # Tokens that can represent identifiers 250 ID_VAR_TOKENS = { 251 TokenType.VAR, 252 TokenType.ANTI, 253 TokenType.APPLY, 254 TokenType.ASC, 255 TokenType.AUTO_INCREMENT, 256 TokenType.BEGIN, 257 TokenType.CACHE, 258 TokenType.CASE, 259 TokenType.COLLATE, 260 TokenType.COMMAND, 261 TokenType.COMMENT, 262 TokenType.COMMIT, 263 TokenType.CONSTRAINT, 264 TokenType.DEFAULT, 265 TokenType.DELETE, 266 TokenType.DESC, 267 TokenType.DESCRIBE, 268 TokenType.DICTIONARY, 269 TokenType.DIV, 270 TokenType.END, 271 TokenType.EXECUTE, 272 TokenType.ESCAPE, 273 TokenType.FALSE, 274 TokenType.FIRST, 275 TokenType.FILTER, 276 TokenType.FORMAT, 277 TokenType.FULL, 278 TokenType.IS, 279 TokenType.ISNULL, 280 TokenType.INTERVAL, 281 TokenType.KEEP, 282 TokenType.KILL, 283 TokenType.LEFT, 284 TokenType.LOAD, 285 TokenType.MERGE, 286 TokenType.NATURAL, 287 TokenType.NEXT, 288 TokenType.OFFSET, 289 TokenType.ORDINALITY, 290 TokenType.OVERLAPS, 291 TokenType.OVERWRITE, 292 TokenType.PARTITION, 293 TokenType.PERCENT, 294 TokenType.PIVOT, 295 TokenType.PRAGMA, 296 TokenType.RANGE, 297 TokenType.REFERENCES, 298 TokenType.RIGHT, 299 TokenType.ROW, 300 TokenType.ROWS, 301 TokenType.SEMI, 302 TokenType.SET, 303 TokenType.SETTINGS, 304 TokenType.SHOW, 305 TokenType.TEMPORARY, 306 TokenType.TOP, 307 TokenType.TRUE, 308 TokenType.UNIQUE, 309 TokenType.UNPIVOT, 310 TokenType.UPDATE, 311 TokenType.VOLATILE, 312 TokenType.WINDOW, 313 *CREATABLES, 314 *SUBQUERY_PREDICATES, 315 *TYPE_TOKENS, 316 *NO_PAREN_FUNCTIONS, 317 } 318 319 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 320 321 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 322 TokenType.ANTI, 323 TokenType.APPLY, 324 TokenType.ASOF, 325 TokenType.FULL, 326 TokenType.LEFT, 327 TokenType.LOCK, 328 TokenType.NATURAL, 329 TokenType.OFFSET, 330 TokenType.RIGHT, 331 TokenType.SEMI, 332 TokenType.WINDOW, 333 } 334 335 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 336 337 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 338 339 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 340 341 FUNC_TOKENS = { 342 TokenType.COMMAND, 343 TokenType.CURRENT_DATE, 344 TokenType.CURRENT_DATETIME, 345 TokenType.CURRENT_TIMESTAMP, 346 TokenType.CURRENT_TIME, 347 TokenType.CURRENT_USER, 348 TokenType.FILTER, 349 TokenType.FIRST, 350 TokenType.FORMAT, 351 TokenType.GLOB, 352 TokenType.IDENTIFIER, 353 TokenType.INDEX, 354 TokenType.ISNULL, 355 TokenType.ILIKE, 356 TokenType.INSERT, 357 TokenType.LIKE, 358 TokenType.MERGE, 359 TokenType.OFFSET, 360 TokenType.PRIMARY_KEY, 361 TokenType.RANGE, 362 TokenType.REPLACE, 363 TokenType.RLIKE, 364 TokenType.ROW, 365 TokenType.UNNEST, 366 TokenType.VAR, 367 TokenType.LEFT, 368 TokenType.RIGHT, 369 TokenType.DATE, 370 TokenType.DATETIME, 371 TokenType.TABLE, 372 TokenType.TIMESTAMP, 373 TokenType.TIMESTAMPTZ, 374 TokenType.WINDOW, 375 TokenType.XOR, 376 *TYPE_TOKENS, 377 *SUBQUERY_PREDICATES, 378 } 379 380 CONJUNCTION = { 381 TokenType.AND: exp.And, 382 TokenType.OR: exp.Or, 383 } 384 385 EQUALITY = { 386 TokenType.EQ: exp.EQ, 387 TokenType.NEQ: exp.NEQ, 388 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 389 } 390 391 COMPARISON = { 392 TokenType.GT: exp.GT, 393 TokenType.GTE: exp.GTE, 394 TokenType.LT: exp.LT, 395 TokenType.LTE: exp.LTE, 396 } 397 398 BITWISE = { 399 TokenType.AMP: exp.BitwiseAnd, 400 TokenType.CARET: exp.BitwiseXor, 401 TokenType.PIPE: exp.BitwiseOr, 402 TokenType.DPIPE: exp.DPipe, 403 } 404 405 TERM = { 406 TokenType.DASH: exp.Sub, 407 TokenType.PLUS: exp.Add, 408 TokenType.MOD: exp.Mod, 409 TokenType.COLLATE: exp.Collate, 410 } 411 412 FACTOR = { 413 TokenType.DIV: exp.IntDiv, 414 TokenType.LR_ARROW: exp.Distance, 415 TokenType.SLASH: exp.Div, 416 TokenType.STAR: exp.Mul, 417 } 418 419 TIMES = { 420 TokenType.TIME, 421 TokenType.TIMETZ, 422 } 423 424 TIMESTAMPS = { 425 TokenType.TIMESTAMP, 426 TokenType.TIMESTAMPTZ, 427 TokenType.TIMESTAMPLTZ, 428 *TIMES, 429 } 430 431 SET_OPERATIONS = { 432 TokenType.UNION, 433 TokenType.INTERSECT, 434 TokenType.EXCEPT, 435 } 436 437 JOIN_METHODS = { 438 TokenType.NATURAL, 439 TokenType.ASOF, 440 } 441 442 JOIN_SIDES = { 443 TokenType.LEFT, 444 TokenType.RIGHT, 445 TokenType.FULL, 446 } 447 448 JOIN_KINDS = { 449 TokenType.INNER, 450 TokenType.OUTER, 451 TokenType.CROSS, 452 TokenType.SEMI, 453 TokenType.ANTI, 454 } 455 456 JOIN_HINTS: t.Set[str] = set() 457 458 LAMBDAS = { 459 TokenType.ARROW: lambda self, expressions: self.expression( 460 exp.Lambda, 461 this=self._replace_lambda( 462 self._parse_conjunction(), 463 {node.name for node in expressions}, 464 ), 465 expressions=expressions, 466 ), 467 TokenType.FARROW: lambda self, expressions: self.expression( 468 exp.Kwarg, 469 this=exp.var(expressions[0].name), 470 expression=self._parse_conjunction(), 471 ), 472 } 473 474 COLUMN_OPERATORS = { 475 TokenType.DOT: None, 476 TokenType.DCOLON: lambda self, this, to: self.expression( 477 exp.Cast if self.STRICT_CAST else exp.TryCast, 478 this=this, 479 to=to, 480 ), 481 TokenType.ARROW: lambda self, this, path: self.expression( 482 exp.JSONExtract, 483 this=this, 484 expression=path, 485 ), 486 TokenType.DARROW: lambda self, this, path: self.expression( 487 exp.JSONExtractScalar, 488 this=this, 489 expression=path, 490 ), 491 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 492 exp.JSONBExtract, 493 this=this, 494 expression=path, 495 ), 496 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 497 exp.JSONBExtractScalar, 498 this=this, 499 expression=path, 500 ), 501 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 502 exp.JSONBContains, 503 this=this, 504 expression=key, 505 ), 506 } 507 508 EXPRESSION_PARSERS = { 509 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 510 exp.Column: lambda self: self._parse_column(), 511 exp.Condition: lambda self: self._parse_conjunction(), 512 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 513 exp.Expression: lambda self: self._parse_statement(), 514 exp.From: lambda self: self._parse_from(), 515 exp.Group: lambda self: self._parse_group(), 516 exp.Having: lambda self: self._parse_having(), 517 exp.Identifier: lambda self: self._parse_id_var(), 518 exp.Join: lambda self: self._parse_join(), 519 exp.Lambda: lambda self: self._parse_lambda(), 520 exp.Lateral: lambda self: self._parse_lateral(), 521 exp.Limit: lambda self: self._parse_limit(), 522 exp.Offset: lambda self: self._parse_offset(), 523 exp.Order: lambda self: self._parse_order(), 524 exp.Ordered: lambda self: self._parse_ordered(), 525 exp.Properties: lambda self: self._parse_properties(), 526 exp.Qualify: lambda self: self._parse_qualify(), 527 exp.Returning: lambda self: self._parse_returning(), 528 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 529 exp.Table: lambda self: self._parse_table_parts(), 530 exp.TableAlias: lambda self: self._parse_table_alias(), 531 exp.Where: lambda self: self._parse_where(), 532 exp.Window: lambda self: self._parse_named_window(), 533 exp.With: lambda self: self._parse_with(), 534 "JOIN_TYPE": lambda self: self._parse_join_parts(), 535 } 536 537 STATEMENT_PARSERS = { 538 TokenType.ALTER: lambda self: self._parse_alter(), 539 TokenType.BEGIN: lambda self: self._parse_transaction(), 540 TokenType.CACHE: lambda self: self._parse_cache(), 541 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 542 TokenType.COMMENT: lambda self: self._parse_comment(), 543 TokenType.CREATE: lambda self: self._parse_create(), 544 TokenType.DELETE: lambda self: self._parse_delete(), 545 TokenType.DESC: lambda self: self._parse_describe(), 546 TokenType.DESCRIBE: lambda self: self._parse_describe(), 547 TokenType.DROP: lambda self: self._parse_drop(), 548 TokenType.INSERT: lambda self: self._parse_insert(), 549 TokenType.KILL: lambda self: self._parse_kill(), 550 TokenType.LOAD: lambda self: self._parse_load(), 551 TokenType.MERGE: lambda self: self._parse_merge(), 552 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 553 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 554 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 555 TokenType.SET: lambda self: self._parse_set(), 556 TokenType.UNCACHE: lambda self: self._parse_uncache(), 557 TokenType.UPDATE: lambda self: self._parse_update(), 558 TokenType.USE: lambda self: self.expression( 559 exp.Use, 560 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 561 and exp.var(self._prev.text), 562 this=self._parse_table(schema=False), 563 ), 564 } 565 566 UNARY_PARSERS = { 567 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 568 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 569 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 570 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 571 } 572 573 PRIMARY_PARSERS = { 574 TokenType.STRING: lambda self, token: self.expression( 575 exp.Literal, this=token.text, is_string=True 576 ), 577 TokenType.NUMBER: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=False 579 ), 580 TokenType.STAR: lambda self, _: self.expression( 581 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 582 ), 583 TokenType.NULL: lambda self, _: self.expression(exp.Null), 584 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 585 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 586 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 587 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 588 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 589 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 590 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 591 exp.National, this=token.text 592 ), 593 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 594 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 595 } 596 597 PLACEHOLDER_PARSERS = { 598 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 599 TokenType.PARAMETER: lambda self: self._parse_parameter(), 600 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 601 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 602 else None, 603 } 604 605 RANGE_PARSERS = { 606 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 607 TokenType.GLOB: binary_range_parser(exp.Glob), 608 TokenType.ILIKE: binary_range_parser(exp.ILike), 609 TokenType.IN: lambda self, this: self._parse_in(this), 610 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 611 TokenType.IS: lambda self, this: self._parse_is(this), 612 TokenType.LIKE: binary_range_parser(exp.Like), 613 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 614 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 615 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 616 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 617 } 618 619 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 620 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 621 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 622 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 623 "CHARACTER SET": lambda self: self._parse_character_set(), 624 "CHECKSUM": lambda self: self._parse_checksum(), 625 "CLUSTER BY": lambda self: self._parse_cluster(), 626 "CLUSTERED": lambda self: self._parse_clustered_by(), 627 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 628 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 629 "COPY": lambda self: self._parse_copy_property(), 630 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 631 "DEFINER": lambda self: self._parse_definer(), 632 "DETERMINISTIC": lambda self: self.expression( 633 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 634 ), 635 "DISTKEY": lambda self: self._parse_distkey(), 636 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 637 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 638 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 639 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 640 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 641 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 642 "FREESPACE": lambda self: self._parse_freespace(), 643 "HEAP": lambda self: self.expression(exp.HeapProperty), 644 "IMMUTABLE": lambda self: self.expression( 645 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 646 ), 647 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 648 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 649 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 650 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 651 "LIKE": lambda self: self._parse_create_like(), 652 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 653 "LOCK": lambda self: self._parse_locking(), 654 "LOCKING": lambda self: self._parse_locking(), 655 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 656 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 657 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 658 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 659 "NO": lambda self: self._parse_no_property(), 660 "ON": lambda self: self._parse_on_property(), 661 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 662 "PARTITION BY": lambda self: self._parse_partitioned_by(), 663 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 664 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 665 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 666 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 667 "RETURNS": lambda self: self._parse_returns(), 668 "ROW": lambda self: self._parse_row(), 669 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 670 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 671 "SETTINGS": lambda self: self.expression( 672 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 673 ), 674 "SORTKEY": lambda self: self._parse_sortkey(), 675 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 676 "STABLE": lambda self: self.expression( 677 exp.StabilityProperty, this=exp.Literal.string("STABLE") 678 ), 679 "STORED": lambda self: self._parse_stored(), 680 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 681 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 682 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 683 "TO": lambda self: self._parse_to_table(), 684 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 685 "TTL": lambda self: self._parse_ttl(), 686 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 687 "VOLATILE": lambda self: self._parse_volatile_property(), 688 "WITH": lambda self: self._parse_with_property(), 689 } 690 691 CONSTRAINT_PARSERS = { 692 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 693 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 694 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 695 "CHARACTER SET": lambda self: self.expression( 696 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 697 ), 698 "CHECK": lambda self: self.expression( 699 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 700 ), 701 "COLLATE": lambda self: self.expression( 702 exp.CollateColumnConstraint, this=self._parse_var() 703 ), 704 "COMMENT": lambda self: self.expression( 705 exp.CommentColumnConstraint, this=self._parse_string() 706 ), 707 "COMPRESS": lambda self: self._parse_compress(), 708 "CLUSTERED": lambda self: self.expression( 709 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 710 ), 711 "NONCLUSTERED": lambda self: self.expression( 712 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 713 ), 714 "DEFAULT": lambda self: self.expression( 715 exp.DefaultColumnConstraint, this=self._parse_bitwise() 716 ), 717 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 718 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 719 "FORMAT": lambda self: self.expression( 720 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 721 ), 722 "GENERATED": lambda self: self._parse_generated_as_identity(), 723 "IDENTITY": lambda self: self._parse_auto_increment(), 724 "INLINE": lambda self: self._parse_inline(), 725 "LIKE": lambda self: self._parse_create_like(), 726 "NOT": lambda self: self._parse_not_constraint(), 727 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 728 "ON": lambda self: ( 729 self._match(TokenType.UPDATE) 730 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 731 ) 732 or self.expression(exp.OnProperty, this=self._parse_id_var()), 733 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 734 "PRIMARY KEY": lambda self: self._parse_primary_key(), 735 "REFERENCES": lambda self: self._parse_references(match=False), 736 "TITLE": lambda self: self.expression( 737 exp.TitleColumnConstraint, this=self._parse_var_or_string() 738 ), 739 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 740 "UNIQUE": lambda self: self._parse_unique(), 741 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 742 "WITH": lambda self: self.expression( 743 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 744 ), 745 } 746 747 ALTER_PARSERS = { 748 "ADD": lambda self: self._parse_alter_table_add(), 749 "ALTER": lambda self: self._parse_alter_table_alter(), 750 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 751 "DROP": lambda self: self._parse_alter_table_drop(), 752 "RENAME": lambda self: self._parse_alter_table_rename(), 753 } 754 755 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 756 757 NO_PAREN_FUNCTION_PARSERS = { 758 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 759 "CASE": lambda self: self._parse_case(), 760 "IF": lambda self: self._parse_if(), 761 "NEXT": lambda self: self._parse_next_value_for(), 762 } 763 764 INVALID_FUNC_NAME_TOKENS = { 765 TokenType.IDENTIFIER, 766 TokenType.STRING, 767 } 768 769 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 770 771 FUNCTION_PARSERS = { 772 "ANY_VALUE": lambda self: self._parse_any_value(), 773 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 774 "CONCAT": lambda self: self._parse_concat(), 775 "CONCAT_WS": lambda self: self._parse_concat_ws(), 776 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 777 "DECODE": lambda self: self._parse_decode(), 778 "EXTRACT": lambda self: self._parse_extract(), 779 "JSON_OBJECT": lambda self: self._parse_json_object(), 780 "LOG": lambda self: self._parse_logarithm(), 781 "MATCH": lambda self: self._parse_match_against(), 782 "OPENJSON": lambda self: self._parse_open_json(), 783 "POSITION": lambda self: self._parse_position(), 784 "SAFE_CAST": lambda self: self._parse_cast(False), 785 "STRING_AGG": lambda self: self._parse_string_agg(), 786 "SUBSTRING": lambda self: self._parse_substring(), 787 "TRIM": lambda self: self._parse_trim(), 788 "TRY_CAST": lambda self: self._parse_cast(False), 789 "TRY_CONVERT": lambda self: self._parse_convert(False), 790 } 791 792 QUERY_MODIFIER_PARSERS = { 793 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 794 TokenType.WHERE: lambda self: ("where", self._parse_where()), 795 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 796 TokenType.HAVING: lambda self: ("having", self._parse_having()), 797 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 798 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 799 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 800 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 801 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 802 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 803 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 804 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 805 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 806 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 807 TokenType.CLUSTER_BY: lambda self: ( 808 "cluster", 809 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 810 ), 811 TokenType.DISTRIBUTE_BY: lambda self: ( 812 "distribute", 813 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 814 ), 815 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 816 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 817 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 818 } 819 820 SET_PARSERS = { 821 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 822 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 823 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 824 "TRANSACTION": lambda self: self._parse_set_transaction(), 825 } 826 827 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 828 829 TYPE_LITERAL_PARSERS = { 830 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 831 } 832 833 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 834 835 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 836 837 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 838 839 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 840 TRANSACTION_CHARACTERISTICS = { 841 "ISOLATION LEVEL REPEATABLE READ", 842 "ISOLATION LEVEL READ COMMITTED", 843 "ISOLATION LEVEL READ UNCOMMITTED", 844 "ISOLATION LEVEL SERIALIZABLE", 845 "READ WRITE", 846 "READ ONLY", 847 } 848 849 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 850 851 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 852 853 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 854 855 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 856 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 857 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 858 859 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 860 861 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 862 863 DISTINCT_TOKENS = {TokenType.DISTINCT} 864 865 NULL_TOKENS = {TokenType.NULL} 866 867 STRICT_CAST = True 868 869 # A NULL arg in CONCAT yields NULL by default 870 CONCAT_NULL_OUTPUTS_STRING = False 871 872 PREFIXED_PIVOT_COLUMNS = False 873 IDENTIFY_PIVOT_STRINGS = False 874 875 LOG_BASE_FIRST = True 876 LOG_DEFAULTS_TO_LN = False 877 878 # Whether or not ADD is present for each column added by ALTER TABLE 879 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 880 881 # Whether or not the table sample clause expects CSV syntax 882 TABLESAMPLE_CSV = False 883 884 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. 885 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 886 887 __slots__ = ( 888 "error_level", 889 "error_message_context", 890 "max_errors", 891 "sql", 892 "errors", 893 "_tokens", 894 "_index", 895 "_curr", 896 "_next", 897 "_prev", 898 "_prev_comments", 899 "_tokenizer", 900 ) 901 902 # Autofilled 903 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 904 INDEX_OFFSET: int = 0 905 UNNEST_COLUMN_ONLY: bool = False 906 ALIAS_POST_TABLESAMPLE: bool = False 907 STRICT_STRING_CONCAT = False 908 SUPPORTS_USER_DEFINED_TYPES = True 909 NORMALIZE_FUNCTIONS = "upper" 910 NULL_ORDERING: str = "nulls_are_small" 911 SHOW_TRIE: t.Dict = {} 912 SET_TRIE: t.Dict = {} 913 FORMAT_MAPPING: t.Dict[str, str] = {} 914 FORMAT_TRIE: t.Dict = {} 915 TIME_MAPPING: t.Dict[str, str] = {} 916 TIME_TRIE: t.Dict = {} 917 918 def __init__( 919 self, 920 error_level: t.Optional[ErrorLevel] = None, 921 error_message_context: int = 100, 922 max_errors: int = 3, 923 ): 924 self.error_level = error_level or ErrorLevel.IMMEDIATE 925 self.error_message_context = error_message_context 926 self.max_errors = max_errors 927 self._tokenizer = self.TOKENIZER_CLASS() 928 self.reset() 929 930 def reset(self): 931 self.sql = "" 932 self.errors = [] 933 self._tokens = [] 934 self._index = 0 935 self._curr = None 936 self._next = None 937 self._prev = None 938 self._prev_comments = None 939 940 def parse( 941 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 942 ) -> t.List[t.Optional[exp.Expression]]: 943 """ 944 Parses a list of tokens and returns a list of syntax trees, one tree 945 per parsed SQL statement. 946 947 Args: 948 raw_tokens: The list of tokens. 949 sql: The original SQL string, used to produce helpful debug messages. 950 951 Returns: 952 The list of the produced syntax trees. 953 """ 954 return self._parse( 955 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 956 ) 957 958 def parse_into( 959 self, 960 expression_types: exp.IntoType, 961 raw_tokens: t.List[Token], 962 sql: t.Optional[str] = None, 963 ) -> t.List[t.Optional[exp.Expression]]: 964 """ 965 Parses a list of tokens into a given Expression type. If a collection of Expression 966 types is given instead, this method will try to parse the token list into each one 967 of them, stopping at the first for which the parsing succeeds. 968 969 Args: 970 expression_types: The expression type(s) to try and parse the token list into. 971 raw_tokens: The list of tokens. 972 sql: The original SQL string, used to produce helpful debug messages. 973 974 Returns: 975 The target Expression. 976 """ 977 errors = [] 978 for expression_type in ensure_list(expression_types): 979 parser = self.EXPRESSION_PARSERS.get(expression_type) 980 if not parser: 981 raise TypeError(f"No parser registered for {expression_type}") 982 983 try: 984 return self._parse(parser, raw_tokens, sql) 985 except ParseError as e: 986 e.errors[0]["into_expression"] = expression_type 987 errors.append(e) 988 989 raise ParseError( 990 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 991 errors=merge_errors(errors), 992 ) from errors[-1] 993 994 def _parse( 995 self, 996 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 997 raw_tokens: t.List[Token], 998 sql: t.Optional[str] = None, 999 ) -> t.List[t.Optional[exp.Expression]]: 1000 self.reset() 1001 self.sql = sql or "" 1002 1003 total = len(raw_tokens) 1004 chunks: t.List[t.List[Token]] = [[]] 1005 1006 for i, token in enumerate(raw_tokens): 1007 if token.token_type == TokenType.SEMICOLON: 1008 if i < total - 1: 1009 chunks.append([]) 1010 else: 1011 chunks[-1].append(token) 1012 1013 expressions = [] 1014 1015 for tokens in chunks: 1016 self._index = -1 1017 self._tokens = tokens 1018 self._advance() 1019 1020 expressions.append(parse_method(self)) 1021 1022 if self._index < len(self._tokens): 1023 self.raise_error("Invalid expression / Unexpected token") 1024 1025 self.check_errors() 1026 1027 return expressions 1028 1029 def check_errors(self) -> None: 1030 """Logs or raises any found errors, depending on the chosen error level setting.""" 1031 if self.error_level == ErrorLevel.WARN: 1032 for error in self.errors: 1033 logger.error(str(error)) 1034 elif self.error_level == ErrorLevel.RAISE and self.errors: 1035 raise ParseError( 1036 concat_messages(self.errors, self.max_errors), 1037 errors=merge_errors(self.errors), 1038 ) 1039 1040 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1041 """ 1042 Appends an error in the list of recorded errors or raises it, depending on the chosen 1043 error level setting. 1044 """ 1045 token = token or self._curr or self._prev or Token.string("") 1046 start = token.start 1047 end = token.end + 1 1048 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1049 highlight = self.sql[start:end] 1050 end_context = self.sql[end : end + self.error_message_context] 1051 1052 error = ParseError.new( 1053 f"{message}. Line {token.line}, Col: {token.col}.\n" 1054 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1055 description=message, 1056 line=token.line, 1057 col=token.col, 1058 start_context=start_context, 1059 highlight=highlight, 1060 end_context=end_context, 1061 ) 1062 1063 if self.error_level == ErrorLevel.IMMEDIATE: 1064 raise error 1065 1066 self.errors.append(error) 1067 1068 def expression( 1069 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1070 ) -> E: 1071 """ 1072 Creates a new, validated Expression. 1073 1074 Args: 1075 exp_class: The expression class to instantiate. 1076 comments: An optional list of comments to attach to the expression. 1077 kwargs: The arguments to set for the expression along with their respective values. 1078 1079 Returns: 1080 The target expression. 1081 """ 1082 instance = exp_class(**kwargs) 1083 instance.add_comments(comments) if comments else self._add_comments(instance) 1084 return self.validate_expression(instance) 1085 1086 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1087 if expression and self._prev_comments: 1088 expression.add_comments(self._prev_comments) 1089 self._prev_comments = None 1090 1091 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1092 """ 1093 Validates an Expression, making sure that all its mandatory arguments are set. 1094 1095 Args: 1096 expression: The expression to validate. 1097 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1098 1099 Returns: 1100 The validated expression. 1101 """ 1102 if self.error_level != ErrorLevel.IGNORE: 1103 for error_message in expression.error_messages(args): 1104 self.raise_error(error_message) 1105 1106 return expression 1107 1108 def _find_sql(self, start: Token, end: Token) -> str: 1109 return self.sql[start.start : end.end + 1] 1110 1111 def _advance(self, times: int = 1) -> None: 1112 self._index += times 1113 self._curr = seq_get(self._tokens, self._index) 1114 self._next = seq_get(self._tokens, self._index + 1) 1115 1116 if self._index > 0: 1117 self._prev = self._tokens[self._index - 1] 1118 self._prev_comments = self._prev.comments 1119 else: 1120 self._prev = None 1121 self._prev_comments = None 1122 1123 def _retreat(self, index: int) -> None: 1124 if index != self._index: 1125 self._advance(index - self._index) 1126 1127 def _parse_command(self) -> exp.Command: 1128 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1129 1130 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1131 start = self._prev 1132 exists = self._parse_exists() if allow_exists else None 1133 1134 self._match(TokenType.ON) 1135 1136 kind = self._match_set(self.CREATABLES) and self._prev 1137 if not kind: 1138 return self._parse_as_command(start) 1139 1140 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1141 this = self._parse_user_defined_function(kind=kind.token_type) 1142 elif kind.token_type == TokenType.TABLE: 1143 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1144 elif kind.token_type == TokenType.COLUMN: 1145 this = self._parse_column() 1146 else: 1147 this = self._parse_id_var() 1148 1149 self._match(TokenType.IS) 1150 1151 return self.expression( 1152 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1153 ) 1154 1155 def _parse_to_table( 1156 self, 1157 ) -> exp.ToTableProperty: 1158 table = self._parse_table_parts(schema=True) 1159 return self.expression(exp.ToTableProperty, this=table) 1160 1161 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1162 def _parse_ttl(self) -> exp.Expression: 1163 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1164 this = self._parse_bitwise() 1165 1166 if self._match_text_seq("DELETE"): 1167 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1168 if self._match_text_seq("RECOMPRESS"): 1169 return self.expression( 1170 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1171 ) 1172 if self._match_text_seq("TO", "DISK"): 1173 return self.expression( 1174 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1175 ) 1176 if self._match_text_seq("TO", "VOLUME"): 1177 return self.expression( 1178 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1179 ) 1180 1181 return this 1182 1183 expressions = self._parse_csv(_parse_ttl_action) 1184 where = self._parse_where() 1185 group = self._parse_group() 1186 1187 aggregates = None 1188 if group and self._match(TokenType.SET): 1189 aggregates = self._parse_csv(self._parse_set_item) 1190 1191 return self.expression( 1192 exp.MergeTreeTTL, 1193 expressions=expressions, 1194 where=where, 1195 group=group, 1196 aggregates=aggregates, 1197 ) 1198 1199 def _parse_statement(self) -> t.Optional[exp.Expression]: 1200 if self._curr is None: 1201 return None 1202 1203 if self._match_set(self.STATEMENT_PARSERS): 1204 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1205 1206 if self._match_set(Tokenizer.COMMANDS): 1207 return self._parse_command() 1208 1209 expression = self._parse_expression() 1210 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1211 return self._parse_query_modifiers(expression) 1212 1213 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1214 start = self._prev 1215 temporary = self._match(TokenType.TEMPORARY) 1216 materialized = self._match_text_seq("MATERIALIZED") 1217 1218 kind = self._match_set(self.CREATABLES) and self._prev.text 1219 if not kind: 1220 return self._parse_as_command(start) 1221 1222 return self.expression( 1223 exp.Drop, 1224 comments=start.comments, 1225 exists=exists or self._parse_exists(), 1226 this=self._parse_table(schema=True), 1227 kind=kind, 1228 temporary=temporary, 1229 materialized=materialized, 1230 cascade=self._match_text_seq("CASCADE"), 1231 constraints=self._match_text_seq("CONSTRAINTS"), 1232 purge=self._match_text_seq("PURGE"), 1233 ) 1234 1235 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1236 return ( 1237 self._match_text_seq("IF") 1238 and (not not_ or self._match(TokenType.NOT)) 1239 and self._match(TokenType.EXISTS) 1240 ) 1241 1242 def _parse_create(self) -> exp.Create | exp.Command: 1243 # Note: this can't be None because we've matched a statement parser 1244 start = self._prev 1245 comments = self._prev_comments 1246 1247 replace = start.text.upper() == "REPLACE" or self._match_pair( 1248 TokenType.OR, TokenType.REPLACE 1249 ) 1250 unique = self._match(TokenType.UNIQUE) 1251 1252 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1253 self._advance() 1254 1255 properties = None 1256 create_token = self._match_set(self.CREATABLES) and self._prev 1257 1258 if not create_token: 1259 # exp.Properties.Location.POST_CREATE 1260 properties = self._parse_properties() 1261 create_token = self._match_set(self.CREATABLES) and self._prev 1262 1263 if not properties or not create_token: 1264 return self._parse_as_command(start) 1265 1266 exists = self._parse_exists(not_=True) 1267 this = None 1268 expression: t.Optional[exp.Expression] = None 1269 indexes = None 1270 no_schema_binding = None 1271 begin = None 1272 clone = None 1273 1274 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1275 nonlocal properties 1276 if properties and temp_props: 1277 properties.expressions.extend(temp_props.expressions) 1278 elif temp_props: 1279 properties = temp_props 1280 1281 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1282 this = self._parse_user_defined_function(kind=create_token.token_type) 1283 1284 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1285 extend_props(self._parse_properties()) 1286 1287 self._match(TokenType.ALIAS) 1288 1289 if self._match(TokenType.COMMAND): 1290 expression = self._parse_as_command(self._prev) 1291 else: 1292 begin = self._match(TokenType.BEGIN) 1293 return_ = self._match_text_seq("RETURN") 1294 expression = self._parse_statement() 1295 1296 if return_: 1297 expression = self.expression(exp.Return, this=expression) 1298 elif create_token.token_type == TokenType.INDEX: 1299 this = self._parse_index(index=self._parse_id_var()) 1300 elif create_token.token_type in self.DB_CREATABLES: 1301 table_parts = self._parse_table_parts(schema=True) 1302 1303 # exp.Properties.Location.POST_NAME 1304 self._match(TokenType.COMMA) 1305 extend_props(self._parse_properties(before=True)) 1306 1307 this = self._parse_schema(this=table_parts) 1308 1309 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1310 extend_props(self._parse_properties()) 1311 1312 self._match(TokenType.ALIAS) 1313 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1314 # exp.Properties.Location.POST_ALIAS 1315 extend_props(self._parse_properties()) 1316 1317 expression = self._parse_ddl_select() 1318 1319 if create_token.token_type == TokenType.TABLE: 1320 # exp.Properties.Location.POST_EXPRESSION 1321 extend_props(self._parse_properties()) 1322 1323 indexes = [] 1324 while True: 1325 index = self._parse_index() 1326 1327 # exp.Properties.Location.POST_INDEX 1328 extend_props(self._parse_properties()) 1329 1330 if not index: 1331 break 1332 else: 1333 self._match(TokenType.COMMA) 1334 indexes.append(index) 1335 elif create_token.token_type == TokenType.VIEW: 1336 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1337 no_schema_binding = True 1338 1339 shallow = self._match_text_seq("SHALLOW") 1340 1341 if self._match_text_seq("CLONE"): 1342 clone = self._parse_table(schema=True) 1343 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1344 clone_kind = ( 1345 self._match(TokenType.L_PAREN) 1346 and self._match_texts(self.CLONE_KINDS) 1347 and self._prev.text.upper() 1348 ) 1349 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1350 self._match(TokenType.R_PAREN) 1351 clone = self.expression( 1352 exp.Clone, 1353 this=clone, 1354 when=when, 1355 kind=clone_kind, 1356 shallow=shallow, 1357 expression=clone_expression, 1358 ) 1359 1360 return self.expression( 1361 exp.Create, 1362 comments=comments, 1363 this=this, 1364 kind=create_token.text, 1365 replace=replace, 1366 unique=unique, 1367 expression=expression, 1368 exists=exists, 1369 properties=properties, 1370 indexes=indexes, 1371 no_schema_binding=no_schema_binding, 1372 begin=begin, 1373 clone=clone, 1374 ) 1375 1376 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1377 # only used for teradata currently 1378 self._match(TokenType.COMMA) 1379 1380 kwargs = { 1381 "no": self._match_text_seq("NO"), 1382 "dual": self._match_text_seq("DUAL"), 1383 "before": self._match_text_seq("BEFORE"), 1384 "default": self._match_text_seq("DEFAULT"), 1385 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1386 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1387 "after": self._match_text_seq("AFTER"), 1388 "minimum": self._match_texts(("MIN", "MINIMUM")), 1389 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1390 } 1391 1392 if self._match_texts(self.PROPERTY_PARSERS): 1393 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1394 try: 1395 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1396 except TypeError: 1397 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1398 1399 return None 1400 1401 def _parse_property(self) -> t.Optional[exp.Expression]: 1402 if self._match_texts(self.PROPERTY_PARSERS): 1403 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1404 1405 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1406 return self._parse_character_set(default=True) 1407 1408 if self._match_text_seq("COMPOUND", "SORTKEY"): 1409 return self._parse_sortkey(compound=True) 1410 1411 if self._match_text_seq("SQL", "SECURITY"): 1412 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1413 1414 assignment = self._match_pair( 1415 TokenType.VAR, TokenType.EQ, advance=False 1416 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1417 1418 if assignment: 1419 key = self._parse_var_or_string() 1420 self._match(TokenType.EQ) 1421 return self.expression( 1422 exp.Property, 1423 this=key, 1424 value=self._parse_column() or self._parse_var(any_token=True), 1425 ) 1426 1427 return None 1428 1429 def _parse_stored(self) -> exp.FileFormatProperty: 1430 self._match(TokenType.ALIAS) 1431 1432 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1433 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1434 1435 return self.expression( 1436 exp.FileFormatProperty, 1437 this=self.expression( 1438 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1439 ) 1440 if input_format or output_format 1441 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1442 ) 1443 1444 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1445 self._match(TokenType.EQ) 1446 self._match(TokenType.ALIAS) 1447 return self.expression(exp_class, this=self._parse_field()) 1448 1449 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1450 properties = [] 1451 while True: 1452 if before: 1453 prop = self._parse_property_before() 1454 else: 1455 prop = self._parse_property() 1456 1457 if not prop: 1458 break 1459 for p in ensure_list(prop): 1460 properties.append(p) 1461 1462 if properties: 1463 return self.expression(exp.Properties, expressions=properties) 1464 1465 return None 1466 1467 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1468 return self.expression( 1469 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1470 ) 1471 1472 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1473 if self._index >= 2: 1474 pre_volatile_token = self._tokens[self._index - 2] 1475 else: 1476 pre_volatile_token = None 1477 1478 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1479 return exp.VolatileProperty() 1480 1481 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1482 1483 def _parse_with_property( 1484 self, 1485 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1486 if self._match(TokenType.L_PAREN, advance=False): 1487 return self._parse_wrapped_csv(self._parse_property) 1488 1489 if self._match_text_seq("JOURNAL"): 1490 return self._parse_withjournaltable() 1491 1492 if self._match_text_seq("DATA"): 1493 return self._parse_withdata(no=False) 1494 elif self._match_text_seq("NO", "DATA"): 1495 return self._parse_withdata(no=True) 1496 1497 if not self._next: 1498 return None 1499 1500 return self._parse_withisolatedloading() 1501 1502 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1503 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1504 self._match(TokenType.EQ) 1505 1506 user = self._parse_id_var() 1507 self._match(TokenType.PARAMETER) 1508 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1509 1510 if not user or not host: 1511 return None 1512 1513 return exp.DefinerProperty(this=f"{user}@{host}") 1514 1515 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1516 self._match(TokenType.TABLE) 1517 self._match(TokenType.EQ) 1518 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1519 1520 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1521 return self.expression(exp.LogProperty, no=no) 1522 1523 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1524 return self.expression(exp.JournalProperty, **kwargs) 1525 1526 def _parse_checksum(self) -> exp.ChecksumProperty: 1527 self._match(TokenType.EQ) 1528 1529 on = None 1530 if self._match(TokenType.ON): 1531 on = True 1532 elif self._match_text_seq("OFF"): 1533 on = False 1534 1535 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1536 1537 def _parse_cluster(self) -> exp.Cluster: 1538 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1539 1540 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1541 self._match_text_seq("BY") 1542 1543 self._match_l_paren() 1544 expressions = self._parse_csv(self._parse_column) 1545 self._match_r_paren() 1546 1547 if self._match_text_seq("SORTED", "BY"): 1548 self._match_l_paren() 1549 sorted_by = self._parse_csv(self._parse_ordered) 1550 self._match_r_paren() 1551 else: 1552 sorted_by = None 1553 1554 self._match(TokenType.INTO) 1555 buckets = self._parse_number() 1556 self._match_text_seq("BUCKETS") 1557 1558 return self.expression( 1559 exp.ClusteredByProperty, 1560 expressions=expressions, 1561 sorted_by=sorted_by, 1562 buckets=buckets, 1563 ) 1564 1565 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1566 if not self._match_text_seq("GRANTS"): 1567 self._retreat(self._index - 1) 1568 return None 1569 1570 return self.expression(exp.CopyGrantsProperty) 1571 1572 def _parse_freespace(self) -> exp.FreespaceProperty: 1573 self._match(TokenType.EQ) 1574 return self.expression( 1575 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1576 ) 1577 1578 def _parse_mergeblockratio( 1579 self, no: bool = False, default: bool = False 1580 ) -> exp.MergeBlockRatioProperty: 1581 if self._match(TokenType.EQ): 1582 return self.expression( 1583 exp.MergeBlockRatioProperty, 1584 this=self._parse_number(), 1585 percent=self._match(TokenType.PERCENT), 1586 ) 1587 1588 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1589 1590 def _parse_datablocksize( 1591 self, 1592 default: t.Optional[bool] = None, 1593 minimum: t.Optional[bool] = None, 1594 maximum: t.Optional[bool] = None, 1595 ) -> exp.DataBlocksizeProperty: 1596 self._match(TokenType.EQ) 1597 size = self._parse_number() 1598 1599 units = None 1600 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1601 units = self._prev.text 1602 1603 return self.expression( 1604 exp.DataBlocksizeProperty, 1605 size=size, 1606 units=units, 1607 default=default, 1608 minimum=minimum, 1609 maximum=maximum, 1610 ) 1611 1612 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1613 self._match(TokenType.EQ) 1614 always = self._match_text_seq("ALWAYS") 1615 manual = self._match_text_seq("MANUAL") 1616 never = self._match_text_seq("NEVER") 1617 default = self._match_text_seq("DEFAULT") 1618 1619 autotemp = None 1620 if self._match_text_seq("AUTOTEMP"): 1621 autotemp = self._parse_schema() 1622 1623 return self.expression( 1624 exp.BlockCompressionProperty, 1625 always=always, 1626 manual=manual, 1627 never=never, 1628 default=default, 1629 autotemp=autotemp, 1630 ) 1631 1632 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1633 no = self._match_text_seq("NO") 1634 concurrent = self._match_text_seq("CONCURRENT") 1635 self._match_text_seq("ISOLATED", "LOADING") 1636 for_all = self._match_text_seq("FOR", "ALL") 1637 for_insert = self._match_text_seq("FOR", "INSERT") 1638 for_none = self._match_text_seq("FOR", "NONE") 1639 return self.expression( 1640 exp.IsolatedLoadingProperty, 1641 no=no, 1642 concurrent=concurrent, 1643 for_all=for_all, 1644 for_insert=for_insert, 1645 for_none=for_none, 1646 ) 1647 1648 def _parse_locking(self) -> exp.LockingProperty: 1649 if self._match(TokenType.TABLE): 1650 kind = "TABLE" 1651 elif self._match(TokenType.VIEW): 1652 kind = "VIEW" 1653 elif self._match(TokenType.ROW): 1654 kind = "ROW" 1655 elif self._match_text_seq("DATABASE"): 1656 kind = "DATABASE" 1657 else: 1658 kind = None 1659 1660 if kind in ("DATABASE", "TABLE", "VIEW"): 1661 this = self._parse_table_parts() 1662 else: 1663 this = None 1664 1665 if self._match(TokenType.FOR): 1666 for_or_in = "FOR" 1667 elif self._match(TokenType.IN): 1668 for_or_in = "IN" 1669 else: 1670 for_or_in = None 1671 1672 if self._match_text_seq("ACCESS"): 1673 lock_type = "ACCESS" 1674 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1675 lock_type = "EXCLUSIVE" 1676 elif self._match_text_seq("SHARE"): 1677 lock_type = "SHARE" 1678 elif self._match_text_seq("READ"): 1679 lock_type = "READ" 1680 elif self._match_text_seq("WRITE"): 1681 lock_type = "WRITE" 1682 elif self._match_text_seq("CHECKSUM"): 1683 lock_type = "CHECKSUM" 1684 else: 1685 lock_type = None 1686 1687 override = self._match_text_seq("OVERRIDE") 1688 1689 return self.expression( 1690 exp.LockingProperty, 1691 this=this, 1692 kind=kind, 1693 for_or_in=for_or_in, 1694 lock_type=lock_type, 1695 override=override, 1696 ) 1697 1698 def _parse_partition_by(self) -> t.List[exp.Expression]: 1699 if self._match(TokenType.PARTITION_BY): 1700 return self._parse_csv(self._parse_conjunction) 1701 return [] 1702 1703 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1704 self._match(TokenType.EQ) 1705 return self.expression( 1706 exp.PartitionedByProperty, 1707 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1708 ) 1709 1710 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1711 if self._match_text_seq("AND", "STATISTICS"): 1712 statistics = True 1713 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1714 statistics = False 1715 else: 1716 statistics = None 1717 1718 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1719 1720 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1721 if self._match_text_seq("PRIMARY", "INDEX"): 1722 return exp.NoPrimaryIndexProperty() 1723 return None 1724 1725 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1726 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1727 return exp.OnCommitProperty() 1728 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1729 return exp.OnCommitProperty(delete=True) 1730 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1731 1732 def _parse_distkey(self) -> exp.DistKeyProperty: 1733 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1734 1735 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1736 table = self._parse_table(schema=True) 1737 1738 options = [] 1739 while self._match_texts(("INCLUDING", "EXCLUDING")): 1740 this = self._prev.text.upper() 1741 1742 id_var = self._parse_id_var() 1743 if not id_var: 1744 return None 1745 1746 options.append( 1747 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1748 ) 1749 1750 return self.expression(exp.LikeProperty, this=table, expressions=options) 1751 1752 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1753 return self.expression( 1754 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1755 ) 1756 1757 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1758 self._match(TokenType.EQ) 1759 return self.expression( 1760 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1761 ) 1762 1763 def _parse_returns(self) -> exp.ReturnsProperty: 1764 value: t.Optional[exp.Expression] 1765 is_table = self._match(TokenType.TABLE) 1766 1767 if is_table: 1768 if self._match(TokenType.LT): 1769 value = self.expression( 1770 exp.Schema, 1771 this="TABLE", 1772 expressions=self._parse_csv(self._parse_struct_types), 1773 ) 1774 if not self._match(TokenType.GT): 1775 self.raise_error("Expecting >") 1776 else: 1777 value = self._parse_schema(exp.var("TABLE")) 1778 else: 1779 value = self._parse_types() 1780 1781 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1782 1783 def _parse_describe(self) -> exp.Describe: 1784 kind = self._match_set(self.CREATABLES) and self._prev.text 1785 this = self._parse_table(schema=True) 1786 properties = self._parse_properties() 1787 expressions = properties.expressions if properties else None 1788 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1789 1790 def _parse_insert(self) -> exp.Insert: 1791 comments = ensure_list(self._prev_comments) 1792 overwrite = self._match(TokenType.OVERWRITE) 1793 ignore = self._match(TokenType.IGNORE) 1794 local = self._match_text_seq("LOCAL") 1795 alternative = None 1796 1797 if self._match_text_seq("DIRECTORY"): 1798 this: t.Optional[exp.Expression] = self.expression( 1799 exp.Directory, 1800 this=self._parse_var_or_string(), 1801 local=local, 1802 row_format=self._parse_row_format(match_row=True), 1803 ) 1804 else: 1805 if self._match(TokenType.OR): 1806 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1807 1808 self._match(TokenType.INTO) 1809 comments += ensure_list(self._prev_comments) 1810 self._match(TokenType.TABLE) 1811 this = self._parse_table(schema=True) 1812 1813 returning = self._parse_returning() 1814 1815 return self.expression( 1816 exp.Insert, 1817 comments=comments, 1818 this=this, 1819 by_name=self._match_text_seq("BY", "NAME"), 1820 exists=self._parse_exists(), 1821 partition=self._parse_partition(), 1822 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1823 and self._parse_conjunction(), 1824 expression=self._parse_ddl_select(), 1825 conflict=self._parse_on_conflict(), 1826 returning=returning or self._parse_returning(), 1827 overwrite=overwrite, 1828 alternative=alternative, 1829 ignore=ignore, 1830 ) 1831 1832 def _parse_kill(self) -> exp.Kill: 1833 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1834 1835 return self.expression( 1836 exp.Kill, 1837 this=self._parse_primary(), 1838 kind=kind, 1839 ) 1840 1841 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1842 conflict = self._match_text_seq("ON", "CONFLICT") 1843 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1844 1845 if not conflict and not duplicate: 1846 return None 1847 1848 nothing = None 1849 expressions = None 1850 key = None 1851 constraint = None 1852 1853 if conflict: 1854 if self._match_text_seq("ON", "CONSTRAINT"): 1855 constraint = self._parse_id_var() 1856 else: 1857 key = self._parse_csv(self._parse_value) 1858 1859 self._match_text_seq("DO") 1860 if self._match_text_seq("NOTHING"): 1861 nothing = True 1862 else: 1863 self._match(TokenType.UPDATE) 1864 self._match(TokenType.SET) 1865 expressions = self._parse_csv(self._parse_equality) 1866 1867 return self.expression( 1868 exp.OnConflict, 1869 duplicate=duplicate, 1870 expressions=expressions, 1871 nothing=nothing, 1872 key=key, 1873 constraint=constraint, 1874 ) 1875 1876 def _parse_returning(self) -> t.Optional[exp.Returning]: 1877 if not self._match(TokenType.RETURNING): 1878 return None 1879 return self.expression( 1880 exp.Returning, 1881 expressions=self._parse_csv(self._parse_expression), 1882 into=self._match(TokenType.INTO) and self._parse_table_part(), 1883 ) 1884 1885 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1886 if not self._match(TokenType.FORMAT): 1887 return None 1888 return self._parse_row_format() 1889 1890 def _parse_row_format( 1891 self, match_row: bool = False 1892 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1893 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1894 return None 1895 1896 if self._match_text_seq("SERDE"): 1897 this = self._parse_string() 1898 1899 serde_properties = None 1900 if self._match(TokenType.SERDE_PROPERTIES): 1901 serde_properties = self.expression( 1902 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1903 ) 1904 1905 return self.expression( 1906 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1907 ) 1908 1909 self._match_text_seq("DELIMITED") 1910 1911 kwargs = {} 1912 1913 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1914 kwargs["fields"] = self._parse_string() 1915 if self._match_text_seq("ESCAPED", "BY"): 1916 kwargs["escaped"] = self._parse_string() 1917 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1918 kwargs["collection_items"] = self._parse_string() 1919 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1920 kwargs["map_keys"] = self._parse_string() 1921 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1922 kwargs["lines"] = self._parse_string() 1923 if self._match_text_seq("NULL", "DEFINED", "AS"): 1924 kwargs["null"] = self._parse_string() 1925 1926 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1927 1928 def _parse_load(self) -> exp.LoadData | exp.Command: 1929 if self._match_text_seq("DATA"): 1930 local = self._match_text_seq("LOCAL") 1931 self._match_text_seq("INPATH") 1932 inpath = self._parse_string() 1933 overwrite = self._match(TokenType.OVERWRITE) 1934 self._match_pair(TokenType.INTO, TokenType.TABLE) 1935 1936 return self.expression( 1937 exp.LoadData, 1938 this=self._parse_table(schema=True), 1939 local=local, 1940 overwrite=overwrite, 1941 inpath=inpath, 1942 partition=self._parse_partition(), 1943 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1944 serde=self._match_text_seq("SERDE") and self._parse_string(), 1945 ) 1946 return self._parse_as_command(self._prev) 1947 1948 def _parse_delete(self) -> exp.Delete: 1949 # This handles MySQL's "Multiple-Table Syntax" 1950 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1951 tables = None 1952 comments = self._prev_comments 1953 if not self._match(TokenType.FROM, advance=False): 1954 tables = self._parse_csv(self._parse_table) or None 1955 1956 returning = self._parse_returning() 1957 1958 return self.expression( 1959 exp.Delete, 1960 comments=comments, 1961 tables=tables, 1962 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1963 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1964 where=self._parse_where(), 1965 returning=returning or self._parse_returning(), 1966 limit=self._parse_limit(), 1967 ) 1968 1969 def _parse_update(self) -> exp.Update: 1970 comments = self._prev_comments 1971 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1972 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1973 returning = self._parse_returning() 1974 return self.expression( 1975 exp.Update, 1976 comments=comments, 1977 **{ # type: ignore 1978 "this": this, 1979 "expressions": expressions, 1980 "from": self._parse_from(joins=True), 1981 "where": self._parse_where(), 1982 "returning": returning or self._parse_returning(), 1983 "order": self._parse_order(), 1984 "limit": self._parse_limit(), 1985 }, 1986 ) 1987 1988 def _parse_uncache(self) -> exp.Uncache: 1989 if not self._match(TokenType.TABLE): 1990 self.raise_error("Expecting TABLE after UNCACHE") 1991 1992 return self.expression( 1993 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1994 ) 1995 1996 def _parse_cache(self) -> exp.Cache: 1997 lazy = self._match_text_seq("LAZY") 1998 self._match(TokenType.TABLE) 1999 table = self._parse_table(schema=True) 2000 2001 options = [] 2002 if self._match_text_seq("OPTIONS"): 2003 self._match_l_paren() 2004 k = self._parse_string() 2005 self._match(TokenType.EQ) 2006 v = self._parse_string() 2007 options = [k, v] 2008 self._match_r_paren() 2009 2010 self._match(TokenType.ALIAS) 2011 return self.expression( 2012 exp.Cache, 2013 this=table, 2014 lazy=lazy, 2015 options=options, 2016 expression=self._parse_select(nested=True), 2017 ) 2018 2019 def _parse_partition(self) -> t.Optional[exp.Partition]: 2020 if not self._match(TokenType.PARTITION): 2021 return None 2022 2023 return self.expression( 2024 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2025 ) 2026 2027 def _parse_value(self) -> exp.Tuple: 2028 if self._match(TokenType.L_PAREN): 2029 expressions = self._parse_csv(self._parse_conjunction) 2030 self._match_r_paren() 2031 return self.expression(exp.Tuple, expressions=expressions) 2032 2033 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2034 # https://prestodb.io/docs/current/sql/values.html 2035 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2036 2037 def _parse_projections(self) -> t.List[exp.Expression]: 2038 return self._parse_expressions() 2039 2040 def _parse_select( 2041 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2042 ) -> t.Optional[exp.Expression]: 2043 cte = self._parse_with() 2044 2045 if cte: 2046 this = self._parse_statement() 2047 2048 if not this: 2049 self.raise_error("Failed to parse any statement following CTE") 2050 return cte 2051 2052 if "with" in this.arg_types: 2053 this.set("with", cte) 2054 else: 2055 self.raise_error(f"{this.key} does not support CTE") 2056 this = cte 2057 2058 return this 2059 2060 # duckdb supports leading with FROM x 2061 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2062 2063 if self._match(TokenType.SELECT): 2064 comments = self._prev_comments 2065 2066 hint = self._parse_hint() 2067 all_ = self._match(TokenType.ALL) 2068 distinct = self._match_set(self.DISTINCT_TOKENS) 2069 2070 kind = ( 2071 self._match(TokenType.ALIAS) 2072 and self._match_texts(("STRUCT", "VALUE")) 2073 and self._prev.text 2074 ) 2075 2076 if distinct: 2077 distinct = self.expression( 2078 exp.Distinct, 2079 on=self._parse_value() if self._match(TokenType.ON) else None, 2080 ) 2081 2082 if all_ and distinct: 2083 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2084 2085 limit = self._parse_limit(top=True) 2086 projections = self._parse_projections() 2087 2088 this = self.expression( 2089 exp.Select, 2090 kind=kind, 2091 hint=hint, 2092 distinct=distinct, 2093 expressions=projections, 2094 limit=limit, 2095 ) 2096 this.comments = comments 2097 2098 into = self._parse_into() 2099 if into: 2100 this.set("into", into) 2101 2102 if not from_: 2103 from_ = self._parse_from() 2104 2105 if from_: 2106 this.set("from", from_) 2107 2108 this = self._parse_query_modifiers(this) 2109 elif (table or nested) and self._match(TokenType.L_PAREN): 2110 if self._match(TokenType.PIVOT): 2111 this = self._parse_simplified_pivot() 2112 elif self._match(TokenType.FROM): 2113 this = exp.select("*").from_( 2114 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2115 ) 2116 else: 2117 this = self._parse_table() if table else self._parse_select(nested=True) 2118 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2119 2120 self._match_r_paren() 2121 2122 # We return early here so that the UNION isn't attached to the subquery by the 2123 # following call to _parse_set_operations, but instead becomes the parent node 2124 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2125 elif self._match(TokenType.VALUES): 2126 this = self.expression( 2127 exp.Values, 2128 expressions=self._parse_csv(self._parse_value), 2129 alias=self._parse_table_alias(), 2130 ) 2131 elif from_: 2132 this = exp.select("*").from_(from_.this, copy=False) 2133 else: 2134 this = None 2135 2136 return self._parse_set_operations(this) 2137 2138 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2139 if not skip_with_token and not self._match(TokenType.WITH): 2140 return None 2141 2142 comments = self._prev_comments 2143 recursive = self._match(TokenType.RECURSIVE) 2144 2145 expressions = [] 2146 while True: 2147 expressions.append(self._parse_cte()) 2148 2149 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2150 break 2151 else: 2152 self._match(TokenType.WITH) 2153 2154 return self.expression( 2155 exp.With, comments=comments, expressions=expressions, recursive=recursive 2156 ) 2157 2158 def _parse_cte(self) -> exp.CTE: 2159 alias = self._parse_table_alias() 2160 if not alias or not alias.this: 2161 self.raise_error("Expected CTE to have alias") 2162 2163 self._match(TokenType.ALIAS) 2164 return self.expression( 2165 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2166 ) 2167 2168 def _parse_table_alias( 2169 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2170 ) -> t.Optional[exp.TableAlias]: 2171 any_token = self._match(TokenType.ALIAS) 2172 alias = ( 2173 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2174 or self._parse_string_as_identifier() 2175 ) 2176 2177 index = self._index 2178 if self._match(TokenType.L_PAREN): 2179 columns = self._parse_csv(self._parse_function_parameter) 2180 self._match_r_paren() if columns else self._retreat(index) 2181 else: 2182 columns = None 2183 2184 if not alias and not columns: 2185 return None 2186 2187 return self.expression(exp.TableAlias, this=alias, columns=columns) 2188 2189 def _parse_subquery( 2190 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2191 ) -> t.Optional[exp.Subquery]: 2192 if not this: 2193 return None 2194 2195 return self.expression( 2196 exp.Subquery, 2197 this=this, 2198 pivots=self._parse_pivots(), 2199 alias=self._parse_table_alias() if parse_alias else None, 2200 ) 2201 2202 def _parse_query_modifiers( 2203 self, this: t.Optional[exp.Expression] 2204 ) -> t.Optional[exp.Expression]: 2205 if isinstance(this, self.MODIFIABLES): 2206 for join in iter(self._parse_join, None): 2207 this.append("joins", join) 2208 for lateral in iter(self._parse_lateral, None): 2209 this.append("laterals", lateral) 2210 2211 while True: 2212 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2213 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2214 key, expression = parser(self) 2215 2216 if expression: 2217 this.set(key, expression) 2218 if key == "limit": 2219 offset = expression.args.pop("offset", None) 2220 if offset: 2221 this.set("offset", exp.Offset(expression=offset)) 2222 continue 2223 break 2224 return this 2225 2226 def _parse_hint(self) -> t.Optional[exp.Hint]: 2227 if self._match(TokenType.HINT): 2228 hints = [] 2229 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2230 hints.extend(hint) 2231 2232 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2233 self.raise_error("Expected */ after HINT") 2234 2235 return self.expression(exp.Hint, expressions=hints) 2236 2237 return None 2238 2239 def _parse_into(self) -> t.Optional[exp.Into]: 2240 if not self._match(TokenType.INTO): 2241 return None 2242 2243 temp = self._match(TokenType.TEMPORARY) 2244 unlogged = self._match_text_seq("UNLOGGED") 2245 self._match(TokenType.TABLE) 2246 2247 return self.expression( 2248 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2249 ) 2250 2251 def _parse_from( 2252 self, joins: bool = False, skip_from_token: bool = False 2253 ) -> t.Optional[exp.From]: 2254 if not skip_from_token and not self._match(TokenType.FROM): 2255 return None 2256 2257 return self.expression( 2258 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2259 ) 2260 2261 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2262 if not self._match(TokenType.MATCH_RECOGNIZE): 2263 return None 2264 2265 self._match_l_paren() 2266 2267 partition = self._parse_partition_by() 2268 order = self._parse_order() 2269 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2270 2271 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2272 rows = exp.var("ONE ROW PER MATCH") 2273 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2274 text = "ALL ROWS PER MATCH" 2275 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2276 text += f" SHOW EMPTY MATCHES" 2277 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2278 text += f" OMIT EMPTY MATCHES" 2279 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2280 text += f" WITH UNMATCHED ROWS" 2281 rows = exp.var(text) 2282 else: 2283 rows = None 2284 2285 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2286 text = "AFTER MATCH SKIP" 2287 if self._match_text_seq("PAST", "LAST", "ROW"): 2288 text += f" PAST LAST ROW" 2289 elif self._match_text_seq("TO", "NEXT", "ROW"): 2290 text += f" TO NEXT ROW" 2291 elif self._match_text_seq("TO", "FIRST"): 2292 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2293 elif self._match_text_seq("TO", "LAST"): 2294 text += f" TO LAST {self._advance_any().text}" # type: ignore 2295 after = exp.var(text) 2296 else: 2297 after = None 2298 2299 if self._match_text_seq("PATTERN"): 2300 self._match_l_paren() 2301 2302 if not self._curr: 2303 self.raise_error("Expecting )", self._curr) 2304 2305 paren = 1 2306 start = self._curr 2307 2308 while self._curr and paren > 0: 2309 if self._curr.token_type == TokenType.L_PAREN: 2310 paren += 1 2311 if self._curr.token_type == TokenType.R_PAREN: 2312 paren -= 1 2313 2314 end = self._prev 2315 self._advance() 2316 2317 if paren > 0: 2318 self.raise_error("Expecting )", self._curr) 2319 2320 pattern = exp.var(self._find_sql(start, end)) 2321 else: 2322 pattern = None 2323 2324 define = ( 2325 self._parse_csv( 2326 lambda: self.expression( 2327 exp.Alias, 2328 alias=self._parse_id_var(any_token=True), 2329 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2330 ) 2331 ) 2332 if self._match_text_seq("DEFINE") 2333 else None 2334 ) 2335 2336 self._match_r_paren() 2337 2338 return self.expression( 2339 exp.MatchRecognize, 2340 partition_by=partition, 2341 order=order, 2342 measures=measures, 2343 rows=rows, 2344 after=after, 2345 pattern=pattern, 2346 define=define, 2347 alias=self._parse_table_alias(), 2348 ) 2349 2350 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2351 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2352 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2353 2354 if outer_apply or cross_apply: 2355 this = self._parse_select(table=True) 2356 view = None 2357 outer = not cross_apply 2358 elif self._match(TokenType.LATERAL): 2359 this = self._parse_select(table=True) 2360 view = self._match(TokenType.VIEW) 2361 outer = self._match(TokenType.OUTER) 2362 else: 2363 return None 2364 2365 if not this: 2366 this = ( 2367 self._parse_unnest() 2368 or self._parse_function() 2369 or self._parse_id_var(any_token=False) 2370 ) 2371 2372 while self._match(TokenType.DOT): 2373 this = exp.Dot( 2374 this=this, 2375 expression=self._parse_function() or self._parse_id_var(any_token=False), 2376 ) 2377 2378 if view: 2379 table = self._parse_id_var(any_token=False) 2380 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2381 table_alias: t.Optional[exp.TableAlias] = self.expression( 2382 exp.TableAlias, this=table, columns=columns 2383 ) 2384 elif isinstance(this, exp.Subquery) and this.alias: 2385 # Ensures parity between the Subquery's and the Lateral's "alias" args 2386 table_alias = this.args["alias"].copy() 2387 else: 2388 table_alias = self._parse_table_alias() 2389 2390 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2391 2392 def _parse_join_parts( 2393 self, 2394 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2395 return ( 2396 self._match_set(self.JOIN_METHODS) and self._prev, 2397 self._match_set(self.JOIN_SIDES) and self._prev, 2398 self._match_set(self.JOIN_KINDS) and self._prev, 2399 ) 2400 2401 def _parse_join( 2402 self, skip_join_token: bool = False, parse_bracket: bool = False 2403 ) -> t.Optional[exp.Join]: 2404 if self._match(TokenType.COMMA): 2405 return self.expression(exp.Join, this=self._parse_table()) 2406 2407 index = self._index 2408 method, side, kind = self._parse_join_parts() 2409 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2410 join = self._match(TokenType.JOIN) 2411 2412 if not skip_join_token and not join: 2413 self._retreat(index) 2414 kind = None 2415 method = None 2416 side = None 2417 2418 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2419 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2420 2421 if not skip_join_token and not join and not outer_apply and not cross_apply: 2422 return None 2423 2424 if outer_apply: 2425 side = Token(TokenType.LEFT, "LEFT") 2426 2427 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2428 2429 if method: 2430 kwargs["method"] = method.text 2431 if side: 2432 kwargs["side"] = side.text 2433 if kind: 2434 kwargs["kind"] = kind.text 2435 if hint: 2436 kwargs["hint"] = hint 2437 2438 if self._match(TokenType.ON): 2439 kwargs["on"] = self._parse_conjunction() 2440 elif self._match(TokenType.USING): 2441 kwargs["using"] = self._parse_wrapped_id_vars() 2442 elif not (kind and kind.token_type == TokenType.CROSS): 2443 index = self._index 2444 joins = self._parse_joins() 2445 2446 if joins and self._match(TokenType.ON): 2447 kwargs["on"] = self._parse_conjunction() 2448 elif joins and self._match(TokenType.USING): 2449 kwargs["using"] = self._parse_wrapped_id_vars() 2450 else: 2451 joins = None 2452 self._retreat(index) 2453 2454 kwargs["this"].set("joins", joins) 2455 2456 comments = [c for token in (method, side, kind) if token for c in token.comments] 2457 return self.expression(exp.Join, comments=comments, **kwargs) 2458 2459 def _parse_index( 2460 self, 2461 index: t.Optional[exp.Expression] = None, 2462 ) -> t.Optional[exp.Index]: 2463 if index: 2464 unique = None 2465 primary = None 2466 amp = None 2467 2468 self._match(TokenType.ON) 2469 self._match(TokenType.TABLE) # hive 2470 table = self._parse_table_parts(schema=True) 2471 else: 2472 unique = self._match(TokenType.UNIQUE) 2473 primary = self._match_text_seq("PRIMARY") 2474 amp = self._match_text_seq("AMP") 2475 2476 if not self._match(TokenType.INDEX): 2477 return None 2478 2479 index = self._parse_id_var() 2480 table = None 2481 2482 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2483 2484 if self._match(TokenType.L_PAREN, advance=False): 2485 columns = self._parse_wrapped_csv(self._parse_ordered) 2486 else: 2487 columns = None 2488 2489 return self.expression( 2490 exp.Index, 2491 this=index, 2492 table=table, 2493 using=using, 2494 columns=columns, 2495 unique=unique, 2496 primary=primary, 2497 amp=amp, 2498 partition_by=self._parse_partition_by(), 2499 where=self._parse_where(), 2500 ) 2501 2502 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2503 hints: t.List[exp.Expression] = [] 2504 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2505 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2506 hints.append( 2507 self.expression( 2508 exp.WithTableHint, 2509 expressions=self._parse_csv( 2510 lambda: self._parse_function() or self._parse_var(any_token=True) 2511 ), 2512 ) 2513 ) 2514 self._match_r_paren() 2515 else: 2516 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2517 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2518 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2519 2520 self._match_texts({"INDEX", "KEY"}) 2521 if self._match(TokenType.FOR): 2522 hint.set("target", self._advance_any() and self._prev.text.upper()) 2523 2524 hint.set("expressions", self._parse_wrapped_id_vars()) 2525 hints.append(hint) 2526 2527 return hints or None 2528 2529 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2530 return ( 2531 (not schema and self._parse_function(optional_parens=False)) 2532 or self._parse_id_var(any_token=False) 2533 or self._parse_string_as_identifier() 2534 or self._parse_placeholder() 2535 ) 2536 2537 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2538 catalog = None 2539 db = None 2540 table = self._parse_table_part(schema=schema) 2541 2542 while self._match(TokenType.DOT): 2543 if catalog: 2544 # This allows nesting the table in arbitrarily many dot expressions if needed 2545 table = self.expression( 2546 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2547 ) 2548 else: 2549 catalog = db 2550 db = table 2551 table = self._parse_table_part(schema=schema) 2552 2553 if not table: 2554 self.raise_error(f"Expected table name but got {self._curr}") 2555 2556 return self.expression( 2557 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2558 ) 2559 2560 def _parse_table( 2561 self, 2562 schema: bool = False, 2563 joins: bool = False, 2564 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2565 parse_bracket: bool = False, 2566 ) -> t.Optional[exp.Expression]: 2567 lateral = self._parse_lateral() 2568 if lateral: 2569 return lateral 2570 2571 unnest = self._parse_unnest() 2572 if unnest: 2573 return unnest 2574 2575 values = self._parse_derived_table_values() 2576 if values: 2577 return values 2578 2579 subquery = self._parse_select(table=True) 2580 if subquery: 2581 if not subquery.args.get("pivots"): 2582 subquery.set("pivots", self._parse_pivots()) 2583 return subquery 2584 2585 bracket = parse_bracket and self._parse_bracket(None) 2586 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2587 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2588 2589 if schema: 2590 return self._parse_schema(this=this) 2591 2592 version = self._parse_version() 2593 2594 if version: 2595 this.set("version", version) 2596 2597 if self.ALIAS_POST_TABLESAMPLE: 2598 table_sample = self._parse_table_sample() 2599 2600 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2601 if alias: 2602 this.set("alias", alias) 2603 2604 this.set("hints", self._parse_table_hints()) 2605 2606 if not this.args.get("pivots"): 2607 this.set("pivots", self._parse_pivots()) 2608 2609 if not self.ALIAS_POST_TABLESAMPLE: 2610 table_sample = self._parse_table_sample() 2611 2612 if table_sample: 2613 table_sample.set("this", this) 2614 this = table_sample 2615 2616 if joins: 2617 for join in iter(self._parse_join, None): 2618 this.append("joins", join) 2619 2620 return this 2621 2622 def _parse_version(self) -> t.Optional[exp.Version]: 2623 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2624 this = "TIMESTAMP" 2625 elif self._match(TokenType.VERSION_SNAPSHOT): 2626 this = "VERSION" 2627 else: 2628 return None 2629 2630 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2631 kind = self._prev.text.upper() 2632 start = self._parse_bitwise() 2633 self._match_texts(("TO", "AND")) 2634 end = self._parse_bitwise() 2635 expression: t.Optional[exp.Expression] = self.expression( 2636 exp.Tuple, expressions=[start, end] 2637 ) 2638 elif self._match_text_seq("CONTAINED", "IN"): 2639 kind = "CONTAINED IN" 2640 expression = self.expression( 2641 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2642 ) 2643 elif self._match(TokenType.ALL): 2644 kind = "ALL" 2645 expression = None 2646 else: 2647 self._match_text_seq("AS", "OF") 2648 kind = "AS OF" 2649 expression = self._parse_type() 2650 2651 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2652 2653 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2654 if not self._match(TokenType.UNNEST): 2655 return None 2656 2657 expressions = self._parse_wrapped_csv(self._parse_type) 2658 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2659 2660 alias = self._parse_table_alias() if with_alias else None 2661 2662 if alias: 2663 if self.UNNEST_COLUMN_ONLY: 2664 if alias.args.get("columns"): 2665 self.raise_error("Unexpected extra column alias in unnest.") 2666 2667 alias.set("columns", [alias.this]) 2668 alias.set("this", None) 2669 2670 columns = alias.args.get("columns") or [] 2671 if offset and len(expressions) < len(columns): 2672 offset = columns.pop() 2673 2674 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2675 self._match(TokenType.ALIAS) 2676 offset = self._parse_id_var() or exp.to_identifier("offset") 2677 2678 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2679 2680 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2681 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2682 if not is_derived and not self._match(TokenType.VALUES): 2683 return None 2684 2685 expressions = self._parse_csv(self._parse_value) 2686 alias = self._parse_table_alias() 2687 2688 if is_derived: 2689 self._match_r_paren() 2690 2691 return self.expression( 2692 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2693 ) 2694 2695 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2696 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2697 as_modifier and self._match_text_seq("USING", "SAMPLE") 2698 ): 2699 return None 2700 2701 bucket_numerator = None 2702 bucket_denominator = None 2703 bucket_field = None 2704 percent = None 2705 rows = None 2706 size = None 2707 seed = None 2708 2709 kind = ( 2710 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2711 ) 2712 method = self._parse_var(tokens=(TokenType.ROW,)) 2713 2714 self._match(TokenType.L_PAREN) 2715 2716 if self.TABLESAMPLE_CSV: 2717 num = None 2718 expressions = self._parse_csv(self._parse_primary) 2719 else: 2720 expressions = None 2721 num = self._parse_primary() 2722 2723 if self._match_text_seq("BUCKET"): 2724 bucket_numerator = self._parse_number() 2725 self._match_text_seq("OUT", "OF") 2726 bucket_denominator = bucket_denominator = self._parse_number() 2727 self._match(TokenType.ON) 2728 bucket_field = self._parse_field() 2729 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2730 percent = num 2731 elif self._match(TokenType.ROWS): 2732 rows = num 2733 elif num: 2734 size = num 2735 2736 self._match(TokenType.R_PAREN) 2737 2738 if self._match(TokenType.L_PAREN): 2739 method = self._parse_var() 2740 seed = self._match(TokenType.COMMA) and self._parse_number() 2741 self._match_r_paren() 2742 elif self._match_texts(("SEED", "REPEATABLE")): 2743 seed = self._parse_wrapped(self._parse_number) 2744 2745 return self.expression( 2746 exp.TableSample, 2747 expressions=expressions, 2748 method=method, 2749 bucket_numerator=bucket_numerator, 2750 bucket_denominator=bucket_denominator, 2751 bucket_field=bucket_field, 2752 percent=percent, 2753 rows=rows, 2754 size=size, 2755 seed=seed, 2756 kind=kind, 2757 ) 2758 2759 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2760 return list(iter(self._parse_pivot, None)) or None 2761 2762 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2763 return list(iter(self._parse_join, None)) or None 2764 2765 # https://duckdb.org/docs/sql/statements/pivot 2766 def _parse_simplified_pivot(self) -> exp.Pivot: 2767 def _parse_on() -> t.Optional[exp.Expression]: 2768 this = self._parse_bitwise() 2769 return self._parse_in(this) if self._match(TokenType.IN) else this 2770 2771 this = self._parse_table() 2772 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2773 using = self._match(TokenType.USING) and self._parse_csv( 2774 lambda: self._parse_alias(self._parse_function()) 2775 ) 2776 group = self._parse_group() 2777 return self.expression( 2778 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2779 ) 2780 2781 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2782 index = self._index 2783 include_nulls = None 2784 2785 if self._match(TokenType.PIVOT): 2786 unpivot = False 2787 elif self._match(TokenType.UNPIVOT): 2788 unpivot = True 2789 2790 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2791 if self._match_text_seq("INCLUDE", "NULLS"): 2792 include_nulls = True 2793 elif self._match_text_seq("EXCLUDE", "NULLS"): 2794 include_nulls = False 2795 else: 2796 return None 2797 2798 expressions = [] 2799 field = None 2800 2801 if not self._match(TokenType.L_PAREN): 2802 self._retreat(index) 2803 return None 2804 2805 if unpivot: 2806 expressions = self._parse_csv(self._parse_column) 2807 else: 2808 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2809 2810 if not expressions: 2811 self.raise_error("Failed to parse PIVOT's aggregation list") 2812 2813 if not self._match(TokenType.FOR): 2814 self.raise_error("Expecting FOR") 2815 2816 value = self._parse_column() 2817 2818 if not self._match(TokenType.IN): 2819 self.raise_error("Expecting IN") 2820 2821 field = self._parse_in(value, alias=True) 2822 2823 self._match_r_paren() 2824 2825 pivot = self.expression( 2826 exp.Pivot, 2827 expressions=expressions, 2828 field=field, 2829 unpivot=unpivot, 2830 include_nulls=include_nulls, 2831 ) 2832 2833 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2834 pivot.set("alias", self._parse_table_alias()) 2835 2836 if not unpivot: 2837 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2838 2839 columns: t.List[exp.Expression] = [] 2840 for fld in pivot.args["field"].expressions: 2841 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2842 for name in names: 2843 if self.PREFIXED_PIVOT_COLUMNS: 2844 name = f"{name}_{field_name}" if name else field_name 2845 else: 2846 name = f"{field_name}_{name}" if name else field_name 2847 2848 columns.append(exp.to_identifier(name)) 2849 2850 pivot.set("columns", columns) 2851 2852 return pivot 2853 2854 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2855 return [agg.alias for agg in aggregations] 2856 2857 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2858 if not skip_where_token and not self._match(TokenType.WHERE): 2859 return None 2860 2861 return self.expression( 2862 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2863 ) 2864 2865 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2866 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2867 return None 2868 2869 elements = defaultdict(list) 2870 2871 if self._match(TokenType.ALL): 2872 return self.expression(exp.Group, all=True) 2873 2874 while True: 2875 expressions = self._parse_csv(self._parse_conjunction) 2876 if expressions: 2877 elements["expressions"].extend(expressions) 2878 2879 grouping_sets = self._parse_grouping_sets() 2880 if grouping_sets: 2881 elements["grouping_sets"].extend(grouping_sets) 2882 2883 rollup = None 2884 cube = None 2885 totals = None 2886 2887 with_ = self._match(TokenType.WITH) 2888 if self._match(TokenType.ROLLUP): 2889 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2890 elements["rollup"].extend(ensure_list(rollup)) 2891 2892 if self._match(TokenType.CUBE): 2893 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2894 elements["cube"].extend(ensure_list(cube)) 2895 2896 if self._match_text_seq("TOTALS"): 2897 totals = True 2898 elements["totals"] = True # type: ignore 2899 2900 if not (grouping_sets or rollup or cube or totals): 2901 break 2902 2903 return self.expression(exp.Group, **elements) # type: ignore 2904 2905 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2906 if not self._match(TokenType.GROUPING_SETS): 2907 return None 2908 2909 return self._parse_wrapped_csv(self._parse_grouping_set) 2910 2911 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2912 if self._match(TokenType.L_PAREN): 2913 grouping_set = self._parse_csv(self._parse_column) 2914 self._match_r_paren() 2915 return self.expression(exp.Tuple, expressions=grouping_set) 2916 2917 return self._parse_column() 2918 2919 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2920 if not skip_having_token and not self._match(TokenType.HAVING): 2921 return None 2922 return self.expression(exp.Having, this=self._parse_conjunction()) 2923 2924 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2925 if not self._match(TokenType.QUALIFY): 2926 return None 2927 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2928 2929 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2930 if skip_start_token: 2931 start = None 2932 elif self._match(TokenType.START_WITH): 2933 start = self._parse_conjunction() 2934 else: 2935 return None 2936 2937 self._match(TokenType.CONNECT_BY) 2938 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2939 exp.Prior, this=self._parse_bitwise() 2940 ) 2941 connect = self._parse_conjunction() 2942 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2943 2944 if not start and self._match(TokenType.START_WITH): 2945 start = self._parse_conjunction() 2946 2947 return self.expression(exp.Connect, start=start, connect=connect) 2948 2949 def _parse_order( 2950 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2951 ) -> t.Optional[exp.Expression]: 2952 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2953 return this 2954 2955 return self.expression( 2956 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2957 ) 2958 2959 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2960 if not self._match(token): 2961 return None 2962 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2963 2964 def _parse_ordered(self) -> exp.Ordered: 2965 this = self._parse_conjunction() 2966 2967 asc = self._match(TokenType.ASC) 2968 desc = self._match(TokenType.DESC) or (asc and False) 2969 2970 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2971 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2972 2973 nulls_first = is_nulls_first or False 2974 explicitly_null_ordered = is_nulls_first or is_nulls_last 2975 2976 if ( 2977 not explicitly_null_ordered 2978 and ( 2979 (not desc and self.NULL_ORDERING == "nulls_are_small") 2980 or (desc and self.NULL_ORDERING != "nulls_are_small") 2981 ) 2982 and self.NULL_ORDERING != "nulls_are_last" 2983 ): 2984 nulls_first = True 2985 2986 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2987 2988 def _parse_limit( 2989 self, this: t.Optional[exp.Expression] = None, top: bool = False 2990 ) -> t.Optional[exp.Expression]: 2991 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2992 comments = self._prev_comments 2993 if top: 2994 limit_paren = self._match(TokenType.L_PAREN) 2995 expression = self._parse_number() 2996 2997 if limit_paren: 2998 self._match_r_paren() 2999 else: 3000 expression = self._parse_term() 3001 3002 if self._match(TokenType.COMMA): 3003 offset = expression 3004 expression = self._parse_term() 3005 else: 3006 offset = None 3007 3008 limit_exp = self.expression( 3009 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3010 ) 3011 3012 return limit_exp 3013 3014 if self._match(TokenType.FETCH): 3015 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3016 direction = self._prev.text if direction else "FIRST" 3017 3018 count = self._parse_field(tokens=self.FETCH_TOKENS) 3019 percent = self._match(TokenType.PERCENT) 3020 3021 self._match_set((TokenType.ROW, TokenType.ROWS)) 3022 3023 only = self._match_text_seq("ONLY") 3024 with_ties = self._match_text_seq("WITH", "TIES") 3025 3026 if only and with_ties: 3027 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3028 3029 return self.expression( 3030 exp.Fetch, 3031 direction=direction, 3032 count=count, 3033 percent=percent, 3034 with_ties=with_ties, 3035 ) 3036 3037 return this 3038 3039 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3040 if not self._match(TokenType.OFFSET): 3041 return this 3042 3043 count = self._parse_term() 3044 self._match_set((TokenType.ROW, TokenType.ROWS)) 3045 return self.expression(exp.Offset, this=this, expression=count) 3046 3047 def _parse_locks(self) -> t.List[exp.Lock]: 3048 locks = [] 3049 while True: 3050 if self._match_text_seq("FOR", "UPDATE"): 3051 update = True 3052 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3053 "LOCK", "IN", "SHARE", "MODE" 3054 ): 3055 update = False 3056 else: 3057 break 3058 3059 expressions = None 3060 if self._match_text_seq("OF"): 3061 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3062 3063 wait: t.Optional[bool | exp.Expression] = None 3064 if self._match_text_seq("NOWAIT"): 3065 wait = True 3066 elif self._match_text_seq("WAIT"): 3067 wait = self._parse_primary() 3068 elif self._match_text_seq("SKIP", "LOCKED"): 3069 wait = False 3070 3071 locks.append( 3072 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3073 ) 3074 3075 return locks 3076 3077 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3078 if not self._match_set(self.SET_OPERATIONS): 3079 return this 3080 3081 token_type = self._prev.token_type 3082 3083 if token_type == TokenType.UNION: 3084 expression = exp.Union 3085 elif token_type == TokenType.EXCEPT: 3086 expression = exp.Except 3087 else: 3088 expression = exp.Intersect 3089 3090 return self.expression( 3091 expression, 3092 this=this, 3093 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3094 by_name=self._match_text_seq("BY", "NAME"), 3095 expression=self._parse_set_operations(self._parse_select(nested=True)), 3096 ) 3097 3098 def _parse_expression(self) -> t.Optional[exp.Expression]: 3099 return self._parse_alias(self._parse_conjunction()) 3100 3101 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3102 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3103 3104 def _parse_equality(self) -> t.Optional[exp.Expression]: 3105 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3106 3107 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3108 return self._parse_tokens(self._parse_range, self.COMPARISON) 3109 3110 def _parse_range(self) -> t.Optional[exp.Expression]: 3111 this = self._parse_bitwise() 3112 negate = self._match(TokenType.NOT) 3113 3114 if self._match_set(self.RANGE_PARSERS): 3115 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3116 if not expression: 3117 return this 3118 3119 this = expression 3120 elif self._match(TokenType.ISNULL): 3121 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3122 3123 # Postgres supports ISNULL and NOTNULL for conditions. 3124 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3125 if self._match(TokenType.NOTNULL): 3126 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3127 this = self.expression(exp.Not, this=this) 3128 3129 if negate: 3130 this = self.expression(exp.Not, this=this) 3131 3132 if self._match(TokenType.IS): 3133 this = self._parse_is(this) 3134 3135 return this 3136 3137 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3138 index = self._index - 1 3139 negate = self._match(TokenType.NOT) 3140 3141 if self._match_text_seq("DISTINCT", "FROM"): 3142 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3143 return self.expression(klass, this=this, expression=self._parse_expression()) 3144 3145 expression = self._parse_null() or self._parse_boolean() 3146 if not expression: 3147 self._retreat(index) 3148 return None 3149 3150 this = self.expression(exp.Is, this=this, expression=expression) 3151 return self.expression(exp.Not, this=this) if negate else this 3152 3153 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3154 unnest = self._parse_unnest(with_alias=False) 3155 if unnest: 3156 this = self.expression(exp.In, this=this, unnest=unnest) 3157 elif self._match(TokenType.L_PAREN): 3158 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3159 3160 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3161 this = self.expression(exp.In, this=this, query=expressions[0]) 3162 else: 3163 this = self.expression(exp.In, this=this, expressions=expressions) 3164 3165 self._match_r_paren(this) 3166 else: 3167 this = self.expression(exp.In, this=this, field=self._parse_field()) 3168 3169 return this 3170 3171 def _parse_between(self, this: exp.Expression) -> exp.Between: 3172 low = self._parse_bitwise() 3173 self._match(TokenType.AND) 3174 high = self._parse_bitwise() 3175 return self.expression(exp.Between, this=this, low=low, high=high) 3176 3177 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3178 if not self._match(TokenType.ESCAPE): 3179 return this 3180 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3181 3182 def _parse_interval(self) -> t.Optional[exp.Interval]: 3183 index = self._index 3184 3185 if not self._match(TokenType.INTERVAL): 3186 return None 3187 3188 if self._match(TokenType.STRING, advance=False): 3189 this = self._parse_primary() 3190 else: 3191 this = self._parse_term() 3192 3193 if not this: 3194 self._retreat(index) 3195 return None 3196 3197 unit = self._parse_function() or self._parse_var(any_token=True) 3198 3199 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3200 # each INTERVAL expression into this canonical form so it's easy to transpile 3201 if this and this.is_number: 3202 this = exp.Literal.string(this.name) 3203 elif this and this.is_string: 3204 parts = this.name.split() 3205 3206 if len(parts) == 2: 3207 if unit: 3208 # This is not actually a unit, it's something else (e.g. a "window side") 3209 unit = None 3210 self._retreat(self._index - 1) 3211 3212 this = exp.Literal.string(parts[0]) 3213 unit = self.expression(exp.Var, this=parts[1]) 3214 3215 return self.expression(exp.Interval, this=this, unit=unit) 3216 3217 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3218 this = self._parse_term() 3219 3220 while True: 3221 if self._match_set(self.BITWISE): 3222 this = self.expression( 3223 self.BITWISE[self._prev.token_type], 3224 this=this, 3225 expression=self._parse_term(), 3226 ) 3227 elif self._match(TokenType.DQMARK): 3228 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3229 elif self._match_pair(TokenType.LT, TokenType.LT): 3230 this = self.expression( 3231 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3232 ) 3233 elif self._match_pair(TokenType.GT, TokenType.GT): 3234 this = self.expression( 3235 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3236 ) 3237 else: 3238 break 3239 3240 return this 3241 3242 def _parse_term(self) -> t.Optional[exp.Expression]: 3243 return self._parse_tokens(self._parse_factor, self.TERM) 3244 3245 def _parse_factor(self) -> t.Optional[exp.Expression]: 3246 return self._parse_tokens(self._parse_unary, self.FACTOR) 3247 3248 def _parse_unary(self) -> t.Optional[exp.Expression]: 3249 if self._match_set(self.UNARY_PARSERS): 3250 return self.UNARY_PARSERS[self._prev.token_type](self) 3251 return self._parse_at_time_zone(self._parse_type()) 3252 3253 def _parse_type(self) -> t.Optional[exp.Expression]: 3254 interval = self._parse_interval() 3255 if interval: 3256 return interval 3257 3258 index = self._index 3259 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3260 this = self._parse_column() 3261 3262 if data_type: 3263 if isinstance(this, exp.Literal): 3264 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3265 if parser: 3266 return parser(self, this, data_type) 3267 return self.expression(exp.Cast, this=this, to=data_type) 3268 if not data_type.expressions: 3269 self._retreat(index) 3270 return self._parse_column() 3271 return self._parse_column_ops(data_type) 3272 3273 return this and self._parse_column_ops(this) 3274 3275 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3276 this = self._parse_type() 3277 if not this: 3278 return None 3279 3280 return self.expression( 3281 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3282 ) 3283 3284 def _parse_types( 3285 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3286 ) -> t.Optional[exp.Expression]: 3287 index = self._index 3288 3289 prefix = self._match_text_seq("SYSUDTLIB", ".") 3290 3291 if not self._match_set(self.TYPE_TOKENS): 3292 identifier = allow_identifiers and self._parse_id_var( 3293 any_token=False, tokens=(TokenType.VAR,) 3294 ) 3295 3296 if identifier: 3297 tokens = self._tokenizer.tokenize(identifier.name) 3298 3299 if len(tokens) != 1: 3300 self.raise_error("Unexpected identifier", self._prev) 3301 3302 if tokens[0].token_type in self.TYPE_TOKENS: 3303 self._prev = tokens[0] 3304 elif self.SUPPORTS_USER_DEFINED_TYPES: 3305 type_name = identifier.name 3306 3307 while self._match(TokenType.DOT): 3308 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3309 3310 return exp.DataType.build(type_name, udt=True) 3311 else: 3312 return None 3313 else: 3314 return None 3315 3316 type_token = self._prev.token_type 3317 3318 if type_token == TokenType.PSEUDO_TYPE: 3319 return self.expression(exp.PseudoType, this=self._prev.text) 3320 3321 if type_token == TokenType.OBJECT_IDENTIFIER: 3322 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3323 3324 nested = type_token in self.NESTED_TYPE_TOKENS 3325 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3326 expressions = None 3327 maybe_func = False 3328 3329 if self._match(TokenType.L_PAREN): 3330 if is_struct: 3331 expressions = self._parse_csv(self._parse_struct_types) 3332 elif nested: 3333 expressions = self._parse_csv( 3334 lambda: self._parse_types( 3335 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3336 ) 3337 ) 3338 elif type_token in self.ENUM_TYPE_TOKENS: 3339 expressions = self._parse_csv(self._parse_equality) 3340 else: 3341 expressions = self._parse_csv(self._parse_type_size) 3342 3343 if not expressions or not self._match(TokenType.R_PAREN): 3344 self._retreat(index) 3345 return None 3346 3347 maybe_func = True 3348 3349 this: t.Optional[exp.Expression] = None 3350 values: t.Optional[t.List[exp.Expression]] = None 3351 3352 if nested and self._match(TokenType.LT): 3353 if is_struct: 3354 expressions = self._parse_csv(self._parse_struct_types) 3355 else: 3356 expressions = self._parse_csv( 3357 lambda: self._parse_types( 3358 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3359 ) 3360 ) 3361 3362 if not self._match(TokenType.GT): 3363 self.raise_error("Expecting >") 3364 3365 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3366 values = self._parse_csv(self._parse_conjunction) 3367 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3368 3369 if type_token in self.TIMESTAMPS: 3370 if self._match_text_seq("WITH", "TIME", "ZONE"): 3371 maybe_func = False 3372 tz_type = ( 3373 exp.DataType.Type.TIMETZ 3374 if type_token in self.TIMES 3375 else exp.DataType.Type.TIMESTAMPTZ 3376 ) 3377 this = exp.DataType(this=tz_type, expressions=expressions) 3378 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3379 maybe_func = False 3380 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3381 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3382 maybe_func = False 3383 elif type_token == TokenType.INTERVAL: 3384 unit = self._parse_var() 3385 3386 if self._match_text_seq("TO"): 3387 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3388 else: 3389 span = None 3390 3391 if span or not unit: 3392 this = self.expression( 3393 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3394 ) 3395 else: 3396 this = self.expression(exp.Interval, unit=unit) 3397 3398 if maybe_func and check_func: 3399 index2 = self._index 3400 peek = self._parse_string() 3401 3402 if not peek: 3403 self._retreat(index) 3404 return None 3405 3406 self._retreat(index2) 3407 3408 if not this: 3409 if self._match_text_seq("UNSIGNED"): 3410 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3411 if not unsigned_type_token: 3412 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3413 3414 type_token = unsigned_type_token or type_token 3415 3416 this = exp.DataType( 3417 this=exp.DataType.Type[type_token.value], 3418 expressions=expressions, 3419 nested=nested, 3420 values=values, 3421 prefix=prefix, 3422 ) 3423 3424 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3425 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3426 3427 return this 3428 3429 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3430 this = self._parse_type() or self._parse_id_var() 3431 self._match(TokenType.COLON) 3432 return self._parse_column_def(this) 3433 3434 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3435 if not self._match_text_seq("AT", "TIME", "ZONE"): 3436 return this 3437 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3438 3439 def _parse_column(self) -> t.Optional[exp.Expression]: 3440 this = self._parse_field() 3441 if isinstance(this, exp.Identifier): 3442 this = self.expression(exp.Column, this=this) 3443 elif not this: 3444 return self._parse_bracket(this) 3445 return self._parse_column_ops(this) 3446 3447 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3448 this = self._parse_bracket(this) 3449 3450 while self._match_set(self.COLUMN_OPERATORS): 3451 op_token = self._prev.token_type 3452 op = self.COLUMN_OPERATORS.get(op_token) 3453 3454 if op_token == TokenType.DCOLON: 3455 field = self._parse_types() 3456 if not field: 3457 self.raise_error("Expected type") 3458 elif op and self._curr: 3459 self._advance() 3460 value = self._prev.text 3461 field = ( 3462 exp.Literal.number(value) 3463 if self._prev.token_type == TokenType.NUMBER 3464 else exp.Literal.string(value) 3465 ) 3466 else: 3467 field = self._parse_field(anonymous_func=True, any_token=True) 3468 3469 if isinstance(field, exp.Func): 3470 # bigquery allows function calls like x.y.count(...) 3471 # SAFE.SUBSTR(...) 3472 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3473 this = self._replace_columns_with_dots(this) 3474 3475 if op: 3476 this = op(self, this, field) 3477 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3478 this = self.expression( 3479 exp.Column, 3480 this=field, 3481 table=this.this, 3482 db=this.args.get("table"), 3483 catalog=this.args.get("db"), 3484 ) 3485 else: 3486 this = self.expression(exp.Dot, this=this, expression=field) 3487 this = self._parse_bracket(this) 3488 return this 3489 3490 def _parse_primary(self) -> t.Optional[exp.Expression]: 3491 if self._match_set(self.PRIMARY_PARSERS): 3492 token_type = self._prev.token_type 3493 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3494 3495 if token_type == TokenType.STRING: 3496 expressions = [primary] 3497 while self._match(TokenType.STRING): 3498 expressions.append(exp.Literal.string(self._prev.text)) 3499 3500 if len(expressions) > 1: 3501 return self.expression(exp.Concat, expressions=expressions) 3502 3503 return primary 3504 3505 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3506 return exp.Literal.number(f"0.{self._prev.text}") 3507 3508 if self._match(TokenType.L_PAREN): 3509 comments = self._prev_comments 3510 query = self._parse_select() 3511 3512 if query: 3513 expressions = [query] 3514 else: 3515 expressions = self._parse_expressions() 3516 3517 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3518 3519 if isinstance(this, exp.Subqueryable): 3520 this = self._parse_set_operations( 3521 self._parse_subquery(this=this, parse_alias=False) 3522 ) 3523 elif len(expressions) > 1: 3524 this = self.expression(exp.Tuple, expressions=expressions) 3525 else: 3526 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3527 3528 if this: 3529 this.add_comments(comments) 3530 3531 self._match_r_paren(expression=this) 3532 return this 3533 3534 return None 3535 3536 def _parse_field( 3537 self, 3538 any_token: bool = False, 3539 tokens: t.Optional[t.Collection[TokenType]] = None, 3540 anonymous_func: bool = False, 3541 ) -> t.Optional[exp.Expression]: 3542 return ( 3543 self._parse_primary() 3544 or self._parse_function(anonymous=anonymous_func) 3545 or self._parse_id_var(any_token=any_token, tokens=tokens) 3546 ) 3547 3548 def _parse_function( 3549 self, 3550 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3551 anonymous: bool = False, 3552 optional_parens: bool = True, 3553 ) -> t.Optional[exp.Expression]: 3554 if not self._curr: 3555 return None 3556 3557 token_type = self._curr.token_type 3558 this = self._curr.text 3559 upper = this.upper() 3560 3561 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3562 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3563 self._advance() 3564 return parser(self) 3565 3566 if not self._next or self._next.token_type != TokenType.L_PAREN: 3567 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3568 self._advance() 3569 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3570 3571 return None 3572 3573 if token_type not in self.FUNC_TOKENS: 3574 return None 3575 3576 self._advance(2) 3577 3578 parser = self.FUNCTION_PARSERS.get(upper) 3579 if parser and not anonymous: 3580 this = parser(self) 3581 else: 3582 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3583 3584 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3585 this = self.expression(subquery_predicate, this=self._parse_select()) 3586 self._match_r_paren() 3587 return this 3588 3589 if functions is None: 3590 functions = self.FUNCTIONS 3591 3592 function = functions.get(upper) 3593 3594 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3595 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3596 3597 if function and not anonymous: 3598 func = self.validate_expression(function(args), args) 3599 if not self.NORMALIZE_FUNCTIONS: 3600 func.meta["name"] = this 3601 this = func 3602 else: 3603 this = self.expression(exp.Anonymous, this=this, expressions=args) 3604 3605 self._match_r_paren(this) 3606 return self._parse_window(this) 3607 3608 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3609 return self._parse_column_def(self._parse_id_var()) 3610 3611 def _parse_user_defined_function( 3612 self, kind: t.Optional[TokenType] = None 3613 ) -> t.Optional[exp.Expression]: 3614 this = self._parse_id_var() 3615 3616 while self._match(TokenType.DOT): 3617 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3618 3619 if not self._match(TokenType.L_PAREN): 3620 return this 3621 3622 expressions = self._parse_csv(self._parse_function_parameter) 3623 self._match_r_paren() 3624 return self.expression( 3625 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3626 ) 3627 3628 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3629 literal = self._parse_primary() 3630 if literal: 3631 return self.expression(exp.Introducer, this=token.text, expression=literal) 3632 3633 return self.expression(exp.Identifier, this=token.text) 3634 3635 def _parse_session_parameter(self) -> exp.SessionParameter: 3636 kind = None 3637 this = self._parse_id_var() or self._parse_primary() 3638 3639 if this and self._match(TokenType.DOT): 3640 kind = this.name 3641 this = self._parse_var() or self._parse_primary() 3642 3643 return self.expression(exp.SessionParameter, this=this, kind=kind) 3644 3645 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3646 index = self._index 3647 3648 if self._match(TokenType.L_PAREN): 3649 expressions = t.cast( 3650 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3651 ) 3652 3653 if not self._match(TokenType.R_PAREN): 3654 self._retreat(index) 3655 else: 3656 expressions = [self._parse_id_var()] 3657 3658 if self._match_set(self.LAMBDAS): 3659 return self.LAMBDAS[self._prev.token_type](self, expressions) 3660 3661 self._retreat(index) 3662 3663 this: t.Optional[exp.Expression] 3664 3665 if self._match(TokenType.DISTINCT): 3666 this = self.expression( 3667 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3668 ) 3669 else: 3670 this = self._parse_select_or_expression(alias=alias) 3671 3672 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3673 3674 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3675 index = self._index 3676 3677 if not self.errors: 3678 try: 3679 if self._parse_select(nested=True): 3680 return this 3681 except ParseError: 3682 pass 3683 finally: 3684 self.errors.clear() 3685 self._retreat(index) 3686 3687 if not self._match(TokenType.L_PAREN): 3688 return this 3689 3690 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3691 3692 self._match_r_paren() 3693 return self.expression(exp.Schema, this=this, expressions=args) 3694 3695 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3696 return self._parse_column_def(self._parse_field(any_token=True)) 3697 3698 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3699 # column defs are not really columns, they're identifiers 3700 if isinstance(this, exp.Column): 3701 this = this.this 3702 3703 kind = self._parse_types(schema=True) 3704 3705 if self._match_text_seq("FOR", "ORDINALITY"): 3706 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3707 3708 constraints: t.List[exp.Expression] = [] 3709 3710 if not kind and self._match(TokenType.ALIAS): 3711 constraints.append( 3712 self.expression( 3713 exp.ComputedColumnConstraint, 3714 this=self._parse_conjunction(), 3715 persisted=self._match_text_seq("PERSISTED"), 3716 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3717 ) 3718 ) 3719 3720 while True: 3721 constraint = self._parse_column_constraint() 3722 if not constraint: 3723 break 3724 constraints.append(constraint) 3725 3726 if not kind and not constraints: 3727 return this 3728 3729 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3730 3731 def _parse_auto_increment( 3732 self, 3733 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3734 start = None 3735 increment = None 3736 3737 if self._match(TokenType.L_PAREN, advance=False): 3738 args = self._parse_wrapped_csv(self._parse_bitwise) 3739 start = seq_get(args, 0) 3740 increment = seq_get(args, 1) 3741 elif self._match_text_seq("START"): 3742 start = self._parse_bitwise() 3743 self._match_text_seq("INCREMENT") 3744 increment = self._parse_bitwise() 3745 3746 if start and increment: 3747 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3748 3749 return exp.AutoIncrementColumnConstraint() 3750 3751 def _parse_compress(self) -> exp.CompressColumnConstraint: 3752 if self._match(TokenType.L_PAREN, advance=False): 3753 return self.expression( 3754 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3755 ) 3756 3757 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3758 3759 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3760 if self._match_text_seq("BY", "DEFAULT"): 3761 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3762 this = self.expression( 3763 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3764 ) 3765 else: 3766 self._match_text_seq("ALWAYS") 3767 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3768 3769 self._match(TokenType.ALIAS) 3770 identity = self._match_text_seq("IDENTITY") 3771 3772 if self._match(TokenType.L_PAREN): 3773 if self._match(TokenType.START_WITH): 3774 this.set("start", self._parse_bitwise()) 3775 if self._match_text_seq("INCREMENT", "BY"): 3776 this.set("increment", self._parse_bitwise()) 3777 if self._match_text_seq("MINVALUE"): 3778 this.set("minvalue", self._parse_bitwise()) 3779 if self._match_text_seq("MAXVALUE"): 3780 this.set("maxvalue", self._parse_bitwise()) 3781 3782 if self._match_text_seq("CYCLE"): 3783 this.set("cycle", True) 3784 elif self._match_text_seq("NO", "CYCLE"): 3785 this.set("cycle", False) 3786 3787 if not identity: 3788 this.set("expression", self._parse_bitwise()) 3789 3790 self._match_r_paren() 3791 3792 return this 3793 3794 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3795 self._match_text_seq("LENGTH") 3796 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3797 3798 def _parse_not_constraint( 3799 self, 3800 ) -> t.Optional[exp.Expression]: 3801 if self._match_text_seq("NULL"): 3802 return self.expression(exp.NotNullColumnConstraint) 3803 if self._match_text_seq("CASESPECIFIC"): 3804 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3805 if self._match_text_seq("FOR", "REPLICATION"): 3806 return self.expression(exp.NotForReplicationColumnConstraint) 3807 return None 3808 3809 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3810 if self._match(TokenType.CONSTRAINT): 3811 this = self._parse_id_var() 3812 else: 3813 this = None 3814 3815 if self._match_texts(self.CONSTRAINT_PARSERS): 3816 return self.expression( 3817 exp.ColumnConstraint, 3818 this=this, 3819 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3820 ) 3821 3822 return this 3823 3824 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3825 if not self._match(TokenType.CONSTRAINT): 3826 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3827 3828 this = self._parse_id_var() 3829 expressions = [] 3830 3831 while True: 3832 constraint = self._parse_unnamed_constraint() or self._parse_function() 3833 if not constraint: 3834 break 3835 expressions.append(constraint) 3836 3837 return self.expression(exp.Constraint, this=this, expressions=expressions) 3838 3839 def _parse_unnamed_constraint( 3840 self, constraints: t.Optional[t.Collection[str]] = None 3841 ) -> t.Optional[exp.Expression]: 3842 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3843 return None 3844 3845 constraint = self._prev.text.upper() 3846 if constraint not in self.CONSTRAINT_PARSERS: 3847 self.raise_error(f"No parser found for schema constraint {constraint}.") 3848 3849 return self.CONSTRAINT_PARSERS[constraint](self) 3850 3851 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3852 self._match_text_seq("KEY") 3853 return self.expression( 3854 exp.UniqueColumnConstraint, 3855 this=self._parse_schema(self._parse_id_var(any_token=False)), 3856 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3857 ) 3858 3859 def _parse_key_constraint_options(self) -> t.List[str]: 3860 options = [] 3861 while True: 3862 if not self._curr: 3863 break 3864 3865 if self._match(TokenType.ON): 3866 action = None 3867 on = self._advance_any() and self._prev.text 3868 3869 if self._match_text_seq("NO", "ACTION"): 3870 action = "NO ACTION" 3871 elif self._match_text_seq("CASCADE"): 3872 action = "CASCADE" 3873 elif self._match_text_seq("RESTRICT"): 3874 action = "RESTRICT" 3875 elif self._match_pair(TokenType.SET, TokenType.NULL): 3876 action = "SET NULL" 3877 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3878 action = "SET DEFAULT" 3879 else: 3880 self.raise_error("Invalid key constraint") 3881 3882 options.append(f"ON {on} {action}") 3883 elif self._match_text_seq("NOT", "ENFORCED"): 3884 options.append("NOT ENFORCED") 3885 elif self._match_text_seq("DEFERRABLE"): 3886 options.append("DEFERRABLE") 3887 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3888 options.append("INITIALLY DEFERRED") 3889 elif self._match_text_seq("NORELY"): 3890 options.append("NORELY") 3891 elif self._match_text_seq("MATCH", "FULL"): 3892 options.append("MATCH FULL") 3893 else: 3894 break 3895 3896 return options 3897 3898 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3899 if match and not self._match(TokenType.REFERENCES): 3900 return None 3901 3902 expressions = None 3903 this = self._parse_table(schema=True) 3904 options = self._parse_key_constraint_options() 3905 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3906 3907 def _parse_foreign_key(self) -> exp.ForeignKey: 3908 expressions = self._parse_wrapped_id_vars() 3909 reference = self._parse_references() 3910 options = {} 3911 3912 while self._match(TokenType.ON): 3913 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3914 self.raise_error("Expected DELETE or UPDATE") 3915 3916 kind = self._prev.text.lower() 3917 3918 if self._match_text_seq("NO", "ACTION"): 3919 action = "NO ACTION" 3920 elif self._match(TokenType.SET): 3921 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3922 action = "SET " + self._prev.text.upper() 3923 else: 3924 self._advance() 3925 action = self._prev.text.upper() 3926 3927 options[kind] = action 3928 3929 return self.expression( 3930 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3931 ) 3932 3933 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3934 return self._parse_field() 3935 3936 def _parse_primary_key( 3937 self, wrapped_optional: bool = False, in_props: bool = False 3938 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3939 desc = ( 3940 self._match_set((TokenType.ASC, TokenType.DESC)) 3941 and self._prev.token_type == TokenType.DESC 3942 ) 3943 3944 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3945 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3946 3947 expressions = self._parse_wrapped_csv( 3948 self._parse_primary_key_part, optional=wrapped_optional 3949 ) 3950 options = self._parse_key_constraint_options() 3951 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3952 3953 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3954 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3955 return this 3956 3957 bracket_kind = self._prev.token_type 3958 3959 if self._match(TokenType.COLON): 3960 expressions: t.List[exp.Expression] = [ 3961 self.expression(exp.Slice, expression=self._parse_conjunction()) 3962 ] 3963 else: 3964 expressions = self._parse_csv( 3965 lambda: self._parse_slice( 3966 self._parse_alias(self._parse_conjunction(), explicit=True) 3967 ) 3968 ) 3969 3970 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3971 if bracket_kind == TokenType.L_BRACE: 3972 this = self.expression(exp.Struct, expressions=expressions) 3973 elif not this or this.name.upper() == "ARRAY": 3974 this = self.expression(exp.Array, expressions=expressions) 3975 else: 3976 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3977 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3978 3979 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3980 self.raise_error("Expected ]") 3981 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3982 self.raise_error("Expected }") 3983 3984 self._add_comments(this) 3985 return self._parse_bracket(this) 3986 3987 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3988 if self._match(TokenType.COLON): 3989 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3990 return this 3991 3992 def _parse_case(self) -> t.Optional[exp.Expression]: 3993 ifs = [] 3994 default = None 3995 3996 comments = self._prev_comments 3997 expression = self._parse_conjunction() 3998 3999 while self._match(TokenType.WHEN): 4000 this = self._parse_conjunction() 4001 self._match(TokenType.THEN) 4002 then = self._parse_conjunction() 4003 ifs.append(self.expression(exp.If, this=this, true=then)) 4004 4005 if self._match(TokenType.ELSE): 4006 default = self._parse_conjunction() 4007 4008 if not self._match(TokenType.END): 4009 self.raise_error("Expected END after CASE", self._prev) 4010 4011 return self._parse_window( 4012 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4013 ) 4014 4015 def _parse_if(self) -> t.Optional[exp.Expression]: 4016 if self._match(TokenType.L_PAREN): 4017 args = self._parse_csv(self._parse_conjunction) 4018 this = self.validate_expression(exp.If.from_arg_list(args), args) 4019 self._match_r_paren() 4020 else: 4021 index = self._index - 1 4022 condition = self._parse_conjunction() 4023 4024 if not condition: 4025 self._retreat(index) 4026 return None 4027 4028 self._match(TokenType.THEN) 4029 true = self._parse_conjunction() 4030 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4031 self._match(TokenType.END) 4032 this = self.expression(exp.If, this=condition, true=true, false=false) 4033 4034 return self._parse_window(this) 4035 4036 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4037 if not self._match_text_seq("VALUE", "FOR"): 4038 self._retreat(self._index - 1) 4039 return None 4040 4041 return self.expression( 4042 exp.NextValueFor, 4043 this=self._parse_column(), 4044 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4045 ) 4046 4047 def _parse_extract(self) -> exp.Extract: 4048 this = self._parse_function() or self._parse_var() or self._parse_type() 4049 4050 if self._match(TokenType.FROM): 4051 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4052 4053 if not self._match(TokenType.COMMA): 4054 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4055 4056 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4057 4058 def _parse_any_value(self) -> exp.AnyValue: 4059 this = self._parse_lambda() 4060 is_max = None 4061 having = None 4062 4063 if self._match(TokenType.HAVING): 4064 self._match_texts(("MAX", "MIN")) 4065 is_max = self._prev.text == "MAX" 4066 having = self._parse_column() 4067 4068 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4069 4070 def _parse_cast(self, strict: bool) -> exp.Expression: 4071 this = self._parse_conjunction() 4072 4073 if not self._match(TokenType.ALIAS): 4074 if self._match(TokenType.COMMA): 4075 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4076 4077 self.raise_error("Expected AS after CAST") 4078 4079 fmt = None 4080 to = self._parse_types() 4081 4082 if not to: 4083 self.raise_error("Expected TYPE after CAST") 4084 elif isinstance(to, exp.Identifier): 4085 to = exp.DataType.build(to.name, udt=True) 4086 elif to.this == exp.DataType.Type.CHAR: 4087 if self._match(TokenType.CHARACTER_SET): 4088 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4089 elif self._match(TokenType.FORMAT): 4090 fmt_string = self._parse_string() 4091 fmt = self._parse_at_time_zone(fmt_string) 4092 4093 if to.this in exp.DataType.TEMPORAL_TYPES: 4094 this = self.expression( 4095 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4096 this=this, 4097 format=exp.Literal.string( 4098 format_time( 4099 fmt_string.this if fmt_string else "", 4100 self.FORMAT_MAPPING or self.TIME_MAPPING, 4101 self.FORMAT_TRIE or self.TIME_TRIE, 4102 ) 4103 ), 4104 ) 4105 4106 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4107 this.set("zone", fmt.args["zone"]) 4108 4109 return this 4110 4111 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4112 4113 def _parse_concat(self) -> t.Optional[exp.Expression]: 4114 args = self._parse_csv(self._parse_conjunction) 4115 if self.CONCAT_NULL_OUTPUTS_STRING: 4116 args = self._ensure_string_if_null(args) 4117 4118 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4119 # we find such a call we replace it with its argument. 4120 if len(args) == 1: 4121 return args[0] 4122 4123 return self.expression( 4124 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4125 ) 4126 4127 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4128 args = self._parse_csv(self._parse_conjunction) 4129 if len(args) < 2: 4130 return self.expression(exp.ConcatWs, expressions=args) 4131 delim, *values = args 4132 if self.CONCAT_NULL_OUTPUTS_STRING: 4133 values = self._ensure_string_if_null(values) 4134 4135 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4136 4137 def _parse_string_agg(self) -> exp.Expression: 4138 if self._match(TokenType.DISTINCT): 4139 args: t.List[t.Optional[exp.Expression]] = [ 4140 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4141 ] 4142 if self._match(TokenType.COMMA): 4143 args.extend(self._parse_csv(self._parse_conjunction)) 4144 else: 4145 args = self._parse_csv(self._parse_conjunction) # type: ignore 4146 4147 index = self._index 4148 if not self._match(TokenType.R_PAREN) and args: 4149 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4150 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4151 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4152 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4153 4154 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4155 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4156 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4157 if not self._match_text_seq("WITHIN", "GROUP"): 4158 self._retreat(index) 4159 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4160 4161 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4162 order = self._parse_order(this=seq_get(args, 0)) 4163 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4164 4165 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4166 this = self._parse_bitwise() 4167 4168 if self._match(TokenType.USING): 4169 to: t.Optional[exp.Expression] = self.expression( 4170 exp.CharacterSet, this=self._parse_var() 4171 ) 4172 elif self._match(TokenType.COMMA): 4173 to = self._parse_types() 4174 else: 4175 to = None 4176 4177 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4178 4179 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4180 """ 4181 There are generally two variants of the DECODE function: 4182 4183 - DECODE(bin, charset) 4184 - DECODE(expression, search, result [, search, result] ... [, default]) 4185 4186 The second variant will always be parsed into a CASE expression. Note that NULL 4187 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4188 instead of relying on pattern matching. 4189 """ 4190 args = self._parse_csv(self._parse_conjunction) 4191 4192 if len(args) < 3: 4193 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4194 4195 expression, *expressions = args 4196 if not expression: 4197 return None 4198 4199 ifs = [] 4200 for search, result in zip(expressions[::2], expressions[1::2]): 4201 if not search or not result: 4202 return None 4203 4204 if isinstance(search, exp.Literal): 4205 ifs.append( 4206 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4207 ) 4208 elif isinstance(search, exp.Null): 4209 ifs.append( 4210 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4211 ) 4212 else: 4213 cond = exp.or_( 4214 exp.EQ(this=expression.copy(), expression=search), 4215 exp.and_( 4216 exp.Is(this=expression.copy(), expression=exp.Null()), 4217 exp.Is(this=search.copy(), expression=exp.Null()), 4218 copy=False, 4219 ), 4220 copy=False, 4221 ) 4222 ifs.append(exp.If(this=cond, true=result)) 4223 4224 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4225 4226 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4227 self._match_text_seq("KEY") 4228 key = self._parse_column() 4229 self._match_set((TokenType.COLON, TokenType.COMMA)) 4230 self._match_text_seq("VALUE") 4231 value = self._parse_bitwise() 4232 4233 if not key and not value: 4234 return None 4235 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4236 4237 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4238 if not this or not self._match_text_seq("FORMAT", "JSON"): 4239 return this 4240 4241 return self.expression(exp.FormatJson, this=this) 4242 4243 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4244 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4245 for value in values: 4246 if self._match_text_seq(value, "ON", on): 4247 return f"{value} ON {on}" 4248 4249 return None 4250 4251 def _parse_json_object(self) -> exp.JSONObject: 4252 star = self._parse_star() 4253 expressions = ( 4254 [star] 4255 if star 4256 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4257 ) 4258 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4259 4260 unique_keys = None 4261 if self._match_text_seq("WITH", "UNIQUE"): 4262 unique_keys = True 4263 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4264 unique_keys = False 4265 4266 self._match_text_seq("KEYS") 4267 4268 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4269 self._parse_type() 4270 ) 4271 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4272 4273 return self.expression( 4274 exp.JSONObject, 4275 expressions=expressions, 4276 null_handling=null_handling, 4277 unique_keys=unique_keys, 4278 return_type=return_type, 4279 encoding=encoding, 4280 ) 4281 4282 def _parse_logarithm(self) -> exp.Func: 4283 # Default argument order is base, expression 4284 args = self._parse_csv(self._parse_range) 4285 4286 if len(args) > 1: 4287 if not self.LOG_BASE_FIRST: 4288 args.reverse() 4289 return exp.Log.from_arg_list(args) 4290 4291 return self.expression( 4292 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4293 ) 4294 4295 def _parse_match_against(self) -> exp.MatchAgainst: 4296 expressions = self._parse_csv(self._parse_column) 4297 4298 self._match_text_seq(")", "AGAINST", "(") 4299 4300 this = self._parse_string() 4301 4302 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4303 modifier = "IN NATURAL LANGUAGE MODE" 4304 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4305 modifier = f"{modifier} WITH QUERY EXPANSION" 4306 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4307 modifier = "IN BOOLEAN MODE" 4308 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4309 modifier = "WITH QUERY EXPANSION" 4310 else: 4311 modifier = None 4312 4313 return self.expression( 4314 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4315 ) 4316 4317 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4318 def _parse_open_json(self) -> exp.OpenJSON: 4319 this = self._parse_bitwise() 4320 path = self._match(TokenType.COMMA) and self._parse_string() 4321 4322 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4323 this = self._parse_field(any_token=True) 4324 kind = self._parse_types() 4325 path = self._parse_string() 4326 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4327 4328 return self.expression( 4329 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4330 ) 4331 4332 expressions = None 4333 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4334 self._match_l_paren() 4335 expressions = self._parse_csv(_parse_open_json_column_def) 4336 4337 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4338 4339 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4340 args = self._parse_csv(self._parse_bitwise) 4341 4342 if self._match(TokenType.IN): 4343 return self.expression( 4344 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4345 ) 4346 4347 if haystack_first: 4348 haystack = seq_get(args, 0) 4349 needle = seq_get(args, 1) 4350 else: 4351 needle = seq_get(args, 0) 4352 haystack = seq_get(args, 1) 4353 4354 return self.expression( 4355 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4356 ) 4357 4358 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4359 args = self._parse_csv(self._parse_table) 4360 return exp.JoinHint(this=func_name.upper(), expressions=args) 4361 4362 def _parse_substring(self) -> exp.Substring: 4363 # Postgres supports the form: substring(string [from int] [for int]) 4364 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4365 4366 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4367 4368 if self._match(TokenType.FROM): 4369 args.append(self._parse_bitwise()) 4370 if self._match(TokenType.FOR): 4371 args.append(self._parse_bitwise()) 4372 4373 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4374 4375 def _parse_trim(self) -> exp.Trim: 4376 # https://www.w3resource.com/sql/character-functions/trim.php 4377 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4378 4379 position = None 4380 collation = None 4381 4382 if self._match_texts(self.TRIM_TYPES): 4383 position = self._prev.text.upper() 4384 4385 expression = self._parse_bitwise() 4386 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4387 this = self._parse_bitwise() 4388 else: 4389 this = expression 4390 expression = None 4391 4392 if self._match(TokenType.COLLATE): 4393 collation = self._parse_bitwise() 4394 4395 return self.expression( 4396 exp.Trim, this=this, position=position, expression=expression, collation=collation 4397 ) 4398 4399 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4400 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4401 4402 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4403 return self._parse_window(self._parse_id_var(), alias=True) 4404 4405 def _parse_respect_or_ignore_nulls( 4406 self, this: t.Optional[exp.Expression] 4407 ) -> t.Optional[exp.Expression]: 4408 if self._match_text_seq("IGNORE", "NULLS"): 4409 return self.expression(exp.IgnoreNulls, this=this) 4410 if self._match_text_seq("RESPECT", "NULLS"): 4411 return self.expression(exp.RespectNulls, this=this) 4412 return this 4413 4414 def _parse_window( 4415 self, this: t.Optional[exp.Expression], alias: bool = False 4416 ) -> t.Optional[exp.Expression]: 4417 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4418 self._match(TokenType.WHERE) 4419 this = self.expression( 4420 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4421 ) 4422 self._match_r_paren() 4423 4424 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4425 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4426 if self._match_text_seq("WITHIN", "GROUP"): 4427 order = self._parse_wrapped(self._parse_order) 4428 this = self.expression(exp.WithinGroup, this=this, expression=order) 4429 4430 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4431 # Some dialects choose to implement and some do not. 4432 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4433 4434 # There is some code above in _parse_lambda that handles 4435 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4436 4437 # The below changes handle 4438 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4439 4440 # Oracle allows both formats 4441 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4442 # and Snowflake chose to do the same for familiarity 4443 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4444 this = self._parse_respect_or_ignore_nulls(this) 4445 4446 # bigquery select from window x AS (partition by ...) 4447 if alias: 4448 over = None 4449 self._match(TokenType.ALIAS) 4450 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4451 return this 4452 else: 4453 over = self._prev.text.upper() 4454 4455 if not self._match(TokenType.L_PAREN): 4456 return self.expression( 4457 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4458 ) 4459 4460 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4461 4462 first = self._match(TokenType.FIRST) 4463 if self._match_text_seq("LAST"): 4464 first = False 4465 4466 partition, order = self._parse_partition_and_order() 4467 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4468 4469 if kind: 4470 self._match(TokenType.BETWEEN) 4471 start = self._parse_window_spec() 4472 self._match(TokenType.AND) 4473 end = self._parse_window_spec() 4474 4475 spec = self.expression( 4476 exp.WindowSpec, 4477 kind=kind, 4478 start=start["value"], 4479 start_side=start["side"], 4480 end=end["value"], 4481 end_side=end["side"], 4482 ) 4483 else: 4484 spec = None 4485 4486 self._match_r_paren() 4487 4488 window = self.expression( 4489 exp.Window, 4490 this=this, 4491 partition_by=partition, 4492 order=order, 4493 spec=spec, 4494 alias=window_alias, 4495 over=over, 4496 first=first, 4497 ) 4498 4499 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4500 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4501 return self._parse_window(window, alias=alias) 4502 4503 return window 4504 4505 def _parse_partition_and_order( 4506 self, 4507 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4508 return self._parse_partition_by(), self._parse_order() 4509 4510 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4511 self._match(TokenType.BETWEEN) 4512 4513 return { 4514 "value": ( 4515 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4516 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4517 or self._parse_bitwise() 4518 ), 4519 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4520 } 4521 4522 def _parse_alias( 4523 self, this: t.Optional[exp.Expression], explicit: bool = False 4524 ) -> t.Optional[exp.Expression]: 4525 any_token = self._match(TokenType.ALIAS) 4526 4527 if explicit and not any_token: 4528 return this 4529 4530 if self._match(TokenType.L_PAREN): 4531 aliases = self.expression( 4532 exp.Aliases, 4533 this=this, 4534 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4535 ) 4536 self._match_r_paren(aliases) 4537 return aliases 4538 4539 alias = self._parse_id_var(any_token) 4540 4541 if alias: 4542 return self.expression(exp.Alias, this=this, alias=alias) 4543 4544 return this 4545 4546 def _parse_id_var( 4547 self, 4548 any_token: bool = True, 4549 tokens: t.Optional[t.Collection[TokenType]] = None, 4550 ) -> t.Optional[exp.Expression]: 4551 identifier = self._parse_identifier() 4552 4553 if identifier: 4554 return identifier 4555 4556 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4557 quoted = self._prev.token_type == TokenType.STRING 4558 return exp.Identifier(this=self._prev.text, quoted=quoted) 4559 4560 return None 4561 4562 def _parse_string(self) -> t.Optional[exp.Expression]: 4563 if self._match(TokenType.STRING): 4564 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4565 return self._parse_placeholder() 4566 4567 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4568 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4569 4570 def _parse_number(self) -> t.Optional[exp.Expression]: 4571 if self._match(TokenType.NUMBER): 4572 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4573 return self._parse_placeholder() 4574 4575 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4576 if self._match(TokenType.IDENTIFIER): 4577 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4578 return self._parse_placeholder() 4579 4580 def _parse_var( 4581 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4582 ) -> t.Optional[exp.Expression]: 4583 if ( 4584 (any_token and self._advance_any()) 4585 or self._match(TokenType.VAR) 4586 or (self._match_set(tokens) if tokens else False) 4587 ): 4588 return self.expression(exp.Var, this=self._prev.text) 4589 return self._parse_placeholder() 4590 4591 def _advance_any(self) -> t.Optional[Token]: 4592 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4593 self._advance() 4594 return self._prev 4595 return None 4596 4597 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4598 return self._parse_var() or self._parse_string() 4599 4600 def _parse_null(self) -> t.Optional[exp.Expression]: 4601 if self._match_set(self.NULL_TOKENS): 4602 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4603 return self._parse_placeholder() 4604 4605 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4606 if self._match(TokenType.TRUE): 4607 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4608 if self._match(TokenType.FALSE): 4609 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4610 return self._parse_placeholder() 4611 4612 def _parse_star(self) -> t.Optional[exp.Expression]: 4613 if self._match(TokenType.STAR): 4614 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4615 return self._parse_placeholder() 4616 4617 def _parse_parameter(self) -> exp.Parameter: 4618 wrapped = self._match(TokenType.L_BRACE) 4619 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4620 self._match(TokenType.R_BRACE) 4621 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4622 4623 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4624 if self._match_set(self.PLACEHOLDER_PARSERS): 4625 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4626 if placeholder: 4627 return placeholder 4628 self._advance(-1) 4629 return None 4630 4631 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4632 if not self._match(TokenType.EXCEPT): 4633 return None 4634 if self._match(TokenType.L_PAREN, advance=False): 4635 return self._parse_wrapped_csv(self._parse_column) 4636 4637 except_column = self._parse_column() 4638 return [except_column] if except_column else None 4639 4640 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4641 if not self._match(TokenType.REPLACE): 4642 return None 4643 if self._match(TokenType.L_PAREN, advance=False): 4644 return self._parse_wrapped_csv(self._parse_expression) 4645 4646 replace_expression = self._parse_expression() 4647 return [replace_expression] if replace_expression else None 4648 4649 def _parse_csv( 4650 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4651 ) -> t.List[exp.Expression]: 4652 parse_result = parse_method() 4653 items = [parse_result] if parse_result is not None else [] 4654 4655 while self._match(sep): 4656 self._add_comments(parse_result) 4657 parse_result = parse_method() 4658 if parse_result is not None: 4659 items.append(parse_result) 4660 4661 return items 4662 4663 def _parse_tokens( 4664 self, parse_method: t.Callable, expressions: t.Dict 4665 ) -> t.Optional[exp.Expression]: 4666 this = parse_method() 4667 4668 while self._match_set(expressions): 4669 this = self.expression( 4670 expressions[self._prev.token_type], 4671 this=this, 4672 comments=self._prev_comments, 4673 expression=parse_method(), 4674 ) 4675 4676 return this 4677 4678 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4679 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4680 4681 def _parse_wrapped_csv( 4682 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4683 ) -> t.List[exp.Expression]: 4684 return self._parse_wrapped( 4685 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4686 ) 4687 4688 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4689 wrapped = self._match(TokenType.L_PAREN) 4690 if not wrapped and not optional: 4691 self.raise_error("Expecting (") 4692 parse_result = parse_method() 4693 if wrapped: 4694 self._match_r_paren() 4695 return parse_result 4696 4697 def _parse_expressions(self) -> t.List[exp.Expression]: 4698 return self._parse_csv(self._parse_expression) 4699 4700 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4701 return self._parse_select() or self._parse_set_operations( 4702 self._parse_expression() if alias else self._parse_conjunction() 4703 ) 4704 4705 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4706 return self._parse_query_modifiers( 4707 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4708 ) 4709 4710 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4711 this = None 4712 if self._match_texts(self.TRANSACTION_KIND): 4713 this = self._prev.text 4714 4715 self._match_texts({"TRANSACTION", "WORK"}) 4716 4717 modes = [] 4718 while True: 4719 mode = [] 4720 while self._match(TokenType.VAR): 4721 mode.append(self._prev.text) 4722 4723 if mode: 4724 modes.append(" ".join(mode)) 4725 if not self._match(TokenType.COMMA): 4726 break 4727 4728 return self.expression(exp.Transaction, this=this, modes=modes) 4729 4730 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4731 chain = None 4732 savepoint = None 4733 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4734 4735 self._match_texts({"TRANSACTION", "WORK"}) 4736 4737 if self._match_text_seq("TO"): 4738 self._match_text_seq("SAVEPOINT") 4739 savepoint = self._parse_id_var() 4740 4741 if self._match(TokenType.AND): 4742 chain = not self._match_text_seq("NO") 4743 self._match_text_seq("CHAIN") 4744 4745 if is_rollback: 4746 return self.expression(exp.Rollback, savepoint=savepoint) 4747 4748 return self.expression(exp.Commit, chain=chain) 4749 4750 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4751 if not self._match_text_seq("ADD"): 4752 return None 4753 4754 self._match(TokenType.COLUMN) 4755 exists_column = self._parse_exists(not_=True) 4756 expression = self._parse_field_def() 4757 4758 if expression: 4759 expression.set("exists", exists_column) 4760 4761 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4762 if self._match_texts(("FIRST", "AFTER")): 4763 position = self._prev.text 4764 column_position = self.expression( 4765 exp.ColumnPosition, this=self._parse_column(), position=position 4766 ) 4767 expression.set("position", column_position) 4768 4769 return expression 4770 4771 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4772 drop = self._match(TokenType.DROP) and self._parse_drop() 4773 if drop and not isinstance(drop, exp.Command): 4774 drop.set("kind", drop.args.get("kind", "COLUMN")) 4775 return drop 4776 4777 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4778 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4779 return self.expression( 4780 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4781 ) 4782 4783 def _parse_add_constraint(self) -> exp.AddConstraint: 4784 this = None 4785 kind = self._prev.token_type 4786 4787 if kind == TokenType.CONSTRAINT: 4788 this = self._parse_id_var() 4789 4790 if self._match_text_seq("CHECK"): 4791 expression = self._parse_wrapped(self._parse_conjunction) 4792 enforced = self._match_text_seq("ENFORCED") 4793 4794 return self.expression( 4795 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4796 ) 4797 4798 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4799 expression = self._parse_foreign_key() 4800 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4801 expression = self._parse_primary_key() 4802 else: 4803 expression = None 4804 4805 return self.expression(exp.AddConstraint, this=this, expression=expression) 4806 4807 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4808 index = self._index - 1 4809 4810 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4811 return self._parse_csv(self._parse_add_constraint) 4812 4813 self._retreat(index) 4814 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4815 return self._parse_csv(self._parse_field_def) 4816 4817 return self._parse_csv(self._parse_add_column) 4818 4819 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4820 self._match(TokenType.COLUMN) 4821 column = self._parse_field(any_token=True) 4822 4823 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4824 return self.expression(exp.AlterColumn, this=column, drop=True) 4825 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4826 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4827 4828 self._match_text_seq("SET", "DATA") 4829 return self.expression( 4830 exp.AlterColumn, 4831 this=column, 4832 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4833 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4834 using=self._match(TokenType.USING) and self._parse_conjunction(), 4835 ) 4836 4837 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4838 index = self._index - 1 4839 4840 partition_exists = self._parse_exists() 4841 if self._match(TokenType.PARTITION, advance=False): 4842 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4843 4844 self._retreat(index) 4845 return self._parse_csv(self._parse_drop_column) 4846 4847 def _parse_alter_table_rename(self) -> exp.RenameTable: 4848 self._match_text_seq("TO") 4849 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4850 4851 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4852 start = self._prev 4853 4854 if not self._match(TokenType.TABLE): 4855 return self._parse_as_command(start) 4856 4857 exists = self._parse_exists() 4858 only = self._match_text_seq("ONLY") 4859 this = self._parse_table(schema=True) 4860 4861 if self._next: 4862 self._advance() 4863 4864 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4865 if parser: 4866 actions = ensure_list(parser(self)) 4867 4868 if not self._curr: 4869 return self.expression( 4870 exp.AlterTable, 4871 this=this, 4872 exists=exists, 4873 actions=actions, 4874 only=only, 4875 ) 4876 4877 return self._parse_as_command(start) 4878 4879 def _parse_merge(self) -> exp.Merge: 4880 self._match(TokenType.INTO) 4881 target = self._parse_table() 4882 4883 if target and self._match(TokenType.ALIAS, advance=False): 4884 target.set("alias", self._parse_table_alias()) 4885 4886 self._match(TokenType.USING) 4887 using = self._parse_table() 4888 4889 self._match(TokenType.ON) 4890 on = self._parse_conjunction() 4891 4892 whens = [] 4893 while self._match(TokenType.WHEN): 4894 matched = not self._match(TokenType.NOT) 4895 self._match_text_seq("MATCHED") 4896 source = ( 4897 False 4898 if self._match_text_seq("BY", "TARGET") 4899 else self._match_text_seq("BY", "SOURCE") 4900 ) 4901 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4902 4903 self._match(TokenType.THEN) 4904 4905 if self._match(TokenType.INSERT): 4906 _this = self._parse_star() 4907 if _this: 4908 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4909 else: 4910 then = self.expression( 4911 exp.Insert, 4912 this=self._parse_value(), 4913 expression=self._match(TokenType.VALUES) and self._parse_value(), 4914 ) 4915 elif self._match(TokenType.UPDATE): 4916 expressions = self._parse_star() 4917 if expressions: 4918 then = self.expression(exp.Update, expressions=expressions) 4919 else: 4920 then = self.expression( 4921 exp.Update, 4922 expressions=self._match(TokenType.SET) 4923 and self._parse_csv(self._parse_equality), 4924 ) 4925 elif self._match(TokenType.DELETE): 4926 then = self.expression(exp.Var, this=self._prev.text) 4927 else: 4928 then = None 4929 4930 whens.append( 4931 self.expression( 4932 exp.When, 4933 matched=matched, 4934 source=source, 4935 condition=condition, 4936 then=then, 4937 ) 4938 ) 4939 4940 return self.expression( 4941 exp.Merge, 4942 this=target, 4943 using=using, 4944 on=on, 4945 expressions=whens, 4946 ) 4947 4948 def _parse_show(self) -> t.Optional[exp.Expression]: 4949 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4950 if parser: 4951 return parser(self) 4952 return self._parse_as_command(self._prev) 4953 4954 def _parse_set_item_assignment( 4955 self, kind: t.Optional[str] = None 4956 ) -> t.Optional[exp.Expression]: 4957 index = self._index 4958 4959 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4960 return self._parse_set_transaction(global_=kind == "GLOBAL") 4961 4962 left = self._parse_primary() or self._parse_id_var() 4963 assignment_delimiter = self._match_texts(("=", "TO")) 4964 4965 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 4966 self._retreat(index) 4967 return None 4968 4969 right = self._parse_statement() or self._parse_id_var() 4970 this = self.expression(exp.EQ, this=left, expression=right) 4971 4972 return self.expression(exp.SetItem, this=this, kind=kind) 4973 4974 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4975 self._match_text_seq("TRANSACTION") 4976 characteristics = self._parse_csv( 4977 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4978 ) 4979 return self.expression( 4980 exp.SetItem, 4981 expressions=characteristics, 4982 kind="TRANSACTION", 4983 **{"global": global_}, # type: ignore 4984 ) 4985 4986 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4987 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4988 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4989 4990 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4991 index = self._index 4992 set_ = self.expression( 4993 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4994 ) 4995 4996 if self._curr: 4997 self._retreat(index) 4998 return self._parse_as_command(self._prev) 4999 5000 return set_ 5001 5002 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5003 for option in options: 5004 if self._match_text_seq(*option.split(" ")): 5005 return exp.var(option) 5006 return None 5007 5008 def _parse_as_command(self, start: Token) -> exp.Command: 5009 while self._curr: 5010 self._advance() 5011 text = self._find_sql(start, self._prev) 5012 size = len(start.text) 5013 return exp.Command(this=text[:size], expression=text[size:]) 5014 5015 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5016 settings = [] 5017 5018 self._match_l_paren() 5019 kind = self._parse_id_var() 5020 5021 if self._match(TokenType.L_PAREN): 5022 while True: 5023 key = self._parse_id_var() 5024 value = self._parse_primary() 5025 5026 if not key and value is None: 5027 break 5028 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5029 self._match(TokenType.R_PAREN) 5030 5031 self._match_r_paren() 5032 5033 return self.expression( 5034 exp.DictProperty, 5035 this=this, 5036 kind=kind.this if kind else None, 5037 settings=settings, 5038 ) 5039 5040 def _parse_dict_range(self, this: str) -> exp.DictRange: 5041 self._match_l_paren() 5042 has_min = self._match_text_seq("MIN") 5043 if has_min: 5044 min = self._parse_var() or self._parse_primary() 5045 self._match_text_seq("MAX") 5046 max = self._parse_var() or self._parse_primary() 5047 else: 5048 max = self._parse_var() or self._parse_primary() 5049 min = exp.Literal.number(0) 5050 self._match_r_paren() 5051 return self.expression(exp.DictRange, this=this, min=min, max=max) 5052 5053 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5054 index = self._index 5055 expression = self._parse_column() 5056 if not self._match(TokenType.IN): 5057 self._retreat(index - 1) 5058 return None 5059 iterator = self._parse_column() 5060 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5061 return self.expression( 5062 exp.Comprehension, 5063 this=this, 5064 expression=expression, 5065 iterator=iterator, 5066 condition=condition, 5067 ) 5068 5069 def _find_parser( 5070 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5071 ) -> t.Optional[t.Callable]: 5072 if not self._curr: 5073 return None 5074 5075 index = self._index 5076 this = [] 5077 while True: 5078 # The current token might be multiple words 5079 curr = self._curr.text.upper() 5080 key = curr.split(" ") 5081 this.append(curr) 5082 5083 self._advance() 5084 result, trie = in_trie(trie, key) 5085 if result == TrieResult.FAILED: 5086 break 5087 5088 if result == TrieResult.EXISTS: 5089 subparser = parsers[" ".join(this)] 5090 return subparser 5091 5092 self._retreat(index) 5093 return None 5094 5095 def _match(self, token_type, advance=True, expression=None): 5096 if not self._curr: 5097 return None 5098 5099 if self._curr.token_type == token_type: 5100 if advance: 5101 self._advance() 5102 self._add_comments(expression) 5103 return True 5104 5105 return None 5106 5107 def _match_set(self, types, advance=True): 5108 if not self._curr: 5109 return None 5110 5111 if self._curr.token_type in types: 5112 if advance: 5113 self._advance() 5114 return True 5115 5116 return None 5117 5118 def _match_pair(self, token_type_a, token_type_b, advance=True): 5119 if not self._curr or not self._next: 5120 return None 5121 5122 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5123 if advance: 5124 self._advance(2) 5125 return True 5126 5127 return None 5128 5129 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5130 if not self._match(TokenType.L_PAREN, expression=expression): 5131 self.raise_error("Expecting (") 5132 5133 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5134 if not self._match(TokenType.R_PAREN, expression=expression): 5135 self.raise_error("Expecting )") 5136 5137 def _match_texts(self, texts, advance=True): 5138 if self._curr and self._curr.text.upper() in texts: 5139 if advance: 5140 self._advance() 5141 return True 5142 return False 5143 5144 def _match_text_seq(self, *texts, advance=True): 5145 index = self._index 5146 for text in texts: 5147 if self._curr and self._curr.text.upper() == text: 5148 self._advance() 5149 else: 5150 self._retreat(index) 5151 return False 5152 5153 if not advance: 5154 self._retreat(index) 5155 5156 return True 5157 5158 @t.overload 5159 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5160 ... 5161 5162 @t.overload 5163 def _replace_columns_with_dots( 5164 self, this: t.Optional[exp.Expression] 5165 ) -> t.Optional[exp.Expression]: 5166 ... 5167 5168 def _replace_columns_with_dots(self, this): 5169 if isinstance(this, exp.Dot): 5170 exp.replace_children(this, self._replace_columns_with_dots) 5171 elif isinstance(this, exp.Column): 5172 exp.replace_children(this, self._replace_columns_with_dots) 5173 table = this.args.get("table") 5174 this = ( 5175 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5176 ) 5177 5178 return this 5179 5180 def _replace_lambda( 5181 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5182 ) -> t.Optional[exp.Expression]: 5183 if not node: 5184 return node 5185 5186 for column in node.find_all(exp.Column): 5187 if column.parts[0].name in lambda_variables: 5188 dot_or_id = column.to_dot() if column.table else column.this 5189 parent = column.parent 5190 5191 while isinstance(parent, exp.Dot): 5192 if not isinstance(parent.parent, exp.Dot): 5193 parent.replace(dot_or_id) 5194 break 5195 parent = parent.parent 5196 else: 5197 if column is node: 5198 node = dot_or_id 5199 else: 5200 column.replace(dot_or_id) 5201 return node 5202 5203 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5204 return [ 5205 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5206 for value in values 5207 if value 5208 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
918 def __init__( 919 self, 920 error_level: t.Optional[ErrorLevel] = None, 921 error_message_context: int = 100, 922 max_errors: int = 3, 923 ): 924 self.error_level = error_level or ErrorLevel.IMMEDIATE 925 self.error_message_context = error_message_context 926 self.max_errors = max_errors 927 self._tokenizer = self.TOKENIZER_CLASS() 928 self.reset()
940 def parse( 941 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 942 ) -> t.List[t.Optional[exp.Expression]]: 943 """ 944 Parses a list of tokens and returns a list of syntax trees, one tree 945 per parsed SQL statement. 946 947 Args: 948 raw_tokens: The list of tokens. 949 sql: The original SQL string, used to produce helpful debug messages. 950 951 Returns: 952 The list of the produced syntax trees. 953 """ 954 return self._parse( 955 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 956 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
958 def parse_into( 959 self, 960 expression_types: exp.IntoType, 961 raw_tokens: t.List[Token], 962 sql: t.Optional[str] = None, 963 ) -> t.List[t.Optional[exp.Expression]]: 964 """ 965 Parses a list of tokens into a given Expression type. If a collection of Expression 966 types is given instead, this method will try to parse the token list into each one 967 of them, stopping at the first for which the parsing succeeds. 968 969 Args: 970 expression_types: The expression type(s) to try and parse the token list into. 971 raw_tokens: The list of tokens. 972 sql: The original SQL string, used to produce helpful debug messages. 973 974 Returns: 975 The target Expression. 976 """ 977 errors = [] 978 for expression_type in ensure_list(expression_types): 979 parser = self.EXPRESSION_PARSERS.get(expression_type) 980 if not parser: 981 raise TypeError(f"No parser registered for {expression_type}") 982 983 try: 984 return self._parse(parser, raw_tokens, sql) 985 except ParseError as e: 986 e.errors[0]["into_expression"] = expression_type 987 errors.append(e) 988 989 raise ParseError( 990 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 991 errors=merge_errors(errors), 992 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1029 def check_errors(self) -> None: 1030 """Logs or raises any found errors, depending on the chosen error level setting.""" 1031 if self.error_level == ErrorLevel.WARN: 1032 for error in self.errors: 1033 logger.error(str(error)) 1034 elif self.error_level == ErrorLevel.RAISE and self.errors: 1035 raise ParseError( 1036 concat_messages(self.errors, self.max_errors), 1037 errors=merge_errors(self.errors), 1038 )
Logs or raises any found errors, depending on the chosen error level setting.
1040 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1041 """ 1042 Appends an error in the list of recorded errors or raises it, depending on the chosen 1043 error level setting. 1044 """ 1045 token = token or self._curr or self._prev or Token.string("") 1046 start = token.start 1047 end = token.end + 1 1048 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1049 highlight = self.sql[start:end] 1050 end_context = self.sql[end : end + self.error_message_context] 1051 1052 error = ParseError.new( 1053 f"{message}. Line {token.line}, Col: {token.col}.\n" 1054 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1055 description=message, 1056 line=token.line, 1057 col=token.col, 1058 start_context=start_context, 1059 highlight=highlight, 1060 end_context=end_context, 1061 ) 1062 1063 if self.error_level == ErrorLevel.IMMEDIATE: 1064 raise error 1065 1066 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1068 def expression( 1069 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1070 ) -> E: 1071 """ 1072 Creates a new, validated Expression. 1073 1074 Args: 1075 exp_class: The expression class to instantiate. 1076 comments: An optional list of comments to attach to the expression. 1077 kwargs: The arguments to set for the expression along with their respective values. 1078 1079 Returns: 1080 The target expression. 1081 """ 1082 instance = exp_class(**kwargs) 1083 instance.add_comments(comments) if comments else self._add_comments(instance) 1084 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1091 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1092 """ 1093 Validates an Expression, making sure that all its mandatory arguments are set. 1094 1095 Args: 1096 expression: The expression to validate. 1097 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1098 1099 Returns: 1100 The validated expression. 1101 """ 1102 if self.error_level != ErrorLevel.IGNORE: 1103 for error_message in expression.error_messages(args): 1104 self.raise_error(error_message) 1105 1106 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.