sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.BIGDECIMAL, 182 TokenType.UUID, 183 TokenType.GEOGRAPHY, 184 TokenType.GEOMETRY, 185 TokenType.HLLSKETCH, 186 TokenType.HSTORE, 187 TokenType.PSEUDO_TYPE, 188 TokenType.SUPER, 189 TokenType.SERIAL, 190 TokenType.SMALLSERIAL, 191 TokenType.BIGSERIAL, 192 TokenType.XML, 193 TokenType.YEAR, 194 TokenType.UNIQUEIDENTIFIER, 195 TokenType.USERDEFINED, 196 TokenType.MONEY, 197 TokenType.SMALLMONEY, 198 TokenType.ROWVERSION, 199 TokenType.IMAGE, 200 TokenType.VARIANT, 201 TokenType.OBJECT, 202 TokenType.OBJECT_IDENTIFIER, 203 TokenType.INET, 204 TokenType.IPADDRESS, 205 TokenType.IPPREFIX, 206 TokenType.UNKNOWN, 207 TokenType.NULL, 208 *ENUM_TYPE_TOKENS, 209 *NESTED_TYPE_TOKENS, 210 } 211 212 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 213 TokenType.BIGINT: TokenType.UBIGINT, 214 TokenType.INT: TokenType.UINT, 215 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 216 TokenType.SMALLINT: TokenType.USMALLINT, 217 TokenType.TINYINT: TokenType.UTINYINT, 218 } 219 220 SUBQUERY_PREDICATES = { 221 TokenType.ANY: exp.Any, 222 TokenType.ALL: exp.All, 223 TokenType.EXISTS: exp.Exists, 224 TokenType.SOME: exp.Any, 225 } 226 227 RESERVED_KEYWORDS = { 228 *Tokenizer.SINGLE_TOKENS.values(), 229 TokenType.SELECT, 230 } 231 232 DB_CREATABLES = { 233 TokenType.DATABASE, 234 TokenType.SCHEMA, 235 TokenType.TABLE, 236 TokenType.VIEW, 237 TokenType.DICTIONARY, 238 } 239 240 CREATABLES = { 241 TokenType.COLUMN, 242 TokenType.FUNCTION, 243 TokenType.INDEX, 244 TokenType.PROCEDURE, 245 *DB_CREATABLES, 246 } 247 248 # Tokens that can represent identifiers 249 ID_VAR_TOKENS = { 250 TokenType.VAR, 251 TokenType.ANTI, 252 TokenType.APPLY, 253 TokenType.ASC, 254 TokenType.AUTO_INCREMENT, 255 TokenType.BEGIN, 256 TokenType.CACHE, 257 TokenType.CASE, 258 TokenType.COLLATE, 259 TokenType.COMMAND, 260 TokenType.COMMENT, 261 TokenType.COMMIT, 262 TokenType.CONSTRAINT, 263 TokenType.DEFAULT, 264 TokenType.DELETE, 265 TokenType.DESC, 266 TokenType.DESCRIBE, 267 TokenType.DICTIONARY, 268 TokenType.DIV, 269 TokenType.END, 270 TokenType.EXECUTE, 271 TokenType.ESCAPE, 272 TokenType.FALSE, 273 TokenType.FIRST, 274 TokenType.FILTER, 275 TokenType.FORMAT, 276 TokenType.FULL, 277 TokenType.IS, 278 TokenType.ISNULL, 279 TokenType.INTERVAL, 280 TokenType.KEEP, 281 TokenType.LEFT, 282 TokenType.LOAD, 283 TokenType.MERGE, 284 TokenType.NATURAL, 285 TokenType.NEXT, 286 TokenType.OFFSET, 287 TokenType.ORDINALITY, 288 TokenType.OVERWRITE, 289 TokenType.PARTITION, 290 TokenType.PERCENT, 291 TokenType.PIVOT, 292 TokenType.PRAGMA, 293 TokenType.RANGE, 294 TokenType.REFERENCES, 295 TokenType.RIGHT, 296 TokenType.ROW, 297 TokenType.ROWS, 298 TokenType.SEMI, 299 TokenType.SET, 300 TokenType.SETTINGS, 301 TokenType.SHOW, 302 TokenType.TEMPORARY, 303 TokenType.TOP, 304 TokenType.TRUE, 305 TokenType.UNIQUE, 306 TokenType.UNPIVOT, 307 TokenType.UPDATE, 308 TokenType.VOLATILE, 309 TokenType.WINDOW, 310 *CREATABLES, 311 *SUBQUERY_PREDICATES, 312 *TYPE_TOKENS, 313 *NO_PAREN_FUNCTIONS, 314 } 315 316 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 317 318 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 319 TokenType.APPLY, 320 TokenType.ASOF, 321 TokenType.FULL, 322 TokenType.LEFT, 323 TokenType.LOCK, 324 TokenType.NATURAL, 325 TokenType.OFFSET, 326 TokenType.RIGHT, 327 TokenType.WINDOW, 328 } 329 330 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 331 332 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 333 334 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 335 336 FUNC_TOKENS = { 337 TokenType.COMMAND, 338 TokenType.CURRENT_DATE, 339 TokenType.CURRENT_DATETIME, 340 TokenType.CURRENT_TIMESTAMP, 341 TokenType.CURRENT_TIME, 342 TokenType.CURRENT_USER, 343 TokenType.FILTER, 344 TokenType.FIRST, 345 TokenType.FORMAT, 346 TokenType.GLOB, 347 TokenType.IDENTIFIER, 348 TokenType.INDEX, 349 TokenType.ISNULL, 350 TokenType.ILIKE, 351 TokenType.INSERT, 352 TokenType.LIKE, 353 TokenType.MERGE, 354 TokenType.OFFSET, 355 TokenType.PRIMARY_KEY, 356 TokenType.RANGE, 357 TokenType.REPLACE, 358 TokenType.RLIKE, 359 TokenType.ROW, 360 TokenType.UNNEST, 361 TokenType.VAR, 362 TokenType.LEFT, 363 TokenType.RIGHT, 364 TokenType.DATE, 365 TokenType.DATETIME, 366 TokenType.TABLE, 367 TokenType.TIMESTAMP, 368 TokenType.TIMESTAMPTZ, 369 TokenType.WINDOW, 370 TokenType.XOR, 371 *TYPE_TOKENS, 372 *SUBQUERY_PREDICATES, 373 } 374 375 CONJUNCTION = { 376 TokenType.AND: exp.And, 377 TokenType.OR: exp.Or, 378 } 379 380 EQUALITY = { 381 TokenType.EQ: exp.EQ, 382 TokenType.NEQ: exp.NEQ, 383 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 384 } 385 386 COMPARISON = { 387 TokenType.GT: exp.GT, 388 TokenType.GTE: exp.GTE, 389 TokenType.LT: exp.LT, 390 TokenType.LTE: exp.LTE, 391 } 392 393 BITWISE = { 394 TokenType.AMP: exp.BitwiseAnd, 395 TokenType.CARET: exp.BitwiseXor, 396 TokenType.PIPE: exp.BitwiseOr, 397 TokenType.DPIPE: exp.DPipe, 398 } 399 400 TERM = { 401 TokenType.DASH: exp.Sub, 402 TokenType.PLUS: exp.Add, 403 TokenType.MOD: exp.Mod, 404 TokenType.COLLATE: exp.Collate, 405 } 406 407 FACTOR = { 408 TokenType.DIV: exp.IntDiv, 409 TokenType.LR_ARROW: exp.Distance, 410 TokenType.SLASH: exp.Div, 411 TokenType.STAR: exp.Mul, 412 } 413 414 TIMES = { 415 TokenType.TIME, 416 TokenType.TIMETZ, 417 } 418 419 TIMESTAMPS = { 420 TokenType.TIMESTAMP, 421 TokenType.TIMESTAMPTZ, 422 TokenType.TIMESTAMPLTZ, 423 *TIMES, 424 } 425 426 SET_OPERATIONS = { 427 TokenType.UNION, 428 TokenType.INTERSECT, 429 TokenType.EXCEPT, 430 } 431 432 JOIN_METHODS = { 433 TokenType.NATURAL, 434 TokenType.ASOF, 435 } 436 437 JOIN_SIDES = { 438 TokenType.LEFT, 439 TokenType.RIGHT, 440 TokenType.FULL, 441 } 442 443 JOIN_KINDS = { 444 TokenType.INNER, 445 TokenType.OUTER, 446 TokenType.CROSS, 447 TokenType.SEMI, 448 TokenType.ANTI, 449 } 450 451 JOIN_HINTS: t.Set[str] = set() 452 453 LAMBDAS = { 454 TokenType.ARROW: lambda self, expressions: self.expression( 455 exp.Lambda, 456 this=self._replace_lambda( 457 self._parse_conjunction(), 458 {node.name for node in expressions}, 459 ), 460 expressions=expressions, 461 ), 462 TokenType.FARROW: lambda self, expressions: self.expression( 463 exp.Kwarg, 464 this=exp.var(expressions[0].name), 465 expression=self._parse_conjunction(), 466 ), 467 } 468 469 COLUMN_OPERATORS = { 470 TokenType.DOT: None, 471 TokenType.DCOLON: lambda self, this, to: self.expression( 472 exp.Cast if self.STRICT_CAST else exp.TryCast, 473 this=this, 474 to=to, 475 ), 476 TokenType.ARROW: lambda self, this, path: self.expression( 477 exp.JSONExtract, 478 this=this, 479 expression=path, 480 ), 481 TokenType.DARROW: lambda self, this, path: self.expression( 482 exp.JSONExtractScalar, 483 this=this, 484 expression=path, 485 ), 486 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 487 exp.JSONBExtract, 488 this=this, 489 expression=path, 490 ), 491 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 492 exp.JSONBExtractScalar, 493 this=this, 494 expression=path, 495 ), 496 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 497 exp.JSONBContains, 498 this=this, 499 expression=key, 500 ), 501 } 502 503 EXPRESSION_PARSERS = { 504 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 505 exp.Column: lambda self: self._parse_column(), 506 exp.Condition: lambda self: self._parse_conjunction(), 507 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 508 exp.Expression: lambda self: self._parse_statement(), 509 exp.From: lambda self: self._parse_from(), 510 exp.Group: lambda self: self._parse_group(), 511 exp.Having: lambda self: self._parse_having(), 512 exp.Identifier: lambda self: self._parse_id_var(), 513 exp.Join: lambda self: self._parse_join(), 514 exp.Lambda: lambda self: self._parse_lambda(), 515 exp.Lateral: lambda self: self._parse_lateral(), 516 exp.Limit: lambda self: self._parse_limit(), 517 exp.Offset: lambda self: self._parse_offset(), 518 exp.Order: lambda self: self._parse_order(), 519 exp.Ordered: lambda self: self._parse_ordered(), 520 exp.Properties: lambda self: self._parse_properties(), 521 exp.Qualify: lambda self: self._parse_qualify(), 522 exp.Returning: lambda self: self._parse_returning(), 523 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 524 exp.Table: lambda self: self._parse_table_parts(), 525 exp.TableAlias: lambda self: self._parse_table_alias(), 526 exp.Where: lambda self: self._parse_where(), 527 exp.Window: lambda self: self._parse_named_window(), 528 exp.With: lambda self: self._parse_with(), 529 "JOIN_TYPE": lambda self: self._parse_join_parts(), 530 } 531 532 STATEMENT_PARSERS = { 533 TokenType.ALTER: lambda self: self._parse_alter(), 534 TokenType.BEGIN: lambda self: self._parse_transaction(), 535 TokenType.CACHE: lambda self: self._parse_cache(), 536 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 537 TokenType.COMMENT: lambda self: self._parse_comment(), 538 TokenType.CREATE: lambda self: self._parse_create(), 539 TokenType.DELETE: lambda self: self._parse_delete(), 540 TokenType.DESC: lambda self: self._parse_describe(), 541 TokenType.DESCRIBE: lambda self: self._parse_describe(), 542 TokenType.DROP: lambda self: self._parse_drop(), 543 TokenType.INSERT: lambda self: self._parse_insert(), 544 TokenType.LOAD: lambda self: self._parse_load(), 545 TokenType.MERGE: lambda self: self._parse_merge(), 546 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 547 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 548 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 549 TokenType.SET: lambda self: self._parse_set(), 550 TokenType.UNCACHE: lambda self: self._parse_uncache(), 551 TokenType.UPDATE: lambda self: self._parse_update(), 552 TokenType.USE: lambda self: self.expression( 553 exp.Use, 554 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 555 and exp.var(self._prev.text), 556 this=self._parse_table(schema=False), 557 ), 558 } 559 560 UNARY_PARSERS = { 561 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 562 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 563 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 564 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 565 } 566 567 PRIMARY_PARSERS = { 568 TokenType.STRING: lambda self, token: self.expression( 569 exp.Literal, this=token.text, is_string=True 570 ), 571 TokenType.NUMBER: lambda self, token: self.expression( 572 exp.Literal, this=token.text, is_string=False 573 ), 574 TokenType.STAR: lambda self, _: self.expression( 575 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 576 ), 577 TokenType.NULL: lambda self, _: self.expression(exp.Null), 578 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 579 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 580 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 581 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 582 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 583 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 584 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 585 exp.National, this=token.text 586 ), 587 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 588 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 589 } 590 591 PLACEHOLDER_PARSERS = { 592 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 593 TokenType.PARAMETER: lambda self: self._parse_parameter(), 594 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 595 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 596 else None, 597 } 598 599 RANGE_PARSERS = { 600 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 601 TokenType.GLOB: binary_range_parser(exp.Glob), 602 TokenType.ILIKE: binary_range_parser(exp.ILike), 603 TokenType.IN: lambda self, this: self._parse_in(this), 604 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 605 TokenType.IS: lambda self, this: self._parse_is(this), 606 TokenType.LIKE: binary_range_parser(exp.Like), 607 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 608 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 609 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 610 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 611 } 612 613 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 614 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 615 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 616 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 617 "CHARACTER SET": lambda self: self._parse_character_set(), 618 "CHECKSUM": lambda self: self._parse_checksum(), 619 "CLUSTER BY": lambda self: self._parse_cluster(), 620 "CLUSTERED": lambda self: self._parse_clustered_by(), 621 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 622 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 623 "COPY": lambda self: self._parse_copy_property(), 624 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 625 "DEFINER": lambda self: self._parse_definer(), 626 "DETERMINISTIC": lambda self: self.expression( 627 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 628 ), 629 "DISTKEY": lambda self: self._parse_distkey(), 630 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 631 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 632 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 633 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 634 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 635 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 636 "FREESPACE": lambda self: self._parse_freespace(), 637 "HEAP": lambda self: self.expression(exp.HeapProperty), 638 "IMMUTABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 642 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 643 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 644 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 645 "LIKE": lambda self: self._parse_create_like(), 646 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 647 "LOCK": lambda self: self._parse_locking(), 648 "LOCKING": lambda self: self._parse_locking(), 649 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 650 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 651 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 652 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 653 "NO": lambda self: self._parse_no_property(), 654 "ON": lambda self: self._parse_on_property(), 655 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 656 "PARTITION BY": lambda self: self._parse_partitioned_by(), 657 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 658 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 659 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 660 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 661 "RETURNS": lambda self: self._parse_returns(), 662 "ROW": lambda self: self._parse_row(), 663 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 664 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 665 "SETTINGS": lambda self: self.expression( 666 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 667 ), 668 "SORTKEY": lambda self: self._parse_sortkey(), 669 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 670 "STABLE": lambda self: self.expression( 671 exp.StabilityProperty, this=exp.Literal.string("STABLE") 672 ), 673 "STORED": lambda self: self._parse_stored(), 674 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 675 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 676 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 677 "TO": lambda self: self._parse_to_table(), 678 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 679 "TTL": lambda self: self._parse_ttl(), 680 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 681 "VOLATILE": lambda self: self._parse_volatile_property(), 682 "WITH": lambda self: self._parse_with_property(), 683 } 684 685 CONSTRAINT_PARSERS = { 686 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 687 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 688 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 689 "CHARACTER SET": lambda self: self.expression( 690 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "CHECK": lambda self: self.expression( 693 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 694 ), 695 "COLLATE": lambda self: self.expression( 696 exp.CollateColumnConstraint, this=self._parse_var() 697 ), 698 "COMMENT": lambda self: self.expression( 699 exp.CommentColumnConstraint, this=self._parse_string() 700 ), 701 "COMPRESS": lambda self: self._parse_compress(), 702 "CLUSTERED": lambda self: self.expression( 703 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 704 ), 705 "NONCLUSTERED": lambda self: self.expression( 706 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 707 ), 708 "DEFAULT": lambda self: self.expression( 709 exp.DefaultColumnConstraint, this=self._parse_bitwise() 710 ), 711 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 712 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 713 "FORMAT": lambda self: self.expression( 714 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 715 ), 716 "GENERATED": lambda self: self._parse_generated_as_identity(), 717 "IDENTITY": lambda self: self._parse_auto_increment(), 718 "INLINE": lambda self: self._parse_inline(), 719 "LIKE": lambda self: self._parse_create_like(), 720 "NOT": lambda self: self._parse_not_constraint(), 721 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 722 "ON": lambda self: ( 723 self._match(TokenType.UPDATE) 724 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 725 ) 726 or self.expression(exp.OnProperty, this=self._parse_id_var()), 727 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 728 "PRIMARY KEY": lambda self: self._parse_primary_key(), 729 "REFERENCES": lambda self: self._parse_references(match=False), 730 "TITLE": lambda self: self.expression( 731 exp.TitleColumnConstraint, this=self._parse_var_or_string() 732 ), 733 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 734 "UNIQUE": lambda self: self._parse_unique(), 735 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 736 "WITH": lambda self: self.expression( 737 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 738 ), 739 } 740 741 ALTER_PARSERS = { 742 "ADD": lambda self: self._parse_alter_table_add(), 743 "ALTER": lambda self: self._parse_alter_table_alter(), 744 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 745 "DROP": lambda self: self._parse_alter_table_drop(), 746 "RENAME": lambda self: self._parse_alter_table_rename(), 747 } 748 749 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 750 751 NO_PAREN_FUNCTION_PARSERS = { 752 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 753 "CASE": lambda self: self._parse_case(), 754 "IF": lambda self: self._parse_if(), 755 "NEXT": lambda self: self._parse_next_value_for(), 756 } 757 758 INVALID_FUNC_NAME_TOKENS = { 759 TokenType.IDENTIFIER, 760 TokenType.STRING, 761 } 762 763 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 764 765 FUNCTION_PARSERS = { 766 "ANY_VALUE": lambda self: self._parse_any_value(), 767 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 768 "CONCAT": lambda self: self._parse_concat(), 769 "CONCAT_WS": lambda self: self._parse_concat_ws(), 770 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 771 "DECODE": lambda self: self._parse_decode(), 772 "EXTRACT": lambda self: self._parse_extract(), 773 "JSON_OBJECT": lambda self: self._parse_json_object(), 774 "LOG": lambda self: self._parse_logarithm(), 775 "MATCH": lambda self: self._parse_match_against(), 776 "OPENJSON": lambda self: self._parse_open_json(), 777 "POSITION": lambda self: self._parse_position(), 778 "SAFE_CAST": lambda self: self._parse_cast(False), 779 "STRING_AGG": lambda self: self._parse_string_agg(), 780 "SUBSTRING": lambda self: self._parse_substring(), 781 "TRIM": lambda self: self._parse_trim(), 782 "TRY_CAST": lambda self: self._parse_cast(False), 783 "TRY_CONVERT": lambda self: self._parse_convert(False), 784 } 785 786 QUERY_MODIFIER_PARSERS = { 787 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 788 TokenType.WHERE: lambda self: ("where", self._parse_where()), 789 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 790 TokenType.HAVING: lambda self: ("having", self._parse_having()), 791 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 792 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 793 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 794 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 795 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 796 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 797 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 798 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 799 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 800 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 801 TokenType.CLUSTER_BY: lambda self: ( 802 "cluster", 803 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 804 ), 805 TokenType.DISTRIBUTE_BY: lambda self: ( 806 "distribute", 807 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 808 ), 809 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 810 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 811 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 812 } 813 814 SET_PARSERS = { 815 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 816 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 817 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 818 "TRANSACTION": lambda self: self._parse_set_transaction(), 819 } 820 821 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 822 823 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 824 825 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 826 827 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 828 829 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 830 831 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 832 TRANSACTION_CHARACTERISTICS = { 833 "ISOLATION LEVEL REPEATABLE READ", 834 "ISOLATION LEVEL READ COMMITTED", 835 "ISOLATION LEVEL READ UNCOMMITTED", 836 "ISOLATION LEVEL SERIALIZABLE", 837 "READ WRITE", 838 "READ ONLY", 839 } 840 841 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 842 843 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 844 845 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 846 847 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 848 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 849 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 850 851 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 852 853 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 854 855 DISTINCT_TOKENS = {TokenType.DISTINCT} 856 857 STRICT_CAST = True 858 859 # A NULL arg in CONCAT yields NULL by default 860 CONCAT_NULL_OUTPUTS_STRING = False 861 862 PREFIXED_PIVOT_COLUMNS = False 863 IDENTIFY_PIVOT_STRINGS = False 864 865 LOG_BASE_FIRST = True 866 LOG_DEFAULTS_TO_LN = False 867 868 # Whether or not ADD is present for each column added by ALTER TABLE 869 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 870 871 # Whether or not the table sample clause expects CSV syntax 872 TABLESAMPLE_CSV = False 873 874 __slots__ = ( 875 "error_level", 876 "error_message_context", 877 "max_errors", 878 "sql", 879 "errors", 880 "_tokens", 881 "_index", 882 "_curr", 883 "_next", 884 "_prev", 885 "_prev_comments", 886 "_tokenizer", 887 ) 888 889 # Autofilled 890 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 891 INDEX_OFFSET: int = 0 892 UNNEST_COLUMN_ONLY: bool = False 893 ALIAS_POST_TABLESAMPLE: bool = False 894 STRICT_STRING_CONCAT = False 895 SUPPORTS_USER_DEFINED_TYPES = True 896 NORMALIZE_FUNCTIONS = "upper" 897 NULL_ORDERING: str = "nulls_are_small" 898 SHOW_TRIE: t.Dict = {} 899 SET_TRIE: t.Dict = {} 900 FORMAT_MAPPING: t.Dict[str, str] = {} 901 FORMAT_TRIE: t.Dict = {} 902 TIME_MAPPING: t.Dict[str, str] = {} 903 TIME_TRIE: t.Dict = {} 904 905 def __init__( 906 self, 907 error_level: t.Optional[ErrorLevel] = None, 908 error_message_context: int = 100, 909 max_errors: int = 3, 910 ): 911 self.error_level = error_level or ErrorLevel.IMMEDIATE 912 self.error_message_context = error_message_context 913 self.max_errors = max_errors 914 self._tokenizer = self.TOKENIZER_CLASS() 915 self.reset() 916 917 def reset(self): 918 self.sql = "" 919 self.errors = [] 920 self._tokens = [] 921 self._index = 0 922 self._curr = None 923 self._next = None 924 self._prev = None 925 self._prev_comments = None 926 927 def parse( 928 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 929 ) -> t.List[t.Optional[exp.Expression]]: 930 """ 931 Parses a list of tokens and returns a list of syntax trees, one tree 932 per parsed SQL statement. 933 934 Args: 935 raw_tokens: The list of tokens. 936 sql: The original SQL string, used to produce helpful debug messages. 937 938 Returns: 939 The list of the produced syntax trees. 940 """ 941 return self._parse( 942 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 943 ) 944 945 def parse_into( 946 self, 947 expression_types: exp.IntoType, 948 raw_tokens: t.List[Token], 949 sql: t.Optional[str] = None, 950 ) -> t.List[t.Optional[exp.Expression]]: 951 """ 952 Parses a list of tokens into a given Expression type. If a collection of Expression 953 types is given instead, this method will try to parse the token list into each one 954 of them, stopping at the first for which the parsing succeeds. 955 956 Args: 957 expression_types: The expression type(s) to try and parse the token list into. 958 raw_tokens: The list of tokens. 959 sql: The original SQL string, used to produce helpful debug messages. 960 961 Returns: 962 The target Expression. 963 """ 964 errors = [] 965 for expression_type in ensure_list(expression_types): 966 parser = self.EXPRESSION_PARSERS.get(expression_type) 967 if not parser: 968 raise TypeError(f"No parser registered for {expression_type}") 969 970 try: 971 return self._parse(parser, raw_tokens, sql) 972 except ParseError as e: 973 e.errors[0]["into_expression"] = expression_type 974 errors.append(e) 975 976 raise ParseError( 977 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 978 errors=merge_errors(errors), 979 ) from errors[-1] 980 981 def _parse( 982 self, 983 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 984 raw_tokens: t.List[Token], 985 sql: t.Optional[str] = None, 986 ) -> t.List[t.Optional[exp.Expression]]: 987 self.reset() 988 self.sql = sql or "" 989 990 total = len(raw_tokens) 991 chunks: t.List[t.List[Token]] = [[]] 992 993 for i, token in enumerate(raw_tokens): 994 if token.token_type == TokenType.SEMICOLON: 995 if i < total - 1: 996 chunks.append([]) 997 else: 998 chunks[-1].append(token) 999 1000 expressions = [] 1001 1002 for tokens in chunks: 1003 self._index = -1 1004 self._tokens = tokens 1005 self._advance() 1006 1007 expressions.append(parse_method(self)) 1008 1009 if self._index < len(self._tokens): 1010 self.raise_error("Invalid expression / Unexpected token") 1011 1012 self.check_errors() 1013 1014 return expressions 1015 1016 def check_errors(self) -> None: 1017 """Logs or raises any found errors, depending on the chosen error level setting.""" 1018 if self.error_level == ErrorLevel.WARN: 1019 for error in self.errors: 1020 logger.error(str(error)) 1021 elif self.error_level == ErrorLevel.RAISE and self.errors: 1022 raise ParseError( 1023 concat_messages(self.errors, self.max_errors), 1024 errors=merge_errors(self.errors), 1025 ) 1026 1027 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1028 """ 1029 Appends an error in the list of recorded errors or raises it, depending on the chosen 1030 error level setting. 1031 """ 1032 token = token or self._curr or self._prev or Token.string("") 1033 start = token.start 1034 end = token.end + 1 1035 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1036 highlight = self.sql[start:end] 1037 end_context = self.sql[end : end + self.error_message_context] 1038 1039 error = ParseError.new( 1040 f"{message}. Line {token.line}, Col: {token.col}.\n" 1041 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1042 description=message, 1043 line=token.line, 1044 col=token.col, 1045 start_context=start_context, 1046 highlight=highlight, 1047 end_context=end_context, 1048 ) 1049 1050 if self.error_level == ErrorLevel.IMMEDIATE: 1051 raise error 1052 1053 self.errors.append(error) 1054 1055 def expression( 1056 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1057 ) -> E: 1058 """ 1059 Creates a new, validated Expression. 1060 1061 Args: 1062 exp_class: The expression class to instantiate. 1063 comments: An optional list of comments to attach to the expression. 1064 kwargs: The arguments to set for the expression along with their respective values. 1065 1066 Returns: 1067 The target expression. 1068 """ 1069 instance = exp_class(**kwargs) 1070 instance.add_comments(comments) if comments else self._add_comments(instance) 1071 return self.validate_expression(instance) 1072 1073 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1074 if expression and self._prev_comments: 1075 expression.add_comments(self._prev_comments) 1076 self._prev_comments = None 1077 1078 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1079 """ 1080 Validates an Expression, making sure that all its mandatory arguments are set. 1081 1082 Args: 1083 expression: The expression to validate. 1084 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1085 1086 Returns: 1087 The validated expression. 1088 """ 1089 if self.error_level != ErrorLevel.IGNORE: 1090 for error_message in expression.error_messages(args): 1091 self.raise_error(error_message) 1092 1093 return expression 1094 1095 def _find_sql(self, start: Token, end: Token) -> str: 1096 return self.sql[start.start : end.end + 1] 1097 1098 def _advance(self, times: int = 1) -> None: 1099 self._index += times 1100 self._curr = seq_get(self._tokens, self._index) 1101 self._next = seq_get(self._tokens, self._index + 1) 1102 1103 if self._index > 0: 1104 self._prev = self._tokens[self._index - 1] 1105 self._prev_comments = self._prev.comments 1106 else: 1107 self._prev = None 1108 self._prev_comments = None 1109 1110 def _retreat(self, index: int) -> None: 1111 if index != self._index: 1112 self._advance(index - self._index) 1113 1114 def _parse_command(self) -> exp.Command: 1115 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1116 1117 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1118 start = self._prev 1119 exists = self._parse_exists() if allow_exists else None 1120 1121 self._match(TokenType.ON) 1122 1123 kind = self._match_set(self.CREATABLES) and self._prev 1124 if not kind: 1125 return self._parse_as_command(start) 1126 1127 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1128 this = self._parse_user_defined_function(kind=kind.token_type) 1129 elif kind.token_type == TokenType.TABLE: 1130 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1131 elif kind.token_type == TokenType.COLUMN: 1132 this = self._parse_column() 1133 else: 1134 this = self._parse_id_var() 1135 1136 self._match(TokenType.IS) 1137 1138 return self.expression( 1139 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1140 ) 1141 1142 def _parse_to_table( 1143 self, 1144 ) -> exp.ToTableProperty: 1145 table = self._parse_table_parts(schema=True) 1146 return self.expression(exp.ToTableProperty, this=table) 1147 1148 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1149 def _parse_ttl(self) -> exp.Expression: 1150 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1151 this = self._parse_bitwise() 1152 1153 if self._match_text_seq("DELETE"): 1154 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1155 if self._match_text_seq("RECOMPRESS"): 1156 return self.expression( 1157 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1158 ) 1159 if self._match_text_seq("TO", "DISK"): 1160 return self.expression( 1161 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1162 ) 1163 if self._match_text_seq("TO", "VOLUME"): 1164 return self.expression( 1165 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1166 ) 1167 1168 return this 1169 1170 expressions = self._parse_csv(_parse_ttl_action) 1171 where = self._parse_where() 1172 group = self._parse_group() 1173 1174 aggregates = None 1175 if group and self._match(TokenType.SET): 1176 aggregates = self._parse_csv(self._parse_set_item) 1177 1178 return self.expression( 1179 exp.MergeTreeTTL, 1180 expressions=expressions, 1181 where=where, 1182 group=group, 1183 aggregates=aggregates, 1184 ) 1185 1186 def _parse_statement(self) -> t.Optional[exp.Expression]: 1187 if self._curr is None: 1188 return None 1189 1190 if self._match_set(self.STATEMENT_PARSERS): 1191 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1192 1193 if self._match_set(Tokenizer.COMMANDS): 1194 return self._parse_command() 1195 1196 expression = self._parse_expression() 1197 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1198 return self._parse_query_modifiers(expression) 1199 1200 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1201 start = self._prev 1202 temporary = self._match(TokenType.TEMPORARY) 1203 materialized = self._match_text_seq("MATERIALIZED") 1204 1205 kind = self._match_set(self.CREATABLES) and self._prev.text 1206 if not kind: 1207 return self._parse_as_command(start) 1208 1209 return self.expression( 1210 exp.Drop, 1211 comments=start.comments, 1212 exists=exists or self._parse_exists(), 1213 this=self._parse_table(schema=True), 1214 kind=kind, 1215 temporary=temporary, 1216 materialized=materialized, 1217 cascade=self._match_text_seq("CASCADE"), 1218 constraints=self._match_text_seq("CONSTRAINTS"), 1219 purge=self._match_text_seq("PURGE"), 1220 ) 1221 1222 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1223 return ( 1224 self._match_text_seq("IF") 1225 and (not not_ or self._match(TokenType.NOT)) 1226 and self._match(TokenType.EXISTS) 1227 ) 1228 1229 def _parse_create(self) -> exp.Create | exp.Command: 1230 # Note: this can't be None because we've matched a statement parser 1231 start = self._prev 1232 comments = self._prev_comments 1233 1234 replace = start.text.upper() == "REPLACE" or self._match_pair( 1235 TokenType.OR, TokenType.REPLACE 1236 ) 1237 unique = self._match(TokenType.UNIQUE) 1238 1239 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1240 self._advance() 1241 1242 properties = None 1243 create_token = self._match_set(self.CREATABLES) and self._prev 1244 1245 if not create_token: 1246 # exp.Properties.Location.POST_CREATE 1247 properties = self._parse_properties() 1248 create_token = self._match_set(self.CREATABLES) and self._prev 1249 1250 if not properties or not create_token: 1251 return self._parse_as_command(start) 1252 1253 exists = self._parse_exists(not_=True) 1254 this = None 1255 expression: t.Optional[exp.Expression] = None 1256 indexes = None 1257 no_schema_binding = None 1258 begin = None 1259 clone = None 1260 1261 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1262 nonlocal properties 1263 if properties and temp_props: 1264 properties.expressions.extend(temp_props.expressions) 1265 elif temp_props: 1266 properties = temp_props 1267 1268 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1269 this = self._parse_user_defined_function(kind=create_token.token_type) 1270 1271 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1272 extend_props(self._parse_properties()) 1273 1274 self._match(TokenType.ALIAS) 1275 1276 if self._match(TokenType.COMMAND): 1277 expression = self._parse_as_command(self._prev) 1278 else: 1279 begin = self._match(TokenType.BEGIN) 1280 return_ = self._match_text_seq("RETURN") 1281 expression = self._parse_statement() 1282 1283 if return_: 1284 expression = self.expression(exp.Return, this=expression) 1285 elif create_token.token_type == TokenType.INDEX: 1286 this = self._parse_index(index=self._parse_id_var()) 1287 elif create_token.token_type in self.DB_CREATABLES: 1288 table_parts = self._parse_table_parts(schema=True) 1289 1290 # exp.Properties.Location.POST_NAME 1291 self._match(TokenType.COMMA) 1292 extend_props(self._parse_properties(before=True)) 1293 1294 this = self._parse_schema(this=table_parts) 1295 1296 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1297 extend_props(self._parse_properties()) 1298 1299 self._match(TokenType.ALIAS) 1300 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1301 # exp.Properties.Location.POST_ALIAS 1302 extend_props(self._parse_properties()) 1303 1304 expression = self._parse_ddl_select() 1305 1306 if create_token.token_type == TokenType.TABLE: 1307 # exp.Properties.Location.POST_EXPRESSION 1308 extend_props(self._parse_properties()) 1309 1310 indexes = [] 1311 while True: 1312 index = self._parse_index() 1313 1314 # exp.Properties.Location.POST_INDEX 1315 extend_props(self._parse_properties()) 1316 1317 if not index: 1318 break 1319 else: 1320 self._match(TokenType.COMMA) 1321 indexes.append(index) 1322 elif create_token.token_type == TokenType.VIEW: 1323 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1324 no_schema_binding = True 1325 1326 shallow = self._match_text_seq("SHALLOW") 1327 1328 if self._match_text_seq("CLONE"): 1329 clone = self._parse_table(schema=True) 1330 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1331 clone_kind = ( 1332 self._match(TokenType.L_PAREN) 1333 and self._match_texts(self.CLONE_KINDS) 1334 and self._prev.text.upper() 1335 ) 1336 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1337 self._match(TokenType.R_PAREN) 1338 clone = self.expression( 1339 exp.Clone, 1340 this=clone, 1341 when=when, 1342 kind=clone_kind, 1343 shallow=shallow, 1344 expression=clone_expression, 1345 ) 1346 1347 return self.expression( 1348 exp.Create, 1349 comments=comments, 1350 this=this, 1351 kind=create_token.text, 1352 replace=replace, 1353 unique=unique, 1354 expression=expression, 1355 exists=exists, 1356 properties=properties, 1357 indexes=indexes, 1358 no_schema_binding=no_schema_binding, 1359 begin=begin, 1360 clone=clone, 1361 ) 1362 1363 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1364 # only used for teradata currently 1365 self._match(TokenType.COMMA) 1366 1367 kwargs = { 1368 "no": self._match_text_seq("NO"), 1369 "dual": self._match_text_seq("DUAL"), 1370 "before": self._match_text_seq("BEFORE"), 1371 "default": self._match_text_seq("DEFAULT"), 1372 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1373 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1374 "after": self._match_text_seq("AFTER"), 1375 "minimum": self._match_texts(("MIN", "MINIMUM")), 1376 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1377 } 1378 1379 if self._match_texts(self.PROPERTY_PARSERS): 1380 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1381 try: 1382 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1383 except TypeError: 1384 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1385 1386 return None 1387 1388 def _parse_property(self) -> t.Optional[exp.Expression]: 1389 if self._match_texts(self.PROPERTY_PARSERS): 1390 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1391 1392 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1393 return self._parse_character_set(default=True) 1394 1395 if self._match_text_seq("COMPOUND", "SORTKEY"): 1396 return self._parse_sortkey(compound=True) 1397 1398 if self._match_text_seq("SQL", "SECURITY"): 1399 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1400 1401 assignment = self._match_pair( 1402 TokenType.VAR, TokenType.EQ, advance=False 1403 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1404 1405 if assignment: 1406 key = self._parse_var_or_string() 1407 self._match(TokenType.EQ) 1408 return self.expression( 1409 exp.Property, 1410 this=key, 1411 value=self._parse_column() or self._parse_var(any_token=True), 1412 ) 1413 1414 return None 1415 1416 def _parse_stored(self) -> exp.FileFormatProperty: 1417 self._match(TokenType.ALIAS) 1418 1419 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1420 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1421 1422 return self.expression( 1423 exp.FileFormatProperty, 1424 this=self.expression( 1425 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1426 ) 1427 if input_format or output_format 1428 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1429 ) 1430 1431 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1432 self._match(TokenType.EQ) 1433 self._match(TokenType.ALIAS) 1434 return self.expression(exp_class, this=self._parse_field()) 1435 1436 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1437 properties = [] 1438 while True: 1439 if before: 1440 prop = self._parse_property_before() 1441 else: 1442 prop = self._parse_property() 1443 1444 if not prop: 1445 break 1446 for p in ensure_list(prop): 1447 properties.append(p) 1448 1449 if properties: 1450 return self.expression(exp.Properties, expressions=properties) 1451 1452 return None 1453 1454 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1455 return self.expression( 1456 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1457 ) 1458 1459 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1460 if self._index >= 2: 1461 pre_volatile_token = self._tokens[self._index - 2] 1462 else: 1463 pre_volatile_token = None 1464 1465 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1466 return exp.VolatileProperty() 1467 1468 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1469 1470 def _parse_with_property( 1471 self, 1472 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1473 if self._match(TokenType.L_PAREN, advance=False): 1474 return self._parse_wrapped_csv(self._parse_property) 1475 1476 if self._match_text_seq("JOURNAL"): 1477 return self._parse_withjournaltable() 1478 1479 if self._match_text_seq("DATA"): 1480 return self._parse_withdata(no=False) 1481 elif self._match_text_seq("NO", "DATA"): 1482 return self._parse_withdata(no=True) 1483 1484 if not self._next: 1485 return None 1486 1487 return self._parse_withisolatedloading() 1488 1489 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1490 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1491 self._match(TokenType.EQ) 1492 1493 user = self._parse_id_var() 1494 self._match(TokenType.PARAMETER) 1495 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1496 1497 if not user or not host: 1498 return None 1499 1500 return exp.DefinerProperty(this=f"{user}@{host}") 1501 1502 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1503 self._match(TokenType.TABLE) 1504 self._match(TokenType.EQ) 1505 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1506 1507 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1508 return self.expression(exp.LogProperty, no=no) 1509 1510 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1511 return self.expression(exp.JournalProperty, **kwargs) 1512 1513 def _parse_checksum(self) -> exp.ChecksumProperty: 1514 self._match(TokenType.EQ) 1515 1516 on = None 1517 if self._match(TokenType.ON): 1518 on = True 1519 elif self._match_text_seq("OFF"): 1520 on = False 1521 1522 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1523 1524 def _parse_cluster(self) -> exp.Cluster: 1525 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1526 1527 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1528 self._match_text_seq("BY") 1529 1530 self._match_l_paren() 1531 expressions = self._parse_csv(self._parse_column) 1532 self._match_r_paren() 1533 1534 if self._match_text_seq("SORTED", "BY"): 1535 self._match_l_paren() 1536 sorted_by = self._parse_csv(self._parse_ordered) 1537 self._match_r_paren() 1538 else: 1539 sorted_by = None 1540 1541 self._match(TokenType.INTO) 1542 buckets = self._parse_number() 1543 self._match_text_seq("BUCKETS") 1544 1545 return self.expression( 1546 exp.ClusteredByProperty, 1547 expressions=expressions, 1548 sorted_by=sorted_by, 1549 buckets=buckets, 1550 ) 1551 1552 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1553 if not self._match_text_seq("GRANTS"): 1554 self._retreat(self._index - 1) 1555 return None 1556 1557 return self.expression(exp.CopyGrantsProperty) 1558 1559 def _parse_freespace(self) -> exp.FreespaceProperty: 1560 self._match(TokenType.EQ) 1561 return self.expression( 1562 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1563 ) 1564 1565 def _parse_mergeblockratio( 1566 self, no: bool = False, default: bool = False 1567 ) -> exp.MergeBlockRatioProperty: 1568 if self._match(TokenType.EQ): 1569 return self.expression( 1570 exp.MergeBlockRatioProperty, 1571 this=self._parse_number(), 1572 percent=self._match(TokenType.PERCENT), 1573 ) 1574 1575 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1576 1577 def _parse_datablocksize( 1578 self, 1579 default: t.Optional[bool] = None, 1580 minimum: t.Optional[bool] = None, 1581 maximum: t.Optional[bool] = None, 1582 ) -> exp.DataBlocksizeProperty: 1583 self._match(TokenType.EQ) 1584 size = self._parse_number() 1585 1586 units = None 1587 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1588 units = self._prev.text 1589 1590 return self.expression( 1591 exp.DataBlocksizeProperty, 1592 size=size, 1593 units=units, 1594 default=default, 1595 minimum=minimum, 1596 maximum=maximum, 1597 ) 1598 1599 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1600 self._match(TokenType.EQ) 1601 always = self._match_text_seq("ALWAYS") 1602 manual = self._match_text_seq("MANUAL") 1603 never = self._match_text_seq("NEVER") 1604 default = self._match_text_seq("DEFAULT") 1605 1606 autotemp = None 1607 if self._match_text_seq("AUTOTEMP"): 1608 autotemp = self._parse_schema() 1609 1610 return self.expression( 1611 exp.BlockCompressionProperty, 1612 always=always, 1613 manual=manual, 1614 never=never, 1615 default=default, 1616 autotemp=autotemp, 1617 ) 1618 1619 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1620 no = self._match_text_seq("NO") 1621 concurrent = self._match_text_seq("CONCURRENT") 1622 self._match_text_seq("ISOLATED", "LOADING") 1623 for_all = self._match_text_seq("FOR", "ALL") 1624 for_insert = self._match_text_seq("FOR", "INSERT") 1625 for_none = self._match_text_seq("FOR", "NONE") 1626 return self.expression( 1627 exp.IsolatedLoadingProperty, 1628 no=no, 1629 concurrent=concurrent, 1630 for_all=for_all, 1631 for_insert=for_insert, 1632 for_none=for_none, 1633 ) 1634 1635 def _parse_locking(self) -> exp.LockingProperty: 1636 if self._match(TokenType.TABLE): 1637 kind = "TABLE" 1638 elif self._match(TokenType.VIEW): 1639 kind = "VIEW" 1640 elif self._match(TokenType.ROW): 1641 kind = "ROW" 1642 elif self._match_text_seq("DATABASE"): 1643 kind = "DATABASE" 1644 else: 1645 kind = None 1646 1647 if kind in ("DATABASE", "TABLE", "VIEW"): 1648 this = self._parse_table_parts() 1649 else: 1650 this = None 1651 1652 if self._match(TokenType.FOR): 1653 for_or_in = "FOR" 1654 elif self._match(TokenType.IN): 1655 for_or_in = "IN" 1656 else: 1657 for_or_in = None 1658 1659 if self._match_text_seq("ACCESS"): 1660 lock_type = "ACCESS" 1661 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1662 lock_type = "EXCLUSIVE" 1663 elif self._match_text_seq("SHARE"): 1664 lock_type = "SHARE" 1665 elif self._match_text_seq("READ"): 1666 lock_type = "READ" 1667 elif self._match_text_seq("WRITE"): 1668 lock_type = "WRITE" 1669 elif self._match_text_seq("CHECKSUM"): 1670 lock_type = "CHECKSUM" 1671 else: 1672 lock_type = None 1673 1674 override = self._match_text_seq("OVERRIDE") 1675 1676 return self.expression( 1677 exp.LockingProperty, 1678 this=this, 1679 kind=kind, 1680 for_or_in=for_or_in, 1681 lock_type=lock_type, 1682 override=override, 1683 ) 1684 1685 def _parse_partition_by(self) -> t.List[exp.Expression]: 1686 if self._match(TokenType.PARTITION_BY): 1687 return self._parse_csv(self._parse_conjunction) 1688 return [] 1689 1690 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1691 self._match(TokenType.EQ) 1692 return self.expression( 1693 exp.PartitionedByProperty, 1694 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1695 ) 1696 1697 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1698 if self._match_text_seq("AND", "STATISTICS"): 1699 statistics = True 1700 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1701 statistics = False 1702 else: 1703 statistics = None 1704 1705 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1706 1707 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1708 if self._match_text_seq("PRIMARY", "INDEX"): 1709 return exp.NoPrimaryIndexProperty() 1710 return None 1711 1712 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1713 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1714 return exp.OnCommitProperty() 1715 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1716 return exp.OnCommitProperty(delete=True) 1717 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1718 1719 def _parse_distkey(self) -> exp.DistKeyProperty: 1720 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1721 1722 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1723 table = self._parse_table(schema=True) 1724 1725 options = [] 1726 while self._match_texts(("INCLUDING", "EXCLUDING")): 1727 this = self._prev.text.upper() 1728 1729 id_var = self._parse_id_var() 1730 if not id_var: 1731 return None 1732 1733 options.append( 1734 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1735 ) 1736 1737 return self.expression(exp.LikeProperty, this=table, expressions=options) 1738 1739 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1740 return self.expression( 1741 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1742 ) 1743 1744 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1745 self._match(TokenType.EQ) 1746 return self.expression( 1747 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1748 ) 1749 1750 def _parse_returns(self) -> exp.ReturnsProperty: 1751 value: t.Optional[exp.Expression] 1752 is_table = self._match(TokenType.TABLE) 1753 1754 if is_table: 1755 if self._match(TokenType.LT): 1756 value = self.expression( 1757 exp.Schema, 1758 this="TABLE", 1759 expressions=self._parse_csv(self._parse_struct_types), 1760 ) 1761 if not self._match(TokenType.GT): 1762 self.raise_error("Expecting >") 1763 else: 1764 value = self._parse_schema(exp.var("TABLE")) 1765 else: 1766 value = self._parse_types() 1767 1768 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1769 1770 def _parse_describe(self) -> exp.Describe: 1771 kind = self._match_set(self.CREATABLES) and self._prev.text 1772 this = self._parse_table(schema=True) 1773 properties = self._parse_properties() 1774 expressions = properties.expressions if properties else None 1775 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1776 1777 def _parse_insert(self) -> exp.Insert: 1778 comments = ensure_list(self._prev_comments) 1779 overwrite = self._match(TokenType.OVERWRITE) 1780 ignore = self._match(TokenType.IGNORE) 1781 local = self._match_text_seq("LOCAL") 1782 alternative = None 1783 1784 if self._match_text_seq("DIRECTORY"): 1785 this: t.Optional[exp.Expression] = self.expression( 1786 exp.Directory, 1787 this=self._parse_var_or_string(), 1788 local=local, 1789 row_format=self._parse_row_format(match_row=True), 1790 ) 1791 else: 1792 if self._match(TokenType.OR): 1793 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1794 1795 self._match(TokenType.INTO) 1796 comments += ensure_list(self._prev_comments) 1797 self._match(TokenType.TABLE) 1798 this = self._parse_table(schema=True) 1799 1800 returning = self._parse_returning() 1801 1802 return self.expression( 1803 exp.Insert, 1804 comments=comments, 1805 this=this, 1806 by_name=self._match_text_seq("BY", "NAME"), 1807 exists=self._parse_exists(), 1808 partition=self._parse_partition(), 1809 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1810 and self._parse_conjunction(), 1811 expression=self._parse_ddl_select(), 1812 conflict=self._parse_on_conflict(), 1813 returning=returning or self._parse_returning(), 1814 overwrite=overwrite, 1815 alternative=alternative, 1816 ignore=ignore, 1817 ) 1818 1819 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1820 conflict = self._match_text_seq("ON", "CONFLICT") 1821 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1822 1823 if not conflict and not duplicate: 1824 return None 1825 1826 nothing = None 1827 expressions = None 1828 key = None 1829 constraint = None 1830 1831 if conflict: 1832 if self._match_text_seq("ON", "CONSTRAINT"): 1833 constraint = self._parse_id_var() 1834 else: 1835 key = self._parse_csv(self._parse_value) 1836 1837 self._match_text_seq("DO") 1838 if self._match_text_seq("NOTHING"): 1839 nothing = True 1840 else: 1841 self._match(TokenType.UPDATE) 1842 self._match(TokenType.SET) 1843 expressions = self._parse_csv(self._parse_equality) 1844 1845 return self.expression( 1846 exp.OnConflict, 1847 duplicate=duplicate, 1848 expressions=expressions, 1849 nothing=nothing, 1850 key=key, 1851 constraint=constraint, 1852 ) 1853 1854 def _parse_returning(self) -> t.Optional[exp.Returning]: 1855 if not self._match(TokenType.RETURNING): 1856 return None 1857 return self.expression( 1858 exp.Returning, 1859 expressions=self._parse_csv(self._parse_expression), 1860 into=self._match(TokenType.INTO) and self._parse_table_part(), 1861 ) 1862 1863 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1864 if not self._match(TokenType.FORMAT): 1865 return None 1866 return self._parse_row_format() 1867 1868 def _parse_row_format( 1869 self, match_row: bool = False 1870 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1871 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1872 return None 1873 1874 if self._match_text_seq("SERDE"): 1875 this = self._parse_string() 1876 1877 serde_properties = None 1878 if self._match(TokenType.SERDE_PROPERTIES): 1879 serde_properties = self.expression( 1880 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1881 ) 1882 1883 return self.expression( 1884 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1885 ) 1886 1887 self._match_text_seq("DELIMITED") 1888 1889 kwargs = {} 1890 1891 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1892 kwargs["fields"] = self._parse_string() 1893 if self._match_text_seq("ESCAPED", "BY"): 1894 kwargs["escaped"] = self._parse_string() 1895 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1896 kwargs["collection_items"] = self._parse_string() 1897 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1898 kwargs["map_keys"] = self._parse_string() 1899 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1900 kwargs["lines"] = self._parse_string() 1901 if self._match_text_seq("NULL", "DEFINED", "AS"): 1902 kwargs["null"] = self._parse_string() 1903 1904 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1905 1906 def _parse_load(self) -> exp.LoadData | exp.Command: 1907 if self._match_text_seq("DATA"): 1908 local = self._match_text_seq("LOCAL") 1909 self._match_text_seq("INPATH") 1910 inpath = self._parse_string() 1911 overwrite = self._match(TokenType.OVERWRITE) 1912 self._match_pair(TokenType.INTO, TokenType.TABLE) 1913 1914 return self.expression( 1915 exp.LoadData, 1916 this=self._parse_table(schema=True), 1917 local=local, 1918 overwrite=overwrite, 1919 inpath=inpath, 1920 partition=self._parse_partition(), 1921 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1922 serde=self._match_text_seq("SERDE") and self._parse_string(), 1923 ) 1924 return self._parse_as_command(self._prev) 1925 1926 def _parse_delete(self) -> exp.Delete: 1927 # This handles MySQL's "Multiple-Table Syntax" 1928 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1929 tables = None 1930 comments = self._prev_comments 1931 if not self._match(TokenType.FROM, advance=False): 1932 tables = self._parse_csv(self._parse_table) or None 1933 1934 returning = self._parse_returning() 1935 1936 return self.expression( 1937 exp.Delete, 1938 comments=comments, 1939 tables=tables, 1940 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1941 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1942 where=self._parse_where(), 1943 returning=returning or self._parse_returning(), 1944 limit=self._parse_limit(), 1945 ) 1946 1947 def _parse_update(self) -> exp.Update: 1948 comments = self._prev_comments 1949 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1950 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1951 returning = self._parse_returning() 1952 return self.expression( 1953 exp.Update, 1954 comments=comments, 1955 **{ # type: ignore 1956 "this": this, 1957 "expressions": expressions, 1958 "from": self._parse_from(joins=True), 1959 "where": self._parse_where(), 1960 "returning": returning or self._parse_returning(), 1961 "order": self._parse_order(), 1962 "limit": self._parse_limit(), 1963 }, 1964 ) 1965 1966 def _parse_uncache(self) -> exp.Uncache: 1967 if not self._match(TokenType.TABLE): 1968 self.raise_error("Expecting TABLE after UNCACHE") 1969 1970 return self.expression( 1971 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1972 ) 1973 1974 def _parse_cache(self) -> exp.Cache: 1975 lazy = self._match_text_seq("LAZY") 1976 self._match(TokenType.TABLE) 1977 table = self._parse_table(schema=True) 1978 1979 options = [] 1980 if self._match_text_seq("OPTIONS"): 1981 self._match_l_paren() 1982 k = self._parse_string() 1983 self._match(TokenType.EQ) 1984 v = self._parse_string() 1985 options = [k, v] 1986 self._match_r_paren() 1987 1988 self._match(TokenType.ALIAS) 1989 return self.expression( 1990 exp.Cache, 1991 this=table, 1992 lazy=lazy, 1993 options=options, 1994 expression=self._parse_select(nested=True), 1995 ) 1996 1997 def _parse_partition(self) -> t.Optional[exp.Partition]: 1998 if not self._match(TokenType.PARTITION): 1999 return None 2000 2001 return self.expression( 2002 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2003 ) 2004 2005 def _parse_value(self) -> exp.Tuple: 2006 if self._match(TokenType.L_PAREN): 2007 expressions = self._parse_csv(self._parse_conjunction) 2008 self._match_r_paren() 2009 return self.expression(exp.Tuple, expressions=expressions) 2010 2011 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2012 # https://prestodb.io/docs/current/sql/values.html 2013 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2014 2015 def _parse_projections(self) -> t.List[exp.Expression]: 2016 return self._parse_expressions() 2017 2018 def _parse_select( 2019 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2020 ) -> t.Optional[exp.Expression]: 2021 cte = self._parse_with() 2022 2023 if cte: 2024 this = self._parse_statement() 2025 2026 if not this: 2027 self.raise_error("Failed to parse any statement following CTE") 2028 return cte 2029 2030 if "with" in this.arg_types: 2031 this.set("with", cte) 2032 else: 2033 self.raise_error(f"{this.key} does not support CTE") 2034 this = cte 2035 2036 return this 2037 2038 # duckdb supports leading with FROM x 2039 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2040 2041 if self._match(TokenType.SELECT): 2042 comments = self._prev_comments 2043 2044 hint = self._parse_hint() 2045 all_ = self._match(TokenType.ALL) 2046 distinct = self._match_set(self.DISTINCT_TOKENS) 2047 2048 kind = ( 2049 self._match(TokenType.ALIAS) 2050 and self._match_texts(("STRUCT", "VALUE")) 2051 and self._prev.text 2052 ) 2053 2054 if distinct: 2055 distinct = self.expression( 2056 exp.Distinct, 2057 on=self._parse_value() if self._match(TokenType.ON) else None, 2058 ) 2059 2060 if all_ and distinct: 2061 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2062 2063 limit = self._parse_limit(top=True) 2064 projections = self._parse_projections() 2065 2066 this = self.expression( 2067 exp.Select, 2068 kind=kind, 2069 hint=hint, 2070 distinct=distinct, 2071 expressions=projections, 2072 limit=limit, 2073 ) 2074 this.comments = comments 2075 2076 into = self._parse_into() 2077 if into: 2078 this.set("into", into) 2079 2080 if not from_: 2081 from_ = self._parse_from() 2082 2083 if from_: 2084 this.set("from", from_) 2085 2086 this = self._parse_query_modifiers(this) 2087 elif (table or nested) and self._match(TokenType.L_PAREN): 2088 if self._match(TokenType.PIVOT): 2089 this = self._parse_simplified_pivot() 2090 elif self._match(TokenType.FROM): 2091 this = exp.select("*").from_( 2092 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2093 ) 2094 else: 2095 this = self._parse_table() if table else self._parse_select(nested=True) 2096 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2097 2098 self._match_r_paren() 2099 2100 # We return early here so that the UNION isn't attached to the subquery by the 2101 # following call to _parse_set_operations, but instead becomes the parent node 2102 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2103 elif self._match(TokenType.VALUES): 2104 this = self.expression( 2105 exp.Values, 2106 expressions=self._parse_csv(self._parse_value), 2107 alias=self._parse_table_alias(), 2108 ) 2109 elif from_: 2110 this = exp.select("*").from_(from_.this, copy=False) 2111 else: 2112 this = None 2113 2114 return self._parse_set_operations(this) 2115 2116 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2117 if not skip_with_token and not self._match(TokenType.WITH): 2118 return None 2119 2120 comments = self._prev_comments 2121 recursive = self._match(TokenType.RECURSIVE) 2122 2123 expressions = [] 2124 while True: 2125 expressions.append(self._parse_cte()) 2126 2127 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2128 break 2129 else: 2130 self._match(TokenType.WITH) 2131 2132 return self.expression( 2133 exp.With, comments=comments, expressions=expressions, recursive=recursive 2134 ) 2135 2136 def _parse_cte(self) -> exp.CTE: 2137 alias = self._parse_table_alias() 2138 if not alias or not alias.this: 2139 self.raise_error("Expected CTE to have alias") 2140 2141 self._match(TokenType.ALIAS) 2142 return self.expression( 2143 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2144 ) 2145 2146 def _parse_table_alias( 2147 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2148 ) -> t.Optional[exp.TableAlias]: 2149 any_token = self._match(TokenType.ALIAS) 2150 alias = ( 2151 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2152 or self._parse_string_as_identifier() 2153 ) 2154 2155 index = self._index 2156 if self._match(TokenType.L_PAREN): 2157 columns = self._parse_csv(self._parse_function_parameter) 2158 self._match_r_paren() if columns else self._retreat(index) 2159 else: 2160 columns = None 2161 2162 if not alias and not columns: 2163 return None 2164 2165 return self.expression(exp.TableAlias, this=alias, columns=columns) 2166 2167 def _parse_subquery( 2168 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2169 ) -> t.Optional[exp.Subquery]: 2170 if not this: 2171 return None 2172 2173 return self.expression( 2174 exp.Subquery, 2175 this=this, 2176 pivots=self._parse_pivots(), 2177 alias=self._parse_table_alias() if parse_alias else None, 2178 ) 2179 2180 def _parse_query_modifiers( 2181 self, this: t.Optional[exp.Expression] 2182 ) -> t.Optional[exp.Expression]: 2183 if isinstance(this, self.MODIFIABLES): 2184 for join in iter(self._parse_join, None): 2185 this.append("joins", join) 2186 for lateral in iter(self._parse_lateral, None): 2187 this.append("laterals", lateral) 2188 2189 while True: 2190 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2191 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2192 key, expression = parser(self) 2193 2194 if expression: 2195 this.set(key, expression) 2196 if key == "limit": 2197 offset = expression.args.pop("offset", None) 2198 if offset: 2199 this.set("offset", exp.Offset(expression=offset)) 2200 continue 2201 break 2202 return this 2203 2204 def _parse_hint(self) -> t.Optional[exp.Hint]: 2205 if self._match(TokenType.HINT): 2206 hints = [] 2207 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2208 hints.extend(hint) 2209 2210 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2211 self.raise_error("Expected */ after HINT") 2212 2213 return self.expression(exp.Hint, expressions=hints) 2214 2215 return None 2216 2217 def _parse_into(self) -> t.Optional[exp.Into]: 2218 if not self._match(TokenType.INTO): 2219 return None 2220 2221 temp = self._match(TokenType.TEMPORARY) 2222 unlogged = self._match_text_seq("UNLOGGED") 2223 self._match(TokenType.TABLE) 2224 2225 return self.expression( 2226 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2227 ) 2228 2229 def _parse_from( 2230 self, joins: bool = False, skip_from_token: bool = False 2231 ) -> t.Optional[exp.From]: 2232 if not skip_from_token and not self._match(TokenType.FROM): 2233 return None 2234 2235 return self.expression( 2236 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2237 ) 2238 2239 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2240 if not self._match(TokenType.MATCH_RECOGNIZE): 2241 return None 2242 2243 self._match_l_paren() 2244 2245 partition = self._parse_partition_by() 2246 order = self._parse_order() 2247 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2248 2249 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2250 rows = exp.var("ONE ROW PER MATCH") 2251 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2252 text = "ALL ROWS PER MATCH" 2253 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2254 text += f" SHOW EMPTY MATCHES" 2255 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2256 text += f" OMIT EMPTY MATCHES" 2257 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2258 text += f" WITH UNMATCHED ROWS" 2259 rows = exp.var(text) 2260 else: 2261 rows = None 2262 2263 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2264 text = "AFTER MATCH SKIP" 2265 if self._match_text_seq("PAST", "LAST", "ROW"): 2266 text += f" PAST LAST ROW" 2267 elif self._match_text_seq("TO", "NEXT", "ROW"): 2268 text += f" TO NEXT ROW" 2269 elif self._match_text_seq("TO", "FIRST"): 2270 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2271 elif self._match_text_seq("TO", "LAST"): 2272 text += f" TO LAST {self._advance_any().text}" # type: ignore 2273 after = exp.var(text) 2274 else: 2275 after = None 2276 2277 if self._match_text_seq("PATTERN"): 2278 self._match_l_paren() 2279 2280 if not self._curr: 2281 self.raise_error("Expecting )", self._curr) 2282 2283 paren = 1 2284 start = self._curr 2285 2286 while self._curr and paren > 0: 2287 if self._curr.token_type == TokenType.L_PAREN: 2288 paren += 1 2289 if self._curr.token_type == TokenType.R_PAREN: 2290 paren -= 1 2291 2292 end = self._prev 2293 self._advance() 2294 2295 if paren > 0: 2296 self.raise_error("Expecting )", self._curr) 2297 2298 pattern = exp.var(self._find_sql(start, end)) 2299 else: 2300 pattern = None 2301 2302 define = ( 2303 self._parse_csv( 2304 lambda: self.expression( 2305 exp.Alias, 2306 alias=self._parse_id_var(any_token=True), 2307 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2308 ) 2309 ) 2310 if self._match_text_seq("DEFINE") 2311 else None 2312 ) 2313 2314 self._match_r_paren() 2315 2316 return self.expression( 2317 exp.MatchRecognize, 2318 partition_by=partition, 2319 order=order, 2320 measures=measures, 2321 rows=rows, 2322 after=after, 2323 pattern=pattern, 2324 define=define, 2325 alias=self._parse_table_alias(), 2326 ) 2327 2328 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2329 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2330 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2331 2332 if outer_apply or cross_apply: 2333 this = self._parse_select(table=True) 2334 view = None 2335 outer = not cross_apply 2336 elif self._match(TokenType.LATERAL): 2337 this = self._parse_select(table=True) 2338 view = self._match(TokenType.VIEW) 2339 outer = self._match(TokenType.OUTER) 2340 else: 2341 return None 2342 2343 if not this: 2344 this = ( 2345 self._parse_unnest() 2346 or self._parse_function() 2347 or self._parse_id_var(any_token=False) 2348 ) 2349 2350 while self._match(TokenType.DOT): 2351 this = exp.Dot( 2352 this=this, 2353 expression=self._parse_function() or self._parse_id_var(any_token=False), 2354 ) 2355 2356 if view: 2357 table = self._parse_id_var(any_token=False) 2358 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2359 table_alias: t.Optional[exp.TableAlias] = self.expression( 2360 exp.TableAlias, this=table, columns=columns 2361 ) 2362 elif isinstance(this, exp.Subquery) and this.alias: 2363 # Ensures parity between the Subquery's and the Lateral's "alias" args 2364 table_alias = this.args["alias"].copy() 2365 else: 2366 table_alias = self._parse_table_alias() 2367 2368 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2369 2370 def _parse_join_parts( 2371 self, 2372 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2373 return ( 2374 self._match_set(self.JOIN_METHODS) and self._prev, 2375 self._match_set(self.JOIN_SIDES) and self._prev, 2376 self._match_set(self.JOIN_KINDS) and self._prev, 2377 ) 2378 2379 def _parse_join( 2380 self, skip_join_token: bool = False, parse_bracket: bool = False 2381 ) -> t.Optional[exp.Join]: 2382 if self._match(TokenType.COMMA): 2383 return self.expression(exp.Join, this=self._parse_table()) 2384 2385 index = self._index 2386 method, side, kind = self._parse_join_parts() 2387 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2388 join = self._match(TokenType.JOIN) 2389 2390 if not skip_join_token and not join: 2391 self._retreat(index) 2392 kind = None 2393 method = None 2394 side = None 2395 2396 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2397 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2398 2399 if not skip_join_token and not join and not outer_apply and not cross_apply: 2400 return None 2401 2402 if outer_apply: 2403 side = Token(TokenType.LEFT, "LEFT") 2404 2405 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2406 2407 if method: 2408 kwargs["method"] = method.text 2409 if side: 2410 kwargs["side"] = side.text 2411 if kind: 2412 kwargs["kind"] = kind.text 2413 if hint: 2414 kwargs["hint"] = hint 2415 2416 if self._match(TokenType.ON): 2417 kwargs["on"] = self._parse_conjunction() 2418 elif self._match(TokenType.USING): 2419 kwargs["using"] = self._parse_wrapped_id_vars() 2420 elif not (kind and kind.token_type == TokenType.CROSS): 2421 index = self._index 2422 joins = self._parse_joins() 2423 2424 if joins and self._match(TokenType.ON): 2425 kwargs["on"] = self._parse_conjunction() 2426 elif joins and self._match(TokenType.USING): 2427 kwargs["using"] = self._parse_wrapped_id_vars() 2428 else: 2429 joins = None 2430 self._retreat(index) 2431 2432 kwargs["this"].set("joins", joins) 2433 2434 comments = [c for token in (method, side, kind) if token for c in token.comments] 2435 return self.expression(exp.Join, comments=comments, **kwargs) 2436 2437 def _parse_index( 2438 self, 2439 index: t.Optional[exp.Expression] = None, 2440 ) -> t.Optional[exp.Index]: 2441 if index: 2442 unique = None 2443 primary = None 2444 amp = None 2445 2446 self._match(TokenType.ON) 2447 self._match(TokenType.TABLE) # hive 2448 table = self._parse_table_parts(schema=True) 2449 else: 2450 unique = self._match(TokenType.UNIQUE) 2451 primary = self._match_text_seq("PRIMARY") 2452 amp = self._match_text_seq("AMP") 2453 2454 if not self._match(TokenType.INDEX): 2455 return None 2456 2457 index = self._parse_id_var() 2458 table = None 2459 2460 using = self._parse_field() if self._match(TokenType.USING) else None 2461 2462 if self._match(TokenType.L_PAREN, advance=False): 2463 columns = self._parse_wrapped_csv(self._parse_ordered) 2464 else: 2465 columns = None 2466 2467 return self.expression( 2468 exp.Index, 2469 this=index, 2470 table=table, 2471 using=using, 2472 columns=columns, 2473 unique=unique, 2474 primary=primary, 2475 amp=amp, 2476 partition_by=self._parse_partition_by(), 2477 ) 2478 2479 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2480 hints: t.List[exp.Expression] = [] 2481 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2482 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2483 hints.append( 2484 self.expression( 2485 exp.WithTableHint, 2486 expressions=self._parse_csv( 2487 lambda: self._parse_function() or self._parse_var(any_token=True) 2488 ), 2489 ) 2490 ) 2491 self._match_r_paren() 2492 else: 2493 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2494 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2495 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2496 2497 self._match_texts({"INDEX", "KEY"}) 2498 if self._match(TokenType.FOR): 2499 hint.set("target", self._advance_any() and self._prev.text.upper()) 2500 2501 hint.set("expressions", self._parse_wrapped_id_vars()) 2502 hints.append(hint) 2503 2504 return hints or None 2505 2506 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2507 return ( 2508 (not schema and self._parse_function(optional_parens=False)) 2509 or self._parse_id_var(any_token=False) 2510 or self._parse_string_as_identifier() 2511 or self._parse_placeholder() 2512 ) 2513 2514 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2515 catalog = None 2516 db = None 2517 table = self._parse_table_part(schema=schema) 2518 2519 while self._match(TokenType.DOT): 2520 if catalog: 2521 # This allows nesting the table in arbitrarily many dot expressions if needed 2522 table = self.expression( 2523 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2524 ) 2525 else: 2526 catalog = db 2527 db = table 2528 table = self._parse_table_part(schema=schema) 2529 2530 if not table: 2531 self.raise_error(f"Expected table name but got {self._curr}") 2532 2533 return self.expression( 2534 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2535 ) 2536 2537 def _parse_table( 2538 self, 2539 schema: bool = False, 2540 joins: bool = False, 2541 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2542 parse_bracket: bool = False, 2543 ) -> t.Optional[exp.Expression]: 2544 lateral = self._parse_lateral() 2545 if lateral: 2546 return lateral 2547 2548 unnest = self._parse_unnest() 2549 if unnest: 2550 return unnest 2551 2552 values = self._parse_derived_table_values() 2553 if values: 2554 return values 2555 2556 subquery = self._parse_select(table=True) 2557 if subquery: 2558 if not subquery.args.get("pivots"): 2559 subquery.set("pivots", self._parse_pivots()) 2560 return subquery 2561 2562 bracket = parse_bracket and self._parse_bracket(None) 2563 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2564 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2565 2566 if schema: 2567 return self._parse_schema(this=this) 2568 2569 version = self._parse_version() 2570 2571 if version: 2572 this.set("version", version) 2573 2574 if self.ALIAS_POST_TABLESAMPLE: 2575 table_sample = self._parse_table_sample() 2576 2577 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2578 if alias: 2579 this.set("alias", alias) 2580 2581 this.set("hints", self._parse_table_hints()) 2582 2583 if not this.args.get("pivots"): 2584 this.set("pivots", self._parse_pivots()) 2585 2586 if not self.ALIAS_POST_TABLESAMPLE: 2587 table_sample = self._parse_table_sample() 2588 2589 if table_sample: 2590 table_sample.set("this", this) 2591 this = table_sample 2592 2593 if joins: 2594 for join in iter(self._parse_join, None): 2595 this.append("joins", join) 2596 2597 return this 2598 2599 def _parse_version(self) -> t.Optional[exp.Version]: 2600 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2601 this = "TIMESTAMP" 2602 elif self._match(TokenType.VERSION_SNAPSHOT): 2603 this = "VERSION" 2604 else: 2605 return None 2606 2607 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2608 kind = self._prev.text.upper() 2609 start = self._parse_bitwise() 2610 self._match_texts(("TO", "AND")) 2611 end = self._parse_bitwise() 2612 expression: t.Optional[exp.Expression] = self.expression( 2613 exp.Tuple, expressions=[start, end] 2614 ) 2615 elif self._match_text_seq("CONTAINED", "IN"): 2616 kind = "CONTAINED IN" 2617 expression = self.expression( 2618 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2619 ) 2620 elif self._match(TokenType.ALL): 2621 kind = "ALL" 2622 expression = None 2623 else: 2624 self._match_text_seq("AS", "OF") 2625 kind = "AS OF" 2626 expression = self._parse_type() 2627 2628 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2629 2630 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2631 if not self._match(TokenType.UNNEST): 2632 return None 2633 2634 expressions = self._parse_wrapped_csv(self._parse_type) 2635 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2636 2637 alias = self._parse_table_alias() if with_alias else None 2638 2639 if alias and self.UNNEST_COLUMN_ONLY: 2640 if alias.args.get("columns"): 2641 self.raise_error("Unexpected extra column alias in unnest.") 2642 2643 alias.set("columns", [alias.this]) 2644 alias.set("this", None) 2645 2646 offset = None 2647 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2648 self._match(TokenType.ALIAS) 2649 offset = self._parse_id_var() or exp.to_identifier("offset") 2650 2651 return self.expression( 2652 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2653 ) 2654 2655 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2656 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2657 if not is_derived and not self._match(TokenType.VALUES): 2658 return None 2659 2660 expressions = self._parse_csv(self._parse_value) 2661 alias = self._parse_table_alias() 2662 2663 if is_derived: 2664 self._match_r_paren() 2665 2666 return self.expression( 2667 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2668 ) 2669 2670 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2671 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2672 as_modifier and self._match_text_seq("USING", "SAMPLE") 2673 ): 2674 return None 2675 2676 bucket_numerator = None 2677 bucket_denominator = None 2678 bucket_field = None 2679 percent = None 2680 rows = None 2681 size = None 2682 seed = None 2683 2684 kind = ( 2685 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2686 ) 2687 method = self._parse_var(tokens=(TokenType.ROW,)) 2688 2689 self._match(TokenType.L_PAREN) 2690 2691 if self.TABLESAMPLE_CSV: 2692 num = None 2693 expressions = self._parse_csv(self._parse_primary) 2694 else: 2695 expressions = None 2696 num = self._parse_number() 2697 2698 if self._match_text_seq("BUCKET"): 2699 bucket_numerator = self._parse_number() 2700 self._match_text_seq("OUT", "OF") 2701 bucket_denominator = bucket_denominator = self._parse_number() 2702 self._match(TokenType.ON) 2703 bucket_field = self._parse_field() 2704 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2705 percent = num 2706 elif self._match(TokenType.ROWS): 2707 rows = num 2708 elif num: 2709 size = num 2710 2711 self._match(TokenType.R_PAREN) 2712 2713 if self._match(TokenType.L_PAREN): 2714 method = self._parse_var() 2715 seed = self._match(TokenType.COMMA) and self._parse_number() 2716 self._match_r_paren() 2717 elif self._match_texts(("SEED", "REPEATABLE")): 2718 seed = self._parse_wrapped(self._parse_number) 2719 2720 return self.expression( 2721 exp.TableSample, 2722 expressions=expressions, 2723 method=method, 2724 bucket_numerator=bucket_numerator, 2725 bucket_denominator=bucket_denominator, 2726 bucket_field=bucket_field, 2727 percent=percent, 2728 rows=rows, 2729 size=size, 2730 seed=seed, 2731 kind=kind, 2732 ) 2733 2734 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2735 return list(iter(self._parse_pivot, None)) or None 2736 2737 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2738 return list(iter(self._parse_join, None)) or None 2739 2740 # https://duckdb.org/docs/sql/statements/pivot 2741 def _parse_simplified_pivot(self) -> exp.Pivot: 2742 def _parse_on() -> t.Optional[exp.Expression]: 2743 this = self._parse_bitwise() 2744 return self._parse_in(this) if self._match(TokenType.IN) else this 2745 2746 this = self._parse_table() 2747 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2748 using = self._match(TokenType.USING) and self._parse_csv( 2749 lambda: self._parse_alias(self._parse_function()) 2750 ) 2751 group = self._parse_group() 2752 return self.expression( 2753 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2754 ) 2755 2756 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2757 index = self._index 2758 include_nulls = None 2759 2760 if self._match(TokenType.PIVOT): 2761 unpivot = False 2762 elif self._match(TokenType.UNPIVOT): 2763 unpivot = True 2764 2765 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2766 if self._match_text_seq("INCLUDE", "NULLS"): 2767 include_nulls = True 2768 elif self._match_text_seq("EXCLUDE", "NULLS"): 2769 include_nulls = False 2770 else: 2771 return None 2772 2773 expressions = [] 2774 field = None 2775 2776 if not self._match(TokenType.L_PAREN): 2777 self._retreat(index) 2778 return None 2779 2780 if unpivot: 2781 expressions = self._parse_csv(self._parse_column) 2782 else: 2783 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2784 2785 if not expressions: 2786 self.raise_error("Failed to parse PIVOT's aggregation list") 2787 2788 if not self._match(TokenType.FOR): 2789 self.raise_error("Expecting FOR") 2790 2791 value = self._parse_column() 2792 2793 if not self._match(TokenType.IN): 2794 self.raise_error("Expecting IN") 2795 2796 field = self._parse_in(value, alias=True) 2797 2798 self._match_r_paren() 2799 2800 pivot = self.expression( 2801 exp.Pivot, 2802 expressions=expressions, 2803 field=field, 2804 unpivot=unpivot, 2805 include_nulls=include_nulls, 2806 ) 2807 2808 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2809 pivot.set("alias", self._parse_table_alias()) 2810 2811 if not unpivot: 2812 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2813 2814 columns: t.List[exp.Expression] = [] 2815 for fld in pivot.args["field"].expressions: 2816 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2817 for name in names: 2818 if self.PREFIXED_PIVOT_COLUMNS: 2819 name = f"{name}_{field_name}" if name else field_name 2820 else: 2821 name = f"{field_name}_{name}" if name else field_name 2822 2823 columns.append(exp.to_identifier(name)) 2824 2825 pivot.set("columns", columns) 2826 2827 return pivot 2828 2829 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2830 return [agg.alias for agg in aggregations] 2831 2832 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2833 if not skip_where_token and not self._match(TokenType.WHERE): 2834 return None 2835 2836 return self.expression( 2837 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2838 ) 2839 2840 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2841 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2842 return None 2843 2844 elements = defaultdict(list) 2845 2846 if self._match(TokenType.ALL): 2847 return self.expression(exp.Group, all=True) 2848 2849 while True: 2850 expressions = self._parse_csv(self._parse_conjunction) 2851 if expressions: 2852 elements["expressions"].extend(expressions) 2853 2854 grouping_sets = self._parse_grouping_sets() 2855 if grouping_sets: 2856 elements["grouping_sets"].extend(grouping_sets) 2857 2858 rollup = None 2859 cube = None 2860 totals = None 2861 2862 with_ = self._match(TokenType.WITH) 2863 if self._match(TokenType.ROLLUP): 2864 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2865 elements["rollup"].extend(ensure_list(rollup)) 2866 2867 if self._match(TokenType.CUBE): 2868 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2869 elements["cube"].extend(ensure_list(cube)) 2870 2871 if self._match_text_seq("TOTALS"): 2872 totals = True 2873 elements["totals"] = True # type: ignore 2874 2875 if not (grouping_sets or rollup or cube or totals): 2876 break 2877 2878 return self.expression(exp.Group, **elements) # type: ignore 2879 2880 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2881 if not self._match(TokenType.GROUPING_SETS): 2882 return None 2883 2884 return self._parse_wrapped_csv(self._parse_grouping_set) 2885 2886 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2887 if self._match(TokenType.L_PAREN): 2888 grouping_set = self._parse_csv(self._parse_column) 2889 self._match_r_paren() 2890 return self.expression(exp.Tuple, expressions=grouping_set) 2891 2892 return self._parse_column() 2893 2894 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2895 if not skip_having_token and not self._match(TokenType.HAVING): 2896 return None 2897 return self.expression(exp.Having, this=self._parse_conjunction()) 2898 2899 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2900 if not self._match(TokenType.QUALIFY): 2901 return None 2902 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2903 2904 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2905 if skip_start_token: 2906 start = None 2907 elif self._match(TokenType.START_WITH): 2908 start = self._parse_conjunction() 2909 else: 2910 return None 2911 2912 self._match(TokenType.CONNECT_BY) 2913 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2914 exp.Prior, this=self._parse_bitwise() 2915 ) 2916 connect = self._parse_conjunction() 2917 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2918 2919 if not start and self._match(TokenType.START_WITH): 2920 start = self._parse_conjunction() 2921 2922 return self.expression(exp.Connect, start=start, connect=connect) 2923 2924 def _parse_order( 2925 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2926 ) -> t.Optional[exp.Expression]: 2927 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2928 return this 2929 2930 return self.expression( 2931 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2932 ) 2933 2934 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2935 if not self._match(token): 2936 return None 2937 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2938 2939 def _parse_ordered(self) -> exp.Ordered: 2940 this = self._parse_conjunction() 2941 self._match(TokenType.ASC) 2942 2943 is_desc = self._match(TokenType.DESC) 2944 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2945 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2946 desc = is_desc or False 2947 asc = not desc 2948 nulls_first = is_nulls_first or False 2949 explicitly_null_ordered = is_nulls_first or is_nulls_last 2950 2951 if ( 2952 not explicitly_null_ordered 2953 and ( 2954 (asc and self.NULL_ORDERING == "nulls_are_small") 2955 or (desc and self.NULL_ORDERING != "nulls_are_small") 2956 ) 2957 and self.NULL_ORDERING != "nulls_are_last" 2958 ): 2959 nulls_first = True 2960 2961 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2962 2963 def _parse_limit( 2964 self, this: t.Optional[exp.Expression] = None, top: bool = False 2965 ) -> t.Optional[exp.Expression]: 2966 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2967 comments = self._prev_comments 2968 if top: 2969 limit_paren = self._match(TokenType.L_PAREN) 2970 expression = self._parse_number() 2971 2972 if limit_paren: 2973 self._match_r_paren() 2974 else: 2975 expression = self._parse_term() 2976 2977 if self._match(TokenType.COMMA): 2978 offset = expression 2979 expression = self._parse_term() 2980 else: 2981 offset = None 2982 2983 limit_exp = self.expression( 2984 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2985 ) 2986 2987 return limit_exp 2988 2989 if self._match(TokenType.FETCH): 2990 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2991 direction = self._prev.text if direction else "FIRST" 2992 2993 count = self._parse_field(tokens=self.FETCH_TOKENS) 2994 percent = self._match(TokenType.PERCENT) 2995 2996 self._match_set((TokenType.ROW, TokenType.ROWS)) 2997 2998 only = self._match_text_seq("ONLY") 2999 with_ties = self._match_text_seq("WITH", "TIES") 3000 3001 if only and with_ties: 3002 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3003 3004 return self.expression( 3005 exp.Fetch, 3006 direction=direction, 3007 count=count, 3008 percent=percent, 3009 with_ties=with_ties, 3010 ) 3011 3012 return this 3013 3014 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3015 if not self._match(TokenType.OFFSET): 3016 return this 3017 3018 count = self._parse_term() 3019 self._match_set((TokenType.ROW, TokenType.ROWS)) 3020 return self.expression(exp.Offset, this=this, expression=count) 3021 3022 def _parse_locks(self) -> t.List[exp.Lock]: 3023 locks = [] 3024 while True: 3025 if self._match_text_seq("FOR", "UPDATE"): 3026 update = True 3027 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3028 "LOCK", "IN", "SHARE", "MODE" 3029 ): 3030 update = False 3031 else: 3032 break 3033 3034 expressions = None 3035 if self._match_text_seq("OF"): 3036 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3037 3038 wait: t.Optional[bool | exp.Expression] = None 3039 if self._match_text_seq("NOWAIT"): 3040 wait = True 3041 elif self._match_text_seq("WAIT"): 3042 wait = self._parse_primary() 3043 elif self._match_text_seq("SKIP", "LOCKED"): 3044 wait = False 3045 3046 locks.append( 3047 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3048 ) 3049 3050 return locks 3051 3052 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3053 if not self._match_set(self.SET_OPERATIONS): 3054 return this 3055 3056 token_type = self._prev.token_type 3057 3058 if token_type == TokenType.UNION: 3059 expression = exp.Union 3060 elif token_type == TokenType.EXCEPT: 3061 expression = exp.Except 3062 else: 3063 expression = exp.Intersect 3064 3065 return self.expression( 3066 expression, 3067 this=this, 3068 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3069 by_name=self._match_text_seq("BY", "NAME"), 3070 expression=self._parse_set_operations(self._parse_select(nested=True)), 3071 ) 3072 3073 def _parse_expression(self) -> t.Optional[exp.Expression]: 3074 return self._parse_alias(self._parse_conjunction()) 3075 3076 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3077 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3078 3079 def _parse_equality(self) -> t.Optional[exp.Expression]: 3080 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3081 3082 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3083 return self._parse_tokens(self._parse_range, self.COMPARISON) 3084 3085 def _parse_range(self) -> t.Optional[exp.Expression]: 3086 this = self._parse_bitwise() 3087 negate = self._match(TokenType.NOT) 3088 3089 if self._match_set(self.RANGE_PARSERS): 3090 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3091 if not expression: 3092 return this 3093 3094 this = expression 3095 elif self._match(TokenType.ISNULL): 3096 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3097 3098 # Postgres supports ISNULL and NOTNULL for conditions. 3099 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3100 if self._match(TokenType.NOTNULL): 3101 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3102 this = self.expression(exp.Not, this=this) 3103 3104 if negate: 3105 this = self.expression(exp.Not, this=this) 3106 3107 if self._match(TokenType.IS): 3108 this = self._parse_is(this) 3109 3110 return this 3111 3112 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3113 index = self._index - 1 3114 negate = self._match(TokenType.NOT) 3115 3116 if self._match_text_seq("DISTINCT", "FROM"): 3117 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3118 return self.expression(klass, this=this, expression=self._parse_expression()) 3119 3120 expression = self._parse_null() or self._parse_boolean() 3121 if not expression: 3122 self._retreat(index) 3123 return None 3124 3125 this = self.expression(exp.Is, this=this, expression=expression) 3126 return self.expression(exp.Not, this=this) if negate else this 3127 3128 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3129 unnest = self._parse_unnest(with_alias=False) 3130 if unnest: 3131 this = self.expression(exp.In, this=this, unnest=unnest) 3132 elif self._match(TokenType.L_PAREN): 3133 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3134 3135 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3136 this = self.expression(exp.In, this=this, query=expressions[0]) 3137 else: 3138 this = self.expression(exp.In, this=this, expressions=expressions) 3139 3140 self._match_r_paren(this) 3141 else: 3142 this = self.expression(exp.In, this=this, field=self._parse_field()) 3143 3144 return this 3145 3146 def _parse_between(self, this: exp.Expression) -> exp.Between: 3147 low = self._parse_bitwise() 3148 self._match(TokenType.AND) 3149 high = self._parse_bitwise() 3150 return self.expression(exp.Between, this=this, low=low, high=high) 3151 3152 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3153 if not self._match(TokenType.ESCAPE): 3154 return this 3155 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3156 3157 def _parse_interval(self) -> t.Optional[exp.Interval]: 3158 index = self._index 3159 3160 if not self._match(TokenType.INTERVAL): 3161 return None 3162 3163 if self._match(TokenType.STRING, advance=False): 3164 this = self._parse_primary() 3165 else: 3166 this = self._parse_term() 3167 3168 if not this: 3169 self._retreat(index) 3170 return None 3171 3172 unit = self._parse_function() or self._parse_var(any_token=True) 3173 3174 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3175 # each INTERVAL expression into this canonical form so it's easy to transpile 3176 if this and this.is_number: 3177 this = exp.Literal.string(this.name) 3178 elif this and this.is_string: 3179 parts = this.name.split() 3180 3181 if len(parts) == 2: 3182 if unit: 3183 # This is not actually a unit, it's something else (e.g. a "window side") 3184 unit = None 3185 self._retreat(self._index - 1) 3186 3187 this = exp.Literal.string(parts[0]) 3188 unit = self.expression(exp.Var, this=parts[1]) 3189 3190 return self.expression(exp.Interval, this=this, unit=unit) 3191 3192 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3193 this = self._parse_term() 3194 3195 while True: 3196 if self._match_set(self.BITWISE): 3197 this = self.expression( 3198 self.BITWISE[self._prev.token_type], 3199 this=this, 3200 expression=self._parse_term(), 3201 ) 3202 elif self._match(TokenType.DQMARK): 3203 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3204 elif self._match_pair(TokenType.LT, TokenType.LT): 3205 this = self.expression( 3206 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3207 ) 3208 elif self._match_pair(TokenType.GT, TokenType.GT): 3209 this = self.expression( 3210 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3211 ) 3212 else: 3213 break 3214 3215 return this 3216 3217 def _parse_term(self) -> t.Optional[exp.Expression]: 3218 return self._parse_tokens(self._parse_factor, self.TERM) 3219 3220 def _parse_factor(self) -> t.Optional[exp.Expression]: 3221 return self._parse_tokens(self._parse_unary, self.FACTOR) 3222 3223 def _parse_unary(self) -> t.Optional[exp.Expression]: 3224 if self._match_set(self.UNARY_PARSERS): 3225 return self.UNARY_PARSERS[self._prev.token_type](self) 3226 return self._parse_at_time_zone(self._parse_type()) 3227 3228 def _parse_type(self) -> t.Optional[exp.Expression]: 3229 interval = self._parse_interval() 3230 if interval: 3231 return interval 3232 3233 index = self._index 3234 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3235 this = self._parse_column() 3236 3237 if data_type: 3238 if isinstance(this, exp.Literal): 3239 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3240 if parser: 3241 return parser(self, this, data_type) 3242 return self.expression(exp.Cast, this=this, to=data_type) 3243 if not data_type.expressions: 3244 self._retreat(index) 3245 return self._parse_column() 3246 return self._parse_column_ops(data_type) 3247 3248 return this 3249 3250 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3251 this = self._parse_type() 3252 if not this: 3253 return None 3254 3255 return self.expression( 3256 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3257 ) 3258 3259 def _parse_types( 3260 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3261 ) -> t.Optional[exp.Expression]: 3262 index = self._index 3263 3264 prefix = self._match_text_seq("SYSUDTLIB", ".") 3265 3266 if not self._match_set(self.TYPE_TOKENS): 3267 identifier = allow_identifiers and self._parse_id_var( 3268 any_token=False, tokens=(TokenType.VAR,) 3269 ) 3270 3271 if identifier: 3272 tokens = self._tokenizer.tokenize(identifier.name) 3273 3274 if len(tokens) != 1: 3275 self.raise_error("Unexpected identifier", self._prev) 3276 3277 if tokens[0].token_type in self.TYPE_TOKENS: 3278 self._prev = tokens[0] 3279 elif self.SUPPORTS_USER_DEFINED_TYPES: 3280 return exp.DataType.build(identifier.name, udt=True) 3281 else: 3282 return None 3283 else: 3284 return None 3285 3286 type_token = self._prev.token_type 3287 3288 if type_token == TokenType.PSEUDO_TYPE: 3289 return self.expression(exp.PseudoType, this=self._prev.text) 3290 3291 if type_token == TokenType.OBJECT_IDENTIFIER: 3292 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3293 3294 nested = type_token in self.NESTED_TYPE_TOKENS 3295 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3296 expressions = None 3297 maybe_func = False 3298 3299 if self._match(TokenType.L_PAREN): 3300 if is_struct: 3301 expressions = self._parse_csv(self._parse_struct_types) 3302 elif nested: 3303 expressions = self._parse_csv( 3304 lambda: self._parse_types( 3305 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3306 ) 3307 ) 3308 elif type_token in self.ENUM_TYPE_TOKENS: 3309 expressions = self._parse_csv(self._parse_equality) 3310 else: 3311 expressions = self._parse_csv(self._parse_type_size) 3312 3313 if not expressions or not self._match(TokenType.R_PAREN): 3314 self._retreat(index) 3315 return None 3316 3317 maybe_func = True 3318 3319 this: t.Optional[exp.Expression] = None 3320 values: t.Optional[t.List[exp.Expression]] = None 3321 3322 if nested and self._match(TokenType.LT): 3323 if is_struct: 3324 expressions = self._parse_csv(self._parse_struct_types) 3325 else: 3326 expressions = self._parse_csv( 3327 lambda: self._parse_types( 3328 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3329 ) 3330 ) 3331 3332 if not self._match(TokenType.GT): 3333 self.raise_error("Expecting >") 3334 3335 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3336 values = self._parse_csv(self._parse_conjunction) 3337 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3338 3339 if type_token in self.TIMESTAMPS: 3340 if self._match_text_seq("WITH", "TIME", "ZONE"): 3341 maybe_func = False 3342 tz_type = ( 3343 exp.DataType.Type.TIMETZ 3344 if type_token in self.TIMES 3345 else exp.DataType.Type.TIMESTAMPTZ 3346 ) 3347 this = exp.DataType(this=tz_type, expressions=expressions) 3348 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3349 maybe_func = False 3350 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3351 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3352 maybe_func = False 3353 elif type_token == TokenType.INTERVAL: 3354 unit = self._parse_var() 3355 3356 if self._match_text_seq("TO"): 3357 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3358 else: 3359 span = None 3360 3361 if span or not unit: 3362 this = self.expression( 3363 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3364 ) 3365 else: 3366 this = self.expression(exp.Interval, unit=unit) 3367 3368 if maybe_func and check_func: 3369 index2 = self._index 3370 peek = self._parse_string() 3371 3372 if not peek: 3373 self._retreat(index) 3374 return None 3375 3376 self._retreat(index2) 3377 3378 if not this: 3379 if self._match_text_seq("UNSIGNED"): 3380 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3381 if not unsigned_type_token: 3382 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3383 3384 type_token = unsigned_type_token or type_token 3385 3386 this = exp.DataType( 3387 this=exp.DataType.Type[type_token.value], 3388 expressions=expressions, 3389 nested=nested, 3390 values=values, 3391 prefix=prefix, 3392 ) 3393 3394 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3395 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3396 3397 return this 3398 3399 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3400 this = self._parse_type() or self._parse_id_var() 3401 self._match(TokenType.COLON) 3402 return self._parse_column_def(this) 3403 3404 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3405 if not self._match_text_seq("AT", "TIME", "ZONE"): 3406 return this 3407 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3408 3409 def _parse_column(self) -> t.Optional[exp.Expression]: 3410 this = self._parse_field() 3411 if isinstance(this, exp.Identifier): 3412 this = self.expression(exp.Column, this=this) 3413 elif not this: 3414 return self._parse_bracket(this) 3415 return self._parse_column_ops(this) 3416 3417 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3418 this = self._parse_bracket(this) 3419 3420 while self._match_set(self.COLUMN_OPERATORS): 3421 op_token = self._prev.token_type 3422 op = self.COLUMN_OPERATORS.get(op_token) 3423 3424 if op_token == TokenType.DCOLON: 3425 field = self._parse_types() 3426 if not field: 3427 self.raise_error("Expected type") 3428 elif op and self._curr: 3429 self._advance() 3430 value = self._prev.text 3431 field = ( 3432 exp.Literal.number(value) 3433 if self._prev.token_type == TokenType.NUMBER 3434 else exp.Literal.string(value) 3435 ) 3436 else: 3437 field = self._parse_field(anonymous_func=True, any_token=True) 3438 3439 if isinstance(field, exp.Func): 3440 # bigquery allows function calls like x.y.count(...) 3441 # SAFE.SUBSTR(...) 3442 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3443 this = self._replace_columns_with_dots(this) 3444 3445 if op: 3446 this = op(self, this, field) 3447 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3448 this = self.expression( 3449 exp.Column, 3450 this=field, 3451 table=this.this, 3452 db=this.args.get("table"), 3453 catalog=this.args.get("db"), 3454 ) 3455 else: 3456 this = self.expression(exp.Dot, this=this, expression=field) 3457 this = self._parse_bracket(this) 3458 return this 3459 3460 def _parse_primary(self) -> t.Optional[exp.Expression]: 3461 if self._match_set(self.PRIMARY_PARSERS): 3462 token_type = self._prev.token_type 3463 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3464 3465 if token_type == TokenType.STRING: 3466 expressions = [primary] 3467 while self._match(TokenType.STRING): 3468 expressions.append(exp.Literal.string(self._prev.text)) 3469 3470 if len(expressions) > 1: 3471 return self.expression(exp.Concat, expressions=expressions) 3472 3473 return primary 3474 3475 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3476 return exp.Literal.number(f"0.{self._prev.text}") 3477 3478 if self._match(TokenType.L_PAREN): 3479 comments = self._prev_comments 3480 query = self._parse_select() 3481 3482 if query: 3483 expressions = [query] 3484 else: 3485 expressions = self._parse_expressions() 3486 3487 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3488 3489 if isinstance(this, exp.Subqueryable): 3490 this = self._parse_set_operations( 3491 self._parse_subquery(this=this, parse_alias=False) 3492 ) 3493 elif len(expressions) > 1: 3494 this = self.expression(exp.Tuple, expressions=expressions) 3495 else: 3496 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3497 3498 if this: 3499 this.add_comments(comments) 3500 3501 self._match_r_paren(expression=this) 3502 return this 3503 3504 return None 3505 3506 def _parse_field( 3507 self, 3508 any_token: bool = False, 3509 tokens: t.Optional[t.Collection[TokenType]] = None, 3510 anonymous_func: bool = False, 3511 ) -> t.Optional[exp.Expression]: 3512 return ( 3513 self._parse_primary() 3514 or self._parse_function(anonymous=anonymous_func) 3515 or self._parse_id_var(any_token=any_token, tokens=tokens) 3516 ) 3517 3518 def _parse_function( 3519 self, 3520 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3521 anonymous: bool = False, 3522 optional_parens: bool = True, 3523 ) -> t.Optional[exp.Expression]: 3524 if not self._curr: 3525 return None 3526 3527 token_type = self._curr.token_type 3528 this = self._curr.text 3529 upper = this.upper() 3530 3531 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3532 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3533 self._advance() 3534 return parser(self) 3535 3536 if not self._next or self._next.token_type != TokenType.L_PAREN: 3537 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3538 self._advance() 3539 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3540 3541 return None 3542 3543 if token_type not in self.FUNC_TOKENS: 3544 return None 3545 3546 self._advance(2) 3547 3548 parser = self.FUNCTION_PARSERS.get(upper) 3549 if parser and not anonymous: 3550 this = parser(self) 3551 else: 3552 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3553 3554 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3555 this = self.expression(subquery_predicate, this=self._parse_select()) 3556 self._match_r_paren() 3557 return this 3558 3559 if functions is None: 3560 functions = self.FUNCTIONS 3561 3562 function = functions.get(upper) 3563 3564 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3565 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3566 3567 if function and not anonymous: 3568 func = self.validate_expression(function(args), args) 3569 if not self.NORMALIZE_FUNCTIONS: 3570 func.meta["name"] = this 3571 this = func 3572 else: 3573 this = self.expression(exp.Anonymous, this=this, expressions=args) 3574 3575 self._match_r_paren(this) 3576 return self._parse_window(this) 3577 3578 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3579 return self._parse_column_def(self._parse_id_var()) 3580 3581 def _parse_user_defined_function( 3582 self, kind: t.Optional[TokenType] = None 3583 ) -> t.Optional[exp.Expression]: 3584 this = self._parse_id_var() 3585 3586 while self._match(TokenType.DOT): 3587 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3588 3589 if not self._match(TokenType.L_PAREN): 3590 return this 3591 3592 expressions = self._parse_csv(self._parse_function_parameter) 3593 self._match_r_paren() 3594 return self.expression( 3595 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3596 ) 3597 3598 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3599 literal = self._parse_primary() 3600 if literal: 3601 return self.expression(exp.Introducer, this=token.text, expression=literal) 3602 3603 return self.expression(exp.Identifier, this=token.text) 3604 3605 def _parse_session_parameter(self) -> exp.SessionParameter: 3606 kind = None 3607 this = self._parse_id_var() or self._parse_primary() 3608 3609 if this and self._match(TokenType.DOT): 3610 kind = this.name 3611 this = self._parse_var() or self._parse_primary() 3612 3613 return self.expression(exp.SessionParameter, this=this, kind=kind) 3614 3615 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3616 index = self._index 3617 3618 if self._match(TokenType.L_PAREN): 3619 expressions = t.cast( 3620 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3621 ) 3622 3623 if not self._match(TokenType.R_PAREN): 3624 self._retreat(index) 3625 else: 3626 expressions = [self._parse_id_var()] 3627 3628 if self._match_set(self.LAMBDAS): 3629 return self.LAMBDAS[self._prev.token_type](self, expressions) 3630 3631 self._retreat(index) 3632 3633 this: t.Optional[exp.Expression] 3634 3635 if self._match(TokenType.DISTINCT): 3636 this = self.expression( 3637 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3638 ) 3639 else: 3640 this = self._parse_select_or_expression(alias=alias) 3641 3642 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3643 3644 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3645 index = self._index 3646 3647 if not self.errors: 3648 try: 3649 if self._parse_select(nested=True): 3650 return this 3651 except ParseError: 3652 pass 3653 finally: 3654 self.errors.clear() 3655 self._retreat(index) 3656 3657 if not self._match(TokenType.L_PAREN): 3658 return this 3659 3660 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3661 3662 self._match_r_paren() 3663 return self.expression(exp.Schema, this=this, expressions=args) 3664 3665 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3666 return self._parse_column_def(self._parse_field(any_token=True)) 3667 3668 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3669 # column defs are not really columns, they're identifiers 3670 if isinstance(this, exp.Column): 3671 this = this.this 3672 3673 kind = self._parse_types(schema=True) 3674 3675 if self._match_text_seq("FOR", "ORDINALITY"): 3676 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3677 3678 constraints: t.List[exp.Expression] = [] 3679 3680 if not kind and self._match(TokenType.ALIAS): 3681 constraints.append( 3682 self.expression( 3683 exp.ComputedColumnConstraint, 3684 this=self._parse_conjunction(), 3685 persisted=self._match_text_seq("PERSISTED"), 3686 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3687 ) 3688 ) 3689 3690 while True: 3691 constraint = self._parse_column_constraint() 3692 if not constraint: 3693 break 3694 constraints.append(constraint) 3695 3696 if not kind and not constraints: 3697 return this 3698 3699 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3700 3701 def _parse_auto_increment( 3702 self, 3703 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3704 start = None 3705 increment = None 3706 3707 if self._match(TokenType.L_PAREN, advance=False): 3708 args = self._parse_wrapped_csv(self._parse_bitwise) 3709 start = seq_get(args, 0) 3710 increment = seq_get(args, 1) 3711 elif self._match_text_seq("START"): 3712 start = self._parse_bitwise() 3713 self._match_text_seq("INCREMENT") 3714 increment = self._parse_bitwise() 3715 3716 if start and increment: 3717 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3718 3719 return exp.AutoIncrementColumnConstraint() 3720 3721 def _parse_compress(self) -> exp.CompressColumnConstraint: 3722 if self._match(TokenType.L_PAREN, advance=False): 3723 return self.expression( 3724 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3725 ) 3726 3727 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3728 3729 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3730 if self._match_text_seq("BY", "DEFAULT"): 3731 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3732 this = self.expression( 3733 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3734 ) 3735 else: 3736 self._match_text_seq("ALWAYS") 3737 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3738 3739 self._match(TokenType.ALIAS) 3740 identity = self._match_text_seq("IDENTITY") 3741 3742 if self._match(TokenType.L_PAREN): 3743 if self._match(TokenType.START_WITH): 3744 this.set("start", self._parse_bitwise()) 3745 if self._match_text_seq("INCREMENT", "BY"): 3746 this.set("increment", self._parse_bitwise()) 3747 if self._match_text_seq("MINVALUE"): 3748 this.set("minvalue", self._parse_bitwise()) 3749 if self._match_text_seq("MAXVALUE"): 3750 this.set("maxvalue", self._parse_bitwise()) 3751 3752 if self._match_text_seq("CYCLE"): 3753 this.set("cycle", True) 3754 elif self._match_text_seq("NO", "CYCLE"): 3755 this.set("cycle", False) 3756 3757 if not identity: 3758 this.set("expression", self._parse_bitwise()) 3759 3760 self._match_r_paren() 3761 3762 return this 3763 3764 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3765 self._match_text_seq("LENGTH") 3766 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3767 3768 def _parse_not_constraint( 3769 self, 3770 ) -> t.Optional[exp.Expression]: 3771 if self._match_text_seq("NULL"): 3772 return self.expression(exp.NotNullColumnConstraint) 3773 if self._match_text_seq("CASESPECIFIC"): 3774 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3775 if self._match_text_seq("FOR", "REPLICATION"): 3776 return self.expression(exp.NotForReplicationColumnConstraint) 3777 return None 3778 3779 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3780 if self._match(TokenType.CONSTRAINT): 3781 this = self._parse_id_var() 3782 else: 3783 this = None 3784 3785 if self._match_texts(self.CONSTRAINT_PARSERS): 3786 return self.expression( 3787 exp.ColumnConstraint, 3788 this=this, 3789 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3790 ) 3791 3792 return this 3793 3794 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3795 if not self._match(TokenType.CONSTRAINT): 3796 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3797 3798 this = self._parse_id_var() 3799 expressions = [] 3800 3801 while True: 3802 constraint = self._parse_unnamed_constraint() or self._parse_function() 3803 if not constraint: 3804 break 3805 expressions.append(constraint) 3806 3807 return self.expression(exp.Constraint, this=this, expressions=expressions) 3808 3809 def _parse_unnamed_constraint( 3810 self, constraints: t.Optional[t.Collection[str]] = None 3811 ) -> t.Optional[exp.Expression]: 3812 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3813 return None 3814 3815 constraint = self._prev.text.upper() 3816 if constraint not in self.CONSTRAINT_PARSERS: 3817 self.raise_error(f"No parser found for schema constraint {constraint}.") 3818 3819 return self.CONSTRAINT_PARSERS[constraint](self) 3820 3821 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3822 self._match_text_seq("KEY") 3823 return self.expression( 3824 exp.UniqueColumnConstraint, 3825 this=self._parse_schema(self._parse_id_var(any_token=False)), 3826 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3827 ) 3828 3829 def _parse_key_constraint_options(self) -> t.List[str]: 3830 options = [] 3831 while True: 3832 if not self._curr: 3833 break 3834 3835 if self._match(TokenType.ON): 3836 action = None 3837 on = self._advance_any() and self._prev.text 3838 3839 if self._match_text_seq("NO", "ACTION"): 3840 action = "NO ACTION" 3841 elif self._match_text_seq("CASCADE"): 3842 action = "CASCADE" 3843 elif self._match_pair(TokenType.SET, TokenType.NULL): 3844 action = "SET NULL" 3845 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3846 action = "SET DEFAULT" 3847 else: 3848 self.raise_error("Invalid key constraint") 3849 3850 options.append(f"ON {on} {action}") 3851 elif self._match_text_seq("NOT", "ENFORCED"): 3852 options.append("NOT ENFORCED") 3853 elif self._match_text_seq("DEFERRABLE"): 3854 options.append("DEFERRABLE") 3855 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3856 options.append("INITIALLY DEFERRED") 3857 elif self._match_text_seq("NORELY"): 3858 options.append("NORELY") 3859 elif self._match_text_seq("MATCH", "FULL"): 3860 options.append("MATCH FULL") 3861 else: 3862 break 3863 3864 return options 3865 3866 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3867 if match and not self._match(TokenType.REFERENCES): 3868 return None 3869 3870 expressions = None 3871 this = self._parse_table(schema=True) 3872 options = self._parse_key_constraint_options() 3873 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3874 3875 def _parse_foreign_key(self) -> exp.ForeignKey: 3876 expressions = self._parse_wrapped_id_vars() 3877 reference = self._parse_references() 3878 options = {} 3879 3880 while self._match(TokenType.ON): 3881 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3882 self.raise_error("Expected DELETE or UPDATE") 3883 3884 kind = self._prev.text.lower() 3885 3886 if self._match_text_seq("NO", "ACTION"): 3887 action = "NO ACTION" 3888 elif self._match(TokenType.SET): 3889 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3890 action = "SET " + self._prev.text.upper() 3891 else: 3892 self._advance() 3893 action = self._prev.text.upper() 3894 3895 options[kind] = action 3896 3897 return self.expression( 3898 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3899 ) 3900 3901 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3902 return self._parse_field() 3903 3904 def _parse_primary_key( 3905 self, wrapped_optional: bool = False, in_props: bool = False 3906 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3907 desc = ( 3908 self._match_set((TokenType.ASC, TokenType.DESC)) 3909 and self._prev.token_type == TokenType.DESC 3910 ) 3911 3912 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3913 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3914 3915 expressions = self._parse_wrapped_csv( 3916 self._parse_primary_key_part, optional=wrapped_optional 3917 ) 3918 options = self._parse_key_constraint_options() 3919 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3920 3921 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3922 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3923 return this 3924 3925 bracket_kind = self._prev.token_type 3926 3927 if self._match(TokenType.COLON): 3928 expressions: t.List[exp.Expression] = [ 3929 self.expression(exp.Slice, expression=self._parse_conjunction()) 3930 ] 3931 else: 3932 expressions = self._parse_csv( 3933 lambda: self._parse_slice( 3934 self._parse_alias(self._parse_conjunction(), explicit=True) 3935 ) 3936 ) 3937 3938 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3939 if bracket_kind == TokenType.L_BRACE: 3940 this = self.expression(exp.Struct, expressions=expressions) 3941 elif not this or this.name.upper() == "ARRAY": 3942 this = self.expression(exp.Array, expressions=expressions) 3943 else: 3944 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3945 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3946 3947 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3948 self.raise_error("Expected ]") 3949 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3950 self.raise_error("Expected }") 3951 3952 self._add_comments(this) 3953 return self._parse_bracket(this) 3954 3955 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3956 if self._match(TokenType.COLON): 3957 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3958 return this 3959 3960 def _parse_case(self) -> t.Optional[exp.Expression]: 3961 ifs = [] 3962 default = None 3963 3964 comments = self._prev_comments 3965 expression = self._parse_conjunction() 3966 3967 while self._match(TokenType.WHEN): 3968 this = self._parse_conjunction() 3969 self._match(TokenType.THEN) 3970 then = self._parse_conjunction() 3971 ifs.append(self.expression(exp.If, this=this, true=then)) 3972 3973 if self._match(TokenType.ELSE): 3974 default = self._parse_conjunction() 3975 3976 if not self._match(TokenType.END): 3977 self.raise_error("Expected END after CASE", self._prev) 3978 3979 return self._parse_window( 3980 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3981 ) 3982 3983 def _parse_if(self) -> t.Optional[exp.Expression]: 3984 if self._match(TokenType.L_PAREN): 3985 args = self._parse_csv(self._parse_conjunction) 3986 this = self.validate_expression(exp.If.from_arg_list(args), args) 3987 self._match_r_paren() 3988 else: 3989 index = self._index - 1 3990 condition = self._parse_conjunction() 3991 3992 if not condition: 3993 self._retreat(index) 3994 return None 3995 3996 self._match(TokenType.THEN) 3997 true = self._parse_conjunction() 3998 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3999 self._match(TokenType.END) 4000 this = self.expression(exp.If, this=condition, true=true, false=false) 4001 4002 return self._parse_window(this) 4003 4004 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4005 if not self._match_text_seq("VALUE", "FOR"): 4006 self._retreat(self._index - 1) 4007 return None 4008 4009 return self.expression( 4010 exp.NextValueFor, 4011 this=self._parse_column(), 4012 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4013 ) 4014 4015 def _parse_extract(self) -> exp.Extract: 4016 this = self._parse_function() or self._parse_var() or self._parse_type() 4017 4018 if self._match(TokenType.FROM): 4019 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4020 4021 if not self._match(TokenType.COMMA): 4022 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4023 4024 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4025 4026 def _parse_any_value(self) -> exp.AnyValue: 4027 this = self._parse_lambda() 4028 is_max = None 4029 having = None 4030 4031 if self._match(TokenType.HAVING): 4032 self._match_texts(("MAX", "MIN")) 4033 is_max = self._prev.text == "MAX" 4034 having = self._parse_column() 4035 4036 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4037 4038 def _parse_cast(self, strict: bool) -> exp.Expression: 4039 this = self._parse_conjunction() 4040 4041 if not self._match(TokenType.ALIAS): 4042 if self._match(TokenType.COMMA): 4043 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4044 4045 self.raise_error("Expected AS after CAST") 4046 4047 fmt = None 4048 to = self._parse_types() 4049 4050 if not to: 4051 self.raise_error("Expected TYPE after CAST") 4052 elif isinstance(to, exp.Identifier): 4053 to = exp.DataType.build(to.name, udt=True) 4054 elif to.this == exp.DataType.Type.CHAR: 4055 if self._match(TokenType.CHARACTER_SET): 4056 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4057 elif self._match(TokenType.FORMAT): 4058 fmt_string = self._parse_string() 4059 fmt = self._parse_at_time_zone(fmt_string) 4060 4061 if to.this in exp.DataType.TEMPORAL_TYPES: 4062 this = self.expression( 4063 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4064 this=this, 4065 format=exp.Literal.string( 4066 format_time( 4067 fmt_string.this if fmt_string else "", 4068 self.FORMAT_MAPPING or self.TIME_MAPPING, 4069 self.FORMAT_TRIE or self.TIME_TRIE, 4070 ) 4071 ), 4072 ) 4073 4074 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4075 this.set("zone", fmt.args["zone"]) 4076 4077 return this 4078 4079 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4080 4081 def _parse_concat(self) -> t.Optional[exp.Expression]: 4082 args = self._parse_csv(self._parse_conjunction) 4083 if self.CONCAT_NULL_OUTPUTS_STRING: 4084 args = self._ensure_string_if_null(args) 4085 4086 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4087 # we find such a call we replace it with its argument. 4088 if len(args) == 1: 4089 return args[0] 4090 4091 return self.expression( 4092 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4093 ) 4094 4095 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4096 args = self._parse_csv(self._parse_conjunction) 4097 if len(args) < 2: 4098 return self.expression(exp.ConcatWs, expressions=args) 4099 delim, *values = args 4100 if self.CONCAT_NULL_OUTPUTS_STRING: 4101 values = self._ensure_string_if_null(values) 4102 4103 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4104 4105 def _parse_string_agg(self) -> exp.Expression: 4106 if self._match(TokenType.DISTINCT): 4107 args: t.List[t.Optional[exp.Expression]] = [ 4108 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4109 ] 4110 if self._match(TokenType.COMMA): 4111 args.extend(self._parse_csv(self._parse_conjunction)) 4112 else: 4113 args = self._parse_csv(self._parse_conjunction) # type: ignore 4114 4115 index = self._index 4116 if not self._match(TokenType.R_PAREN) and args: 4117 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4118 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4119 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4120 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4121 4122 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4123 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4124 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4125 if not self._match_text_seq("WITHIN", "GROUP"): 4126 self._retreat(index) 4127 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4128 4129 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4130 order = self._parse_order(this=seq_get(args, 0)) 4131 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4132 4133 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4134 this = self._parse_bitwise() 4135 4136 if self._match(TokenType.USING): 4137 to: t.Optional[exp.Expression] = self.expression( 4138 exp.CharacterSet, this=self._parse_var() 4139 ) 4140 elif self._match(TokenType.COMMA): 4141 to = self._parse_types() 4142 else: 4143 to = None 4144 4145 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4146 4147 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4148 """ 4149 There are generally two variants of the DECODE function: 4150 4151 - DECODE(bin, charset) 4152 - DECODE(expression, search, result [, search, result] ... [, default]) 4153 4154 The second variant will always be parsed into a CASE expression. Note that NULL 4155 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4156 instead of relying on pattern matching. 4157 """ 4158 args = self._parse_csv(self._parse_conjunction) 4159 4160 if len(args) < 3: 4161 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4162 4163 expression, *expressions = args 4164 if not expression: 4165 return None 4166 4167 ifs = [] 4168 for search, result in zip(expressions[::2], expressions[1::2]): 4169 if not search or not result: 4170 return None 4171 4172 if isinstance(search, exp.Literal): 4173 ifs.append( 4174 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4175 ) 4176 elif isinstance(search, exp.Null): 4177 ifs.append( 4178 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4179 ) 4180 else: 4181 cond = exp.or_( 4182 exp.EQ(this=expression.copy(), expression=search), 4183 exp.and_( 4184 exp.Is(this=expression.copy(), expression=exp.Null()), 4185 exp.Is(this=search.copy(), expression=exp.Null()), 4186 copy=False, 4187 ), 4188 copy=False, 4189 ) 4190 ifs.append(exp.If(this=cond, true=result)) 4191 4192 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4193 4194 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4195 self._match_text_seq("KEY") 4196 key = self._parse_column() 4197 self._match_set((TokenType.COLON, TokenType.COMMA)) 4198 self._match_text_seq("VALUE") 4199 value = self._parse_bitwise() 4200 4201 if not key and not value: 4202 return None 4203 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4204 4205 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4206 if not this or not self._match_text_seq("FORMAT", "JSON"): 4207 return this 4208 4209 return self.expression(exp.FormatJson, this=this) 4210 4211 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4212 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4213 for value in values: 4214 if self._match_text_seq(value, "ON", on): 4215 return f"{value} ON {on}" 4216 4217 return None 4218 4219 def _parse_json_object(self) -> exp.JSONObject: 4220 star = self._parse_star() 4221 expressions = ( 4222 [star] 4223 if star 4224 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4225 ) 4226 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4227 4228 unique_keys = None 4229 if self._match_text_seq("WITH", "UNIQUE"): 4230 unique_keys = True 4231 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4232 unique_keys = False 4233 4234 self._match_text_seq("KEYS") 4235 4236 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4237 self._parse_type() 4238 ) 4239 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4240 4241 return self.expression( 4242 exp.JSONObject, 4243 expressions=expressions, 4244 null_handling=null_handling, 4245 unique_keys=unique_keys, 4246 return_type=return_type, 4247 encoding=encoding, 4248 ) 4249 4250 def _parse_logarithm(self) -> exp.Func: 4251 # Default argument order is base, expression 4252 args = self._parse_csv(self._parse_range) 4253 4254 if len(args) > 1: 4255 if not self.LOG_BASE_FIRST: 4256 args.reverse() 4257 return exp.Log.from_arg_list(args) 4258 4259 return self.expression( 4260 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4261 ) 4262 4263 def _parse_match_against(self) -> exp.MatchAgainst: 4264 expressions = self._parse_csv(self._parse_column) 4265 4266 self._match_text_seq(")", "AGAINST", "(") 4267 4268 this = self._parse_string() 4269 4270 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4271 modifier = "IN NATURAL LANGUAGE MODE" 4272 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4273 modifier = f"{modifier} WITH QUERY EXPANSION" 4274 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4275 modifier = "IN BOOLEAN MODE" 4276 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4277 modifier = "WITH QUERY EXPANSION" 4278 else: 4279 modifier = None 4280 4281 return self.expression( 4282 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4283 ) 4284 4285 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4286 def _parse_open_json(self) -> exp.OpenJSON: 4287 this = self._parse_bitwise() 4288 path = self._match(TokenType.COMMA) and self._parse_string() 4289 4290 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4291 this = self._parse_field(any_token=True) 4292 kind = self._parse_types() 4293 path = self._parse_string() 4294 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4295 4296 return self.expression( 4297 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4298 ) 4299 4300 expressions = None 4301 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4302 self._match_l_paren() 4303 expressions = self._parse_csv(_parse_open_json_column_def) 4304 4305 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4306 4307 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4308 args = self._parse_csv(self._parse_bitwise) 4309 4310 if self._match(TokenType.IN): 4311 return self.expression( 4312 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4313 ) 4314 4315 if haystack_first: 4316 haystack = seq_get(args, 0) 4317 needle = seq_get(args, 1) 4318 else: 4319 needle = seq_get(args, 0) 4320 haystack = seq_get(args, 1) 4321 4322 return self.expression( 4323 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4324 ) 4325 4326 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4327 args = self._parse_csv(self._parse_table) 4328 return exp.JoinHint(this=func_name.upper(), expressions=args) 4329 4330 def _parse_substring(self) -> exp.Substring: 4331 # Postgres supports the form: substring(string [from int] [for int]) 4332 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4333 4334 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4335 4336 if self._match(TokenType.FROM): 4337 args.append(self._parse_bitwise()) 4338 if self._match(TokenType.FOR): 4339 args.append(self._parse_bitwise()) 4340 4341 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4342 4343 def _parse_trim(self) -> exp.Trim: 4344 # https://www.w3resource.com/sql/character-functions/trim.php 4345 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4346 4347 position = None 4348 collation = None 4349 4350 if self._match_texts(self.TRIM_TYPES): 4351 position = self._prev.text.upper() 4352 4353 expression = self._parse_bitwise() 4354 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4355 this = self._parse_bitwise() 4356 else: 4357 this = expression 4358 expression = None 4359 4360 if self._match(TokenType.COLLATE): 4361 collation = self._parse_bitwise() 4362 4363 return self.expression( 4364 exp.Trim, this=this, position=position, expression=expression, collation=collation 4365 ) 4366 4367 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4368 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4369 4370 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4371 return self._parse_window(self._parse_id_var(), alias=True) 4372 4373 def _parse_respect_or_ignore_nulls( 4374 self, this: t.Optional[exp.Expression] 4375 ) -> t.Optional[exp.Expression]: 4376 if self._match_text_seq("IGNORE", "NULLS"): 4377 return self.expression(exp.IgnoreNulls, this=this) 4378 if self._match_text_seq("RESPECT", "NULLS"): 4379 return self.expression(exp.RespectNulls, this=this) 4380 return this 4381 4382 def _parse_window( 4383 self, this: t.Optional[exp.Expression], alias: bool = False 4384 ) -> t.Optional[exp.Expression]: 4385 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4386 self._match(TokenType.WHERE) 4387 this = self.expression( 4388 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4389 ) 4390 self._match_r_paren() 4391 4392 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4393 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4394 if self._match_text_seq("WITHIN", "GROUP"): 4395 order = self._parse_wrapped(self._parse_order) 4396 this = self.expression(exp.WithinGroup, this=this, expression=order) 4397 4398 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4399 # Some dialects choose to implement and some do not. 4400 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4401 4402 # There is some code above in _parse_lambda that handles 4403 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4404 4405 # The below changes handle 4406 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4407 4408 # Oracle allows both formats 4409 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4410 # and Snowflake chose to do the same for familiarity 4411 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4412 this = self._parse_respect_or_ignore_nulls(this) 4413 4414 # bigquery select from window x AS (partition by ...) 4415 if alias: 4416 over = None 4417 self._match(TokenType.ALIAS) 4418 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4419 return this 4420 else: 4421 over = self._prev.text.upper() 4422 4423 if not self._match(TokenType.L_PAREN): 4424 return self.expression( 4425 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4426 ) 4427 4428 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4429 4430 first = self._match(TokenType.FIRST) 4431 if self._match_text_seq("LAST"): 4432 first = False 4433 4434 partition, order = self._parse_partition_and_order() 4435 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4436 4437 if kind: 4438 self._match(TokenType.BETWEEN) 4439 start = self._parse_window_spec() 4440 self._match(TokenType.AND) 4441 end = self._parse_window_spec() 4442 4443 spec = self.expression( 4444 exp.WindowSpec, 4445 kind=kind, 4446 start=start["value"], 4447 start_side=start["side"], 4448 end=end["value"], 4449 end_side=end["side"], 4450 ) 4451 else: 4452 spec = None 4453 4454 self._match_r_paren() 4455 4456 window = self.expression( 4457 exp.Window, 4458 this=this, 4459 partition_by=partition, 4460 order=order, 4461 spec=spec, 4462 alias=window_alias, 4463 over=over, 4464 first=first, 4465 ) 4466 4467 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4468 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4469 return self._parse_window(window, alias=alias) 4470 4471 return window 4472 4473 def _parse_partition_and_order( 4474 self, 4475 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4476 return self._parse_partition_by(), self._parse_order() 4477 4478 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4479 self._match(TokenType.BETWEEN) 4480 4481 return { 4482 "value": ( 4483 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4484 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4485 or self._parse_bitwise() 4486 ), 4487 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4488 } 4489 4490 def _parse_alias( 4491 self, this: t.Optional[exp.Expression], explicit: bool = False 4492 ) -> t.Optional[exp.Expression]: 4493 any_token = self._match(TokenType.ALIAS) 4494 4495 if explicit and not any_token: 4496 return this 4497 4498 if self._match(TokenType.L_PAREN): 4499 aliases = self.expression( 4500 exp.Aliases, 4501 this=this, 4502 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4503 ) 4504 self._match_r_paren(aliases) 4505 return aliases 4506 4507 alias = self._parse_id_var(any_token) 4508 4509 if alias: 4510 return self.expression(exp.Alias, this=this, alias=alias) 4511 4512 return this 4513 4514 def _parse_id_var( 4515 self, 4516 any_token: bool = True, 4517 tokens: t.Optional[t.Collection[TokenType]] = None, 4518 ) -> t.Optional[exp.Expression]: 4519 identifier = self._parse_identifier() 4520 4521 if identifier: 4522 return identifier 4523 4524 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4525 quoted = self._prev.token_type == TokenType.STRING 4526 return exp.Identifier(this=self._prev.text, quoted=quoted) 4527 4528 return None 4529 4530 def _parse_string(self) -> t.Optional[exp.Expression]: 4531 if self._match(TokenType.STRING): 4532 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4533 return self._parse_placeholder() 4534 4535 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4536 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4537 4538 def _parse_number(self) -> t.Optional[exp.Expression]: 4539 if self._match(TokenType.NUMBER): 4540 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4541 return self._parse_placeholder() 4542 4543 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4544 if self._match(TokenType.IDENTIFIER): 4545 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4546 return self._parse_placeholder() 4547 4548 def _parse_var( 4549 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4550 ) -> t.Optional[exp.Expression]: 4551 if ( 4552 (any_token and self._advance_any()) 4553 or self._match(TokenType.VAR) 4554 or (self._match_set(tokens) if tokens else False) 4555 ): 4556 return self.expression(exp.Var, this=self._prev.text) 4557 return self._parse_placeholder() 4558 4559 def _advance_any(self) -> t.Optional[Token]: 4560 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4561 self._advance() 4562 return self._prev 4563 return None 4564 4565 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4566 return self._parse_var() or self._parse_string() 4567 4568 def _parse_null(self) -> t.Optional[exp.Expression]: 4569 if self._match(TokenType.NULL): 4570 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4571 return self._parse_placeholder() 4572 4573 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4574 if self._match(TokenType.TRUE): 4575 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4576 if self._match(TokenType.FALSE): 4577 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4578 return self._parse_placeholder() 4579 4580 def _parse_star(self) -> t.Optional[exp.Expression]: 4581 if self._match(TokenType.STAR): 4582 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4583 return self._parse_placeholder() 4584 4585 def _parse_parameter(self) -> exp.Parameter: 4586 wrapped = self._match(TokenType.L_BRACE) 4587 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4588 self._match(TokenType.R_BRACE) 4589 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4590 4591 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4592 if self._match_set(self.PLACEHOLDER_PARSERS): 4593 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4594 if placeholder: 4595 return placeholder 4596 self._advance(-1) 4597 return None 4598 4599 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4600 if not self._match(TokenType.EXCEPT): 4601 return None 4602 if self._match(TokenType.L_PAREN, advance=False): 4603 return self._parse_wrapped_csv(self._parse_column) 4604 return self._parse_csv(self._parse_column) 4605 4606 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4607 if not self._match(TokenType.REPLACE): 4608 return None 4609 if self._match(TokenType.L_PAREN, advance=False): 4610 return self._parse_wrapped_csv(self._parse_expression) 4611 return self._parse_expressions() 4612 4613 def _parse_csv( 4614 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4615 ) -> t.List[exp.Expression]: 4616 parse_result = parse_method() 4617 items = [parse_result] if parse_result is not None else [] 4618 4619 while self._match(sep): 4620 self._add_comments(parse_result) 4621 parse_result = parse_method() 4622 if parse_result is not None: 4623 items.append(parse_result) 4624 4625 return items 4626 4627 def _parse_tokens( 4628 self, parse_method: t.Callable, expressions: t.Dict 4629 ) -> t.Optional[exp.Expression]: 4630 this = parse_method() 4631 4632 while self._match_set(expressions): 4633 this = self.expression( 4634 expressions[self._prev.token_type], 4635 this=this, 4636 comments=self._prev_comments, 4637 expression=parse_method(), 4638 ) 4639 4640 return this 4641 4642 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4643 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4644 4645 def _parse_wrapped_csv( 4646 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4647 ) -> t.List[exp.Expression]: 4648 return self._parse_wrapped( 4649 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4650 ) 4651 4652 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4653 wrapped = self._match(TokenType.L_PAREN) 4654 if not wrapped and not optional: 4655 self.raise_error("Expecting (") 4656 parse_result = parse_method() 4657 if wrapped: 4658 self._match_r_paren() 4659 return parse_result 4660 4661 def _parse_expressions(self) -> t.List[exp.Expression]: 4662 return self._parse_csv(self._parse_expression) 4663 4664 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4665 return self._parse_select() or self._parse_set_operations( 4666 self._parse_expression() if alias else self._parse_conjunction() 4667 ) 4668 4669 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4670 return self._parse_query_modifiers( 4671 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4672 ) 4673 4674 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4675 this = None 4676 if self._match_texts(self.TRANSACTION_KIND): 4677 this = self._prev.text 4678 4679 self._match_texts({"TRANSACTION", "WORK"}) 4680 4681 modes = [] 4682 while True: 4683 mode = [] 4684 while self._match(TokenType.VAR): 4685 mode.append(self._prev.text) 4686 4687 if mode: 4688 modes.append(" ".join(mode)) 4689 if not self._match(TokenType.COMMA): 4690 break 4691 4692 return self.expression(exp.Transaction, this=this, modes=modes) 4693 4694 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4695 chain = None 4696 savepoint = None 4697 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4698 4699 self._match_texts({"TRANSACTION", "WORK"}) 4700 4701 if self._match_text_seq("TO"): 4702 self._match_text_seq("SAVEPOINT") 4703 savepoint = self._parse_id_var() 4704 4705 if self._match(TokenType.AND): 4706 chain = not self._match_text_seq("NO") 4707 self._match_text_seq("CHAIN") 4708 4709 if is_rollback: 4710 return self.expression(exp.Rollback, savepoint=savepoint) 4711 4712 return self.expression(exp.Commit, chain=chain) 4713 4714 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4715 if not self._match_text_seq("ADD"): 4716 return None 4717 4718 self._match(TokenType.COLUMN) 4719 exists_column = self._parse_exists(not_=True) 4720 expression = self._parse_field_def() 4721 4722 if expression: 4723 expression.set("exists", exists_column) 4724 4725 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4726 if self._match_texts(("FIRST", "AFTER")): 4727 position = self._prev.text 4728 column_position = self.expression( 4729 exp.ColumnPosition, this=self._parse_column(), position=position 4730 ) 4731 expression.set("position", column_position) 4732 4733 return expression 4734 4735 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4736 drop = self._match(TokenType.DROP) and self._parse_drop() 4737 if drop and not isinstance(drop, exp.Command): 4738 drop.set("kind", drop.args.get("kind", "COLUMN")) 4739 return drop 4740 4741 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4742 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4743 return self.expression( 4744 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4745 ) 4746 4747 def _parse_add_constraint(self) -> exp.AddConstraint: 4748 this = None 4749 kind = self._prev.token_type 4750 4751 if kind == TokenType.CONSTRAINT: 4752 this = self._parse_id_var() 4753 4754 if self._match_text_seq("CHECK"): 4755 expression = self._parse_wrapped(self._parse_conjunction) 4756 enforced = self._match_text_seq("ENFORCED") 4757 4758 return self.expression( 4759 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4760 ) 4761 4762 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4763 expression = self._parse_foreign_key() 4764 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4765 expression = self._parse_primary_key() 4766 else: 4767 expression = None 4768 4769 return self.expression(exp.AddConstraint, this=this, expression=expression) 4770 4771 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4772 index = self._index - 1 4773 4774 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4775 return self._parse_csv(self._parse_add_constraint) 4776 4777 self._retreat(index) 4778 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4779 return self._parse_csv(self._parse_field_def) 4780 4781 return self._parse_csv(self._parse_add_column) 4782 4783 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4784 self._match(TokenType.COLUMN) 4785 column = self._parse_field(any_token=True) 4786 4787 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4788 return self.expression(exp.AlterColumn, this=column, drop=True) 4789 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4790 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4791 4792 self._match_text_seq("SET", "DATA") 4793 return self.expression( 4794 exp.AlterColumn, 4795 this=column, 4796 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4797 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4798 using=self._match(TokenType.USING) and self._parse_conjunction(), 4799 ) 4800 4801 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4802 index = self._index - 1 4803 4804 partition_exists = self._parse_exists() 4805 if self._match(TokenType.PARTITION, advance=False): 4806 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4807 4808 self._retreat(index) 4809 return self._parse_csv(self._parse_drop_column) 4810 4811 def _parse_alter_table_rename(self) -> exp.RenameTable: 4812 self._match_text_seq("TO") 4813 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4814 4815 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4816 start = self._prev 4817 4818 if not self._match(TokenType.TABLE): 4819 return self._parse_as_command(start) 4820 4821 exists = self._parse_exists() 4822 only = self._match_text_seq("ONLY") 4823 this = self._parse_table(schema=True) 4824 4825 if self._next: 4826 self._advance() 4827 4828 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4829 if parser: 4830 actions = ensure_list(parser(self)) 4831 4832 if not self._curr: 4833 return self.expression( 4834 exp.AlterTable, 4835 this=this, 4836 exists=exists, 4837 actions=actions, 4838 only=only, 4839 ) 4840 4841 return self._parse_as_command(start) 4842 4843 def _parse_merge(self) -> exp.Merge: 4844 self._match(TokenType.INTO) 4845 target = self._parse_table() 4846 4847 if target and self._match(TokenType.ALIAS, advance=False): 4848 target.set("alias", self._parse_table_alias()) 4849 4850 self._match(TokenType.USING) 4851 using = self._parse_table() 4852 4853 self._match(TokenType.ON) 4854 on = self._parse_conjunction() 4855 4856 whens = [] 4857 while self._match(TokenType.WHEN): 4858 matched = not self._match(TokenType.NOT) 4859 self._match_text_seq("MATCHED") 4860 source = ( 4861 False 4862 if self._match_text_seq("BY", "TARGET") 4863 else self._match_text_seq("BY", "SOURCE") 4864 ) 4865 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4866 4867 self._match(TokenType.THEN) 4868 4869 if self._match(TokenType.INSERT): 4870 _this = self._parse_star() 4871 if _this: 4872 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4873 else: 4874 then = self.expression( 4875 exp.Insert, 4876 this=self._parse_value(), 4877 expression=self._match(TokenType.VALUES) and self._parse_value(), 4878 ) 4879 elif self._match(TokenType.UPDATE): 4880 expressions = self._parse_star() 4881 if expressions: 4882 then = self.expression(exp.Update, expressions=expressions) 4883 else: 4884 then = self.expression( 4885 exp.Update, 4886 expressions=self._match(TokenType.SET) 4887 and self._parse_csv(self._parse_equality), 4888 ) 4889 elif self._match(TokenType.DELETE): 4890 then = self.expression(exp.Var, this=self._prev.text) 4891 else: 4892 then = None 4893 4894 whens.append( 4895 self.expression( 4896 exp.When, 4897 matched=matched, 4898 source=source, 4899 condition=condition, 4900 then=then, 4901 ) 4902 ) 4903 4904 return self.expression( 4905 exp.Merge, 4906 this=target, 4907 using=using, 4908 on=on, 4909 expressions=whens, 4910 ) 4911 4912 def _parse_show(self) -> t.Optional[exp.Expression]: 4913 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4914 if parser: 4915 return parser(self) 4916 return self._parse_as_command(self._prev) 4917 4918 def _parse_set_item_assignment( 4919 self, kind: t.Optional[str] = None 4920 ) -> t.Optional[exp.Expression]: 4921 index = self._index 4922 4923 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4924 return self._parse_set_transaction(global_=kind == "GLOBAL") 4925 4926 left = self._parse_primary() or self._parse_id_var() 4927 4928 if not self._match_texts(("=", "TO")): 4929 self._retreat(index) 4930 return None 4931 4932 right = self._parse_statement() or self._parse_id_var() 4933 this = self.expression(exp.EQ, this=left, expression=right) 4934 4935 return self.expression(exp.SetItem, this=this, kind=kind) 4936 4937 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4938 self._match_text_seq("TRANSACTION") 4939 characteristics = self._parse_csv( 4940 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4941 ) 4942 return self.expression( 4943 exp.SetItem, 4944 expressions=characteristics, 4945 kind="TRANSACTION", 4946 **{"global": global_}, # type: ignore 4947 ) 4948 4949 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4950 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4951 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4952 4953 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4954 index = self._index 4955 set_ = self.expression( 4956 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4957 ) 4958 4959 if self._curr: 4960 self._retreat(index) 4961 return self._parse_as_command(self._prev) 4962 4963 return set_ 4964 4965 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4966 for option in options: 4967 if self._match_text_seq(*option.split(" ")): 4968 return exp.var(option) 4969 return None 4970 4971 def _parse_as_command(self, start: Token) -> exp.Command: 4972 while self._curr: 4973 self._advance() 4974 text = self._find_sql(start, self._prev) 4975 size = len(start.text) 4976 return exp.Command(this=text[:size], expression=text[size:]) 4977 4978 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4979 settings = [] 4980 4981 self._match_l_paren() 4982 kind = self._parse_id_var() 4983 4984 if self._match(TokenType.L_PAREN): 4985 while True: 4986 key = self._parse_id_var() 4987 value = self._parse_primary() 4988 4989 if not key and value is None: 4990 break 4991 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4992 self._match(TokenType.R_PAREN) 4993 4994 self._match_r_paren() 4995 4996 return self.expression( 4997 exp.DictProperty, 4998 this=this, 4999 kind=kind.this if kind else None, 5000 settings=settings, 5001 ) 5002 5003 def _parse_dict_range(self, this: str) -> exp.DictRange: 5004 self._match_l_paren() 5005 has_min = self._match_text_seq("MIN") 5006 if has_min: 5007 min = self._parse_var() or self._parse_primary() 5008 self._match_text_seq("MAX") 5009 max = self._parse_var() or self._parse_primary() 5010 else: 5011 max = self._parse_var() or self._parse_primary() 5012 min = exp.Literal.number(0) 5013 self._match_r_paren() 5014 return self.expression(exp.DictRange, this=this, min=min, max=max) 5015 5016 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5017 index = self._index 5018 expression = self._parse_column() 5019 if not self._match(TokenType.IN): 5020 self._retreat(index - 1) 5021 return None 5022 iterator = self._parse_column() 5023 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5024 return self.expression( 5025 exp.Comprehension, 5026 this=this, 5027 expression=expression, 5028 iterator=iterator, 5029 condition=condition, 5030 ) 5031 5032 def _find_parser( 5033 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5034 ) -> t.Optional[t.Callable]: 5035 if not self._curr: 5036 return None 5037 5038 index = self._index 5039 this = [] 5040 while True: 5041 # The current token might be multiple words 5042 curr = self._curr.text.upper() 5043 key = curr.split(" ") 5044 this.append(curr) 5045 5046 self._advance() 5047 result, trie = in_trie(trie, key) 5048 if result == TrieResult.FAILED: 5049 break 5050 5051 if result == TrieResult.EXISTS: 5052 subparser = parsers[" ".join(this)] 5053 return subparser 5054 5055 self._retreat(index) 5056 return None 5057 5058 def _match(self, token_type, advance=True, expression=None): 5059 if not self._curr: 5060 return None 5061 5062 if self._curr.token_type == token_type: 5063 if advance: 5064 self._advance() 5065 self._add_comments(expression) 5066 return True 5067 5068 return None 5069 5070 def _match_set(self, types, advance=True): 5071 if not self._curr: 5072 return None 5073 5074 if self._curr.token_type in types: 5075 if advance: 5076 self._advance() 5077 return True 5078 5079 return None 5080 5081 def _match_pair(self, token_type_a, token_type_b, advance=True): 5082 if not self._curr or not self._next: 5083 return None 5084 5085 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5086 if advance: 5087 self._advance(2) 5088 return True 5089 5090 return None 5091 5092 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5093 if not self._match(TokenType.L_PAREN, expression=expression): 5094 self.raise_error("Expecting (") 5095 5096 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5097 if not self._match(TokenType.R_PAREN, expression=expression): 5098 self.raise_error("Expecting )") 5099 5100 def _match_texts(self, texts, advance=True): 5101 if self._curr and self._curr.text.upper() in texts: 5102 if advance: 5103 self._advance() 5104 return True 5105 return False 5106 5107 def _match_text_seq(self, *texts, advance=True): 5108 index = self._index 5109 for text in texts: 5110 if self._curr and self._curr.text.upper() == text: 5111 self._advance() 5112 else: 5113 self._retreat(index) 5114 return False 5115 5116 if not advance: 5117 self._retreat(index) 5118 5119 return True 5120 5121 @t.overload 5122 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5123 ... 5124 5125 @t.overload 5126 def _replace_columns_with_dots( 5127 self, this: t.Optional[exp.Expression] 5128 ) -> t.Optional[exp.Expression]: 5129 ... 5130 5131 def _replace_columns_with_dots(self, this): 5132 if isinstance(this, exp.Dot): 5133 exp.replace_children(this, self._replace_columns_with_dots) 5134 elif isinstance(this, exp.Column): 5135 exp.replace_children(this, self._replace_columns_with_dots) 5136 table = this.args.get("table") 5137 this = ( 5138 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5139 ) 5140 5141 return this 5142 5143 def _replace_lambda( 5144 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5145 ) -> t.Optional[exp.Expression]: 5146 if not node: 5147 return node 5148 5149 for column in node.find_all(exp.Column): 5150 if column.parts[0].name in lambda_variables: 5151 dot_or_id = column.to_dot() if column.table else column.this 5152 parent = column.parent 5153 5154 while isinstance(parent, exp.Dot): 5155 if not isinstance(parent.parent, exp.Dot): 5156 parent.replace(dot_or_id) 5157 break 5158 parent = parent.parent 5159 else: 5160 if column is node: 5161 node = dot_or_id 5162 else: 5163 column.replace(dot_or_id) 5164 return node 5165 5166 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5167 return [ 5168 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5169 for value in values 5170 if value 5171 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 } 220 221 SUBQUERY_PREDICATES = { 222 TokenType.ANY: exp.Any, 223 TokenType.ALL: exp.All, 224 TokenType.EXISTS: exp.Exists, 225 TokenType.SOME: exp.Any, 226 } 227 228 RESERVED_KEYWORDS = { 229 *Tokenizer.SINGLE_TOKENS.values(), 230 TokenType.SELECT, 231 } 232 233 DB_CREATABLES = { 234 TokenType.DATABASE, 235 TokenType.SCHEMA, 236 TokenType.TABLE, 237 TokenType.VIEW, 238 TokenType.DICTIONARY, 239 } 240 241 CREATABLES = { 242 TokenType.COLUMN, 243 TokenType.FUNCTION, 244 TokenType.INDEX, 245 TokenType.PROCEDURE, 246 *DB_CREATABLES, 247 } 248 249 # Tokens that can represent identifiers 250 ID_VAR_TOKENS = { 251 TokenType.VAR, 252 TokenType.ANTI, 253 TokenType.APPLY, 254 TokenType.ASC, 255 TokenType.AUTO_INCREMENT, 256 TokenType.BEGIN, 257 TokenType.CACHE, 258 TokenType.CASE, 259 TokenType.COLLATE, 260 TokenType.COMMAND, 261 TokenType.COMMENT, 262 TokenType.COMMIT, 263 TokenType.CONSTRAINT, 264 TokenType.DEFAULT, 265 TokenType.DELETE, 266 TokenType.DESC, 267 TokenType.DESCRIBE, 268 TokenType.DICTIONARY, 269 TokenType.DIV, 270 TokenType.END, 271 TokenType.EXECUTE, 272 TokenType.ESCAPE, 273 TokenType.FALSE, 274 TokenType.FIRST, 275 TokenType.FILTER, 276 TokenType.FORMAT, 277 TokenType.FULL, 278 TokenType.IS, 279 TokenType.ISNULL, 280 TokenType.INTERVAL, 281 TokenType.KEEP, 282 TokenType.LEFT, 283 TokenType.LOAD, 284 TokenType.MERGE, 285 TokenType.NATURAL, 286 TokenType.NEXT, 287 TokenType.OFFSET, 288 TokenType.ORDINALITY, 289 TokenType.OVERWRITE, 290 TokenType.PARTITION, 291 TokenType.PERCENT, 292 TokenType.PIVOT, 293 TokenType.PRAGMA, 294 TokenType.RANGE, 295 TokenType.REFERENCES, 296 TokenType.RIGHT, 297 TokenType.ROW, 298 TokenType.ROWS, 299 TokenType.SEMI, 300 TokenType.SET, 301 TokenType.SETTINGS, 302 TokenType.SHOW, 303 TokenType.TEMPORARY, 304 TokenType.TOP, 305 TokenType.TRUE, 306 TokenType.UNIQUE, 307 TokenType.UNPIVOT, 308 TokenType.UPDATE, 309 TokenType.VOLATILE, 310 TokenType.WINDOW, 311 *CREATABLES, 312 *SUBQUERY_PREDICATES, 313 *TYPE_TOKENS, 314 *NO_PAREN_FUNCTIONS, 315 } 316 317 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 318 319 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 320 TokenType.APPLY, 321 TokenType.ASOF, 322 TokenType.FULL, 323 TokenType.LEFT, 324 TokenType.LOCK, 325 TokenType.NATURAL, 326 TokenType.OFFSET, 327 TokenType.RIGHT, 328 TokenType.WINDOW, 329 } 330 331 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 332 333 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 334 335 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 336 337 FUNC_TOKENS = { 338 TokenType.COMMAND, 339 TokenType.CURRENT_DATE, 340 TokenType.CURRENT_DATETIME, 341 TokenType.CURRENT_TIMESTAMP, 342 TokenType.CURRENT_TIME, 343 TokenType.CURRENT_USER, 344 TokenType.FILTER, 345 TokenType.FIRST, 346 TokenType.FORMAT, 347 TokenType.GLOB, 348 TokenType.IDENTIFIER, 349 TokenType.INDEX, 350 TokenType.ISNULL, 351 TokenType.ILIKE, 352 TokenType.INSERT, 353 TokenType.LIKE, 354 TokenType.MERGE, 355 TokenType.OFFSET, 356 TokenType.PRIMARY_KEY, 357 TokenType.RANGE, 358 TokenType.REPLACE, 359 TokenType.RLIKE, 360 TokenType.ROW, 361 TokenType.UNNEST, 362 TokenType.VAR, 363 TokenType.LEFT, 364 TokenType.RIGHT, 365 TokenType.DATE, 366 TokenType.DATETIME, 367 TokenType.TABLE, 368 TokenType.TIMESTAMP, 369 TokenType.TIMESTAMPTZ, 370 TokenType.WINDOW, 371 TokenType.XOR, 372 *TYPE_TOKENS, 373 *SUBQUERY_PREDICATES, 374 } 375 376 CONJUNCTION = { 377 TokenType.AND: exp.And, 378 TokenType.OR: exp.Or, 379 } 380 381 EQUALITY = { 382 TokenType.EQ: exp.EQ, 383 TokenType.NEQ: exp.NEQ, 384 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 385 } 386 387 COMPARISON = { 388 TokenType.GT: exp.GT, 389 TokenType.GTE: exp.GTE, 390 TokenType.LT: exp.LT, 391 TokenType.LTE: exp.LTE, 392 } 393 394 BITWISE = { 395 TokenType.AMP: exp.BitwiseAnd, 396 TokenType.CARET: exp.BitwiseXor, 397 TokenType.PIPE: exp.BitwiseOr, 398 TokenType.DPIPE: exp.DPipe, 399 } 400 401 TERM = { 402 TokenType.DASH: exp.Sub, 403 TokenType.PLUS: exp.Add, 404 TokenType.MOD: exp.Mod, 405 TokenType.COLLATE: exp.Collate, 406 } 407 408 FACTOR = { 409 TokenType.DIV: exp.IntDiv, 410 TokenType.LR_ARROW: exp.Distance, 411 TokenType.SLASH: exp.Div, 412 TokenType.STAR: exp.Mul, 413 } 414 415 TIMES = { 416 TokenType.TIME, 417 TokenType.TIMETZ, 418 } 419 420 TIMESTAMPS = { 421 TokenType.TIMESTAMP, 422 TokenType.TIMESTAMPTZ, 423 TokenType.TIMESTAMPLTZ, 424 *TIMES, 425 } 426 427 SET_OPERATIONS = { 428 TokenType.UNION, 429 TokenType.INTERSECT, 430 TokenType.EXCEPT, 431 } 432 433 JOIN_METHODS = { 434 TokenType.NATURAL, 435 TokenType.ASOF, 436 } 437 438 JOIN_SIDES = { 439 TokenType.LEFT, 440 TokenType.RIGHT, 441 TokenType.FULL, 442 } 443 444 JOIN_KINDS = { 445 TokenType.INNER, 446 TokenType.OUTER, 447 TokenType.CROSS, 448 TokenType.SEMI, 449 TokenType.ANTI, 450 } 451 452 JOIN_HINTS: t.Set[str] = set() 453 454 LAMBDAS = { 455 TokenType.ARROW: lambda self, expressions: self.expression( 456 exp.Lambda, 457 this=self._replace_lambda( 458 self._parse_conjunction(), 459 {node.name for node in expressions}, 460 ), 461 expressions=expressions, 462 ), 463 TokenType.FARROW: lambda self, expressions: self.expression( 464 exp.Kwarg, 465 this=exp.var(expressions[0].name), 466 expression=self._parse_conjunction(), 467 ), 468 } 469 470 COLUMN_OPERATORS = { 471 TokenType.DOT: None, 472 TokenType.DCOLON: lambda self, this, to: self.expression( 473 exp.Cast if self.STRICT_CAST else exp.TryCast, 474 this=this, 475 to=to, 476 ), 477 TokenType.ARROW: lambda self, this, path: self.expression( 478 exp.JSONExtract, 479 this=this, 480 expression=path, 481 ), 482 TokenType.DARROW: lambda self, this, path: self.expression( 483 exp.JSONExtractScalar, 484 this=this, 485 expression=path, 486 ), 487 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 488 exp.JSONBExtract, 489 this=this, 490 expression=path, 491 ), 492 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 493 exp.JSONBExtractScalar, 494 this=this, 495 expression=path, 496 ), 497 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 498 exp.JSONBContains, 499 this=this, 500 expression=key, 501 ), 502 } 503 504 EXPRESSION_PARSERS = { 505 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 506 exp.Column: lambda self: self._parse_column(), 507 exp.Condition: lambda self: self._parse_conjunction(), 508 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 509 exp.Expression: lambda self: self._parse_statement(), 510 exp.From: lambda self: self._parse_from(), 511 exp.Group: lambda self: self._parse_group(), 512 exp.Having: lambda self: self._parse_having(), 513 exp.Identifier: lambda self: self._parse_id_var(), 514 exp.Join: lambda self: self._parse_join(), 515 exp.Lambda: lambda self: self._parse_lambda(), 516 exp.Lateral: lambda self: self._parse_lateral(), 517 exp.Limit: lambda self: self._parse_limit(), 518 exp.Offset: lambda self: self._parse_offset(), 519 exp.Order: lambda self: self._parse_order(), 520 exp.Ordered: lambda self: self._parse_ordered(), 521 exp.Properties: lambda self: self._parse_properties(), 522 exp.Qualify: lambda self: self._parse_qualify(), 523 exp.Returning: lambda self: self._parse_returning(), 524 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 525 exp.Table: lambda self: self._parse_table_parts(), 526 exp.TableAlias: lambda self: self._parse_table_alias(), 527 exp.Where: lambda self: self._parse_where(), 528 exp.Window: lambda self: self._parse_named_window(), 529 exp.With: lambda self: self._parse_with(), 530 "JOIN_TYPE": lambda self: self._parse_join_parts(), 531 } 532 533 STATEMENT_PARSERS = { 534 TokenType.ALTER: lambda self: self._parse_alter(), 535 TokenType.BEGIN: lambda self: self._parse_transaction(), 536 TokenType.CACHE: lambda self: self._parse_cache(), 537 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 538 TokenType.COMMENT: lambda self: self._parse_comment(), 539 TokenType.CREATE: lambda self: self._parse_create(), 540 TokenType.DELETE: lambda self: self._parse_delete(), 541 TokenType.DESC: lambda self: self._parse_describe(), 542 TokenType.DESCRIBE: lambda self: self._parse_describe(), 543 TokenType.DROP: lambda self: self._parse_drop(), 544 TokenType.INSERT: lambda self: self._parse_insert(), 545 TokenType.LOAD: lambda self: self._parse_load(), 546 TokenType.MERGE: lambda self: self._parse_merge(), 547 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 548 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 549 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 550 TokenType.SET: lambda self: self._parse_set(), 551 TokenType.UNCACHE: lambda self: self._parse_uncache(), 552 TokenType.UPDATE: lambda self: self._parse_update(), 553 TokenType.USE: lambda self: self.expression( 554 exp.Use, 555 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 556 and exp.var(self._prev.text), 557 this=self._parse_table(schema=False), 558 ), 559 } 560 561 UNARY_PARSERS = { 562 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 563 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 564 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 565 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 566 } 567 568 PRIMARY_PARSERS = { 569 TokenType.STRING: lambda self, token: self.expression( 570 exp.Literal, this=token.text, is_string=True 571 ), 572 TokenType.NUMBER: lambda self, token: self.expression( 573 exp.Literal, this=token.text, is_string=False 574 ), 575 TokenType.STAR: lambda self, _: self.expression( 576 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 577 ), 578 TokenType.NULL: lambda self, _: self.expression(exp.Null), 579 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 580 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 581 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 582 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 583 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 584 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 585 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 586 exp.National, this=token.text 587 ), 588 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 589 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 590 } 591 592 PLACEHOLDER_PARSERS = { 593 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 594 TokenType.PARAMETER: lambda self: self._parse_parameter(), 595 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 596 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 597 else None, 598 } 599 600 RANGE_PARSERS = { 601 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 602 TokenType.GLOB: binary_range_parser(exp.Glob), 603 TokenType.ILIKE: binary_range_parser(exp.ILike), 604 TokenType.IN: lambda self, this: self._parse_in(this), 605 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 606 TokenType.IS: lambda self, this: self._parse_is(this), 607 TokenType.LIKE: binary_range_parser(exp.Like), 608 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 609 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 610 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 611 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 612 } 613 614 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 615 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 616 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 617 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 618 "CHARACTER SET": lambda self: self._parse_character_set(), 619 "CHECKSUM": lambda self: self._parse_checksum(), 620 "CLUSTER BY": lambda self: self._parse_cluster(), 621 "CLUSTERED": lambda self: self._parse_clustered_by(), 622 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 623 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 624 "COPY": lambda self: self._parse_copy_property(), 625 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 626 "DEFINER": lambda self: self._parse_definer(), 627 "DETERMINISTIC": lambda self: self.expression( 628 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 629 ), 630 "DISTKEY": lambda self: self._parse_distkey(), 631 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 632 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 633 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 634 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 635 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 636 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 637 "FREESPACE": lambda self: self._parse_freespace(), 638 "HEAP": lambda self: self.expression(exp.HeapProperty), 639 "IMMUTABLE": lambda self: self.expression( 640 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 641 ), 642 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 643 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 644 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 645 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 646 "LIKE": lambda self: self._parse_create_like(), 647 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 648 "LOCK": lambda self: self._parse_locking(), 649 "LOCKING": lambda self: self._parse_locking(), 650 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 651 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 652 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 653 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 654 "NO": lambda self: self._parse_no_property(), 655 "ON": lambda self: self._parse_on_property(), 656 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 657 "PARTITION BY": lambda self: self._parse_partitioned_by(), 658 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 659 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 660 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 661 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 662 "RETURNS": lambda self: self._parse_returns(), 663 "ROW": lambda self: self._parse_row(), 664 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 665 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 666 "SETTINGS": lambda self: self.expression( 667 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 668 ), 669 "SORTKEY": lambda self: self._parse_sortkey(), 670 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 671 "STABLE": lambda self: self.expression( 672 exp.StabilityProperty, this=exp.Literal.string("STABLE") 673 ), 674 "STORED": lambda self: self._parse_stored(), 675 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 676 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 677 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 678 "TO": lambda self: self._parse_to_table(), 679 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 680 "TTL": lambda self: self._parse_ttl(), 681 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 682 "VOLATILE": lambda self: self._parse_volatile_property(), 683 "WITH": lambda self: self._parse_with_property(), 684 } 685 686 CONSTRAINT_PARSERS = { 687 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 688 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 689 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 690 "CHARACTER SET": lambda self: self.expression( 691 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 692 ), 693 "CHECK": lambda self: self.expression( 694 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 695 ), 696 "COLLATE": lambda self: self.expression( 697 exp.CollateColumnConstraint, this=self._parse_var() 698 ), 699 "COMMENT": lambda self: self.expression( 700 exp.CommentColumnConstraint, this=self._parse_string() 701 ), 702 "COMPRESS": lambda self: self._parse_compress(), 703 "CLUSTERED": lambda self: self.expression( 704 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 705 ), 706 "NONCLUSTERED": lambda self: self.expression( 707 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 708 ), 709 "DEFAULT": lambda self: self.expression( 710 exp.DefaultColumnConstraint, this=self._parse_bitwise() 711 ), 712 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 713 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 714 "FORMAT": lambda self: self.expression( 715 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 716 ), 717 "GENERATED": lambda self: self._parse_generated_as_identity(), 718 "IDENTITY": lambda self: self._parse_auto_increment(), 719 "INLINE": lambda self: self._parse_inline(), 720 "LIKE": lambda self: self._parse_create_like(), 721 "NOT": lambda self: self._parse_not_constraint(), 722 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 723 "ON": lambda self: ( 724 self._match(TokenType.UPDATE) 725 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 726 ) 727 or self.expression(exp.OnProperty, this=self._parse_id_var()), 728 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 729 "PRIMARY KEY": lambda self: self._parse_primary_key(), 730 "REFERENCES": lambda self: self._parse_references(match=False), 731 "TITLE": lambda self: self.expression( 732 exp.TitleColumnConstraint, this=self._parse_var_or_string() 733 ), 734 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 735 "UNIQUE": lambda self: self._parse_unique(), 736 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 737 "WITH": lambda self: self.expression( 738 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 739 ), 740 } 741 742 ALTER_PARSERS = { 743 "ADD": lambda self: self._parse_alter_table_add(), 744 "ALTER": lambda self: self._parse_alter_table_alter(), 745 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 746 "DROP": lambda self: self._parse_alter_table_drop(), 747 "RENAME": lambda self: self._parse_alter_table_rename(), 748 } 749 750 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 751 752 NO_PAREN_FUNCTION_PARSERS = { 753 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 754 "CASE": lambda self: self._parse_case(), 755 "IF": lambda self: self._parse_if(), 756 "NEXT": lambda self: self._parse_next_value_for(), 757 } 758 759 INVALID_FUNC_NAME_TOKENS = { 760 TokenType.IDENTIFIER, 761 TokenType.STRING, 762 } 763 764 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 765 766 FUNCTION_PARSERS = { 767 "ANY_VALUE": lambda self: self._parse_any_value(), 768 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 769 "CONCAT": lambda self: self._parse_concat(), 770 "CONCAT_WS": lambda self: self._parse_concat_ws(), 771 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 772 "DECODE": lambda self: self._parse_decode(), 773 "EXTRACT": lambda self: self._parse_extract(), 774 "JSON_OBJECT": lambda self: self._parse_json_object(), 775 "LOG": lambda self: self._parse_logarithm(), 776 "MATCH": lambda self: self._parse_match_against(), 777 "OPENJSON": lambda self: self._parse_open_json(), 778 "POSITION": lambda self: self._parse_position(), 779 "SAFE_CAST": lambda self: self._parse_cast(False), 780 "STRING_AGG": lambda self: self._parse_string_agg(), 781 "SUBSTRING": lambda self: self._parse_substring(), 782 "TRIM": lambda self: self._parse_trim(), 783 "TRY_CAST": lambda self: self._parse_cast(False), 784 "TRY_CONVERT": lambda self: self._parse_convert(False), 785 } 786 787 QUERY_MODIFIER_PARSERS = { 788 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 789 TokenType.WHERE: lambda self: ("where", self._parse_where()), 790 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 791 TokenType.HAVING: lambda self: ("having", self._parse_having()), 792 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 793 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 794 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 795 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 796 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 797 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 798 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 799 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 800 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 801 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 802 TokenType.CLUSTER_BY: lambda self: ( 803 "cluster", 804 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 805 ), 806 TokenType.DISTRIBUTE_BY: lambda self: ( 807 "distribute", 808 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 809 ), 810 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 811 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 812 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 813 } 814 815 SET_PARSERS = { 816 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 817 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 818 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 819 "TRANSACTION": lambda self: self._parse_set_transaction(), 820 } 821 822 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 823 824 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 825 826 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 827 828 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 829 830 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 831 832 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 833 TRANSACTION_CHARACTERISTICS = { 834 "ISOLATION LEVEL REPEATABLE READ", 835 "ISOLATION LEVEL READ COMMITTED", 836 "ISOLATION LEVEL READ UNCOMMITTED", 837 "ISOLATION LEVEL SERIALIZABLE", 838 "READ WRITE", 839 "READ ONLY", 840 } 841 842 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 843 844 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 845 846 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 847 848 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 849 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 850 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 851 852 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 853 854 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 855 856 DISTINCT_TOKENS = {TokenType.DISTINCT} 857 858 STRICT_CAST = True 859 860 # A NULL arg in CONCAT yields NULL by default 861 CONCAT_NULL_OUTPUTS_STRING = False 862 863 PREFIXED_PIVOT_COLUMNS = False 864 IDENTIFY_PIVOT_STRINGS = False 865 866 LOG_BASE_FIRST = True 867 LOG_DEFAULTS_TO_LN = False 868 869 # Whether or not ADD is present for each column added by ALTER TABLE 870 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 871 872 # Whether or not the table sample clause expects CSV syntax 873 TABLESAMPLE_CSV = False 874 875 __slots__ = ( 876 "error_level", 877 "error_message_context", 878 "max_errors", 879 "sql", 880 "errors", 881 "_tokens", 882 "_index", 883 "_curr", 884 "_next", 885 "_prev", 886 "_prev_comments", 887 "_tokenizer", 888 ) 889 890 # Autofilled 891 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 892 INDEX_OFFSET: int = 0 893 UNNEST_COLUMN_ONLY: bool = False 894 ALIAS_POST_TABLESAMPLE: bool = False 895 STRICT_STRING_CONCAT = False 896 SUPPORTS_USER_DEFINED_TYPES = True 897 NORMALIZE_FUNCTIONS = "upper" 898 NULL_ORDERING: str = "nulls_are_small" 899 SHOW_TRIE: t.Dict = {} 900 SET_TRIE: t.Dict = {} 901 FORMAT_MAPPING: t.Dict[str, str] = {} 902 FORMAT_TRIE: t.Dict = {} 903 TIME_MAPPING: t.Dict[str, str] = {} 904 TIME_TRIE: t.Dict = {} 905 906 def __init__( 907 self, 908 error_level: t.Optional[ErrorLevel] = None, 909 error_message_context: int = 100, 910 max_errors: int = 3, 911 ): 912 self.error_level = error_level or ErrorLevel.IMMEDIATE 913 self.error_message_context = error_message_context 914 self.max_errors = max_errors 915 self._tokenizer = self.TOKENIZER_CLASS() 916 self.reset() 917 918 def reset(self): 919 self.sql = "" 920 self.errors = [] 921 self._tokens = [] 922 self._index = 0 923 self._curr = None 924 self._next = None 925 self._prev = None 926 self._prev_comments = None 927 928 def parse( 929 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 930 ) -> t.List[t.Optional[exp.Expression]]: 931 """ 932 Parses a list of tokens and returns a list of syntax trees, one tree 933 per parsed SQL statement. 934 935 Args: 936 raw_tokens: The list of tokens. 937 sql: The original SQL string, used to produce helpful debug messages. 938 939 Returns: 940 The list of the produced syntax trees. 941 """ 942 return self._parse( 943 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 944 ) 945 946 def parse_into( 947 self, 948 expression_types: exp.IntoType, 949 raw_tokens: t.List[Token], 950 sql: t.Optional[str] = None, 951 ) -> t.List[t.Optional[exp.Expression]]: 952 """ 953 Parses a list of tokens into a given Expression type. If a collection of Expression 954 types is given instead, this method will try to parse the token list into each one 955 of them, stopping at the first for which the parsing succeeds. 956 957 Args: 958 expression_types: The expression type(s) to try and parse the token list into. 959 raw_tokens: The list of tokens. 960 sql: The original SQL string, used to produce helpful debug messages. 961 962 Returns: 963 The target Expression. 964 """ 965 errors = [] 966 for expression_type in ensure_list(expression_types): 967 parser = self.EXPRESSION_PARSERS.get(expression_type) 968 if not parser: 969 raise TypeError(f"No parser registered for {expression_type}") 970 971 try: 972 return self._parse(parser, raw_tokens, sql) 973 except ParseError as e: 974 e.errors[0]["into_expression"] = expression_type 975 errors.append(e) 976 977 raise ParseError( 978 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 979 errors=merge_errors(errors), 980 ) from errors[-1] 981 982 def _parse( 983 self, 984 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 985 raw_tokens: t.List[Token], 986 sql: t.Optional[str] = None, 987 ) -> t.List[t.Optional[exp.Expression]]: 988 self.reset() 989 self.sql = sql or "" 990 991 total = len(raw_tokens) 992 chunks: t.List[t.List[Token]] = [[]] 993 994 for i, token in enumerate(raw_tokens): 995 if token.token_type == TokenType.SEMICOLON: 996 if i < total - 1: 997 chunks.append([]) 998 else: 999 chunks[-1].append(token) 1000 1001 expressions = [] 1002 1003 for tokens in chunks: 1004 self._index = -1 1005 self._tokens = tokens 1006 self._advance() 1007 1008 expressions.append(parse_method(self)) 1009 1010 if self._index < len(self._tokens): 1011 self.raise_error("Invalid expression / Unexpected token") 1012 1013 self.check_errors() 1014 1015 return expressions 1016 1017 def check_errors(self) -> None: 1018 """Logs or raises any found errors, depending on the chosen error level setting.""" 1019 if self.error_level == ErrorLevel.WARN: 1020 for error in self.errors: 1021 logger.error(str(error)) 1022 elif self.error_level == ErrorLevel.RAISE and self.errors: 1023 raise ParseError( 1024 concat_messages(self.errors, self.max_errors), 1025 errors=merge_errors(self.errors), 1026 ) 1027 1028 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1029 """ 1030 Appends an error in the list of recorded errors or raises it, depending on the chosen 1031 error level setting. 1032 """ 1033 token = token or self._curr or self._prev or Token.string("") 1034 start = token.start 1035 end = token.end + 1 1036 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1037 highlight = self.sql[start:end] 1038 end_context = self.sql[end : end + self.error_message_context] 1039 1040 error = ParseError.new( 1041 f"{message}. Line {token.line}, Col: {token.col}.\n" 1042 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1043 description=message, 1044 line=token.line, 1045 col=token.col, 1046 start_context=start_context, 1047 highlight=highlight, 1048 end_context=end_context, 1049 ) 1050 1051 if self.error_level == ErrorLevel.IMMEDIATE: 1052 raise error 1053 1054 self.errors.append(error) 1055 1056 def expression( 1057 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1058 ) -> E: 1059 """ 1060 Creates a new, validated Expression. 1061 1062 Args: 1063 exp_class: The expression class to instantiate. 1064 comments: An optional list of comments to attach to the expression. 1065 kwargs: The arguments to set for the expression along with their respective values. 1066 1067 Returns: 1068 The target expression. 1069 """ 1070 instance = exp_class(**kwargs) 1071 instance.add_comments(comments) if comments else self._add_comments(instance) 1072 return self.validate_expression(instance) 1073 1074 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1075 if expression and self._prev_comments: 1076 expression.add_comments(self._prev_comments) 1077 self._prev_comments = None 1078 1079 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1080 """ 1081 Validates an Expression, making sure that all its mandatory arguments are set. 1082 1083 Args: 1084 expression: The expression to validate. 1085 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1086 1087 Returns: 1088 The validated expression. 1089 """ 1090 if self.error_level != ErrorLevel.IGNORE: 1091 for error_message in expression.error_messages(args): 1092 self.raise_error(error_message) 1093 1094 return expression 1095 1096 def _find_sql(self, start: Token, end: Token) -> str: 1097 return self.sql[start.start : end.end + 1] 1098 1099 def _advance(self, times: int = 1) -> None: 1100 self._index += times 1101 self._curr = seq_get(self._tokens, self._index) 1102 self._next = seq_get(self._tokens, self._index + 1) 1103 1104 if self._index > 0: 1105 self._prev = self._tokens[self._index - 1] 1106 self._prev_comments = self._prev.comments 1107 else: 1108 self._prev = None 1109 self._prev_comments = None 1110 1111 def _retreat(self, index: int) -> None: 1112 if index != self._index: 1113 self._advance(index - self._index) 1114 1115 def _parse_command(self) -> exp.Command: 1116 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1117 1118 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1119 start = self._prev 1120 exists = self._parse_exists() if allow_exists else None 1121 1122 self._match(TokenType.ON) 1123 1124 kind = self._match_set(self.CREATABLES) and self._prev 1125 if not kind: 1126 return self._parse_as_command(start) 1127 1128 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1129 this = self._parse_user_defined_function(kind=kind.token_type) 1130 elif kind.token_type == TokenType.TABLE: 1131 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1132 elif kind.token_type == TokenType.COLUMN: 1133 this = self._parse_column() 1134 else: 1135 this = self._parse_id_var() 1136 1137 self._match(TokenType.IS) 1138 1139 return self.expression( 1140 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1141 ) 1142 1143 def _parse_to_table( 1144 self, 1145 ) -> exp.ToTableProperty: 1146 table = self._parse_table_parts(schema=True) 1147 return self.expression(exp.ToTableProperty, this=table) 1148 1149 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1150 def _parse_ttl(self) -> exp.Expression: 1151 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1152 this = self._parse_bitwise() 1153 1154 if self._match_text_seq("DELETE"): 1155 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1156 if self._match_text_seq("RECOMPRESS"): 1157 return self.expression( 1158 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1159 ) 1160 if self._match_text_seq("TO", "DISK"): 1161 return self.expression( 1162 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1163 ) 1164 if self._match_text_seq("TO", "VOLUME"): 1165 return self.expression( 1166 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1167 ) 1168 1169 return this 1170 1171 expressions = self._parse_csv(_parse_ttl_action) 1172 where = self._parse_where() 1173 group = self._parse_group() 1174 1175 aggregates = None 1176 if group and self._match(TokenType.SET): 1177 aggregates = self._parse_csv(self._parse_set_item) 1178 1179 return self.expression( 1180 exp.MergeTreeTTL, 1181 expressions=expressions, 1182 where=where, 1183 group=group, 1184 aggregates=aggregates, 1185 ) 1186 1187 def _parse_statement(self) -> t.Optional[exp.Expression]: 1188 if self._curr is None: 1189 return None 1190 1191 if self._match_set(self.STATEMENT_PARSERS): 1192 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1193 1194 if self._match_set(Tokenizer.COMMANDS): 1195 return self._parse_command() 1196 1197 expression = self._parse_expression() 1198 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1199 return self._parse_query_modifiers(expression) 1200 1201 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1202 start = self._prev 1203 temporary = self._match(TokenType.TEMPORARY) 1204 materialized = self._match_text_seq("MATERIALIZED") 1205 1206 kind = self._match_set(self.CREATABLES) and self._prev.text 1207 if not kind: 1208 return self._parse_as_command(start) 1209 1210 return self.expression( 1211 exp.Drop, 1212 comments=start.comments, 1213 exists=exists or self._parse_exists(), 1214 this=self._parse_table(schema=True), 1215 kind=kind, 1216 temporary=temporary, 1217 materialized=materialized, 1218 cascade=self._match_text_seq("CASCADE"), 1219 constraints=self._match_text_seq("CONSTRAINTS"), 1220 purge=self._match_text_seq("PURGE"), 1221 ) 1222 1223 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1224 return ( 1225 self._match_text_seq("IF") 1226 and (not not_ or self._match(TokenType.NOT)) 1227 and self._match(TokenType.EXISTS) 1228 ) 1229 1230 def _parse_create(self) -> exp.Create | exp.Command: 1231 # Note: this can't be None because we've matched a statement parser 1232 start = self._prev 1233 comments = self._prev_comments 1234 1235 replace = start.text.upper() == "REPLACE" or self._match_pair( 1236 TokenType.OR, TokenType.REPLACE 1237 ) 1238 unique = self._match(TokenType.UNIQUE) 1239 1240 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1241 self._advance() 1242 1243 properties = None 1244 create_token = self._match_set(self.CREATABLES) and self._prev 1245 1246 if not create_token: 1247 # exp.Properties.Location.POST_CREATE 1248 properties = self._parse_properties() 1249 create_token = self._match_set(self.CREATABLES) and self._prev 1250 1251 if not properties or not create_token: 1252 return self._parse_as_command(start) 1253 1254 exists = self._parse_exists(not_=True) 1255 this = None 1256 expression: t.Optional[exp.Expression] = None 1257 indexes = None 1258 no_schema_binding = None 1259 begin = None 1260 clone = None 1261 1262 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1263 nonlocal properties 1264 if properties and temp_props: 1265 properties.expressions.extend(temp_props.expressions) 1266 elif temp_props: 1267 properties = temp_props 1268 1269 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1270 this = self._parse_user_defined_function(kind=create_token.token_type) 1271 1272 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1273 extend_props(self._parse_properties()) 1274 1275 self._match(TokenType.ALIAS) 1276 1277 if self._match(TokenType.COMMAND): 1278 expression = self._parse_as_command(self._prev) 1279 else: 1280 begin = self._match(TokenType.BEGIN) 1281 return_ = self._match_text_seq("RETURN") 1282 expression = self._parse_statement() 1283 1284 if return_: 1285 expression = self.expression(exp.Return, this=expression) 1286 elif create_token.token_type == TokenType.INDEX: 1287 this = self._parse_index(index=self._parse_id_var()) 1288 elif create_token.token_type in self.DB_CREATABLES: 1289 table_parts = self._parse_table_parts(schema=True) 1290 1291 # exp.Properties.Location.POST_NAME 1292 self._match(TokenType.COMMA) 1293 extend_props(self._parse_properties(before=True)) 1294 1295 this = self._parse_schema(this=table_parts) 1296 1297 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1298 extend_props(self._parse_properties()) 1299 1300 self._match(TokenType.ALIAS) 1301 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1302 # exp.Properties.Location.POST_ALIAS 1303 extend_props(self._parse_properties()) 1304 1305 expression = self._parse_ddl_select() 1306 1307 if create_token.token_type == TokenType.TABLE: 1308 # exp.Properties.Location.POST_EXPRESSION 1309 extend_props(self._parse_properties()) 1310 1311 indexes = [] 1312 while True: 1313 index = self._parse_index() 1314 1315 # exp.Properties.Location.POST_INDEX 1316 extend_props(self._parse_properties()) 1317 1318 if not index: 1319 break 1320 else: 1321 self._match(TokenType.COMMA) 1322 indexes.append(index) 1323 elif create_token.token_type == TokenType.VIEW: 1324 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1325 no_schema_binding = True 1326 1327 shallow = self._match_text_seq("SHALLOW") 1328 1329 if self._match_text_seq("CLONE"): 1330 clone = self._parse_table(schema=True) 1331 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1332 clone_kind = ( 1333 self._match(TokenType.L_PAREN) 1334 and self._match_texts(self.CLONE_KINDS) 1335 and self._prev.text.upper() 1336 ) 1337 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1338 self._match(TokenType.R_PAREN) 1339 clone = self.expression( 1340 exp.Clone, 1341 this=clone, 1342 when=when, 1343 kind=clone_kind, 1344 shallow=shallow, 1345 expression=clone_expression, 1346 ) 1347 1348 return self.expression( 1349 exp.Create, 1350 comments=comments, 1351 this=this, 1352 kind=create_token.text, 1353 replace=replace, 1354 unique=unique, 1355 expression=expression, 1356 exists=exists, 1357 properties=properties, 1358 indexes=indexes, 1359 no_schema_binding=no_schema_binding, 1360 begin=begin, 1361 clone=clone, 1362 ) 1363 1364 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1365 # only used for teradata currently 1366 self._match(TokenType.COMMA) 1367 1368 kwargs = { 1369 "no": self._match_text_seq("NO"), 1370 "dual": self._match_text_seq("DUAL"), 1371 "before": self._match_text_seq("BEFORE"), 1372 "default": self._match_text_seq("DEFAULT"), 1373 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1374 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1375 "after": self._match_text_seq("AFTER"), 1376 "minimum": self._match_texts(("MIN", "MINIMUM")), 1377 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1378 } 1379 1380 if self._match_texts(self.PROPERTY_PARSERS): 1381 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1382 try: 1383 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1384 except TypeError: 1385 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1386 1387 return None 1388 1389 def _parse_property(self) -> t.Optional[exp.Expression]: 1390 if self._match_texts(self.PROPERTY_PARSERS): 1391 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1392 1393 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1394 return self._parse_character_set(default=True) 1395 1396 if self._match_text_seq("COMPOUND", "SORTKEY"): 1397 return self._parse_sortkey(compound=True) 1398 1399 if self._match_text_seq("SQL", "SECURITY"): 1400 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1401 1402 assignment = self._match_pair( 1403 TokenType.VAR, TokenType.EQ, advance=False 1404 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1405 1406 if assignment: 1407 key = self._parse_var_or_string() 1408 self._match(TokenType.EQ) 1409 return self.expression( 1410 exp.Property, 1411 this=key, 1412 value=self._parse_column() or self._parse_var(any_token=True), 1413 ) 1414 1415 return None 1416 1417 def _parse_stored(self) -> exp.FileFormatProperty: 1418 self._match(TokenType.ALIAS) 1419 1420 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1421 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1422 1423 return self.expression( 1424 exp.FileFormatProperty, 1425 this=self.expression( 1426 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1427 ) 1428 if input_format or output_format 1429 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1430 ) 1431 1432 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1433 self._match(TokenType.EQ) 1434 self._match(TokenType.ALIAS) 1435 return self.expression(exp_class, this=self._parse_field()) 1436 1437 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1438 properties = [] 1439 while True: 1440 if before: 1441 prop = self._parse_property_before() 1442 else: 1443 prop = self._parse_property() 1444 1445 if not prop: 1446 break 1447 for p in ensure_list(prop): 1448 properties.append(p) 1449 1450 if properties: 1451 return self.expression(exp.Properties, expressions=properties) 1452 1453 return None 1454 1455 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1456 return self.expression( 1457 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1458 ) 1459 1460 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1461 if self._index >= 2: 1462 pre_volatile_token = self._tokens[self._index - 2] 1463 else: 1464 pre_volatile_token = None 1465 1466 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1467 return exp.VolatileProperty() 1468 1469 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1470 1471 def _parse_with_property( 1472 self, 1473 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1474 if self._match(TokenType.L_PAREN, advance=False): 1475 return self._parse_wrapped_csv(self._parse_property) 1476 1477 if self._match_text_seq("JOURNAL"): 1478 return self._parse_withjournaltable() 1479 1480 if self._match_text_seq("DATA"): 1481 return self._parse_withdata(no=False) 1482 elif self._match_text_seq("NO", "DATA"): 1483 return self._parse_withdata(no=True) 1484 1485 if not self._next: 1486 return None 1487 1488 return self._parse_withisolatedloading() 1489 1490 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1491 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1492 self._match(TokenType.EQ) 1493 1494 user = self._parse_id_var() 1495 self._match(TokenType.PARAMETER) 1496 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1497 1498 if not user or not host: 1499 return None 1500 1501 return exp.DefinerProperty(this=f"{user}@{host}") 1502 1503 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1504 self._match(TokenType.TABLE) 1505 self._match(TokenType.EQ) 1506 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1507 1508 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1509 return self.expression(exp.LogProperty, no=no) 1510 1511 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1512 return self.expression(exp.JournalProperty, **kwargs) 1513 1514 def _parse_checksum(self) -> exp.ChecksumProperty: 1515 self._match(TokenType.EQ) 1516 1517 on = None 1518 if self._match(TokenType.ON): 1519 on = True 1520 elif self._match_text_seq("OFF"): 1521 on = False 1522 1523 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1524 1525 def _parse_cluster(self) -> exp.Cluster: 1526 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1527 1528 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1529 self._match_text_seq("BY") 1530 1531 self._match_l_paren() 1532 expressions = self._parse_csv(self._parse_column) 1533 self._match_r_paren() 1534 1535 if self._match_text_seq("SORTED", "BY"): 1536 self._match_l_paren() 1537 sorted_by = self._parse_csv(self._parse_ordered) 1538 self._match_r_paren() 1539 else: 1540 sorted_by = None 1541 1542 self._match(TokenType.INTO) 1543 buckets = self._parse_number() 1544 self._match_text_seq("BUCKETS") 1545 1546 return self.expression( 1547 exp.ClusteredByProperty, 1548 expressions=expressions, 1549 sorted_by=sorted_by, 1550 buckets=buckets, 1551 ) 1552 1553 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1554 if not self._match_text_seq("GRANTS"): 1555 self._retreat(self._index - 1) 1556 return None 1557 1558 return self.expression(exp.CopyGrantsProperty) 1559 1560 def _parse_freespace(self) -> exp.FreespaceProperty: 1561 self._match(TokenType.EQ) 1562 return self.expression( 1563 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1564 ) 1565 1566 def _parse_mergeblockratio( 1567 self, no: bool = False, default: bool = False 1568 ) -> exp.MergeBlockRatioProperty: 1569 if self._match(TokenType.EQ): 1570 return self.expression( 1571 exp.MergeBlockRatioProperty, 1572 this=self._parse_number(), 1573 percent=self._match(TokenType.PERCENT), 1574 ) 1575 1576 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1577 1578 def _parse_datablocksize( 1579 self, 1580 default: t.Optional[bool] = None, 1581 minimum: t.Optional[bool] = None, 1582 maximum: t.Optional[bool] = None, 1583 ) -> exp.DataBlocksizeProperty: 1584 self._match(TokenType.EQ) 1585 size = self._parse_number() 1586 1587 units = None 1588 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1589 units = self._prev.text 1590 1591 return self.expression( 1592 exp.DataBlocksizeProperty, 1593 size=size, 1594 units=units, 1595 default=default, 1596 minimum=minimum, 1597 maximum=maximum, 1598 ) 1599 1600 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1601 self._match(TokenType.EQ) 1602 always = self._match_text_seq("ALWAYS") 1603 manual = self._match_text_seq("MANUAL") 1604 never = self._match_text_seq("NEVER") 1605 default = self._match_text_seq("DEFAULT") 1606 1607 autotemp = None 1608 if self._match_text_seq("AUTOTEMP"): 1609 autotemp = self._parse_schema() 1610 1611 return self.expression( 1612 exp.BlockCompressionProperty, 1613 always=always, 1614 manual=manual, 1615 never=never, 1616 default=default, 1617 autotemp=autotemp, 1618 ) 1619 1620 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1621 no = self._match_text_seq("NO") 1622 concurrent = self._match_text_seq("CONCURRENT") 1623 self._match_text_seq("ISOLATED", "LOADING") 1624 for_all = self._match_text_seq("FOR", "ALL") 1625 for_insert = self._match_text_seq("FOR", "INSERT") 1626 for_none = self._match_text_seq("FOR", "NONE") 1627 return self.expression( 1628 exp.IsolatedLoadingProperty, 1629 no=no, 1630 concurrent=concurrent, 1631 for_all=for_all, 1632 for_insert=for_insert, 1633 for_none=for_none, 1634 ) 1635 1636 def _parse_locking(self) -> exp.LockingProperty: 1637 if self._match(TokenType.TABLE): 1638 kind = "TABLE" 1639 elif self._match(TokenType.VIEW): 1640 kind = "VIEW" 1641 elif self._match(TokenType.ROW): 1642 kind = "ROW" 1643 elif self._match_text_seq("DATABASE"): 1644 kind = "DATABASE" 1645 else: 1646 kind = None 1647 1648 if kind in ("DATABASE", "TABLE", "VIEW"): 1649 this = self._parse_table_parts() 1650 else: 1651 this = None 1652 1653 if self._match(TokenType.FOR): 1654 for_or_in = "FOR" 1655 elif self._match(TokenType.IN): 1656 for_or_in = "IN" 1657 else: 1658 for_or_in = None 1659 1660 if self._match_text_seq("ACCESS"): 1661 lock_type = "ACCESS" 1662 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1663 lock_type = "EXCLUSIVE" 1664 elif self._match_text_seq("SHARE"): 1665 lock_type = "SHARE" 1666 elif self._match_text_seq("READ"): 1667 lock_type = "READ" 1668 elif self._match_text_seq("WRITE"): 1669 lock_type = "WRITE" 1670 elif self._match_text_seq("CHECKSUM"): 1671 lock_type = "CHECKSUM" 1672 else: 1673 lock_type = None 1674 1675 override = self._match_text_seq("OVERRIDE") 1676 1677 return self.expression( 1678 exp.LockingProperty, 1679 this=this, 1680 kind=kind, 1681 for_or_in=for_or_in, 1682 lock_type=lock_type, 1683 override=override, 1684 ) 1685 1686 def _parse_partition_by(self) -> t.List[exp.Expression]: 1687 if self._match(TokenType.PARTITION_BY): 1688 return self._parse_csv(self._parse_conjunction) 1689 return [] 1690 1691 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1692 self._match(TokenType.EQ) 1693 return self.expression( 1694 exp.PartitionedByProperty, 1695 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1696 ) 1697 1698 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1699 if self._match_text_seq("AND", "STATISTICS"): 1700 statistics = True 1701 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1702 statistics = False 1703 else: 1704 statistics = None 1705 1706 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1707 1708 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1709 if self._match_text_seq("PRIMARY", "INDEX"): 1710 return exp.NoPrimaryIndexProperty() 1711 return None 1712 1713 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1714 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1715 return exp.OnCommitProperty() 1716 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1717 return exp.OnCommitProperty(delete=True) 1718 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1719 1720 def _parse_distkey(self) -> exp.DistKeyProperty: 1721 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1722 1723 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1724 table = self._parse_table(schema=True) 1725 1726 options = [] 1727 while self._match_texts(("INCLUDING", "EXCLUDING")): 1728 this = self._prev.text.upper() 1729 1730 id_var = self._parse_id_var() 1731 if not id_var: 1732 return None 1733 1734 options.append( 1735 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1736 ) 1737 1738 return self.expression(exp.LikeProperty, this=table, expressions=options) 1739 1740 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1741 return self.expression( 1742 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1743 ) 1744 1745 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1746 self._match(TokenType.EQ) 1747 return self.expression( 1748 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1749 ) 1750 1751 def _parse_returns(self) -> exp.ReturnsProperty: 1752 value: t.Optional[exp.Expression] 1753 is_table = self._match(TokenType.TABLE) 1754 1755 if is_table: 1756 if self._match(TokenType.LT): 1757 value = self.expression( 1758 exp.Schema, 1759 this="TABLE", 1760 expressions=self._parse_csv(self._parse_struct_types), 1761 ) 1762 if not self._match(TokenType.GT): 1763 self.raise_error("Expecting >") 1764 else: 1765 value = self._parse_schema(exp.var("TABLE")) 1766 else: 1767 value = self._parse_types() 1768 1769 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1770 1771 def _parse_describe(self) -> exp.Describe: 1772 kind = self._match_set(self.CREATABLES) and self._prev.text 1773 this = self._parse_table(schema=True) 1774 properties = self._parse_properties() 1775 expressions = properties.expressions if properties else None 1776 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1777 1778 def _parse_insert(self) -> exp.Insert: 1779 comments = ensure_list(self._prev_comments) 1780 overwrite = self._match(TokenType.OVERWRITE) 1781 ignore = self._match(TokenType.IGNORE) 1782 local = self._match_text_seq("LOCAL") 1783 alternative = None 1784 1785 if self._match_text_seq("DIRECTORY"): 1786 this: t.Optional[exp.Expression] = self.expression( 1787 exp.Directory, 1788 this=self._parse_var_or_string(), 1789 local=local, 1790 row_format=self._parse_row_format(match_row=True), 1791 ) 1792 else: 1793 if self._match(TokenType.OR): 1794 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1795 1796 self._match(TokenType.INTO) 1797 comments += ensure_list(self._prev_comments) 1798 self._match(TokenType.TABLE) 1799 this = self._parse_table(schema=True) 1800 1801 returning = self._parse_returning() 1802 1803 return self.expression( 1804 exp.Insert, 1805 comments=comments, 1806 this=this, 1807 by_name=self._match_text_seq("BY", "NAME"), 1808 exists=self._parse_exists(), 1809 partition=self._parse_partition(), 1810 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1811 and self._parse_conjunction(), 1812 expression=self._parse_ddl_select(), 1813 conflict=self._parse_on_conflict(), 1814 returning=returning or self._parse_returning(), 1815 overwrite=overwrite, 1816 alternative=alternative, 1817 ignore=ignore, 1818 ) 1819 1820 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1821 conflict = self._match_text_seq("ON", "CONFLICT") 1822 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1823 1824 if not conflict and not duplicate: 1825 return None 1826 1827 nothing = None 1828 expressions = None 1829 key = None 1830 constraint = None 1831 1832 if conflict: 1833 if self._match_text_seq("ON", "CONSTRAINT"): 1834 constraint = self._parse_id_var() 1835 else: 1836 key = self._parse_csv(self._parse_value) 1837 1838 self._match_text_seq("DO") 1839 if self._match_text_seq("NOTHING"): 1840 nothing = True 1841 else: 1842 self._match(TokenType.UPDATE) 1843 self._match(TokenType.SET) 1844 expressions = self._parse_csv(self._parse_equality) 1845 1846 return self.expression( 1847 exp.OnConflict, 1848 duplicate=duplicate, 1849 expressions=expressions, 1850 nothing=nothing, 1851 key=key, 1852 constraint=constraint, 1853 ) 1854 1855 def _parse_returning(self) -> t.Optional[exp.Returning]: 1856 if not self._match(TokenType.RETURNING): 1857 return None 1858 return self.expression( 1859 exp.Returning, 1860 expressions=self._parse_csv(self._parse_expression), 1861 into=self._match(TokenType.INTO) and self._parse_table_part(), 1862 ) 1863 1864 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1865 if not self._match(TokenType.FORMAT): 1866 return None 1867 return self._parse_row_format() 1868 1869 def _parse_row_format( 1870 self, match_row: bool = False 1871 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1872 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1873 return None 1874 1875 if self._match_text_seq("SERDE"): 1876 this = self._parse_string() 1877 1878 serde_properties = None 1879 if self._match(TokenType.SERDE_PROPERTIES): 1880 serde_properties = self.expression( 1881 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1882 ) 1883 1884 return self.expression( 1885 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1886 ) 1887 1888 self._match_text_seq("DELIMITED") 1889 1890 kwargs = {} 1891 1892 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1893 kwargs["fields"] = self._parse_string() 1894 if self._match_text_seq("ESCAPED", "BY"): 1895 kwargs["escaped"] = self._parse_string() 1896 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1897 kwargs["collection_items"] = self._parse_string() 1898 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1899 kwargs["map_keys"] = self._parse_string() 1900 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1901 kwargs["lines"] = self._parse_string() 1902 if self._match_text_seq("NULL", "DEFINED", "AS"): 1903 kwargs["null"] = self._parse_string() 1904 1905 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1906 1907 def _parse_load(self) -> exp.LoadData | exp.Command: 1908 if self._match_text_seq("DATA"): 1909 local = self._match_text_seq("LOCAL") 1910 self._match_text_seq("INPATH") 1911 inpath = self._parse_string() 1912 overwrite = self._match(TokenType.OVERWRITE) 1913 self._match_pair(TokenType.INTO, TokenType.TABLE) 1914 1915 return self.expression( 1916 exp.LoadData, 1917 this=self._parse_table(schema=True), 1918 local=local, 1919 overwrite=overwrite, 1920 inpath=inpath, 1921 partition=self._parse_partition(), 1922 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1923 serde=self._match_text_seq("SERDE") and self._parse_string(), 1924 ) 1925 return self._parse_as_command(self._prev) 1926 1927 def _parse_delete(self) -> exp.Delete: 1928 # This handles MySQL's "Multiple-Table Syntax" 1929 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1930 tables = None 1931 comments = self._prev_comments 1932 if not self._match(TokenType.FROM, advance=False): 1933 tables = self._parse_csv(self._parse_table) or None 1934 1935 returning = self._parse_returning() 1936 1937 return self.expression( 1938 exp.Delete, 1939 comments=comments, 1940 tables=tables, 1941 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1942 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1943 where=self._parse_where(), 1944 returning=returning or self._parse_returning(), 1945 limit=self._parse_limit(), 1946 ) 1947 1948 def _parse_update(self) -> exp.Update: 1949 comments = self._prev_comments 1950 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1951 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1952 returning = self._parse_returning() 1953 return self.expression( 1954 exp.Update, 1955 comments=comments, 1956 **{ # type: ignore 1957 "this": this, 1958 "expressions": expressions, 1959 "from": self._parse_from(joins=True), 1960 "where": self._parse_where(), 1961 "returning": returning or self._parse_returning(), 1962 "order": self._parse_order(), 1963 "limit": self._parse_limit(), 1964 }, 1965 ) 1966 1967 def _parse_uncache(self) -> exp.Uncache: 1968 if not self._match(TokenType.TABLE): 1969 self.raise_error("Expecting TABLE after UNCACHE") 1970 1971 return self.expression( 1972 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1973 ) 1974 1975 def _parse_cache(self) -> exp.Cache: 1976 lazy = self._match_text_seq("LAZY") 1977 self._match(TokenType.TABLE) 1978 table = self._parse_table(schema=True) 1979 1980 options = [] 1981 if self._match_text_seq("OPTIONS"): 1982 self._match_l_paren() 1983 k = self._parse_string() 1984 self._match(TokenType.EQ) 1985 v = self._parse_string() 1986 options = [k, v] 1987 self._match_r_paren() 1988 1989 self._match(TokenType.ALIAS) 1990 return self.expression( 1991 exp.Cache, 1992 this=table, 1993 lazy=lazy, 1994 options=options, 1995 expression=self._parse_select(nested=True), 1996 ) 1997 1998 def _parse_partition(self) -> t.Optional[exp.Partition]: 1999 if not self._match(TokenType.PARTITION): 2000 return None 2001 2002 return self.expression( 2003 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2004 ) 2005 2006 def _parse_value(self) -> exp.Tuple: 2007 if self._match(TokenType.L_PAREN): 2008 expressions = self._parse_csv(self._parse_conjunction) 2009 self._match_r_paren() 2010 return self.expression(exp.Tuple, expressions=expressions) 2011 2012 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2013 # https://prestodb.io/docs/current/sql/values.html 2014 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2015 2016 def _parse_projections(self) -> t.List[exp.Expression]: 2017 return self._parse_expressions() 2018 2019 def _parse_select( 2020 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2021 ) -> t.Optional[exp.Expression]: 2022 cte = self._parse_with() 2023 2024 if cte: 2025 this = self._parse_statement() 2026 2027 if not this: 2028 self.raise_error("Failed to parse any statement following CTE") 2029 return cte 2030 2031 if "with" in this.arg_types: 2032 this.set("with", cte) 2033 else: 2034 self.raise_error(f"{this.key} does not support CTE") 2035 this = cte 2036 2037 return this 2038 2039 # duckdb supports leading with FROM x 2040 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2041 2042 if self._match(TokenType.SELECT): 2043 comments = self._prev_comments 2044 2045 hint = self._parse_hint() 2046 all_ = self._match(TokenType.ALL) 2047 distinct = self._match_set(self.DISTINCT_TOKENS) 2048 2049 kind = ( 2050 self._match(TokenType.ALIAS) 2051 and self._match_texts(("STRUCT", "VALUE")) 2052 and self._prev.text 2053 ) 2054 2055 if distinct: 2056 distinct = self.expression( 2057 exp.Distinct, 2058 on=self._parse_value() if self._match(TokenType.ON) else None, 2059 ) 2060 2061 if all_ and distinct: 2062 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2063 2064 limit = self._parse_limit(top=True) 2065 projections = self._parse_projections() 2066 2067 this = self.expression( 2068 exp.Select, 2069 kind=kind, 2070 hint=hint, 2071 distinct=distinct, 2072 expressions=projections, 2073 limit=limit, 2074 ) 2075 this.comments = comments 2076 2077 into = self._parse_into() 2078 if into: 2079 this.set("into", into) 2080 2081 if not from_: 2082 from_ = self._parse_from() 2083 2084 if from_: 2085 this.set("from", from_) 2086 2087 this = self._parse_query_modifiers(this) 2088 elif (table or nested) and self._match(TokenType.L_PAREN): 2089 if self._match(TokenType.PIVOT): 2090 this = self._parse_simplified_pivot() 2091 elif self._match(TokenType.FROM): 2092 this = exp.select("*").from_( 2093 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2094 ) 2095 else: 2096 this = self._parse_table() if table else self._parse_select(nested=True) 2097 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2098 2099 self._match_r_paren() 2100 2101 # We return early here so that the UNION isn't attached to the subquery by the 2102 # following call to _parse_set_operations, but instead becomes the parent node 2103 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2104 elif self._match(TokenType.VALUES): 2105 this = self.expression( 2106 exp.Values, 2107 expressions=self._parse_csv(self._parse_value), 2108 alias=self._parse_table_alias(), 2109 ) 2110 elif from_: 2111 this = exp.select("*").from_(from_.this, copy=False) 2112 else: 2113 this = None 2114 2115 return self._parse_set_operations(this) 2116 2117 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2118 if not skip_with_token and not self._match(TokenType.WITH): 2119 return None 2120 2121 comments = self._prev_comments 2122 recursive = self._match(TokenType.RECURSIVE) 2123 2124 expressions = [] 2125 while True: 2126 expressions.append(self._parse_cte()) 2127 2128 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2129 break 2130 else: 2131 self._match(TokenType.WITH) 2132 2133 return self.expression( 2134 exp.With, comments=comments, expressions=expressions, recursive=recursive 2135 ) 2136 2137 def _parse_cte(self) -> exp.CTE: 2138 alias = self._parse_table_alias() 2139 if not alias or not alias.this: 2140 self.raise_error("Expected CTE to have alias") 2141 2142 self._match(TokenType.ALIAS) 2143 return self.expression( 2144 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2145 ) 2146 2147 def _parse_table_alias( 2148 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2149 ) -> t.Optional[exp.TableAlias]: 2150 any_token = self._match(TokenType.ALIAS) 2151 alias = ( 2152 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2153 or self._parse_string_as_identifier() 2154 ) 2155 2156 index = self._index 2157 if self._match(TokenType.L_PAREN): 2158 columns = self._parse_csv(self._parse_function_parameter) 2159 self._match_r_paren() if columns else self._retreat(index) 2160 else: 2161 columns = None 2162 2163 if not alias and not columns: 2164 return None 2165 2166 return self.expression(exp.TableAlias, this=alias, columns=columns) 2167 2168 def _parse_subquery( 2169 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2170 ) -> t.Optional[exp.Subquery]: 2171 if not this: 2172 return None 2173 2174 return self.expression( 2175 exp.Subquery, 2176 this=this, 2177 pivots=self._parse_pivots(), 2178 alias=self._parse_table_alias() if parse_alias else None, 2179 ) 2180 2181 def _parse_query_modifiers( 2182 self, this: t.Optional[exp.Expression] 2183 ) -> t.Optional[exp.Expression]: 2184 if isinstance(this, self.MODIFIABLES): 2185 for join in iter(self._parse_join, None): 2186 this.append("joins", join) 2187 for lateral in iter(self._parse_lateral, None): 2188 this.append("laterals", lateral) 2189 2190 while True: 2191 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2192 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2193 key, expression = parser(self) 2194 2195 if expression: 2196 this.set(key, expression) 2197 if key == "limit": 2198 offset = expression.args.pop("offset", None) 2199 if offset: 2200 this.set("offset", exp.Offset(expression=offset)) 2201 continue 2202 break 2203 return this 2204 2205 def _parse_hint(self) -> t.Optional[exp.Hint]: 2206 if self._match(TokenType.HINT): 2207 hints = [] 2208 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2209 hints.extend(hint) 2210 2211 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2212 self.raise_error("Expected */ after HINT") 2213 2214 return self.expression(exp.Hint, expressions=hints) 2215 2216 return None 2217 2218 def _parse_into(self) -> t.Optional[exp.Into]: 2219 if not self._match(TokenType.INTO): 2220 return None 2221 2222 temp = self._match(TokenType.TEMPORARY) 2223 unlogged = self._match_text_seq("UNLOGGED") 2224 self._match(TokenType.TABLE) 2225 2226 return self.expression( 2227 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2228 ) 2229 2230 def _parse_from( 2231 self, joins: bool = False, skip_from_token: bool = False 2232 ) -> t.Optional[exp.From]: 2233 if not skip_from_token and not self._match(TokenType.FROM): 2234 return None 2235 2236 return self.expression( 2237 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2238 ) 2239 2240 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2241 if not self._match(TokenType.MATCH_RECOGNIZE): 2242 return None 2243 2244 self._match_l_paren() 2245 2246 partition = self._parse_partition_by() 2247 order = self._parse_order() 2248 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2249 2250 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2251 rows = exp.var("ONE ROW PER MATCH") 2252 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2253 text = "ALL ROWS PER MATCH" 2254 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2255 text += f" SHOW EMPTY MATCHES" 2256 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2257 text += f" OMIT EMPTY MATCHES" 2258 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2259 text += f" WITH UNMATCHED ROWS" 2260 rows = exp.var(text) 2261 else: 2262 rows = None 2263 2264 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2265 text = "AFTER MATCH SKIP" 2266 if self._match_text_seq("PAST", "LAST", "ROW"): 2267 text += f" PAST LAST ROW" 2268 elif self._match_text_seq("TO", "NEXT", "ROW"): 2269 text += f" TO NEXT ROW" 2270 elif self._match_text_seq("TO", "FIRST"): 2271 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2272 elif self._match_text_seq("TO", "LAST"): 2273 text += f" TO LAST {self._advance_any().text}" # type: ignore 2274 after = exp.var(text) 2275 else: 2276 after = None 2277 2278 if self._match_text_seq("PATTERN"): 2279 self._match_l_paren() 2280 2281 if not self._curr: 2282 self.raise_error("Expecting )", self._curr) 2283 2284 paren = 1 2285 start = self._curr 2286 2287 while self._curr and paren > 0: 2288 if self._curr.token_type == TokenType.L_PAREN: 2289 paren += 1 2290 if self._curr.token_type == TokenType.R_PAREN: 2291 paren -= 1 2292 2293 end = self._prev 2294 self._advance() 2295 2296 if paren > 0: 2297 self.raise_error("Expecting )", self._curr) 2298 2299 pattern = exp.var(self._find_sql(start, end)) 2300 else: 2301 pattern = None 2302 2303 define = ( 2304 self._parse_csv( 2305 lambda: self.expression( 2306 exp.Alias, 2307 alias=self._parse_id_var(any_token=True), 2308 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2309 ) 2310 ) 2311 if self._match_text_seq("DEFINE") 2312 else None 2313 ) 2314 2315 self._match_r_paren() 2316 2317 return self.expression( 2318 exp.MatchRecognize, 2319 partition_by=partition, 2320 order=order, 2321 measures=measures, 2322 rows=rows, 2323 after=after, 2324 pattern=pattern, 2325 define=define, 2326 alias=self._parse_table_alias(), 2327 ) 2328 2329 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2330 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2331 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2332 2333 if outer_apply or cross_apply: 2334 this = self._parse_select(table=True) 2335 view = None 2336 outer = not cross_apply 2337 elif self._match(TokenType.LATERAL): 2338 this = self._parse_select(table=True) 2339 view = self._match(TokenType.VIEW) 2340 outer = self._match(TokenType.OUTER) 2341 else: 2342 return None 2343 2344 if not this: 2345 this = ( 2346 self._parse_unnest() 2347 or self._parse_function() 2348 or self._parse_id_var(any_token=False) 2349 ) 2350 2351 while self._match(TokenType.DOT): 2352 this = exp.Dot( 2353 this=this, 2354 expression=self._parse_function() or self._parse_id_var(any_token=False), 2355 ) 2356 2357 if view: 2358 table = self._parse_id_var(any_token=False) 2359 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2360 table_alias: t.Optional[exp.TableAlias] = self.expression( 2361 exp.TableAlias, this=table, columns=columns 2362 ) 2363 elif isinstance(this, exp.Subquery) and this.alias: 2364 # Ensures parity between the Subquery's and the Lateral's "alias" args 2365 table_alias = this.args["alias"].copy() 2366 else: 2367 table_alias = self._parse_table_alias() 2368 2369 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2370 2371 def _parse_join_parts( 2372 self, 2373 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2374 return ( 2375 self._match_set(self.JOIN_METHODS) and self._prev, 2376 self._match_set(self.JOIN_SIDES) and self._prev, 2377 self._match_set(self.JOIN_KINDS) and self._prev, 2378 ) 2379 2380 def _parse_join( 2381 self, skip_join_token: bool = False, parse_bracket: bool = False 2382 ) -> t.Optional[exp.Join]: 2383 if self._match(TokenType.COMMA): 2384 return self.expression(exp.Join, this=self._parse_table()) 2385 2386 index = self._index 2387 method, side, kind = self._parse_join_parts() 2388 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2389 join = self._match(TokenType.JOIN) 2390 2391 if not skip_join_token and not join: 2392 self._retreat(index) 2393 kind = None 2394 method = None 2395 side = None 2396 2397 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2398 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2399 2400 if not skip_join_token and not join and not outer_apply and not cross_apply: 2401 return None 2402 2403 if outer_apply: 2404 side = Token(TokenType.LEFT, "LEFT") 2405 2406 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2407 2408 if method: 2409 kwargs["method"] = method.text 2410 if side: 2411 kwargs["side"] = side.text 2412 if kind: 2413 kwargs["kind"] = kind.text 2414 if hint: 2415 kwargs["hint"] = hint 2416 2417 if self._match(TokenType.ON): 2418 kwargs["on"] = self._parse_conjunction() 2419 elif self._match(TokenType.USING): 2420 kwargs["using"] = self._parse_wrapped_id_vars() 2421 elif not (kind and kind.token_type == TokenType.CROSS): 2422 index = self._index 2423 joins = self._parse_joins() 2424 2425 if joins and self._match(TokenType.ON): 2426 kwargs["on"] = self._parse_conjunction() 2427 elif joins and self._match(TokenType.USING): 2428 kwargs["using"] = self._parse_wrapped_id_vars() 2429 else: 2430 joins = None 2431 self._retreat(index) 2432 2433 kwargs["this"].set("joins", joins) 2434 2435 comments = [c for token in (method, side, kind) if token for c in token.comments] 2436 return self.expression(exp.Join, comments=comments, **kwargs) 2437 2438 def _parse_index( 2439 self, 2440 index: t.Optional[exp.Expression] = None, 2441 ) -> t.Optional[exp.Index]: 2442 if index: 2443 unique = None 2444 primary = None 2445 amp = None 2446 2447 self._match(TokenType.ON) 2448 self._match(TokenType.TABLE) # hive 2449 table = self._parse_table_parts(schema=True) 2450 else: 2451 unique = self._match(TokenType.UNIQUE) 2452 primary = self._match_text_seq("PRIMARY") 2453 amp = self._match_text_seq("AMP") 2454 2455 if not self._match(TokenType.INDEX): 2456 return None 2457 2458 index = self._parse_id_var() 2459 table = None 2460 2461 using = self._parse_field() if self._match(TokenType.USING) else None 2462 2463 if self._match(TokenType.L_PAREN, advance=False): 2464 columns = self._parse_wrapped_csv(self._parse_ordered) 2465 else: 2466 columns = None 2467 2468 return self.expression( 2469 exp.Index, 2470 this=index, 2471 table=table, 2472 using=using, 2473 columns=columns, 2474 unique=unique, 2475 primary=primary, 2476 amp=amp, 2477 partition_by=self._parse_partition_by(), 2478 ) 2479 2480 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2481 hints: t.List[exp.Expression] = [] 2482 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2483 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2484 hints.append( 2485 self.expression( 2486 exp.WithTableHint, 2487 expressions=self._parse_csv( 2488 lambda: self._parse_function() or self._parse_var(any_token=True) 2489 ), 2490 ) 2491 ) 2492 self._match_r_paren() 2493 else: 2494 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2495 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2496 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2497 2498 self._match_texts({"INDEX", "KEY"}) 2499 if self._match(TokenType.FOR): 2500 hint.set("target", self._advance_any() and self._prev.text.upper()) 2501 2502 hint.set("expressions", self._parse_wrapped_id_vars()) 2503 hints.append(hint) 2504 2505 return hints or None 2506 2507 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2508 return ( 2509 (not schema and self._parse_function(optional_parens=False)) 2510 or self._parse_id_var(any_token=False) 2511 or self._parse_string_as_identifier() 2512 or self._parse_placeholder() 2513 ) 2514 2515 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2516 catalog = None 2517 db = None 2518 table = self._parse_table_part(schema=schema) 2519 2520 while self._match(TokenType.DOT): 2521 if catalog: 2522 # This allows nesting the table in arbitrarily many dot expressions if needed 2523 table = self.expression( 2524 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2525 ) 2526 else: 2527 catalog = db 2528 db = table 2529 table = self._parse_table_part(schema=schema) 2530 2531 if not table: 2532 self.raise_error(f"Expected table name but got {self._curr}") 2533 2534 return self.expression( 2535 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2536 ) 2537 2538 def _parse_table( 2539 self, 2540 schema: bool = False, 2541 joins: bool = False, 2542 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2543 parse_bracket: bool = False, 2544 ) -> t.Optional[exp.Expression]: 2545 lateral = self._parse_lateral() 2546 if lateral: 2547 return lateral 2548 2549 unnest = self._parse_unnest() 2550 if unnest: 2551 return unnest 2552 2553 values = self._parse_derived_table_values() 2554 if values: 2555 return values 2556 2557 subquery = self._parse_select(table=True) 2558 if subquery: 2559 if not subquery.args.get("pivots"): 2560 subquery.set("pivots", self._parse_pivots()) 2561 return subquery 2562 2563 bracket = parse_bracket and self._parse_bracket(None) 2564 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2565 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2566 2567 if schema: 2568 return self._parse_schema(this=this) 2569 2570 version = self._parse_version() 2571 2572 if version: 2573 this.set("version", version) 2574 2575 if self.ALIAS_POST_TABLESAMPLE: 2576 table_sample = self._parse_table_sample() 2577 2578 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2579 if alias: 2580 this.set("alias", alias) 2581 2582 this.set("hints", self._parse_table_hints()) 2583 2584 if not this.args.get("pivots"): 2585 this.set("pivots", self._parse_pivots()) 2586 2587 if not self.ALIAS_POST_TABLESAMPLE: 2588 table_sample = self._parse_table_sample() 2589 2590 if table_sample: 2591 table_sample.set("this", this) 2592 this = table_sample 2593 2594 if joins: 2595 for join in iter(self._parse_join, None): 2596 this.append("joins", join) 2597 2598 return this 2599 2600 def _parse_version(self) -> t.Optional[exp.Version]: 2601 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2602 this = "TIMESTAMP" 2603 elif self._match(TokenType.VERSION_SNAPSHOT): 2604 this = "VERSION" 2605 else: 2606 return None 2607 2608 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2609 kind = self._prev.text.upper() 2610 start = self._parse_bitwise() 2611 self._match_texts(("TO", "AND")) 2612 end = self._parse_bitwise() 2613 expression: t.Optional[exp.Expression] = self.expression( 2614 exp.Tuple, expressions=[start, end] 2615 ) 2616 elif self._match_text_seq("CONTAINED", "IN"): 2617 kind = "CONTAINED IN" 2618 expression = self.expression( 2619 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2620 ) 2621 elif self._match(TokenType.ALL): 2622 kind = "ALL" 2623 expression = None 2624 else: 2625 self._match_text_seq("AS", "OF") 2626 kind = "AS OF" 2627 expression = self._parse_type() 2628 2629 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2630 2631 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2632 if not self._match(TokenType.UNNEST): 2633 return None 2634 2635 expressions = self._parse_wrapped_csv(self._parse_type) 2636 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2637 2638 alias = self._parse_table_alias() if with_alias else None 2639 2640 if alias and self.UNNEST_COLUMN_ONLY: 2641 if alias.args.get("columns"): 2642 self.raise_error("Unexpected extra column alias in unnest.") 2643 2644 alias.set("columns", [alias.this]) 2645 alias.set("this", None) 2646 2647 offset = None 2648 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2649 self._match(TokenType.ALIAS) 2650 offset = self._parse_id_var() or exp.to_identifier("offset") 2651 2652 return self.expression( 2653 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2654 ) 2655 2656 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2657 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2658 if not is_derived and not self._match(TokenType.VALUES): 2659 return None 2660 2661 expressions = self._parse_csv(self._parse_value) 2662 alias = self._parse_table_alias() 2663 2664 if is_derived: 2665 self._match_r_paren() 2666 2667 return self.expression( 2668 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2669 ) 2670 2671 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2672 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2673 as_modifier and self._match_text_seq("USING", "SAMPLE") 2674 ): 2675 return None 2676 2677 bucket_numerator = None 2678 bucket_denominator = None 2679 bucket_field = None 2680 percent = None 2681 rows = None 2682 size = None 2683 seed = None 2684 2685 kind = ( 2686 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2687 ) 2688 method = self._parse_var(tokens=(TokenType.ROW,)) 2689 2690 self._match(TokenType.L_PAREN) 2691 2692 if self.TABLESAMPLE_CSV: 2693 num = None 2694 expressions = self._parse_csv(self._parse_primary) 2695 else: 2696 expressions = None 2697 num = self._parse_number() 2698 2699 if self._match_text_seq("BUCKET"): 2700 bucket_numerator = self._parse_number() 2701 self._match_text_seq("OUT", "OF") 2702 bucket_denominator = bucket_denominator = self._parse_number() 2703 self._match(TokenType.ON) 2704 bucket_field = self._parse_field() 2705 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2706 percent = num 2707 elif self._match(TokenType.ROWS): 2708 rows = num 2709 elif num: 2710 size = num 2711 2712 self._match(TokenType.R_PAREN) 2713 2714 if self._match(TokenType.L_PAREN): 2715 method = self._parse_var() 2716 seed = self._match(TokenType.COMMA) and self._parse_number() 2717 self._match_r_paren() 2718 elif self._match_texts(("SEED", "REPEATABLE")): 2719 seed = self._parse_wrapped(self._parse_number) 2720 2721 return self.expression( 2722 exp.TableSample, 2723 expressions=expressions, 2724 method=method, 2725 bucket_numerator=bucket_numerator, 2726 bucket_denominator=bucket_denominator, 2727 bucket_field=bucket_field, 2728 percent=percent, 2729 rows=rows, 2730 size=size, 2731 seed=seed, 2732 kind=kind, 2733 ) 2734 2735 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2736 return list(iter(self._parse_pivot, None)) or None 2737 2738 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2739 return list(iter(self._parse_join, None)) or None 2740 2741 # https://duckdb.org/docs/sql/statements/pivot 2742 def _parse_simplified_pivot(self) -> exp.Pivot: 2743 def _parse_on() -> t.Optional[exp.Expression]: 2744 this = self._parse_bitwise() 2745 return self._parse_in(this) if self._match(TokenType.IN) else this 2746 2747 this = self._parse_table() 2748 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2749 using = self._match(TokenType.USING) and self._parse_csv( 2750 lambda: self._parse_alias(self._parse_function()) 2751 ) 2752 group = self._parse_group() 2753 return self.expression( 2754 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2755 ) 2756 2757 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2758 index = self._index 2759 include_nulls = None 2760 2761 if self._match(TokenType.PIVOT): 2762 unpivot = False 2763 elif self._match(TokenType.UNPIVOT): 2764 unpivot = True 2765 2766 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2767 if self._match_text_seq("INCLUDE", "NULLS"): 2768 include_nulls = True 2769 elif self._match_text_seq("EXCLUDE", "NULLS"): 2770 include_nulls = False 2771 else: 2772 return None 2773 2774 expressions = [] 2775 field = None 2776 2777 if not self._match(TokenType.L_PAREN): 2778 self._retreat(index) 2779 return None 2780 2781 if unpivot: 2782 expressions = self._parse_csv(self._parse_column) 2783 else: 2784 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2785 2786 if not expressions: 2787 self.raise_error("Failed to parse PIVOT's aggregation list") 2788 2789 if not self._match(TokenType.FOR): 2790 self.raise_error("Expecting FOR") 2791 2792 value = self._parse_column() 2793 2794 if not self._match(TokenType.IN): 2795 self.raise_error("Expecting IN") 2796 2797 field = self._parse_in(value, alias=True) 2798 2799 self._match_r_paren() 2800 2801 pivot = self.expression( 2802 exp.Pivot, 2803 expressions=expressions, 2804 field=field, 2805 unpivot=unpivot, 2806 include_nulls=include_nulls, 2807 ) 2808 2809 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2810 pivot.set("alias", self._parse_table_alias()) 2811 2812 if not unpivot: 2813 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2814 2815 columns: t.List[exp.Expression] = [] 2816 for fld in pivot.args["field"].expressions: 2817 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2818 for name in names: 2819 if self.PREFIXED_PIVOT_COLUMNS: 2820 name = f"{name}_{field_name}" if name else field_name 2821 else: 2822 name = f"{field_name}_{name}" if name else field_name 2823 2824 columns.append(exp.to_identifier(name)) 2825 2826 pivot.set("columns", columns) 2827 2828 return pivot 2829 2830 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2831 return [agg.alias for agg in aggregations] 2832 2833 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2834 if not skip_where_token and not self._match(TokenType.WHERE): 2835 return None 2836 2837 return self.expression( 2838 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2839 ) 2840 2841 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2842 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2843 return None 2844 2845 elements = defaultdict(list) 2846 2847 if self._match(TokenType.ALL): 2848 return self.expression(exp.Group, all=True) 2849 2850 while True: 2851 expressions = self._parse_csv(self._parse_conjunction) 2852 if expressions: 2853 elements["expressions"].extend(expressions) 2854 2855 grouping_sets = self._parse_grouping_sets() 2856 if grouping_sets: 2857 elements["grouping_sets"].extend(grouping_sets) 2858 2859 rollup = None 2860 cube = None 2861 totals = None 2862 2863 with_ = self._match(TokenType.WITH) 2864 if self._match(TokenType.ROLLUP): 2865 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2866 elements["rollup"].extend(ensure_list(rollup)) 2867 2868 if self._match(TokenType.CUBE): 2869 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2870 elements["cube"].extend(ensure_list(cube)) 2871 2872 if self._match_text_seq("TOTALS"): 2873 totals = True 2874 elements["totals"] = True # type: ignore 2875 2876 if not (grouping_sets or rollup or cube or totals): 2877 break 2878 2879 return self.expression(exp.Group, **elements) # type: ignore 2880 2881 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2882 if not self._match(TokenType.GROUPING_SETS): 2883 return None 2884 2885 return self._parse_wrapped_csv(self._parse_grouping_set) 2886 2887 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2888 if self._match(TokenType.L_PAREN): 2889 grouping_set = self._parse_csv(self._parse_column) 2890 self._match_r_paren() 2891 return self.expression(exp.Tuple, expressions=grouping_set) 2892 2893 return self._parse_column() 2894 2895 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2896 if not skip_having_token and not self._match(TokenType.HAVING): 2897 return None 2898 return self.expression(exp.Having, this=self._parse_conjunction()) 2899 2900 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2901 if not self._match(TokenType.QUALIFY): 2902 return None 2903 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2904 2905 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2906 if skip_start_token: 2907 start = None 2908 elif self._match(TokenType.START_WITH): 2909 start = self._parse_conjunction() 2910 else: 2911 return None 2912 2913 self._match(TokenType.CONNECT_BY) 2914 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2915 exp.Prior, this=self._parse_bitwise() 2916 ) 2917 connect = self._parse_conjunction() 2918 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2919 2920 if not start and self._match(TokenType.START_WITH): 2921 start = self._parse_conjunction() 2922 2923 return self.expression(exp.Connect, start=start, connect=connect) 2924 2925 def _parse_order( 2926 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2927 ) -> t.Optional[exp.Expression]: 2928 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2929 return this 2930 2931 return self.expression( 2932 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2933 ) 2934 2935 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2936 if not self._match(token): 2937 return None 2938 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2939 2940 def _parse_ordered(self) -> exp.Ordered: 2941 this = self._parse_conjunction() 2942 self._match(TokenType.ASC) 2943 2944 is_desc = self._match(TokenType.DESC) 2945 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2946 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2947 desc = is_desc or False 2948 asc = not desc 2949 nulls_first = is_nulls_first or False 2950 explicitly_null_ordered = is_nulls_first or is_nulls_last 2951 2952 if ( 2953 not explicitly_null_ordered 2954 and ( 2955 (asc and self.NULL_ORDERING == "nulls_are_small") 2956 or (desc and self.NULL_ORDERING != "nulls_are_small") 2957 ) 2958 and self.NULL_ORDERING != "nulls_are_last" 2959 ): 2960 nulls_first = True 2961 2962 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2963 2964 def _parse_limit( 2965 self, this: t.Optional[exp.Expression] = None, top: bool = False 2966 ) -> t.Optional[exp.Expression]: 2967 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2968 comments = self._prev_comments 2969 if top: 2970 limit_paren = self._match(TokenType.L_PAREN) 2971 expression = self._parse_number() 2972 2973 if limit_paren: 2974 self._match_r_paren() 2975 else: 2976 expression = self._parse_term() 2977 2978 if self._match(TokenType.COMMA): 2979 offset = expression 2980 expression = self._parse_term() 2981 else: 2982 offset = None 2983 2984 limit_exp = self.expression( 2985 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2986 ) 2987 2988 return limit_exp 2989 2990 if self._match(TokenType.FETCH): 2991 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2992 direction = self._prev.text if direction else "FIRST" 2993 2994 count = self._parse_field(tokens=self.FETCH_TOKENS) 2995 percent = self._match(TokenType.PERCENT) 2996 2997 self._match_set((TokenType.ROW, TokenType.ROWS)) 2998 2999 only = self._match_text_seq("ONLY") 3000 with_ties = self._match_text_seq("WITH", "TIES") 3001 3002 if only and with_ties: 3003 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3004 3005 return self.expression( 3006 exp.Fetch, 3007 direction=direction, 3008 count=count, 3009 percent=percent, 3010 with_ties=with_ties, 3011 ) 3012 3013 return this 3014 3015 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3016 if not self._match(TokenType.OFFSET): 3017 return this 3018 3019 count = self._parse_term() 3020 self._match_set((TokenType.ROW, TokenType.ROWS)) 3021 return self.expression(exp.Offset, this=this, expression=count) 3022 3023 def _parse_locks(self) -> t.List[exp.Lock]: 3024 locks = [] 3025 while True: 3026 if self._match_text_seq("FOR", "UPDATE"): 3027 update = True 3028 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3029 "LOCK", "IN", "SHARE", "MODE" 3030 ): 3031 update = False 3032 else: 3033 break 3034 3035 expressions = None 3036 if self._match_text_seq("OF"): 3037 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3038 3039 wait: t.Optional[bool | exp.Expression] = None 3040 if self._match_text_seq("NOWAIT"): 3041 wait = True 3042 elif self._match_text_seq("WAIT"): 3043 wait = self._parse_primary() 3044 elif self._match_text_seq("SKIP", "LOCKED"): 3045 wait = False 3046 3047 locks.append( 3048 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3049 ) 3050 3051 return locks 3052 3053 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3054 if not self._match_set(self.SET_OPERATIONS): 3055 return this 3056 3057 token_type = self._prev.token_type 3058 3059 if token_type == TokenType.UNION: 3060 expression = exp.Union 3061 elif token_type == TokenType.EXCEPT: 3062 expression = exp.Except 3063 else: 3064 expression = exp.Intersect 3065 3066 return self.expression( 3067 expression, 3068 this=this, 3069 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3070 by_name=self._match_text_seq("BY", "NAME"), 3071 expression=self._parse_set_operations(self._parse_select(nested=True)), 3072 ) 3073 3074 def _parse_expression(self) -> t.Optional[exp.Expression]: 3075 return self._parse_alias(self._parse_conjunction()) 3076 3077 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3078 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3079 3080 def _parse_equality(self) -> t.Optional[exp.Expression]: 3081 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3082 3083 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3084 return self._parse_tokens(self._parse_range, self.COMPARISON) 3085 3086 def _parse_range(self) -> t.Optional[exp.Expression]: 3087 this = self._parse_bitwise() 3088 negate = self._match(TokenType.NOT) 3089 3090 if self._match_set(self.RANGE_PARSERS): 3091 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3092 if not expression: 3093 return this 3094 3095 this = expression 3096 elif self._match(TokenType.ISNULL): 3097 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3098 3099 # Postgres supports ISNULL and NOTNULL for conditions. 3100 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3101 if self._match(TokenType.NOTNULL): 3102 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3103 this = self.expression(exp.Not, this=this) 3104 3105 if negate: 3106 this = self.expression(exp.Not, this=this) 3107 3108 if self._match(TokenType.IS): 3109 this = self._parse_is(this) 3110 3111 return this 3112 3113 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3114 index = self._index - 1 3115 negate = self._match(TokenType.NOT) 3116 3117 if self._match_text_seq("DISTINCT", "FROM"): 3118 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3119 return self.expression(klass, this=this, expression=self._parse_expression()) 3120 3121 expression = self._parse_null() or self._parse_boolean() 3122 if not expression: 3123 self._retreat(index) 3124 return None 3125 3126 this = self.expression(exp.Is, this=this, expression=expression) 3127 return self.expression(exp.Not, this=this) if negate else this 3128 3129 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3130 unnest = self._parse_unnest(with_alias=False) 3131 if unnest: 3132 this = self.expression(exp.In, this=this, unnest=unnest) 3133 elif self._match(TokenType.L_PAREN): 3134 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3135 3136 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3137 this = self.expression(exp.In, this=this, query=expressions[0]) 3138 else: 3139 this = self.expression(exp.In, this=this, expressions=expressions) 3140 3141 self._match_r_paren(this) 3142 else: 3143 this = self.expression(exp.In, this=this, field=self._parse_field()) 3144 3145 return this 3146 3147 def _parse_between(self, this: exp.Expression) -> exp.Between: 3148 low = self._parse_bitwise() 3149 self._match(TokenType.AND) 3150 high = self._parse_bitwise() 3151 return self.expression(exp.Between, this=this, low=low, high=high) 3152 3153 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3154 if not self._match(TokenType.ESCAPE): 3155 return this 3156 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3157 3158 def _parse_interval(self) -> t.Optional[exp.Interval]: 3159 index = self._index 3160 3161 if not self._match(TokenType.INTERVAL): 3162 return None 3163 3164 if self._match(TokenType.STRING, advance=False): 3165 this = self._parse_primary() 3166 else: 3167 this = self._parse_term() 3168 3169 if not this: 3170 self._retreat(index) 3171 return None 3172 3173 unit = self._parse_function() or self._parse_var(any_token=True) 3174 3175 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3176 # each INTERVAL expression into this canonical form so it's easy to transpile 3177 if this and this.is_number: 3178 this = exp.Literal.string(this.name) 3179 elif this and this.is_string: 3180 parts = this.name.split() 3181 3182 if len(parts) == 2: 3183 if unit: 3184 # This is not actually a unit, it's something else (e.g. a "window side") 3185 unit = None 3186 self._retreat(self._index - 1) 3187 3188 this = exp.Literal.string(parts[0]) 3189 unit = self.expression(exp.Var, this=parts[1]) 3190 3191 return self.expression(exp.Interval, this=this, unit=unit) 3192 3193 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3194 this = self._parse_term() 3195 3196 while True: 3197 if self._match_set(self.BITWISE): 3198 this = self.expression( 3199 self.BITWISE[self._prev.token_type], 3200 this=this, 3201 expression=self._parse_term(), 3202 ) 3203 elif self._match(TokenType.DQMARK): 3204 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3205 elif self._match_pair(TokenType.LT, TokenType.LT): 3206 this = self.expression( 3207 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3208 ) 3209 elif self._match_pair(TokenType.GT, TokenType.GT): 3210 this = self.expression( 3211 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3212 ) 3213 else: 3214 break 3215 3216 return this 3217 3218 def _parse_term(self) -> t.Optional[exp.Expression]: 3219 return self._parse_tokens(self._parse_factor, self.TERM) 3220 3221 def _parse_factor(self) -> t.Optional[exp.Expression]: 3222 return self._parse_tokens(self._parse_unary, self.FACTOR) 3223 3224 def _parse_unary(self) -> t.Optional[exp.Expression]: 3225 if self._match_set(self.UNARY_PARSERS): 3226 return self.UNARY_PARSERS[self._prev.token_type](self) 3227 return self._parse_at_time_zone(self._parse_type()) 3228 3229 def _parse_type(self) -> t.Optional[exp.Expression]: 3230 interval = self._parse_interval() 3231 if interval: 3232 return interval 3233 3234 index = self._index 3235 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3236 this = self._parse_column() 3237 3238 if data_type: 3239 if isinstance(this, exp.Literal): 3240 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3241 if parser: 3242 return parser(self, this, data_type) 3243 return self.expression(exp.Cast, this=this, to=data_type) 3244 if not data_type.expressions: 3245 self._retreat(index) 3246 return self._parse_column() 3247 return self._parse_column_ops(data_type) 3248 3249 return this 3250 3251 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3252 this = self._parse_type() 3253 if not this: 3254 return None 3255 3256 return self.expression( 3257 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3258 ) 3259 3260 def _parse_types( 3261 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3262 ) -> t.Optional[exp.Expression]: 3263 index = self._index 3264 3265 prefix = self._match_text_seq("SYSUDTLIB", ".") 3266 3267 if not self._match_set(self.TYPE_TOKENS): 3268 identifier = allow_identifiers and self._parse_id_var( 3269 any_token=False, tokens=(TokenType.VAR,) 3270 ) 3271 3272 if identifier: 3273 tokens = self._tokenizer.tokenize(identifier.name) 3274 3275 if len(tokens) != 1: 3276 self.raise_error("Unexpected identifier", self._prev) 3277 3278 if tokens[0].token_type in self.TYPE_TOKENS: 3279 self._prev = tokens[0] 3280 elif self.SUPPORTS_USER_DEFINED_TYPES: 3281 return exp.DataType.build(identifier.name, udt=True) 3282 else: 3283 return None 3284 else: 3285 return None 3286 3287 type_token = self._prev.token_type 3288 3289 if type_token == TokenType.PSEUDO_TYPE: 3290 return self.expression(exp.PseudoType, this=self._prev.text) 3291 3292 if type_token == TokenType.OBJECT_IDENTIFIER: 3293 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3294 3295 nested = type_token in self.NESTED_TYPE_TOKENS 3296 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3297 expressions = None 3298 maybe_func = False 3299 3300 if self._match(TokenType.L_PAREN): 3301 if is_struct: 3302 expressions = self._parse_csv(self._parse_struct_types) 3303 elif nested: 3304 expressions = self._parse_csv( 3305 lambda: self._parse_types( 3306 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3307 ) 3308 ) 3309 elif type_token in self.ENUM_TYPE_TOKENS: 3310 expressions = self._parse_csv(self._parse_equality) 3311 else: 3312 expressions = self._parse_csv(self._parse_type_size) 3313 3314 if not expressions or not self._match(TokenType.R_PAREN): 3315 self._retreat(index) 3316 return None 3317 3318 maybe_func = True 3319 3320 this: t.Optional[exp.Expression] = None 3321 values: t.Optional[t.List[exp.Expression]] = None 3322 3323 if nested and self._match(TokenType.LT): 3324 if is_struct: 3325 expressions = self._parse_csv(self._parse_struct_types) 3326 else: 3327 expressions = self._parse_csv( 3328 lambda: self._parse_types( 3329 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3330 ) 3331 ) 3332 3333 if not self._match(TokenType.GT): 3334 self.raise_error("Expecting >") 3335 3336 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3337 values = self._parse_csv(self._parse_conjunction) 3338 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3339 3340 if type_token in self.TIMESTAMPS: 3341 if self._match_text_seq("WITH", "TIME", "ZONE"): 3342 maybe_func = False 3343 tz_type = ( 3344 exp.DataType.Type.TIMETZ 3345 if type_token in self.TIMES 3346 else exp.DataType.Type.TIMESTAMPTZ 3347 ) 3348 this = exp.DataType(this=tz_type, expressions=expressions) 3349 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3350 maybe_func = False 3351 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3352 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3353 maybe_func = False 3354 elif type_token == TokenType.INTERVAL: 3355 unit = self._parse_var() 3356 3357 if self._match_text_seq("TO"): 3358 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3359 else: 3360 span = None 3361 3362 if span or not unit: 3363 this = self.expression( 3364 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3365 ) 3366 else: 3367 this = self.expression(exp.Interval, unit=unit) 3368 3369 if maybe_func and check_func: 3370 index2 = self._index 3371 peek = self._parse_string() 3372 3373 if not peek: 3374 self._retreat(index) 3375 return None 3376 3377 self._retreat(index2) 3378 3379 if not this: 3380 if self._match_text_seq("UNSIGNED"): 3381 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3382 if not unsigned_type_token: 3383 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3384 3385 type_token = unsigned_type_token or type_token 3386 3387 this = exp.DataType( 3388 this=exp.DataType.Type[type_token.value], 3389 expressions=expressions, 3390 nested=nested, 3391 values=values, 3392 prefix=prefix, 3393 ) 3394 3395 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3396 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3397 3398 return this 3399 3400 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3401 this = self._parse_type() or self._parse_id_var() 3402 self._match(TokenType.COLON) 3403 return self._parse_column_def(this) 3404 3405 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3406 if not self._match_text_seq("AT", "TIME", "ZONE"): 3407 return this 3408 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3409 3410 def _parse_column(self) -> t.Optional[exp.Expression]: 3411 this = self._parse_field() 3412 if isinstance(this, exp.Identifier): 3413 this = self.expression(exp.Column, this=this) 3414 elif not this: 3415 return self._parse_bracket(this) 3416 return self._parse_column_ops(this) 3417 3418 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3419 this = self._parse_bracket(this) 3420 3421 while self._match_set(self.COLUMN_OPERATORS): 3422 op_token = self._prev.token_type 3423 op = self.COLUMN_OPERATORS.get(op_token) 3424 3425 if op_token == TokenType.DCOLON: 3426 field = self._parse_types() 3427 if not field: 3428 self.raise_error("Expected type") 3429 elif op and self._curr: 3430 self._advance() 3431 value = self._prev.text 3432 field = ( 3433 exp.Literal.number(value) 3434 if self._prev.token_type == TokenType.NUMBER 3435 else exp.Literal.string(value) 3436 ) 3437 else: 3438 field = self._parse_field(anonymous_func=True, any_token=True) 3439 3440 if isinstance(field, exp.Func): 3441 # bigquery allows function calls like x.y.count(...) 3442 # SAFE.SUBSTR(...) 3443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3444 this = self._replace_columns_with_dots(this) 3445 3446 if op: 3447 this = op(self, this, field) 3448 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3449 this = self.expression( 3450 exp.Column, 3451 this=field, 3452 table=this.this, 3453 db=this.args.get("table"), 3454 catalog=this.args.get("db"), 3455 ) 3456 else: 3457 this = self.expression(exp.Dot, this=this, expression=field) 3458 this = self._parse_bracket(this) 3459 return this 3460 3461 def _parse_primary(self) -> t.Optional[exp.Expression]: 3462 if self._match_set(self.PRIMARY_PARSERS): 3463 token_type = self._prev.token_type 3464 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3465 3466 if token_type == TokenType.STRING: 3467 expressions = [primary] 3468 while self._match(TokenType.STRING): 3469 expressions.append(exp.Literal.string(self._prev.text)) 3470 3471 if len(expressions) > 1: 3472 return self.expression(exp.Concat, expressions=expressions) 3473 3474 return primary 3475 3476 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3477 return exp.Literal.number(f"0.{self._prev.text}") 3478 3479 if self._match(TokenType.L_PAREN): 3480 comments = self._prev_comments 3481 query = self._parse_select() 3482 3483 if query: 3484 expressions = [query] 3485 else: 3486 expressions = self._parse_expressions() 3487 3488 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3489 3490 if isinstance(this, exp.Subqueryable): 3491 this = self._parse_set_operations( 3492 self._parse_subquery(this=this, parse_alias=False) 3493 ) 3494 elif len(expressions) > 1: 3495 this = self.expression(exp.Tuple, expressions=expressions) 3496 else: 3497 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3498 3499 if this: 3500 this.add_comments(comments) 3501 3502 self._match_r_paren(expression=this) 3503 return this 3504 3505 return None 3506 3507 def _parse_field( 3508 self, 3509 any_token: bool = False, 3510 tokens: t.Optional[t.Collection[TokenType]] = None, 3511 anonymous_func: bool = False, 3512 ) -> t.Optional[exp.Expression]: 3513 return ( 3514 self._parse_primary() 3515 or self._parse_function(anonymous=anonymous_func) 3516 or self._parse_id_var(any_token=any_token, tokens=tokens) 3517 ) 3518 3519 def _parse_function( 3520 self, 3521 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3522 anonymous: bool = False, 3523 optional_parens: bool = True, 3524 ) -> t.Optional[exp.Expression]: 3525 if not self._curr: 3526 return None 3527 3528 token_type = self._curr.token_type 3529 this = self._curr.text 3530 upper = this.upper() 3531 3532 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3533 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3534 self._advance() 3535 return parser(self) 3536 3537 if not self._next or self._next.token_type != TokenType.L_PAREN: 3538 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3539 self._advance() 3540 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3541 3542 return None 3543 3544 if token_type not in self.FUNC_TOKENS: 3545 return None 3546 3547 self._advance(2) 3548 3549 parser = self.FUNCTION_PARSERS.get(upper) 3550 if parser and not anonymous: 3551 this = parser(self) 3552 else: 3553 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3554 3555 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3556 this = self.expression(subquery_predicate, this=self._parse_select()) 3557 self._match_r_paren() 3558 return this 3559 3560 if functions is None: 3561 functions = self.FUNCTIONS 3562 3563 function = functions.get(upper) 3564 3565 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3566 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3567 3568 if function and not anonymous: 3569 func = self.validate_expression(function(args), args) 3570 if not self.NORMALIZE_FUNCTIONS: 3571 func.meta["name"] = this 3572 this = func 3573 else: 3574 this = self.expression(exp.Anonymous, this=this, expressions=args) 3575 3576 self._match_r_paren(this) 3577 return self._parse_window(this) 3578 3579 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3580 return self._parse_column_def(self._parse_id_var()) 3581 3582 def _parse_user_defined_function( 3583 self, kind: t.Optional[TokenType] = None 3584 ) -> t.Optional[exp.Expression]: 3585 this = self._parse_id_var() 3586 3587 while self._match(TokenType.DOT): 3588 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3589 3590 if not self._match(TokenType.L_PAREN): 3591 return this 3592 3593 expressions = self._parse_csv(self._parse_function_parameter) 3594 self._match_r_paren() 3595 return self.expression( 3596 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3597 ) 3598 3599 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3600 literal = self._parse_primary() 3601 if literal: 3602 return self.expression(exp.Introducer, this=token.text, expression=literal) 3603 3604 return self.expression(exp.Identifier, this=token.text) 3605 3606 def _parse_session_parameter(self) -> exp.SessionParameter: 3607 kind = None 3608 this = self._parse_id_var() or self._parse_primary() 3609 3610 if this and self._match(TokenType.DOT): 3611 kind = this.name 3612 this = self._parse_var() or self._parse_primary() 3613 3614 return self.expression(exp.SessionParameter, this=this, kind=kind) 3615 3616 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3617 index = self._index 3618 3619 if self._match(TokenType.L_PAREN): 3620 expressions = t.cast( 3621 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3622 ) 3623 3624 if not self._match(TokenType.R_PAREN): 3625 self._retreat(index) 3626 else: 3627 expressions = [self._parse_id_var()] 3628 3629 if self._match_set(self.LAMBDAS): 3630 return self.LAMBDAS[self._prev.token_type](self, expressions) 3631 3632 self._retreat(index) 3633 3634 this: t.Optional[exp.Expression] 3635 3636 if self._match(TokenType.DISTINCT): 3637 this = self.expression( 3638 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3639 ) 3640 else: 3641 this = self._parse_select_or_expression(alias=alias) 3642 3643 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3644 3645 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3646 index = self._index 3647 3648 if not self.errors: 3649 try: 3650 if self._parse_select(nested=True): 3651 return this 3652 except ParseError: 3653 pass 3654 finally: 3655 self.errors.clear() 3656 self._retreat(index) 3657 3658 if not self._match(TokenType.L_PAREN): 3659 return this 3660 3661 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3662 3663 self._match_r_paren() 3664 return self.expression(exp.Schema, this=this, expressions=args) 3665 3666 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3667 return self._parse_column_def(self._parse_field(any_token=True)) 3668 3669 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3670 # column defs are not really columns, they're identifiers 3671 if isinstance(this, exp.Column): 3672 this = this.this 3673 3674 kind = self._parse_types(schema=True) 3675 3676 if self._match_text_seq("FOR", "ORDINALITY"): 3677 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3678 3679 constraints: t.List[exp.Expression] = [] 3680 3681 if not kind and self._match(TokenType.ALIAS): 3682 constraints.append( 3683 self.expression( 3684 exp.ComputedColumnConstraint, 3685 this=self._parse_conjunction(), 3686 persisted=self._match_text_seq("PERSISTED"), 3687 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3688 ) 3689 ) 3690 3691 while True: 3692 constraint = self._parse_column_constraint() 3693 if not constraint: 3694 break 3695 constraints.append(constraint) 3696 3697 if not kind and not constraints: 3698 return this 3699 3700 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3701 3702 def _parse_auto_increment( 3703 self, 3704 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3705 start = None 3706 increment = None 3707 3708 if self._match(TokenType.L_PAREN, advance=False): 3709 args = self._parse_wrapped_csv(self._parse_bitwise) 3710 start = seq_get(args, 0) 3711 increment = seq_get(args, 1) 3712 elif self._match_text_seq("START"): 3713 start = self._parse_bitwise() 3714 self._match_text_seq("INCREMENT") 3715 increment = self._parse_bitwise() 3716 3717 if start and increment: 3718 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3719 3720 return exp.AutoIncrementColumnConstraint() 3721 3722 def _parse_compress(self) -> exp.CompressColumnConstraint: 3723 if self._match(TokenType.L_PAREN, advance=False): 3724 return self.expression( 3725 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3726 ) 3727 3728 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3729 3730 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3731 if self._match_text_seq("BY", "DEFAULT"): 3732 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3733 this = self.expression( 3734 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3735 ) 3736 else: 3737 self._match_text_seq("ALWAYS") 3738 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3739 3740 self._match(TokenType.ALIAS) 3741 identity = self._match_text_seq("IDENTITY") 3742 3743 if self._match(TokenType.L_PAREN): 3744 if self._match(TokenType.START_WITH): 3745 this.set("start", self._parse_bitwise()) 3746 if self._match_text_seq("INCREMENT", "BY"): 3747 this.set("increment", self._parse_bitwise()) 3748 if self._match_text_seq("MINVALUE"): 3749 this.set("minvalue", self._parse_bitwise()) 3750 if self._match_text_seq("MAXVALUE"): 3751 this.set("maxvalue", self._parse_bitwise()) 3752 3753 if self._match_text_seq("CYCLE"): 3754 this.set("cycle", True) 3755 elif self._match_text_seq("NO", "CYCLE"): 3756 this.set("cycle", False) 3757 3758 if not identity: 3759 this.set("expression", self._parse_bitwise()) 3760 3761 self._match_r_paren() 3762 3763 return this 3764 3765 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3766 self._match_text_seq("LENGTH") 3767 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3768 3769 def _parse_not_constraint( 3770 self, 3771 ) -> t.Optional[exp.Expression]: 3772 if self._match_text_seq("NULL"): 3773 return self.expression(exp.NotNullColumnConstraint) 3774 if self._match_text_seq("CASESPECIFIC"): 3775 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3776 if self._match_text_seq("FOR", "REPLICATION"): 3777 return self.expression(exp.NotForReplicationColumnConstraint) 3778 return None 3779 3780 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3781 if self._match(TokenType.CONSTRAINT): 3782 this = self._parse_id_var() 3783 else: 3784 this = None 3785 3786 if self._match_texts(self.CONSTRAINT_PARSERS): 3787 return self.expression( 3788 exp.ColumnConstraint, 3789 this=this, 3790 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3791 ) 3792 3793 return this 3794 3795 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3796 if not self._match(TokenType.CONSTRAINT): 3797 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3798 3799 this = self._parse_id_var() 3800 expressions = [] 3801 3802 while True: 3803 constraint = self._parse_unnamed_constraint() or self._parse_function() 3804 if not constraint: 3805 break 3806 expressions.append(constraint) 3807 3808 return self.expression(exp.Constraint, this=this, expressions=expressions) 3809 3810 def _parse_unnamed_constraint( 3811 self, constraints: t.Optional[t.Collection[str]] = None 3812 ) -> t.Optional[exp.Expression]: 3813 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3814 return None 3815 3816 constraint = self._prev.text.upper() 3817 if constraint not in self.CONSTRAINT_PARSERS: 3818 self.raise_error(f"No parser found for schema constraint {constraint}.") 3819 3820 return self.CONSTRAINT_PARSERS[constraint](self) 3821 3822 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3823 self._match_text_seq("KEY") 3824 return self.expression( 3825 exp.UniqueColumnConstraint, 3826 this=self._parse_schema(self._parse_id_var(any_token=False)), 3827 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3828 ) 3829 3830 def _parse_key_constraint_options(self) -> t.List[str]: 3831 options = [] 3832 while True: 3833 if not self._curr: 3834 break 3835 3836 if self._match(TokenType.ON): 3837 action = None 3838 on = self._advance_any() and self._prev.text 3839 3840 if self._match_text_seq("NO", "ACTION"): 3841 action = "NO ACTION" 3842 elif self._match_text_seq("CASCADE"): 3843 action = "CASCADE" 3844 elif self._match_pair(TokenType.SET, TokenType.NULL): 3845 action = "SET NULL" 3846 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3847 action = "SET DEFAULT" 3848 else: 3849 self.raise_error("Invalid key constraint") 3850 3851 options.append(f"ON {on} {action}") 3852 elif self._match_text_seq("NOT", "ENFORCED"): 3853 options.append("NOT ENFORCED") 3854 elif self._match_text_seq("DEFERRABLE"): 3855 options.append("DEFERRABLE") 3856 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3857 options.append("INITIALLY DEFERRED") 3858 elif self._match_text_seq("NORELY"): 3859 options.append("NORELY") 3860 elif self._match_text_seq("MATCH", "FULL"): 3861 options.append("MATCH FULL") 3862 else: 3863 break 3864 3865 return options 3866 3867 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3868 if match and not self._match(TokenType.REFERENCES): 3869 return None 3870 3871 expressions = None 3872 this = self._parse_table(schema=True) 3873 options = self._parse_key_constraint_options() 3874 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3875 3876 def _parse_foreign_key(self) -> exp.ForeignKey: 3877 expressions = self._parse_wrapped_id_vars() 3878 reference = self._parse_references() 3879 options = {} 3880 3881 while self._match(TokenType.ON): 3882 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3883 self.raise_error("Expected DELETE or UPDATE") 3884 3885 kind = self._prev.text.lower() 3886 3887 if self._match_text_seq("NO", "ACTION"): 3888 action = "NO ACTION" 3889 elif self._match(TokenType.SET): 3890 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3891 action = "SET " + self._prev.text.upper() 3892 else: 3893 self._advance() 3894 action = self._prev.text.upper() 3895 3896 options[kind] = action 3897 3898 return self.expression( 3899 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3900 ) 3901 3902 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3903 return self._parse_field() 3904 3905 def _parse_primary_key( 3906 self, wrapped_optional: bool = False, in_props: bool = False 3907 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3908 desc = ( 3909 self._match_set((TokenType.ASC, TokenType.DESC)) 3910 and self._prev.token_type == TokenType.DESC 3911 ) 3912 3913 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3914 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3915 3916 expressions = self._parse_wrapped_csv( 3917 self._parse_primary_key_part, optional=wrapped_optional 3918 ) 3919 options = self._parse_key_constraint_options() 3920 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3921 3922 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3923 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3924 return this 3925 3926 bracket_kind = self._prev.token_type 3927 3928 if self._match(TokenType.COLON): 3929 expressions: t.List[exp.Expression] = [ 3930 self.expression(exp.Slice, expression=self._parse_conjunction()) 3931 ] 3932 else: 3933 expressions = self._parse_csv( 3934 lambda: self._parse_slice( 3935 self._parse_alias(self._parse_conjunction(), explicit=True) 3936 ) 3937 ) 3938 3939 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3940 if bracket_kind == TokenType.L_BRACE: 3941 this = self.expression(exp.Struct, expressions=expressions) 3942 elif not this or this.name.upper() == "ARRAY": 3943 this = self.expression(exp.Array, expressions=expressions) 3944 else: 3945 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3946 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3947 3948 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3949 self.raise_error("Expected ]") 3950 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3951 self.raise_error("Expected }") 3952 3953 self._add_comments(this) 3954 return self._parse_bracket(this) 3955 3956 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3957 if self._match(TokenType.COLON): 3958 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3959 return this 3960 3961 def _parse_case(self) -> t.Optional[exp.Expression]: 3962 ifs = [] 3963 default = None 3964 3965 comments = self._prev_comments 3966 expression = self._parse_conjunction() 3967 3968 while self._match(TokenType.WHEN): 3969 this = self._parse_conjunction() 3970 self._match(TokenType.THEN) 3971 then = self._parse_conjunction() 3972 ifs.append(self.expression(exp.If, this=this, true=then)) 3973 3974 if self._match(TokenType.ELSE): 3975 default = self._parse_conjunction() 3976 3977 if not self._match(TokenType.END): 3978 self.raise_error("Expected END after CASE", self._prev) 3979 3980 return self._parse_window( 3981 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3982 ) 3983 3984 def _parse_if(self) -> t.Optional[exp.Expression]: 3985 if self._match(TokenType.L_PAREN): 3986 args = self._parse_csv(self._parse_conjunction) 3987 this = self.validate_expression(exp.If.from_arg_list(args), args) 3988 self._match_r_paren() 3989 else: 3990 index = self._index - 1 3991 condition = self._parse_conjunction() 3992 3993 if not condition: 3994 self._retreat(index) 3995 return None 3996 3997 self._match(TokenType.THEN) 3998 true = self._parse_conjunction() 3999 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4000 self._match(TokenType.END) 4001 this = self.expression(exp.If, this=condition, true=true, false=false) 4002 4003 return self._parse_window(this) 4004 4005 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4006 if not self._match_text_seq("VALUE", "FOR"): 4007 self._retreat(self._index - 1) 4008 return None 4009 4010 return self.expression( 4011 exp.NextValueFor, 4012 this=self._parse_column(), 4013 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4014 ) 4015 4016 def _parse_extract(self) -> exp.Extract: 4017 this = self._parse_function() or self._parse_var() or self._parse_type() 4018 4019 if self._match(TokenType.FROM): 4020 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4021 4022 if not self._match(TokenType.COMMA): 4023 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4024 4025 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4026 4027 def _parse_any_value(self) -> exp.AnyValue: 4028 this = self._parse_lambda() 4029 is_max = None 4030 having = None 4031 4032 if self._match(TokenType.HAVING): 4033 self._match_texts(("MAX", "MIN")) 4034 is_max = self._prev.text == "MAX" 4035 having = self._parse_column() 4036 4037 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4038 4039 def _parse_cast(self, strict: bool) -> exp.Expression: 4040 this = self._parse_conjunction() 4041 4042 if not self._match(TokenType.ALIAS): 4043 if self._match(TokenType.COMMA): 4044 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4045 4046 self.raise_error("Expected AS after CAST") 4047 4048 fmt = None 4049 to = self._parse_types() 4050 4051 if not to: 4052 self.raise_error("Expected TYPE after CAST") 4053 elif isinstance(to, exp.Identifier): 4054 to = exp.DataType.build(to.name, udt=True) 4055 elif to.this == exp.DataType.Type.CHAR: 4056 if self._match(TokenType.CHARACTER_SET): 4057 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4058 elif self._match(TokenType.FORMAT): 4059 fmt_string = self._parse_string() 4060 fmt = self._parse_at_time_zone(fmt_string) 4061 4062 if to.this in exp.DataType.TEMPORAL_TYPES: 4063 this = self.expression( 4064 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4065 this=this, 4066 format=exp.Literal.string( 4067 format_time( 4068 fmt_string.this if fmt_string else "", 4069 self.FORMAT_MAPPING or self.TIME_MAPPING, 4070 self.FORMAT_TRIE or self.TIME_TRIE, 4071 ) 4072 ), 4073 ) 4074 4075 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4076 this.set("zone", fmt.args["zone"]) 4077 4078 return this 4079 4080 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4081 4082 def _parse_concat(self) -> t.Optional[exp.Expression]: 4083 args = self._parse_csv(self._parse_conjunction) 4084 if self.CONCAT_NULL_OUTPUTS_STRING: 4085 args = self._ensure_string_if_null(args) 4086 4087 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4088 # we find such a call we replace it with its argument. 4089 if len(args) == 1: 4090 return args[0] 4091 4092 return self.expression( 4093 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4094 ) 4095 4096 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4097 args = self._parse_csv(self._parse_conjunction) 4098 if len(args) < 2: 4099 return self.expression(exp.ConcatWs, expressions=args) 4100 delim, *values = args 4101 if self.CONCAT_NULL_OUTPUTS_STRING: 4102 values = self._ensure_string_if_null(values) 4103 4104 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4105 4106 def _parse_string_agg(self) -> exp.Expression: 4107 if self._match(TokenType.DISTINCT): 4108 args: t.List[t.Optional[exp.Expression]] = [ 4109 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4110 ] 4111 if self._match(TokenType.COMMA): 4112 args.extend(self._parse_csv(self._parse_conjunction)) 4113 else: 4114 args = self._parse_csv(self._parse_conjunction) # type: ignore 4115 4116 index = self._index 4117 if not self._match(TokenType.R_PAREN) and args: 4118 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4119 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4120 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4121 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4122 4123 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4124 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4125 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4126 if not self._match_text_seq("WITHIN", "GROUP"): 4127 self._retreat(index) 4128 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4129 4130 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4131 order = self._parse_order(this=seq_get(args, 0)) 4132 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4133 4134 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4135 this = self._parse_bitwise() 4136 4137 if self._match(TokenType.USING): 4138 to: t.Optional[exp.Expression] = self.expression( 4139 exp.CharacterSet, this=self._parse_var() 4140 ) 4141 elif self._match(TokenType.COMMA): 4142 to = self._parse_types() 4143 else: 4144 to = None 4145 4146 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4147 4148 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4149 """ 4150 There are generally two variants of the DECODE function: 4151 4152 - DECODE(bin, charset) 4153 - DECODE(expression, search, result [, search, result] ... [, default]) 4154 4155 The second variant will always be parsed into a CASE expression. Note that NULL 4156 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4157 instead of relying on pattern matching. 4158 """ 4159 args = self._parse_csv(self._parse_conjunction) 4160 4161 if len(args) < 3: 4162 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4163 4164 expression, *expressions = args 4165 if not expression: 4166 return None 4167 4168 ifs = [] 4169 for search, result in zip(expressions[::2], expressions[1::2]): 4170 if not search or not result: 4171 return None 4172 4173 if isinstance(search, exp.Literal): 4174 ifs.append( 4175 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4176 ) 4177 elif isinstance(search, exp.Null): 4178 ifs.append( 4179 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4180 ) 4181 else: 4182 cond = exp.or_( 4183 exp.EQ(this=expression.copy(), expression=search), 4184 exp.and_( 4185 exp.Is(this=expression.copy(), expression=exp.Null()), 4186 exp.Is(this=search.copy(), expression=exp.Null()), 4187 copy=False, 4188 ), 4189 copy=False, 4190 ) 4191 ifs.append(exp.If(this=cond, true=result)) 4192 4193 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4194 4195 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4196 self._match_text_seq("KEY") 4197 key = self._parse_column() 4198 self._match_set((TokenType.COLON, TokenType.COMMA)) 4199 self._match_text_seq("VALUE") 4200 value = self._parse_bitwise() 4201 4202 if not key and not value: 4203 return None 4204 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4205 4206 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4207 if not this or not self._match_text_seq("FORMAT", "JSON"): 4208 return this 4209 4210 return self.expression(exp.FormatJson, this=this) 4211 4212 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4213 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4214 for value in values: 4215 if self._match_text_seq(value, "ON", on): 4216 return f"{value} ON {on}" 4217 4218 return None 4219 4220 def _parse_json_object(self) -> exp.JSONObject: 4221 star = self._parse_star() 4222 expressions = ( 4223 [star] 4224 if star 4225 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4226 ) 4227 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4228 4229 unique_keys = None 4230 if self._match_text_seq("WITH", "UNIQUE"): 4231 unique_keys = True 4232 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4233 unique_keys = False 4234 4235 self._match_text_seq("KEYS") 4236 4237 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4238 self._parse_type() 4239 ) 4240 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4241 4242 return self.expression( 4243 exp.JSONObject, 4244 expressions=expressions, 4245 null_handling=null_handling, 4246 unique_keys=unique_keys, 4247 return_type=return_type, 4248 encoding=encoding, 4249 ) 4250 4251 def _parse_logarithm(self) -> exp.Func: 4252 # Default argument order is base, expression 4253 args = self._parse_csv(self._parse_range) 4254 4255 if len(args) > 1: 4256 if not self.LOG_BASE_FIRST: 4257 args.reverse() 4258 return exp.Log.from_arg_list(args) 4259 4260 return self.expression( 4261 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4262 ) 4263 4264 def _parse_match_against(self) -> exp.MatchAgainst: 4265 expressions = self._parse_csv(self._parse_column) 4266 4267 self._match_text_seq(")", "AGAINST", "(") 4268 4269 this = self._parse_string() 4270 4271 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4272 modifier = "IN NATURAL LANGUAGE MODE" 4273 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4274 modifier = f"{modifier} WITH QUERY EXPANSION" 4275 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4276 modifier = "IN BOOLEAN MODE" 4277 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4278 modifier = "WITH QUERY EXPANSION" 4279 else: 4280 modifier = None 4281 4282 return self.expression( 4283 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4284 ) 4285 4286 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4287 def _parse_open_json(self) -> exp.OpenJSON: 4288 this = self._parse_bitwise() 4289 path = self._match(TokenType.COMMA) and self._parse_string() 4290 4291 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4292 this = self._parse_field(any_token=True) 4293 kind = self._parse_types() 4294 path = self._parse_string() 4295 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4296 4297 return self.expression( 4298 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4299 ) 4300 4301 expressions = None 4302 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4303 self._match_l_paren() 4304 expressions = self._parse_csv(_parse_open_json_column_def) 4305 4306 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4307 4308 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4309 args = self._parse_csv(self._parse_bitwise) 4310 4311 if self._match(TokenType.IN): 4312 return self.expression( 4313 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4314 ) 4315 4316 if haystack_first: 4317 haystack = seq_get(args, 0) 4318 needle = seq_get(args, 1) 4319 else: 4320 needle = seq_get(args, 0) 4321 haystack = seq_get(args, 1) 4322 4323 return self.expression( 4324 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4325 ) 4326 4327 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4328 args = self._parse_csv(self._parse_table) 4329 return exp.JoinHint(this=func_name.upper(), expressions=args) 4330 4331 def _parse_substring(self) -> exp.Substring: 4332 # Postgres supports the form: substring(string [from int] [for int]) 4333 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4334 4335 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4336 4337 if self._match(TokenType.FROM): 4338 args.append(self._parse_bitwise()) 4339 if self._match(TokenType.FOR): 4340 args.append(self._parse_bitwise()) 4341 4342 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4343 4344 def _parse_trim(self) -> exp.Trim: 4345 # https://www.w3resource.com/sql/character-functions/trim.php 4346 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4347 4348 position = None 4349 collation = None 4350 4351 if self._match_texts(self.TRIM_TYPES): 4352 position = self._prev.text.upper() 4353 4354 expression = self._parse_bitwise() 4355 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4356 this = self._parse_bitwise() 4357 else: 4358 this = expression 4359 expression = None 4360 4361 if self._match(TokenType.COLLATE): 4362 collation = self._parse_bitwise() 4363 4364 return self.expression( 4365 exp.Trim, this=this, position=position, expression=expression, collation=collation 4366 ) 4367 4368 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4369 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4370 4371 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4372 return self._parse_window(self._parse_id_var(), alias=True) 4373 4374 def _parse_respect_or_ignore_nulls( 4375 self, this: t.Optional[exp.Expression] 4376 ) -> t.Optional[exp.Expression]: 4377 if self._match_text_seq("IGNORE", "NULLS"): 4378 return self.expression(exp.IgnoreNulls, this=this) 4379 if self._match_text_seq("RESPECT", "NULLS"): 4380 return self.expression(exp.RespectNulls, this=this) 4381 return this 4382 4383 def _parse_window( 4384 self, this: t.Optional[exp.Expression], alias: bool = False 4385 ) -> t.Optional[exp.Expression]: 4386 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4387 self._match(TokenType.WHERE) 4388 this = self.expression( 4389 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4390 ) 4391 self._match_r_paren() 4392 4393 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4394 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4395 if self._match_text_seq("WITHIN", "GROUP"): 4396 order = self._parse_wrapped(self._parse_order) 4397 this = self.expression(exp.WithinGroup, this=this, expression=order) 4398 4399 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4400 # Some dialects choose to implement and some do not. 4401 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4402 4403 # There is some code above in _parse_lambda that handles 4404 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4405 4406 # The below changes handle 4407 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4408 4409 # Oracle allows both formats 4410 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4411 # and Snowflake chose to do the same for familiarity 4412 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4413 this = self._parse_respect_or_ignore_nulls(this) 4414 4415 # bigquery select from window x AS (partition by ...) 4416 if alias: 4417 over = None 4418 self._match(TokenType.ALIAS) 4419 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4420 return this 4421 else: 4422 over = self._prev.text.upper() 4423 4424 if not self._match(TokenType.L_PAREN): 4425 return self.expression( 4426 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4427 ) 4428 4429 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4430 4431 first = self._match(TokenType.FIRST) 4432 if self._match_text_seq("LAST"): 4433 first = False 4434 4435 partition, order = self._parse_partition_and_order() 4436 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4437 4438 if kind: 4439 self._match(TokenType.BETWEEN) 4440 start = self._parse_window_spec() 4441 self._match(TokenType.AND) 4442 end = self._parse_window_spec() 4443 4444 spec = self.expression( 4445 exp.WindowSpec, 4446 kind=kind, 4447 start=start["value"], 4448 start_side=start["side"], 4449 end=end["value"], 4450 end_side=end["side"], 4451 ) 4452 else: 4453 spec = None 4454 4455 self._match_r_paren() 4456 4457 window = self.expression( 4458 exp.Window, 4459 this=this, 4460 partition_by=partition, 4461 order=order, 4462 spec=spec, 4463 alias=window_alias, 4464 over=over, 4465 first=first, 4466 ) 4467 4468 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4469 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4470 return self._parse_window(window, alias=alias) 4471 4472 return window 4473 4474 def _parse_partition_and_order( 4475 self, 4476 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4477 return self._parse_partition_by(), self._parse_order() 4478 4479 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4480 self._match(TokenType.BETWEEN) 4481 4482 return { 4483 "value": ( 4484 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4485 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4486 or self._parse_bitwise() 4487 ), 4488 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4489 } 4490 4491 def _parse_alias( 4492 self, this: t.Optional[exp.Expression], explicit: bool = False 4493 ) -> t.Optional[exp.Expression]: 4494 any_token = self._match(TokenType.ALIAS) 4495 4496 if explicit and not any_token: 4497 return this 4498 4499 if self._match(TokenType.L_PAREN): 4500 aliases = self.expression( 4501 exp.Aliases, 4502 this=this, 4503 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4504 ) 4505 self._match_r_paren(aliases) 4506 return aliases 4507 4508 alias = self._parse_id_var(any_token) 4509 4510 if alias: 4511 return self.expression(exp.Alias, this=this, alias=alias) 4512 4513 return this 4514 4515 def _parse_id_var( 4516 self, 4517 any_token: bool = True, 4518 tokens: t.Optional[t.Collection[TokenType]] = None, 4519 ) -> t.Optional[exp.Expression]: 4520 identifier = self._parse_identifier() 4521 4522 if identifier: 4523 return identifier 4524 4525 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4526 quoted = self._prev.token_type == TokenType.STRING 4527 return exp.Identifier(this=self._prev.text, quoted=quoted) 4528 4529 return None 4530 4531 def _parse_string(self) -> t.Optional[exp.Expression]: 4532 if self._match(TokenType.STRING): 4533 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4534 return self._parse_placeholder() 4535 4536 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4537 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4538 4539 def _parse_number(self) -> t.Optional[exp.Expression]: 4540 if self._match(TokenType.NUMBER): 4541 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4542 return self._parse_placeholder() 4543 4544 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4545 if self._match(TokenType.IDENTIFIER): 4546 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4547 return self._parse_placeholder() 4548 4549 def _parse_var( 4550 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4551 ) -> t.Optional[exp.Expression]: 4552 if ( 4553 (any_token and self._advance_any()) 4554 or self._match(TokenType.VAR) 4555 or (self._match_set(tokens) if tokens else False) 4556 ): 4557 return self.expression(exp.Var, this=self._prev.text) 4558 return self._parse_placeholder() 4559 4560 def _advance_any(self) -> t.Optional[Token]: 4561 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4562 self._advance() 4563 return self._prev 4564 return None 4565 4566 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4567 return self._parse_var() or self._parse_string() 4568 4569 def _parse_null(self) -> t.Optional[exp.Expression]: 4570 if self._match(TokenType.NULL): 4571 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4572 return self._parse_placeholder() 4573 4574 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4575 if self._match(TokenType.TRUE): 4576 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4577 if self._match(TokenType.FALSE): 4578 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4579 return self._parse_placeholder() 4580 4581 def _parse_star(self) -> t.Optional[exp.Expression]: 4582 if self._match(TokenType.STAR): 4583 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4584 return self._parse_placeholder() 4585 4586 def _parse_parameter(self) -> exp.Parameter: 4587 wrapped = self._match(TokenType.L_BRACE) 4588 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4589 self._match(TokenType.R_BRACE) 4590 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4591 4592 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4593 if self._match_set(self.PLACEHOLDER_PARSERS): 4594 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4595 if placeholder: 4596 return placeholder 4597 self._advance(-1) 4598 return None 4599 4600 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4601 if not self._match(TokenType.EXCEPT): 4602 return None 4603 if self._match(TokenType.L_PAREN, advance=False): 4604 return self._parse_wrapped_csv(self._parse_column) 4605 return self._parse_csv(self._parse_column) 4606 4607 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4608 if not self._match(TokenType.REPLACE): 4609 return None 4610 if self._match(TokenType.L_PAREN, advance=False): 4611 return self._parse_wrapped_csv(self._parse_expression) 4612 return self._parse_expressions() 4613 4614 def _parse_csv( 4615 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4616 ) -> t.List[exp.Expression]: 4617 parse_result = parse_method() 4618 items = [parse_result] if parse_result is not None else [] 4619 4620 while self._match(sep): 4621 self._add_comments(parse_result) 4622 parse_result = parse_method() 4623 if parse_result is not None: 4624 items.append(parse_result) 4625 4626 return items 4627 4628 def _parse_tokens( 4629 self, parse_method: t.Callable, expressions: t.Dict 4630 ) -> t.Optional[exp.Expression]: 4631 this = parse_method() 4632 4633 while self._match_set(expressions): 4634 this = self.expression( 4635 expressions[self._prev.token_type], 4636 this=this, 4637 comments=self._prev_comments, 4638 expression=parse_method(), 4639 ) 4640 4641 return this 4642 4643 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4644 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4645 4646 def _parse_wrapped_csv( 4647 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4648 ) -> t.List[exp.Expression]: 4649 return self._parse_wrapped( 4650 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4651 ) 4652 4653 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4654 wrapped = self._match(TokenType.L_PAREN) 4655 if not wrapped and not optional: 4656 self.raise_error("Expecting (") 4657 parse_result = parse_method() 4658 if wrapped: 4659 self._match_r_paren() 4660 return parse_result 4661 4662 def _parse_expressions(self) -> t.List[exp.Expression]: 4663 return self._parse_csv(self._parse_expression) 4664 4665 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4666 return self._parse_select() or self._parse_set_operations( 4667 self._parse_expression() if alias else self._parse_conjunction() 4668 ) 4669 4670 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4671 return self._parse_query_modifiers( 4672 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4673 ) 4674 4675 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4676 this = None 4677 if self._match_texts(self.TRANSACTION_KIND): 4678 this = self._prev.text 4679 4680 self._match_texts({"TRANSACTION", "WORK"}) 4681 4682 modes = [] 4683 while True: 4684 mode = [] 4685 while self._match(TokenType.VAR): 4686 mode.append(self._prev.text) 4687 4688 if mode: 4689 modes.append(" ".join(mode)) 4690 if not self._match(TokenType.COMMA): 4691 break 4692 4693 return self.expression(exp.Transaction, this=this, modes=modes) 4694 4695 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4696 chain = None 4697 savepoint = None 4698 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4699 4700 self._match_texts({"TRANSACTION", "WORK"}) 4701 4702 if self._match_text_seq("TO"): 4703 self._match_text_seq("SAVEPOINT") 4704 savepoint = self._parse_id_var() 4705 4706 if self._match(TokenType.AND): 4707 chain = not self._match_text_seq("NO") 4708 self._match_text_seq("CHAIN") 4709 4710 if is_rollback: 4711 return self.expression(exp.Rollback, savepoint=savepoint) 4712 4713 return self.expression(exp.Commit, chain=chain) 4714 4715 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4716 if not self._match_text_seq("ADD"): 4717 return None 4718 4719 self._match(TokenType.COLUMN) 4720 exists_column = self._parse_exists(not_=True) 4721 expression = self._parse_field_def() 4722 4723 if expression: 4724 expression.set("exists", exists_column) 4725 4726 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4727 if self._match_texts(("FIRST", "AFTER")): 4728 position = self._prev.text 4729 column_position = self.expression( 4730 exp.ColumnPosition, this=self._parse_column(), position=position 4731 ) 4732 expression.set("position", column_position) 4733 4734 return expression 4735 4736 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4737 drop = self._match(TokenType.DROP) and self._parse_drop() 4738 if drop and not isinstance(drop, exp.Command): 4739 drop.set("kind", drop.args.get("kind", "COLUMN")) 4740 return drop 4741 4742 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4743 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4744 return self.expression( 4745 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4746 ) 4747 4748 def _parse_add_constraint(self) -> exp.AddConstraint: 4749 this = None 4750 kind = self._prev.token_type 4751 4752 if kind == TokenType.CONSTRAINT: 4753 this = self._parse_id_var() 4754 4755 if self._match_text_seq("CHECK"): 4756 expression = self._parse_wrapped(self._parse_conjunction) 4757 enforced = self._match_text_seq("ENFORCED") 4758 4759 return self.expression( 4760 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4761 ) 4762 4763 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4764 expression = self._parse_foreign_key() 4765 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4766 expression = self._parse_primary_key() 4767 else: 4768 expression = None 4769 4770 return self.expression(exp.AddConstraint, this=this, expression=expression) 4771 4772 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4773 index = self._index - 1 4774 4775 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4776 return self._parse_csv(self._parse_add_constraint) 4777 4778 self._retreat(index) 4779 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4780 return self._parse_csv(self._parse_field_def) 4781 4782 return self._parse_csv(self._parse_add_column) 4783 4784 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4785 self._match(TokenType.COLUMN) 4786 column = self._parse_field(any_token=True) 4787 4788 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4789 return self.expression(exp.AlterColumn, this=column, drop=True) 4790 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4791 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4792 4793 self._match_text_seq("SET", "DATA") 4794 return self.expression( 4795 exp.AlterColumn, 4796 this=column, 4797 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4798 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4799 using=self._match(TokenType.USING) and self._parse_conjunction(), 4800 ) 4801 4802 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4803 index = self._index - 1 4804 4805 partition_exists = self._parse_exists() 4806 if self._match(TokenType.PARTITION, advance=False): 4807 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4808 4809 self._retreat(index) 4810 return self._parse_csv(self._parse_drop_column) 4811 4812 def _parse_alter_table_rename(self) -> exp.RenameTable: 4813 self._match_text_seq("TO") 4814 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4815 4816 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4817 start = self._prev 4818 4819 if not self._match(TokenType.TABLE): 4820 return self._parse_as_command(start) 4821 4822 exists = self._parse_exists() 4823 only = self._match_text_seq("ONLY") 4824 this = self._parse_table(schema=True) 4825 4826 if self._next: 4827 self._advance() 4828 4829 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4830 if parser: 4831 actions = ensure_list(parser(self)) 4832 4833 if not self._curr: 4834 return self.expression( 4835 exp.AlterTable, 4836 this=this, 4837 exists=exists, 4838 actions=actions, 4839 only=only, 4840 ) 4841 4842 return self._parse_as_command(start) 4843 4844 def _parse_merge(self) -> exp.Merge: 4845 self._match(TokenType.INTO) 4846 target = self._parse_table() 4847 4848 if target and self._match(TokenType.ALIAS, advance=False): 4849 target.set("alias", self._parse_table_alias()) 4850 4851 self._match(TokenType.USING) 4852 using = self._parse_table() 4853 4854 self._match(TokenType.ON) 4855 on = self._parse_conjunction() 4856 4857 whens = [] 4858 while self._match(TokenType.WHEN): 4859 matched = not self._match(TokenType.NOT) 4860 self._match_text_seq("MATCHED") 4861 source = ( 4862 False 4863 if self._match_text_seq("BY", "TARGET") 4864 else self._match_text_seq("BY", "SOURCE") 4865 ) 4866 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4867 4868 self._match(TokenType.THEN) 4869 4870 if self._match(TokenType.INSERT): 4871 _this = self._parse_star() 4872 if _this: 4873 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4874 else: 4875 then = self.expression( 4876 exp.Insert, 4877 this=self._parse_value(), 4878 expression=self._match(TokenType.VALUES) and self._parse_value(), 4879 ) 4880 elif self._match(TokenType.UPDATE): 4881 expressions = self._parse_star() 4882 if expressions: 4883 then = self.expression(exp.Update, expressions=expressions) 4884 else: 4885 then = self.expression( 4886 exp.Update, 4887 expressions=self._match(TokenType.SET) 4888 and self._parse_csv(self._parse_equality), 4889 ) 4890 elif self._match(TokenType.DELETE): 4891 then = self.expression(exp.Var, this=self._prev.text) 4892 else: 4893 then = None 4894 4895 whens.append( 4896 self.expression( 4897 exp.When, 4898 matched=matched, 4899 source=source, 4900 condition=condition, 4901 then=then, 4902 ) 4903 ) 4904 4905 return self.expression( 4906 exp.Merge, 4907 this=target, 4908 using=using, 4909 on=on, 4910 expressions=whens, 4911 ) 4912 4913 def _parse_show(self) -> t.Optional[exp.Expression]: 4914 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4915 if parser: 4916 return parser(self) 4917 return self._parse_as_command(self._prev) 4918 4919 def _parse_set_item_assignment( 4920 self, kind: t.Optional[str] = None 4921 ) -> t.Optional[exp.Expression]: 4922 index = self._index 4923 4924 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4925 return self._parse_set_transaction(global_=kind == "GLOBAL") 4926 4927 left = self._parse_primary() or self._parse_id_var() 4928 4929 if not self._match_texts(("=", "TO")): 4930 self._retreat(index) 4931 return None 4932 4933 right = self._parse_statement() or self._parse_id_var() 4934 this = self.expression(exp.EQ, this=left, expression=right) 4935 4936 return self.expression(exp.SetItem, this=this, kind=kind) 4937 4938 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4939 self._match_text_seq("TRANSACTION") 4940 characteristics = self._parse_csv( 4941 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4942 ) 4943 return self.expression( 4944 exp.SetItem, 4945 expressions=characteristics, 4946 kind="TRANSACTION", 4947 **{"global": global_}, # type: ignore 4948 ) 4949 4950 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4951 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4952 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4953 4954 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4955 index = self._index 4956 set_ = self.expression( 4957 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4958 ) 4959 4960 if self._curr: 4961 self._retreat(index) 4962 return self._parse_as_command(self._prev) 4963 4964 return set_ 4965 4966 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4967 for option in options: 4968 if self._match_text_seq(*option.split(" ")): 4969 return exp.var(option) 4970 return None 4971 4972 def _parse_as_command(self, start: Token) -> exp.Command: 4973 while self._curr: 4974 self._advance() 4975 text = self._find_sql(start, self._prev) 4976 size = len(start.text) 4977 return exp.Command(this=text[:size], expression=text[size:]) 4978 4979 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4980 settings = [] 4981 4982 self._match_l_paren() 4983 kind = self._parse_id_var() 4984 4985 if self._match(TokenType.L_PAREN): 4986 while True: 4987 key = self._parse_id_var() 4988 value = self._parse_primary() 4989 4990 if not key and value is None: 4991 break 4992 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4993 self._match(TokenType.R_PAREN) 4994 4995 self._match_r_paren() 4996 4997 return self.expression( 4998 exp.DictProperty, 4999 this=this, 5000 kind=kind.this if kind else None, 5001 settings=settings, 5002 ) 5003 5004 def _parse_dict_range(self, this: str) -> exp.DictRange: 5005 self._match_l_paren() 5006 has_min = self._match_text_seq("MIN") 5007 if has_min: 5008 min = self._parse_var() or self._parse_primary() 5009 self._match_text_seq("MAX") 5010 max = self._parse_var() or self._parse_primary() 5011 else: 5012 max = self._parse_var() or self._parse_primary() 5013 min = exp.Literal.number(0) 5014 self._match_r_paren() 5015 return self.expression(exp.DictRange, this=this, min=min, max=max) 5016 5017 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5018 index = self._index 5019 expression = self._parse_column() 5020 if not self._match(TokenType.IN): 5021 self._retreat(index - 1) 5022 return None 5023 iterator = self._parse_column() 5024 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5025 return self.expression( 5026 exp.Comprehension, 5027 this=this, 5028 expression=expression, 5029 iterator=iterator, 5030 condition=condition, 5031 ) 5032 5033 def _find_parser( 5034 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5035 ) -> t.Optional[t.Callable]: 5036 if not self._curr: 5037 return None 5038 5039 index = self._index 5040 this = [] 5041 while True: 5042 # The current token might be multiple words 5043 curr = self._curr.text.upper() 5044 key = curr.split(" ") 5045 this.append(curr) 5046 5047 self._advance() 5048 result, trie = in_trie(trie, key) 5049 if result == TrieResult.FAILED: 5050 break 5051 5052 if result == TrieResult.EXISTS: 5053 subparser = parsers[" ".join(this)] 5054 return subparser 5055 5056 self._retreat(index) 5057 return None 5058 5059 def _match(self, token_type, advance=True, expression=None): 5060 if not self._curr: 5061 return None 5062 5063 if self._curr.token_type == token_type: 5064 if advance: 5065 self._advance() 5066 self._add_comments(expression) 5067 return True 5068 5069 return None 5070 5071 def _match_set(self, types, advance=True): 5072 if not self._curr: 5073 return None 5074 5075 if self._curr.token_type in types: 5076 if advance: 5077 self._advance() 5078 return True 5079 5080 return None 5081 5082 def _match_pair(self, token_type_a, token_type_b, advance=True): 5083 if not self._curr or not self._next: 5084 return None 5085 5086 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5087 if advance: 5088 self._advance(2) 5089 return True 5090 5091 return None 5092 5093 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5094 if not self._match(TokenType.L_PAREN, expression=expression): 5095 self.raise_error("Expecting (") 5096 5097 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5098 if not self._match(TokenType.R_PAREN, expression=expression): 5099 self.raise_error("Expecting )") 5100 5101 def _match_texts(self, texts, advance=True): 5102 if self._curr and self._curr.text.upper() in texts: 5103 if advance: 5104 self._advance() 5105 return True 5106 return False 5107 5108 def _match_text_seq(self, *texts, advance=True): 5109 index = self._index 5110 for text in texts: 5111 if self._curr and self._curr.text.upper() == text: 5112 self._advance() 5113 else: 5114 self._retreat(index) 5115 return False 5116 5117 if not advance: 5118 self._retreat(index) 5119 5120 return True 5121 5122 @t.overload 5123 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5124 ... 5125 5126 @t.overload 5127 def _replace_columns_with_dots( 5128 self, this: t.Optional[exp.Expression] 5129 ) -> t.Optional[exp.Expression]: 5130 ... 5131 5132 def _replace_columns_with_dots(self, this): 5133 if isinstance(this, exp.Dot): 5134 exp.replace_children(this, self._replace_columns_with_dots) 5135 elif isinstance(this, exp.Column): 5136 exp.replace_children(this, self._replace_columns_with_dots) 5137 table = this.args.get("table") 5138 this = ( 5139 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5140 ) 5141 5142 return this 5143 5144 def _replace_lambda( 5145 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5146 ) -> t.Optional[exp.Expression]: 5147 if not node: 5148 return node 5149 5150 for column in node.find_all(exp.Column): 5151 if column.parts[0].name in lambda_variables: 5152 dot_or_id = column.to_dot() if column.table else column.this 5153 parent = column.parent 5154 5155 while isinstance(parent, exp.Dot): 5156 if not isinstance(parent.parent, exp.Dot): 5157 parent.replace(dot_or_id) 5158 break 5159 parent = parent.parent 5160 else: 5161 if column is node: 5162 node = dot_or_id 5163 else: 5164 column.replace(dot_or_id) 5165 return node 5166 5167 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5168 return [ 5169 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5170 for value in values 5171 if value 5172 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
906 def __init__( 907 self, 908 error_level: t.Optional[ErrorLevel] = None, 909 error_message_context: int = 100, 910 max_errors: int = 3, 911 ): 912 self.error_level = error_level or ErrorLevel.IMMEDIATE 913 self.error_message_context = error_message_context 914 self.max_errors = max_errors 915 self._tokenizer = self.TOKENIZER_CLASS() 916 self.reset()
928 def parse( 929 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 930 ) -> t.List[t.Optional[exp.Expression]]: 931 """ 932 Parses a list of tokens and returns a list of syntax trees, one tree 933 per parsed SQL statement. 934 935 Args: 936 raw_tokens: The list of tokens. 937 sql: The original SQL string, used to produce helpful debug messages. 938 939 Returns: 940 The list of the produced syntax trees. 941 """ 942 return self._parse( 943 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 944 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
946 def parse_into( 947 self, 948 expression_types: exp.IntoType, 949 raw_tokens: t.List[Token], 950 sql: t.Optional[str] = None, 951 ) -> t.List[t.Optional[exp.Expression]]: 952 """ 953 Parses a list of tokens into a given Expression type. If a collection of Expression 954 types is given instead, this method will try to parse the token list into each one 955 of them, stopping at the first for which the parsing succeeds. 956 957 Args: 958 expression_types: The expression type(s) to try and parse the token list into. 959 raw_tokens: The list of tokens. 960 sql: The original SQL string, used to produce helpful debug messages. 961 962 Returns: 963 The target Expression. 964 """ 965 errors = [] 966 for expression_type in ensure_list(expression_types): 967 parser = self.EXPRESSION_PARSERS.get(expression_type) 968 if not parser: 969 raise TypeError(f"No parser registered for {expression_type}") 970 971 try: 972 return self._parse(parser, raw_tokens, sql) 973 except ParseError as e: 974 e.errors[0]["into_expression"] = expression_type 975 errors.append(e) 976 977 raise ParseError( 978 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 979 errors=merge_errors(errors), 980 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1017 def check_errors(self) -> None: 1018 """Logs or raises any found errors, depending on the chosen error level setting.""" 1019 if self.error_level == ErrorLevel.WARN: 1020 for error in self.errors: 1021 logger.error(str(error)) 1022 elif self.error_level == ErrorLevel.RAISE and self.errors: 1023 raise ParseError( 1024 concat_messages(self.errors, self.max_errors), 1025 errors=merge_errors(self.errors), 1026 )
Logs or raises any found errors, depending on the chosen error level setting.
1028 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1029 """ 1030 Appends an error in the list of recorded errors or raises it, depending on the chosen 1031 error level setting. 1032 """ 1033 token = token or self._curr or self._prev or Token.string("") 1034 start = token.start 1035 end = token.end + 1 1036 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1037 highlight = self.sql[start:end] 1038 end_context = self.sql[end : end + self.error_message_context] 1039 1040 error = ParseError.new( 1041 f"{message}. Line {token.line}, Col: {token.col}.\n" 1042 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1043 description=message, 1044 line=token.line, 1045 col=token.col, 1046 start_context=start_context, 1047 highlight=highlight, 1048 end_context=end_context, 1049 ) 1050 1051 if self.error_level == ErrorLevel.IMMEDIATE: 1052 raise error 1053 1054 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1056 def expression( 1057 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1058 ) -> E: 1059 """ 1060 Creates a new, validated Expression. 1061 1062 Args: 1063 exp_class: The expression class to instantiate. 1064 comments: An optional list of comments to attach to the expression. 1065 kwargs: The arguments to set for the expression along with their respective values. 1066 1067 Returns: 1068 The target expression. 1069 """ 1070 instance = exp_class(**kwargs) 1071 instance.add_comments(comments) if comments else self._add_comments(instance) 1072 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1079 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1080 """ 1081 Validates an Expression, making sure that all its mandatory arguments are set. 1082 1083 Args: 1084 expression: The expression to validate. 1085 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1086 1087 Returns: 1088 The validated expression. 1089 """ 1090 if self.error_level != ErrorLevel.IGNORE: 1091 for error_message in expression.error_messages(args): 1092 self.raise_error(error_message) 1093 1094 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.