sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.UDECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 TokenType.DECIMAL: TokenType.UDECIMAL, 220 } 221 222 SUBQUERY_PREDICATES = { 223 TokenType.ANY: exp.Any, 224 TokenType.ALL: exp.All, 225 TokenType.EXISTS: exp.Exists, 226 TokenType.SOME: exp.Any, 227 } 228 229 RESERVED_KEYWORDS = { 230 *Tokenizer.SINGLE_TOKENS.values(), 231 TokenType.SELECT, 232 } 233 234 DB_CREATABLES = { 235 TokenType.DATABASE, 236 TokenType.SCHEMA, 237 TokenType.TABLE, 238 TokenType.VIEW, 239 TokenType.DICTIONARY, 240 } 241 242 CREATABLES = { 243 TokenType.COLUMN, 244 TokenType.FUNCTION, 245 TokenType.INDEX, 246 TokenType.PROCEDURE, 247 *DB_CREATABLES, 248 } 249 250 # Tokens that can represent identifiers 251 ID_VAR_TOKENS = { 252 TokenType.VAR, 253 TokenType.ANTI, 254 TokenType.APPLY, 255 TokenType.ASC, 256 TokenType.AUTO_INCREMENT, 257 TokenType.BEGIN, 258 TokenType.CACHE, 259 TokenType.CASE, 260 TokenType.COLLATE, 261 TokenType.COMMAND, 262 TokenType.COMMENT, 263 TokenType.COMMIT, 264 TokenType.CONSTRAINT, 265 TokenType.DEFAULT, 266 TokenType.DELETE, 267 TokenType.DESC, 268 TokenType.DESCRIBE, 269 TokenType.DICTIONARY, 270 TokenType.DIV, 271 TokenType.END, 272 TokenType.EXECUTE, 273 TokenType.ESCAPE, 274 TokenType.FALSE, 275 TokenType.FIRST, 276 TokenType.FILTER, 277 TokenType.FORMAT, 278 TokenType.FULL, 279 TokenType.IS, 280 TokenType.ISNULL, 281 TokenType.INTERVAL, 282 TokenType.KEEP, 283 TokenType.KILL, 284 TokenType.LEFT, 285 TokenType.LOAD, 286 TokenType.MERGE, 287 TokenType.NATURAL, 288 TokenType.NEXT, 289 TokenType.OFFSET, 290 TokenType.ORDINALITY, 291 TokenType.OVERLAPS, 292 TokenType.OVERWRITE, 293 TokenType.PARTITION, 294 TokenType.PERCENT, 295 TokenType.PIVOT, 296 TokenType.PRAGMA, 297 TokenType.RANGE, 298 TokenType.REFERENCES, 299 TokenType.RIGHT, 300 TokenType.ROW, 301 TokenType.ROWS, 302 TokenType.SEMI, 303 TokenType.SET, 304 TokenType.SETTINGS, 305 TokenType.SHOW, 306 TokenType.TEMPORARY, 307 TokenType.TOP, 308 TokenType.TRUE, 309 TokenType.UNIQUE, 310 TokenType.UNPIVOT, 311 TokenType.UPDATE, 312 TokenType.VOLATILE, 313 TokenType.WINDOW, 314 *CREATABLES, 315 *SUBQUERY_PREDICATES, 316 *TYPE_TOKENS, 317 *NO_PAREN_FUNCTIONS, 318 } 319 320 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 321 322 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 323 TokenType.ANTI, 324 TokenType.APPLY, 325 TokenType.ASOF, 326 TokenType.FULL, 327 TokenType.LEFT, 328 TokenType.LOCK, 329 TokenType.NATURAL, 330 TokenType.OFFSET, 331 TokenType.RIGHT, 332 TokenType.SEMI, 333 TokenType.WINDOW, 334 } 335 336 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 337 338 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 339 340 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 341 342 FUNC_TOKENS = { 343 TokenType.COLLATE, 344 TokenType.COMMAND, 345 TokenType.CURRENT_DATE, 346 TokenType.CURRENT_DATETIME, 347 TokenType.CURRENT_TIMESTAMP, 348 TokenType.CURRENT_TIME, 349 TokenType.CURRENT_USER, 350 TokenType.FILTER, 351 TokenType.FIRST, 352 TokenType.FORMAT, 353 TokenType.GLOB, 354 TokenType.IDENTIFIER, 355 TokenType.INDEX, 356 TokenType.ISNULL, 357 TokenType.ILIKE, 358 TokenType.INSERT, 359 TokenType.LIKE, 360 TokenType.MERGE, 361 TokenType.OFFSET, 362 TokenType.PRIMARY_KEY, 363 TokenType.RANGE, 364 TokenType.REPLACE, 365 TokenType.RLIKE, 366 TokenType.ROW, 367 TokenType.UNNEST, 368 TokenType.VAR, 369 TokenType.LEFT, 370 TokenType.RIGHT, 371 TokenType.DATE, 372 TokenType.DATETIME, 373 TokenType.TABLE, 374 TokenType.TIMESTAMP, 375 TokenType.TIMESTAMPTZ, 376 TokenType.WINDOW, 377 TokenType.XOR, 378 *TYPE_TOKENS, 379 *SUBQUERY_PREDICATES, 380 } 381 382 CONJUNCTION = { 383 TokenType.AND: exp.And, 384 TokenType.OR: exp.Or, 385 } 386 387 EQUALITY = { 388 TokenType.EQ: exp.EQ, 389 TokenType.NEQ: exp.NEQ, 390 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 391 } 392 393 COMPARISON = { 394 TokenType.GT: exp.GT, 395 TokenType.GTE: exp.GTE, 396 TokenType.LT: exp.LT, 397 TokenType.LTE: exp.LTE, 398 } 399 400 BITWISE = { 401 TokenType.AMP: exp.BitwiseAnd, 402 TokenType.CARET: exp.BitwiseXor, 403 TokenType.PIPE: exp.BitwiseOr, 404 TokenType.DPIPE: exp.DPipe, 405 } 406 407 TERM = { 408 TokenType.DASH: exp.Sub, 409 TokenType.PLUS: exp.Add, 410 TokenType.MOD: exp.Mod, 411 TokenType.COLLATE: exp.Collate, 412 } 413 414 FACTOR = { 415 TokenType.DIV: exp.IntDiv, 416 TokenType.LR_ARROW: exp.Distance, 417 TokenType.SLASH: exp.Div, 418 TokenType.STAR: exp.Mul, 419 } 420 421 TIMES = { 422 TokenType.TIME, 423 TokenType.TIMETZ, 424 } 425 426 TIMESTAMPS = { 427 TokenType.TIMESTAMP, 428 TokenType.TIMESTAMPTZ, 429 TokenType.TIMESTAMPLTZ, 430 *TIMES, 431 } 432 433 SET_OPERATIONS = { 434 TokenType.UNION, 435 TokenType.INTERSECT, 436 TokenType.EXCEPT, 437 } 438 439 JOIN_METHODS = { 440 TokenType.NATURAL, 441 TokenType.ASOF, 442 } 443 444 JOIN_SIDES = { 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.FULL, 448 } 449 450 JOIN_KINDS = { 451 TokenType.INNER, 452 TokenType.OUTER, 453 TokenType.CROSS, 454 TokenType.SEMI, 455 TokenType.ANTI, 456 } 457 458 JOIN_HINTS: t.Set[str] = set() 459 460 LAMBDAS = { 461 TokenType.ARROW: lambda self, expressions: self.expression( 462 exp.Lambda, 463 this=self._replace_lambda( 464 self._parse_conjunction(), 465 {node.name for node in expressions}, 466 ), 467 expressions=expressions, 468 ), 469 TokenType.FARROW: lambda self, expressions: self.expression( 470 exp.Kwarg, 471 this=exp.var(expressions[0].name), 472 expression=self._parse_conjunction(), 473 ), 474 } 475 476 COLUMN_OPERATORS = { 477 TokenType.DOT: None, 478 TokenType.DCOLON: lambda self, this, to: self.expression( 479 exp.Cast if self.STRICT_CAST else exp.TryCast, 480 this=this, 481 to=to, 482 ), 483 TokenType.ARROW: lambda self, this, path: self.expression( 484 exp.JSONExtract, 485 this=this, 486 expression=path, 487 ), 488 TokenType.DARROW: lambda self, this, path: self.expression( 489 exp.JSONExtractScalar, 490 this=this, 491 expression=path, 492 ), 493 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 494 exp.JSONBExtract, 495 this=this, 496 expression=path, 497 ), 498 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtractScalar, 500 this=this, 501 expression=path, 502 ), 503 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 504 exp.JSONBContains, 505 this=this, 506 expression=key, 507 ), 508 } 509 510 EXPRESSION_PARSERS = { 511 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 512 exp.Column: lambda self: self._parse_column(), 513 exp.Condition: lambda self: self._parse_conjunction(), 514 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 515 exp.Expression: lambda self: self._parse_statement(), 516 exp.From: lambda self: self._parse_from(), 517 exp.Group: lambda self: self._parse_group(), 518 exp.Having: lambda self: self._parse_having(), 519 exp.Identifier: lambda self: self._parse_id_var(), 520 exp.Join: lambda self: self._parse_join(), 521 exp.Lambda: lambda self: self._parse_lambda(), 522 exp.Lateral: lambda self: self._parse_lateral(), 523 exp.Limit: lambda self: self._parse_limit(), 524 exp.Offset: lambda self: self._parse_offset(), 525 exp.Order: lambda self: self._parse_order(), 526 exp.Ordered: lambda self: self._parse_ordered(), 527 exp.Properties: lambda self: self._parse_properties(), 528 exp.Qualify: lambda self: self._parse_qualify(), 529 exp.Returning: lambda self: self._parse_returning(), 530 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 531 exp.Table: lambda self: self._parse_table_parts(), 532 exp.TableAlias: lambda self: self._parse_table_alias(), 533 exp.Where: lambda self: self._parse_where(), 534 exp.Window: lambda self: self._parse_named_window(), 535 exp.With: lambda self: self._parse_with(), 536 "JOIN_TYPE": lambda self: self._parse_join_parts(), 537 } 538 539 STATEMENT_PARSERS = { 540 TokenType.ALTER: lambda self: self._parse_alter(), 541 TokenType.BEGIN: lambda self: self._parse_transaction(), 542 TokenType.CACHE: lambda self: self._parse_cache(), 543 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 544 TokenType.COMMENT: lambda self: self._parse_comment(), 545 TokenType.CREATE: lambda self: self._parse_create(), 546 TokenType.DELETE: lambda self: self._parse_delete(), 547 TokenType.DESC: lambda self: self._parse_describe(), 548 TokenType.DESCRIBE: lambda self: self._parse_describe(), 549 TokenType.DROP: lambda self: self._parse_drop(), 550 TokenType.INSERT: lambda self: self._parse_insert(), 551 TokenType.KILL: lambda self: self._parse_kill(), 552 TokenType.LOAD: lambda self: self._parse_load(), 553 TokenType.MERGE: lambda self: self._parse_merge(), 554 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 555 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 556 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 557 TokenType.SET: lambda self: self._parse_set(), 558 TokenType.UNCACHE: lambda self: self._parse_uncache(), 559 TokenType.UPDATE: lambda self: self._parse_update(), 560 TokenType.USE: lambda self: self.expression( 561 exp.Use, 562 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 563 and exp.var(self._prev.text), 564 this=self._parse_table(schema=False), 565 ), 566 } 567 568 UNARY_PARSERS = { 569 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 570 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 571 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 572 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 573 } 574 575 PRIMARY_PARSERS = { 576 TokenType.STRING: lambda self, token: self.expression( 577 exp.Literal, this=token.text, is_string=True 578 ), 579 TokenType.NUMBER: lambda self, token: self.expression( 580 exp.Literal, this=token.text, is_string=False 581 ), 582 TokenType.STAR: lambda self, _: self.expression( 583 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 584 ), 585 TokenType.NULL: lambda self, _: self.expression(exp.Null), 586 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 587 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 588 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 589 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 590 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 591 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 592 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 593 exp.National, this=token.text 594 ), 595 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 596 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 597 exp.RawString, this=token.text 598 ), 599 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 600 } 601 602 PLACEHOLDER_PARSERS = { 603 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 604 TokenType.PARAMETER: lambda self: self._parse_parameter(), 605 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 606 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 607 else None, 608 } 609 610 RANGE_PARSERS = { 611 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 612 TokenType.GLOB: binary_range_parser(exp.Glob), 613 TokenType.ILIKE: binary_range_parser(exp.ILike), 614 TokenType.IN: lambda self, this: self._parse_in(this), 615 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 616 TokenType.IS: lambda self, this: self._parse_is(this), 617 TokenType.LIKE: binary_range_parser(exp.Like), 618 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 619 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 620 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 621 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 622 } 623 624 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 625 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 626 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 627 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 628 "CHARACTER SET": lambda self: self._parse_character_set(), 629 "CHECKSUM": lambda self: self._parse_checksum(), 630 "CLUSTER BY": lambda self: self._parse_cluster(), 631 "CLUSTERED": lambda self: self._parse_clustered_by(), 632 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 633 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 634 "COPY": lambda self: self._parse_copy_property(), 635 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 636 "DEFINER": lambda self: self._parse_definer(), 637 "DETERMINISTIC": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 639 ), 640 "DISTKEY": lambda self: self._parse_distkey(), 641 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 642 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 643 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 644 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 645 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 646 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "FREESPACE": lambda self: self._parse_freespace(), 648 "HEAP": lambda self: self.expression(exp.HeapProperty), 649 "IMMUTABLE": lambda self: self.expression( 650 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 651 ), 652 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 653 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 654 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 655 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 656 "LIKE": lambda self: self._parse_create_like(), 657 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 658 "LOCK": lambda self: self._parse_locking(), 659 "LOCKING": lambda self: self._parse_locking(), 660 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 661 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 662 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 663 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 664 "NO": lambda self: self._parse_no_property(), 665 "ON": lambda self: self._parse_on_property(), 666 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 667 "PARTITION BY": lambda self: self._parse_partitioned_by(), 668 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 670 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 671 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 672 "RETURNS": lambda self: self._parse_returns(), 673 "ROW": lambda self: self._parse_row(), 674 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 675 "SAMPLE": lambda self: self.expression( 676 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 677 ), 678 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 679 "SETTINGS": lambda self: self.expression( 680 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 681 ), 682 "SORTKEY": lambda self: self._parse_sortkey(), 683 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 684 "STABLE": lambda self: self.expression( 685 exp.StabilityProperty, this=exp.Literal.string("STABLE") 686 ), 687 "STORED": lambda self: self._parse_stored(), 688 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 689 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 690 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 691 "TO": lambda self: self._parse_to_table(), 692 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 693 "TTL": lambda self: self._parse_ttl(), 694 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 695 "VOLATILE": lambda self: self._parse_volatile_property(), 696 "WITH": lambda self: self._parse_with_property(), 697 } 698 699 CONSTRAINT_PARSERS = { 700 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 701 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 702 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 703 "CHARACTER SET": lambda self: self.expression( 704 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 705 ), 706 "CHECK": lambda self: self.expression( 707 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 708 ), 709 "COLLATE": lambda self: self.expression( 710 exp.CollateColumnConstraint, this=self._parse_var() 711 ), 712 "COMMENT": lambda self: self.expression( 713 exp.CommentColumnConstraint, this=self._parse_string() 714 ), 715 "COMPRESS": lambda self: self._parse_compress(), 716 "CLUSTERED": lambda self: self.expression( 717 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 718 ), 719 "NONCLUSTERED": lambda self: self.expression( 720 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 721 ), 722 "DEFAULT": lambda self: self.expression( 723 exp.DefaultColumnConstraint, this=self._parse_bitwise() 724 ), 725 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 726 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 727 "FORMAT": lambda self: self.expression( 728 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 729 ), 730 "GENERATED": lambda self: self._parse_generated_as_identity(), 731 "IDENTITY": lambda self: self._parse_auto_increment(), 732 "INLINE": lambda self: self._parse_inline(), 733 "LIKE": lambda self: self._parse_create_like(), 734 "NOT": lambda self: self._parse_not_constraint(), 735 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 736 "ON": lambda self: ( 737 self._match(TokenType.UPDATE) 738 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 739 ) 740 or self.expression(exp.OnProperty, this=self._parse_id_var()), 741 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 742 "PRIMARY KEY": lambda self: self._parse_primary_key(), 743 "REFERENCES": lambda self: self._parse_references(match=False), 744 "TITLE": lambda self: self.expression( 745 exp.TitleColumnConstraint, this=self._parse_var_or_string() 746 ), 747 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 748 "UNIQUE": lambda self: self._parse_unique(), 749 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 750 "WITH": lambda self: self.expression( 751 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 752 ), 753 } 754 755 ALTER_PARSERS = { 756 "ADD": lambda self: self._parse_alter_table_add(), 757 "ALTER": lambda self: self._parse_alter_table_alter(), 758 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 759 "DROP": lambda self: self._parse_alter_table_drop(), 760 "RENAME": lambda self: self._parse_alter_table_rename(), 761 } 762 763 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 764 765 NO_PAREN_FUNCTION_PARSERS = { 766 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 767 "CASE": lambda self: self._parse_case(), 768 "IF": lambda self: self._parse_if(), 769 "NEXT": lambda self: self._parse_next_value_for(), 770 } 771 772 INVALID_FUNC_NAME_TOKENS = { 773 TokenType.IDENTIFIER, 774 TokenType.STRING, 775 } 776 777 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 778 779 FUNCTION_PARSERS = { 780 "ANY_VALUE": lambda self: self._parse_any_value(), 781 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 782 "CONCAT": lambda self: self._parse_concat(), 783 "CONCAT_WS": lambda self: self._parse_concat_ws(), 784 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 785 "DECODE": lambda self: self._parse_decode(), 786 "EXTRACT": lambda self: self._parse_extract(), 787 "JSON_OBJECT": lambda self: self._parse_json_object(), 788 "LOG": lambda self: self._parse_logarithm(), 789 "MATCH": lambda self: self._parse_match_against(), 790 "OPENJSON": lambda self: self._parse_open_json(), 791 "POSITION": lambda self: self._parse_position(), 792 "SAFE_CAST": lambda self: self._parse_cast(False), 793 "STRING_AGG": lambda self: self._parse_string_agg(), 794 "SUBSTRING": lambda self: self._parse_substring(), 795 "TRIM": lambda self: self._parse_trim(), 796 "TRY_CAST": lambda self: self._parse_cast(False), 797 "TRY_CONVERT": lambda self: self._parse_convert(False), 798 } 799 800 QUERY_MODIFIER_PARSERS = { 801 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 802 TokenType.WHERE: lambda self: ("where", self._parse_where()), 803 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 804 TokenType.HAVING: lambda self: ("having", self._parse_having()), 805 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 806 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 807 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 808 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 809 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 810 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 811 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 812 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 813 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 814 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 815 TokenType.CLUSTER_BY: lambda self: ( 816 "cluster", 817 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 818 ), 819 TokenType.DISTRIBUTE_BY: lambda self: ( 820 "distribute", 821 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 822 ), 823 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 824 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 825 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 826 } 827 828 SET_PARSERS = { 829 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 830 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 831 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 832 "TRANSACTION": lambda self: self._parse_set_transaction(), 833 } 834 835 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 836 837 TYPE_LITERAL_PARSERS = { 838 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 839 } 840 841 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 842 843 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 844 845 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 846 847 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 848 TRANSACTION_CHARACTERISTICS = { 849 "ISOLATION LEVEL REPEATABLE READ", 850 "ISOLATION LEVEL READ COMMITTED", 851 "ISOLATION LEVEL READ UNCOMMITTED", 852 "ISOLATION LEVEL SERIALIZABLE", 853 "READ WRITE", 854 "READ ONLY", 855 } 856 857 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 858 859 CLONE_KEYWORDS = {"CLONE", "COPY"} 860 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 861 862 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 863 864 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 865 866 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 867 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 868 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 869 870 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 871 872 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 873 874 DISTINCT_TOKENS = {TokenType.DISTINCT} 875 876 NULL_TOKENS = {TokenType.NULL} 877 878 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 879 880 STRICT_CAST = True 881 882 # A NULL arg in CONCAT yields NULL by default 883 CONCAT_NULL_OUTPUTS_STRING = False 884 885 PREFIXED_PIVOT_COLUMNS = False 886 IDENTIFY_PIVOT_STRINGS = False 887 888 LOG_BASE_FIRST = True 889 LOG_DEFAULTS_TO_LN = False 890 891 # Whether or not ADD is present for each column added by ALTER TABLE 892 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 893 894 # Whether or not the table sample clause expects CSV syntax 895 TABLESAMPLE_CSV = False 896 897 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 898 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 899 900 # Whether the TRIM function expects the characters to trim as its first argument 901 TRIM_PATTERN_FIRST = False 902 903 __slots__ = ( 904 "error_level", 905 "error_message_context", 906 "max_errors", 907 "sql", 908 "errors", 909 "_tokens", 910 "_index", 911 "_curr", 912 "_next", 913 "_prev", 914 "_prev_comments", 915 "_tokenizer", 916 ) 917 918 # Autofilled 919 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 920 INDEX_OFFSET: int = 0 921 UNNEST_COLUMN_ONLY: bool = False 922 ALIAS_POST_TABLESAMPLE: bool = False 923 STRICT_STRING_CONCAT = False 924 SUPPORTS_USER_DEFINED_TYPES = True 925 NORMALIZE_FUNCTIONS = "upper" 926 NULL_ORDERING: str = "nulls_are_small" 927 SHOW_TRIE: t.Dict = {} 928 SET_TRIE: t.Dict = {} 929 FORMAT_MAPPING: t.Dict[str, str] = {} 930 FORMAT_TRIE: t.Dict = {} 931 TIME_MAPPING: t.Dict[str, str] = {} 932 TIME_TRIE: t.Dict = {} 933 934 def __init__( 935 self, 936 error_level: t.Optional[ErrorLevel] = None, 937 error_message_context: int = 100, 938 max_errors: int = 3, 939 ): 940 self.error_level = error_level or ErrorLevel.IMMEDIATE 941 self.error_message_context = error_message_context 942 self.max_errors = max_errors 943 self._tokenizer = self.TOKENIZER_CLASS() 944 self.reset() 945 946 def reset(self): 947 self.sql = "" 948 self.errors = [] 949 self._tokens = [] 950 self._index = 0 951 self._curr = None 952 self._next = None 953 self._prev = None 954 self._prev_comments = None 955 956 def parse( 957 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 958 ) -> t.List[t.Optional[exp.Expression]]: 959 """ 960 Parses a list of tokens and returns a list of syntax trees, one tree 961 per parsed SQL statement. 962 963 Args: 964 raw_tokens: The list of tokens. 965 sql: The original SQL string, used to produce helpful debug messages. 966 967 Returns: 968 The list of the produced syntax trees. 969 """ 970 return self._parse( 971 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 972 ) 973 974 def parse_into( 975 self, 976 expression_types: exp.IntoType, 977 raw_tokens: t.List[Token], 978 sql: t.Optional[str] = None, 979 ) -> t.List[t.Optional[exp.Expression]]: 980 """ 981 Parses a list of tokens into a given Expression type. If a collection of Expression 982 types is given instead, this method will try to parse the token list into each one 983 of them, stopping at the first for which the parsing succeeds. 984 985 Args: 986 expression_types: The expression type(s) to try and parse the token list into. 987 raw_tokens: The list of tokens. 988 sql: The original SQL string, used to produce helpful debug messages. 989 990 Returns: 991 The target Expression. 992 """ 993 errors = [] 994 for expression_type in ensure_list(expression_types): 995 parser = self.EXPRESSION_PARSERS.get(expression_type) 996 if not parser: 997 raise TypeError(f"No parser registered for {expression_type}") 998 999 try: 1000 return self._parse(parser, raw_tokens, sql) 1001 except ParseError as e: 1002 e.errors[0]["into_expression"] = expression_type 1003 errors.append(e) 1004 1005 raise ParseError( 1006 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1007 errors=merge_errors(errors), 1008 ) from errors[-1] 1009 1010 def _parse( 1011 self, 1012 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1013 raw_tokens: t.List[Token], 1014 sql: t.Optional[str] = None, 1015 ) -> t.List[t.Optional[exp.Expression]]: 1016 self.reset() 1017 self.sql = sql or "" 1018 1019 total = len(raw_tokens) 1020 chunks: t.List[t.List[Token]] = [[]] 1021 1022 for i, token in enumerate(raw_tokens): 1023 if token.token_type == TokenType.SEMICOLON: 1024 if i < total - 1: 1025 chunks.append([]) 1026 else: 1027 chunks[-1].append(token) 1028 1029 expressions = [] 1030 1031 for tokens in chunks: 1032 self._index = -1 1033 self._tokens = tokens 1034 self._advance() 1035 1036 expressions.append(parse_method(self)) 1037 1038 if self._index < len(self._tokens): 1039 self.raise_error("Invalid expression / Unexpected token") 1040 1041 self.check_errors() 1042 1043 return expressions 1044 1045 def check_errors(self) -> None: 1046 """Logs or raises any found errors, depending on the chosen error level setting.""" 1047 if self.error_level == ErrorLevel.WARN: 1048 for error in self.errors: 1049 logger.error(str(error)) 1050 elif self.error_level == ErrorLevel.RAISE and self.errors: 1051 raise ParseError( 1052 concat_messages(self.errors, self.max_errors), 1053 errors=merge_errors(self.errors), 1054 ) 1055 1056 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1057 """ 1058 Appends an error in the list of recorded errors or raises it, depending on the chosen 1059 error level setting. 1060 """ 1061 token = token or self._curr or self._prev or Token.string("") 1062 start = token.start 1063 end = token.end + 1 1064 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1065 highlight = self.sql[start:end] 1066 end_context = self.sql[end : end + self.error_message_context] 1067 1068 error = ParseError.new( 1069 f"{message}. Line {token.line}, Col: {token.col}.\n" 1070 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1071 description=message, 1072 line=token.line, 1073 col=token.col, 1074 start_context=start_context, 1075 highlight=highlight, 1076 end_context=end_context, 1077 ) 1078 1079 if self.error_level == ErrorLevel.IMMEDIATE: 1080 raise error 1081 1082 self.errors.append(error) 1083 1084 def expression( 1085 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1086 ) -> E: 1087 """ 1088 Creates a new, validated Expression. 1089 1090 Args: 1091 exp_class: The expression class to instantiate. 1092 comments: An optional list of comments to attach to the expression. 1093 kwargs: The arguments to set for the expression along with their respective values. 1094 1095 Returns: 1096 The target expression. 1097 """ 1098 instance = exp_class(**kwargs) 1099 instance.add_comments(comments) if comments else self._add_comments(instance) 1100 return self.validate_expression(instance) 1101 1102 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1103 if expression and self._prev_comments: 1104 expression.add_comments(self._prev_comments) 1105 self._prev_comments = None 1106 1107 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1108 """ 1109 Validates an Expression, making sure that all its mandatory arguments are set. 1110 1111 Args: 1112 expression: The expression to validate. 1113 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1114 1115 Returns: 1116 The validated expression. 1117 """ 1118 if self.error_level != ErrorLevel.IGNORE: 1119 for error_message in expression.error_messages(args): 1120 self.raise_error(error_message) 1121 1122 return expression 1123 1124 def _find_sql(self, start: Token, end: Token) -> str: 1125 return self.sql[start.start : end.end + 1] 1126 1127 def _advance(self, times: int = 1) -> None: 1128 self._index += times 1129 self._curr = seq_get(self._tokens, self._index) 1130 self._next = seq_get(self._tokens, self._index + 1) 1131 1132 if self._index > 0: 1133 self._prev = self._tokens[self._index - 1] 1134 self._prev_comments = self._prev.comments 1135 else: 1136 self._prev = None 1137 self._prev_comments = None 1138 1139 def _retreat(self, index: int) -> None: 1140 if index != self._index: 1141 self._advance(index - self._index) 1142 1143 def _parse_command(self) -> exp.Command: 1144 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1145 1146 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1147 start = self._prev 1148 exists = self._parse_exists() if allow_exists else None 1149 1150 self._match(TokenType.ON) 1151 1152 kind = self._match_set(self.CREATABLES) and self._prev 1153 if not kind: 1154 return self._parse_as_command(start) 1155 1156 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1157 this = self._parse_user_defined_function(kind=kind.token_type) 1158 elif kind.token_type == TokenType.TABLE: 1159 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1160 elif kind.token_type == TokenType.COLUMN: 1161 this = self._parse_column() 1162 else: 1163 this = self._parse_id_var() 1164 1165 self._match(TokenType.IS) 1166 1167 return self.expression( 1168 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1169 ) 1170 1171 def _parse_to_table( 1172 self, 1173 ) -> exp.ToTableProperty: 1174 table = self._parse_table_parts(schema=True) 1175 return self.expression(exp.ToTableProperty, this=table) 1176 1177 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1178 def _parse_ttl(self) -> exp.Expression: 1179 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1180 this = self._parse_bitwise() 1181 1182 if self._match_text_seq("DELETE"): 1183 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1184 if self._match_text_seq("RECOMPRESS"): 1185 return self.expression( 1186 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1187 ) 1188 if self._match_text_seq("TO", "DISK"): 1189 return self.expression( 1190 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1191 ) 1192 if self._match_text_seq("TO", "VOLUME"): 1193 return self.expression( 1194 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1195 ) 1196 1197 return this 1198 1199 expressions = self._parse_csv(_parse_ttl_action) 1200 where = self._parse_where() 1201 group = self._parse_group() 1202 1203 aggregates = None 1204 if group and self._match(TokenType.SET): 1205 aggregates = self._parse_csv(self._parse_set_item) 1206 1207 return self.expression( 1208 exp.MergeTreeTTL, 1209 expressions=expressions, 1210 where=where, 1211 group=group, 1212 aggregates=aggregates, 1213 ) 1214 1215 def _parse_statement(self) -> t.Optional[exp.Expression]: 1216 if self._curr is None: 1217 return None 1218 1219 if self._match_set(self.STATEMENT_PARSERS): 1220 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1221 1222 if self._match_set(Tokenizer.COMMANDS): 1223 return self._parse_command() 1224 1225 expression = self._parse_expression() 1226 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1227 return self._parse_query_modifiers(expression) 1228 1229 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1230 start = self._prev 1231 temporary = self._match(TokenType.TEMPORARY) 1232 materialized = self._match_text_seq("MATERIALIZED") 1233 1234 kind = self._match_set(self.CREATABLES) and self._prev.text 1235 if not kind: 1236 return self._parse_as_command(start) 1237 1238 return self.expression( 1239 exp.Drop, 1240 comments=start.comments, 1241 exists=exists or self._parse_exists(), 1242 this=self._parse_table(schema=True), 1243 kind=kind, 1244 temporary=temporary, 1245 materialized=materialized, 1246 cascade=self._match_text_seq("CASCADE"), 1247 constraints=self._match_text_seq("CONSTRAINTS"), 1248 purge=self._match_text_seq("PURGE"), 1249 ) 1250 1251 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1252 return ( 1253 self._match_text_seq("IF") 1254 and (not not_ or self._match(TokenType.NOT)) 1255 and self._match(TokenType.EXISTS) 1256 ) 1257 1258 def _parse_create(self) -> exp.Create | exp.Command: 1259 # Note: this can't be None because we've matched a statement parser 1260 start = self._prev 1261 comments = self._prev_comments 1262 1263 replace = start.text.upper() == "REPLACE" or self._match_pair( 1264 TokenType.OR, TokenType.REPLACE 1265 ) 1266 unique = self._match(TokenType.UNIQUE) 1267 1268 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1269 self._advance() 1270 1271 properties = None 1272 create_token = self._match_set(self.CREATABLES) and self._prev 1273 1274 if not create_token: 1275 # exp.Properties.Location.POST_CREATE 1276 properties = self._parse_properties() 1277 create_token = self._match_set(self.CREATABLES) and self._prev 1278 1279 if not properties or not create_token: 1280 return self._parse_as_command(start) 1281 1282 exists = self._parse_exists(not_=True) 1283 this = None 1284 expression: t.Optional[exp.Expression] = None 1285 indexes = None 1286 no_schema_binding = None 1287 begin = None 1288 clone = None 1289 1290 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1291 nonlocal properties 1292 if properties and temp_props: 1293 properties.expressions.extend(temp_props.expressions) 1294 elif temp_props: 1295 properties = temp_props 1296 1297 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1298 this = self._parse_user_defined_function(kind=create_token.token_type) 1299 1300 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1301 extend_props(self._parse_properties()) 1302 1303 self._match(TokenType.ALIAS) 1304 1305 if self._match(TokenType.COMMAND): 1306 expression = self._parse_as_command(self._prev) 1307 else: 1308 begin = self._match(TokenType.BEGIN) 1309 return_ = self._match_text_seq("RETURN") 1310 1311 if self._match(TokenType.STRING, advance=False): 1312 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1313 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1314 expression = self._parse_string() 1315 extend_props(self._parse_properties()) 1316 else: 1317 expression = self._parse_statement() 1318 1319 if return_: 1320 expression = self.expression(exp.Return, this=expression) 1321 elif create_token.token_type == TokenType.INDEX: 1322 this = self._parse_index(index=self._parse_id_var()) 1323 elif create_token.token_type in self.DB_CREATABLES: 1324 table_parts = self._parse_table_parts(schema=True) 1325 1326 # exp.Properties.Location.POST_NAME 1327 self._match(TokenType.COMMA) 1328 extend_props(self._parse_properties(before=True)) 1329 1330 this = self._parse_schema(this=table_parts) 1331 1332 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1333 extend_props(self._parse_properties()) 1334 1335 self._match(TokenType.ALIAS) 1336 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1337 # exp.Properties.Location.POST_ALIAS 1338 extend_props(self._parse_properties()) 1339 1340 expression = self._parse_ddl_select() 1341 1342 if create_token.token_type == TokenType.TABLE: 1343 # exp.Properties.Location.POST_EXPRESSION 1344 extend_props(self._parse_properties()) 1345 1346 indexes = [] 1347 while True: 1348 index = self._parse_index() 1349 1350 # exp.Properties.Location.POST_INDEX 1351 extend_props(self._parse_properties()) 1352 1353 if not index: 1354 break 1355 else: 1356 self._match(TokenType.COMMA) 1357 indexes.append(index) 1358 elif create_token.token_type == TokenType.VIEW: 1359 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1360 no_schema_binding = True 1361 1362 shallow = self._match_text_seq("SHALLOW") 1363 1364 if self._match_texts(self.CLONE_KEYWORDS): 1365 copy = self._prev.text.lower() == "copy" 1366 clone = self._parse_table(schema=True) 1367 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1368 clone_kind = ( 1369 self._match(TokenType.L_PAREN) 1370 and self._match_texts(self.CLONE_KINDS) 1371 and self._prev.text.upper() 1372 ) 1373 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1374 self._match(TokenType.R_PAREN) 1375 clone = self.expression( 1376 exp.Clone, 1377 this=clone, 1378 when=when, 1379 kind=clone_kind, 1380 shallow=shallow, 1381 expression=clone_expression, 1382 copy=copy, 1383 ) 1384 1385 return self.expression( 1386 exp.Create, 1387 comments=comments, 1388 this=this, 1389 kind=create_token.text, 1390 replace=replace, 1391 unique=unique, 1392 expression=expression, 1393 exists=exists, 1394 properties=properties, 1395 indexes=indexes, 1396 no_schema_binding=no_schema_binding, 1397 begin=begin, 1398 clone=clone, 1399 ) 1400 1401 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1402 # only used for teradata currently 1403 self._match(TokenType.COMMA) 1404 1405 kwargs = { 1406 "no": self._match_text_seq("NO"), 1407 "dual": self._match_text_seq("DUAL"), 1408 "before": self._match_text_seq("BEFORE"), 1409 "default": self._match_text_seq("DEFAULT"), 1410 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1411 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1412 "after": self._match_text_seq("AFTER"), 1413 "minimum": self._match_texts(("MIN", "MINIMUM")), 1414 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1415 } 1416 1417 if self._match_texts(self.PROPERTY_PARSERS): 1418 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1419 try: 1420 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1421 except TypeError: 1422 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1423 1424 return None 1425 1426 def _parse_property(self) -> t.Optional[exp.Expression]: 1427 if self._match_texts(self.PROPERTY_PARSERS): 1428 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1429 1430 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1431 return self._parse_character_set(default=True) 1432 1433 if self._match_text_seq("COMPOUND", "SORTKEY"): 1434 return self._parse_sortkey(compound=True) 1435 1436 if self._match_text_seq("SQL", "SECURITY"): 1437 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1438 1439 index = self._index 1440 key = self._parse_column() 1441 1442 if not self._match(TokenType.EQ): 1443 self._retreat(index) 1444 return None 1445 1446 return self.expression( 1447 exp.Property, 1448 this=key.to_dot() if isinstance(key, exp.Column) else key, 1449 value=self._parse_column() or self._parse_var(any_token=True), 1450 ) 1451 1452 def _parse_stored(self) -> exp.FileFormatProperty: 1453 self._match(TokenType.ALIAS) 1454 1455 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1456 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1457 1458 return self.expression( 1459 exp.FileFormatProperty, 1460 this=self.expression( 1461 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1462 ) 1463 if input_format or output_format 1464 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1465 ) 1466 1467 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1468 self._match(TokenType.EQ) 1469 self._match(TokenType.ALIAS) 1470 return self.expression(exp_class, this=self._parse_field()) 1471 1472 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1473 properties = [] 1474 while True: 1475 if before: 1476 prop = self._parse_property_before() 1477 else: 1478 prop = self._parse_property() 1479 1480 if not prop: 1481 break 1482 for p in ensure_list(prop): 1483 properties.append(p) 1484 1485 if properties: 1486 return self.expression(exp.Properties, expressions=properties) 1487 1488 return None 1489 1490 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1491 return self.expression( 1492 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1493 ) 1494 1495 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1496 if self._index >= 2: 1497 pre_volatile_token = self._tokens[self._index - 2] 1498 else: 1499 pre_volatile_token = None 1500 1501 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1502 return exp.VolatileProperty() 1503 1504 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1505 1506 def _parse_with_property( 1507 self, 1508 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1509 if self._match(TokenType.L_PAREN, advance=False): 1510 return self._parse_wrapped_csv(self._parse_property) 1511 1512 if self._match_text_seq("JOURNAL"): 1513 return self._parse_withjournaltable() 1514 1515 if self._match_text_seq("DATA"): 1516 return self._parse_withdata(no=False) 1517 elif self._match_text_seq("NO", "DATA"): 1518 return self._parse_withdata(no=True) 1519 1520 if not self._next: 1521 return None 1522 1523 return self._parse_withisolatedloading() 1524 1525 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1526 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1527 self._match(TokenType.EQ) 1528 1529 user = self._parse_id_var() 1530 self._match(TokenType.PARAMETER) 1531 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1532 1533 if not user or not host: 1534 return None 1535 1536 return exp.DefinerProperty(this=f"{user}@{host}") 1537 1538 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1539 self._match(TokenType.TABLE) 1540 self._match(TokenType.EQ) 1541 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1542 1543 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1544 return self.expression(exp.LogProperty, no=no) 1545 1546 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1547 return self.expression(exp.JournalProperty, **kwargs) 1548 1549 def _parse_checksum(self) -> exp.ChecksumProperty: 1550 self._match(TokenType.EQ) 1551 1552 on = None 1553 if self._match(TokenType.ON): 1554 on = True 1555 elif self._match_text_seq("OFF"): 1556 on = False 1557 1558 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1559 1560 def _parse_cluster(self) -> exp.Cluster: 1561 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1562 1563 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1564 self._match_text_seq("BY") 1565 1566 self._match_l_paren() 1567 expressions = self._parse_csv(self._parse_column) 1568 self._match_r_paren() 1569 1570 if self._match_text_seq("SORTED", "BY"): 1571 self._match_l_paren() 1572 sorted_by = self._parse_csv(self._parse_ordered) 1573 self._match_r_paren() 1574 else: 1575 sorted_by = None 1576 1577 self._match(TokenType.INTO) 1578 buckets = self._parse_number() 1579 self._match_text_seq("BUCKETS") 1580 1581 return self.expression( 1582 exp.ClusteredByProperty, 1583 expressions=expressions, 1584 sorted_by=sorted_by, 1585 buckets=buckets, 1586 ) 1587 1588 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1589 if not self._match_text_seq("GRANTS"): 1590 self._retreat(self._index - 1) 1591 return None 1592 1593 return self.expression(exp.CopyGrantsProperty) 1594 1595 def _parse_freespace(self) -> exp.FreespaceProperty: 1596 self._match(TokenType.EQ) 1597 return self.expression( 1598 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1599 ) 1600 1601 def _parse_mergeblockratio( 1602 self, no: bool = False, default: bool = False 1603 ) -> exp.MergeBlockRatioProperty: 1604 if self._match(TokenType.EQ): 1605 return self.expression( 1606 exp.MergeBlockRatioProperty, 1607 this=self._parse_number(), 1608 percent=self._match(TokenType.PERCENT), 1609 ) 1610 1611 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1612 1613 def _parse_datablocksize( 1614 self, 1615 default: t.Optional[bool] = None, 1616 minimum: t.Optional[bool] = None, 1617 maximum: t.Optional[bool] = None, 1618 ) -> exp.DataBlocksizeProperty: 1619 self._match(TokenType.EQ) 1620 size = self._parse_number() 1621 1622 units = None 1623 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1624 units = self._prev.text 1625 1626 return self.expression( 1627 exp.DataBlocksizeProperty, 1628 size=size, 1629 units=units, 1630 default=default, 1631 minimum=minimum, 1632 maximum=maximum, 1633 ) 1634 1635 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1636 self._match(TokenType.EQ) 1637 always = self._match_text_seq("ALWAYS") 1638 manual = self._match_text_seq("MANUAL") 1639 never = self._match_text_seq("NEVER") 1640 default = self._match_text_seq("DEFAULT") 1641 1642 autotemp = None 1643 if self._match_text_seq("AUTOTEMP"): 1644 autotemp = self._parse_schema() 1645 1646 return self.expression( 1647 exp.BlockCompressionProperty, 1648 always=always, 1649 manual=manual, 1650 never=never, 1651 default=default, 1652 autotemp=autotemp, 1653 ) 1654 1655 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1656 no = self._match_text_seq("NO") 1657 concurrent = self._match_text_seq("CONCURRENT") 1658 self._match_text_seq("ISOLATED", "LOADING") 1659 for_all = self._match_text_seq("FOR", "ALL") 1660 for_insert = self._match_text_seq("FOR", "INSERT") 1661 for_none = self._match_text_seq("FOR", "NONE") 1662 return self.expression( 1663 exp.IsolatedLoadingProperty, 1664 no=no, 1665 concurrent=concurrent, 1666 for_all=for_all, 1667 for_insert=for_insert, 1668 for_none=for_none, 1669 ) 1670 1671 def _parse_locking(self) -> exp.LockingProperty: 1672 if self._match(TokenType.TABLE): 1673 kind = "TABLE" 1674 elif self._match(TokenType.VIEW): 1675 kind = "VIEW" 1676 elif self._match(TokenType.ROW): 1677 kind = "ROW" 1678 elif self._match_text_seq("DATABASE"): 1679 kind = "DATABASE" 1680 else: 1681 kind = None 1682 1683 if kind in ("DATABASE", "TABLE", "VIEW"): 1684 this = self._parse_table_parts() 1685 else: 1686 this = None 1687 1688 if self._match(TokenType.FOR): 1689 for_or_in = "FOR" 1690 elif self._match(TokenType.IN): 1691 for_or_in = "IN" 1692 else: 1693 for_or_in = None 1694 1695 if self._match_text_seq("ACCESS"): 1696 lock_type = "ACCESS" 1697 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1698 lock_type = "EXCLUSIVE" 1699 elif self._match_text_seq("SHARE"): 1700 lock_type = "SHARE" 1701 elif self._match_text_seq("READ"): 1702 lock_type = "READ" 1703 elif self._match_text_seq("WRITE"): 1704 lock_type = "WRITE" 1705 elif self._match_text_seq("CHECKSUM"): 1706 lock_type = "CHECKSUM" 1707 else: 1708 lock_type = None 1709 1710 override = self._match_text_seq("OVERRIDE") 1711 1712 return self.expression( 1713 exp.LockingProperty, 1714 this=this, 1715 kind=kind, 1716 for_or_in=for_or_in, 1717 lock_type=lock_type, 1718 override=override, 1719 ) 1720 1721 def _parse_partition_by(self) -> t.List[exp.Expression]: 1722 if self._match(TokenType.PARTITION_BY): 1723 return self._parse_csv(self._parse_conjunction) 1724 return [] 1725 1726 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1727 self._match(TokenType.EQ) 1728 return self.expression( 1729 exp.PartitionedByProperty, 1730 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1731 ) 1732 1733 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1734 if self._match_text_seq("AND", "STATISTICS"): 1735 statistics = True 1736 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1737 statistics = False 1738 else: 1739 statistics = None 1740 1741 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1742 1743 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1744 if self._match_text_seq("PRIMARY", "INDEX"): 1745 return exp.NoPrimaryIndexProperty() 1746 return None 1747 1748 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1749 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1750 return exp.OnCommitProperty() 1751 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1752 return exp.OnCommitProperty(delete=True) 1753 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1754 1755 def _parse_distkey(self) -> exp.DistKeyProperty: 1756 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1757 1758 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1759 table = self._parse_table(schema=True) 1760 1761 options = [] 1762 while self._match_texts(("INCLUDING", "EXCLUDING")): 1763 this = self._prev.text.upper() 1764 1765 id_var = self._parse_id_var() 1766 if not id_var: 1767 return None 1768 1769 options.append( 1770 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1771 ) 1772 1773 return self.expression(exp.LikeProperty, this=table, expressions=options) 1774 1775 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1776 return self.expression( 1777 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1778 ) 1779 1780 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1781 self._match(TokenType.EQ) 1782 return self.expression( 1783 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1784 ) 1785 1786 def _parse_returns(self) -> exp.ReturnsProperty: 1787 value: t.Optional[exp.Expression] 1788 is_table = self._match(TokenType.TABLE) 1789 1790 if is_table: 1791 if self._match(TokenType.LT): 1792 value = self.expression( 1793 exp.Schema, 1794 this="TABLE", 1795 expressions=self._parse_csv(self._parse_struct_types), 1796 ) 1797 if not self._match(TokenType.GT): 1798 self.raise_error("Expecting >") 1799 else: 1800 value = self._parse_schema(exp.var("TABLE")) 1801 else: 1802 value = self._parse_types() 1803 1804 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1805 1806 def _parse_describe(self) -> exp.Describe: 1807 kind = self._match_set(self.CREATABLES) and self._prev.text 1808 this = self._parse_table(schema=True) 1809 properties = self._parse_properties() 1810 expressions = properties.expressions if properties else None 1811 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1812 1813 def _parse_insert(self) -> exp.Insert: 1814 comments = ensure_list(self._prev_comments) 1815 overwrite = self._match(TokenType.OVERWRITE) 1816 ignore = self._match(TokenType.IGNORE) 1817 local = self._match_text_seq("LOCAL") 1818 alternative = None 1819 1820 if self._match_text_seq("DIRECTORY"): 1821 this: t.Optional[exp.Expression] = self.expression( 1822 exp.Directory, 1823 this=self._parse_var_or_string(), 1824 local=local, 1825 row_format=self._parse_row_format(match_row=True), 1826 ) 1827 else: 1828 if self._match(TokenType.OR): 1829 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1830 1831 self._match(TokenType.INTO) 1832 comments += ensure_list(self._prev_comments) 1833 self._match(TokenType.TABLE) 1834 this = self._parse_table(schema=True) 1835 1836 returning = self._parse_returning() 1837 1838 return self.expression( 1839 exp.Insert, 1840 comments=comments, 1841 this=this, 1842 by_name=self._match_text_seq("BY", "NAME"), 1843 exists=self._parse_exists(), 1844 partition=self._parse_partition(), 1845 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1846 and self._parse_conjunction(), 1847 expression=self._parse_ddl_select(), 1848 conflict=self._parse_on_conflict(), 1849 returning=returning or self._parse_returning(), 1850 overwrite=overwrite, 1851 alternative=alternative, 1852 ignore=ignore, 1853 ) 1854 1855 def _parse_kill(self) -> exp.Kill: 1856 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1857 1858 return self.expression( 1859 exp.Kill, 1860 this=self._parse_primary(), 1861 kind=kind, 1862 ) 1863 1864 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1865 conflict = self._match_text_seq("ON", "CONFLICT") 1866 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1867 1868 if not conflict and not duplicate: 1869 return None 1870 1871 nothing = None 1872 expressions = None 1873 key = None 1874 constraint = None 1875 1876 if conflict: 1877 if self._match_text_seq("ON", "CONSTRAINT"): 1878 constraint = self._parse_id_var() 1879 else: 1880 key = self._parse_csv(self._parse_value) 1881 1882 self._match_text_seq("DO") 1883 if self._match_text_seq("NOTHING"): 1884 nothing = True 1885 else: 1886 self._match(TokenType.UPDATE) 1887 self._match(TokenType.SET) 1888 expressions = self._parse_csv(self._parse_equality) 1889 1890 return self.expression( 1891 exp.OnConflict, 1892 duplicate=duplicate, 1893 expressions=expressions, 1894 nothing=nothing, 1895 key=key, 1896 constraint=constraint, 1897 ) 1898 1899 def _parse_returning(self) -> t.Optional[exp.Returning]: 1900 if not self._match(TokenType.RETURNING): 1901 return None 1902 return self.expression( 1903 exp.Returning, 1904 expressions=self._parse_csv(self._parse_expression), 1905 into=self._match(TokenType.INTO) and self._parse_table_part(), 1906 ) 1907 1908 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1909 if not self._match(TokenType.FORMAT): 1910 return None 1911 return self._parse_row_format() 1912 1913 def _parse_row_format( 1914 self, match_row: bool = False 1915 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1916 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1917 return None 1918 1919 if self._match_text_seq("SERDE"): 1920 this = self._parse_string() 1921 1922 serde_properties = None 1923 if self._match(TokenType.SERDE_PROPERTIES): 1924 serde_properties = self.expression( 1925 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1926 ) 1927 1928 return self.expression( 1929 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1930 ) 1931 1932 self._match_text_seq("DELIMITED") 1933 1934 kwargs = {} 1935 1936 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1937 kwargs["fields"] = self._parse_string() 1938 if self._match_text_seq("ESCAPED", "BY"): 1939 kwargs["escaped"] = self._parse_string() 1940 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1941 kwargs["collection_items"] = self._parse_string() 1942 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1943 kwargs["map_keys"] = self._parse_string() 1944 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1945 kwargs["lines"] = self._parse_string() 1946 if self._match_text_seq("NULL", "DEFINED", "AS"): 1947 kwargs["null"] = self._parse_string() 1948 1949 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1950 1951 def _parse_load(self) -> exp.LoadData | exp.Command: 1952 if self._match_text_seq("DATA"): 1953 local = self._match_text_seq("LOCAL") 1954 self._match_text_seq("INPATH") 1955 inpath = self._parse_string() 1956 overwrite = self._match(TokenType.OVERWRITE) 1957 self._match_pair(TokenType.INTO, TokenType.TABLE) 1958 1959 return self.expression( 1960 exp.LoadData, 1961 this=self._parse_table(schema=True), 1962 local=local, 1963 overwrite=overwrite, 1964 inpath=inpath, 1965 partition=self._parse_partition(), 1966 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1967 serde=self._match_text_seq("SERDE") and self._parse_string(), 1968 ) 1969 return self._parse_as_command(self._prev) 1970 1971 def _parse_delete(self) -> exp.Delete: 1972 # This handles MySQL's "Multiple-Table Syntax" 1973 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1974 tables = None 1975 comments = self._prev_comments 1976 if not self._match(TokenType.FROM, advance=False): 1977 tables = self._parse_csv(self._parse_table) or None 1978 1979 returning = self._parse_returning() 1980 1981 return self.expression( 1982 exp.Delete, 1983 comments=comments, 1984 tables=tables, 1985 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1986 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1987 where=self._parse_where(), 1988 returning=returning or self._parse_returning(), 1989 limit=self._parse_limit(), 1990 ) 1991 1992 def _parse_update(self) -> exp.Update: 1993 comments = self._prev_comments 1994 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1995 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1996 returning = self._parse_returning() 1997 return self.expression( 1998 exp.Update, 1999 comments=comments, 2000 **{ # type: ignore 2001 "this": this, 2002 "expressions": expressions, 2003 "from": self._parse_from(joins=True), 2004 "where": self._parse_where(), 2005 "returning": returning or self._parse_returning(), 2006 "order": self._parse_order(), 2007 "limit": self._parse_limit(), 2008 }, 2009 ) 2010 2011 def _parse_uncache(self) -> exp.Uncache: 2012 if not self._match(TokenType.TABLE): 2013 self.raise_error("Expecting TABLE after UNCACHE") 2014 2015 return self.expression( 2016 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2017 ) 2018 2019 def _parse_cache(self) -> exp.Cache: 2020 lazy = self._match_text_seq("LAZY") 2021 self._match(TokenType.TABLE) 2022 table = self._parse_table(schema=True) 2023 2024 options = [] 2025 if self._match_text_seq("OPTIONS"): 2026 self._match_l_paren() 2027 k = self._parse_string() 2028 self._match(TokenType.EQ) 2029 v = self._parse_string() 2030 options = [k, v] 2031 self._match_r_paren() 2032 2033 self._match(TokenType.ALIAS) 2034 return self.expression( 2035 exp.Cache, 2036 this=table, 2037 lazy=lazy, 2038 options=options, 2039 expression=self._parse_select(nested=True), 2040 ) 2041 2042 def _parse_partition(self) -> t.Optional[exp.Partition]: 2043 if not self._match(TokenType.PARTITION): 2044 return None 2045 2046 return self.expression( 2047 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2048 ) 2049 2050 def _parse_value(self) -> exp.Tuple: 2051 if self._match(TokenType.L_PAREN): 2052 expressions = self._parse_csv(self._parse_conjunction) 2053 self._match_r_paren() 2054 return self.expression(exp.Tuple, expressions=expressions) 2055 2056 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2057 # https://prestodb.io/docs/current/sql/values.html 2058 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2059 2060 def _parse_projections(self) -> t.List[exp.Expression]: 2061 return self._parse_expressions() 2062 2063 def _parse_select( 2064 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2065 ) -> t.Optional[exp.Expression]: 2066 cte = self._parse_with() 2067 2068 if cte: 2069 this = self._parse_statement() 2070 2071 if not this: 2072 self.raise_error("Failed to parse any statement following CTE") 2073 return cte 2074 2075 if "with" in this.arg_types: 2076 this.set("with", cte) 2077 else: 2078 self.raise_error(f"{this.key} does not support CTE") 2079 this = cte 2080 2081 return this 2082 2083 # duckdb supports leading with FROM x 2084 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2085 2086 if self._match(TokenType.SELECT): 2087 comments = self._prev_comments 2088 2089 hint = self._parse_hint() 2090 all_ = self._match(TokenType.ALL) 2091 distinct = self._match_set(self.DISTINCT_TOKENS) 2092 2093 kind = ( 2094 self._match(TokenType.ALIAS) 2095 and self._match_texts(("STRUCT", "VALUE")) 2096 and self._prev.text 2097 ) 2098 2099 if distinct: 2100 distinct = self.expression( 2101 exp.Distinct, 2102 on=self._parse_value() if self._match(TokenType.ON) else None, 2103 ) 2104 2105 if all_ and distinct: 2106 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2107 2108 limit = self._parse_limit(top=True) 2109 projections = self._parse_projections() 2110 2111 this = self.expression( 2112 exp.Select, 2113 kind=kind, 2114 hint=hint, 2115 distinct=distinct, 2116 expressions=projections, 2117 limit=limit, 2118 ) 2119 this.comments = comments 2120 2121 into = self._parse_into() 2122 if into: 2123 this.set("into", into) 2124 2125 if not from_: 2126 from_ = self._parse_from() 2127 2128 if from_: 2129 this.set("from", from_) 2130 2131 this = self._parse_query_modifiers(this) 2132 elif (table or nested) and self._match(TokenType.L_PAREN): 2133 if self._match(TokenType.PIVOT): 2134 this = self._parse_simplified_pivot() 2135 elif self._match(TokenType.FROM): 2136 this = exp.select("*").from_( 2137 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2138 ) 2139 else: 2140 this = self._parse_table() if table else self._parse_select(nested=True) 2141 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2142 2143 self._match_r_paren() 2144 2145 # We return early here so that the UNION isn't attached to the subquery by the 2146 # following call to _parse_set_operations, but instead becomes the parent node 2147 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2148 elif self._match(TokenType.VALUES): 2149 this = self.expression( 2150 exp.Values, 2151 expressions=self._parse_csv(self._parse_value), 2152 alias=self._parse_table_alias(), 2153 ) 2154 elif from_: 2155 this = exp.select("*").from_(from_.this, copy=False) 2156 else: 2157 this = None 2158 2159 return self._parse_set_operations(this) 2160 2161 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2162 if not skip_with_token and not self._match(TokenType.WITH): 2163 return None 2164 2165 comments = self._prev_comments 2166 recursive = self._match(TokenType.RECURSIVE) 2167 2168 expressions = [] 2169 while True: 2170 expressions.append(self._parse_cte()) 2171 2172 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2173 break 2174 else: 2175 self._match(TokenType.WITH) 2176 2177 return self.expression( 2178 exp.With, comments=comments, expressions=expressions, recursive=recursive 2179 ) 2180 2181 def _parse_cte(self) -> exp.CTE: 2182 alias = self._parse_table_alias() 2183 if not alias or not alias.this: 2184 self.raise_error("Expected CTE to have alias") 2185 2186 self._match(TokenType.ALIAS) 2187 return self.expression( 2188 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2189 ) 2190 2191 def _parse_table_alias( 2192 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2193 ) -> t.Optional[exp.TableAlias]: 2194 any_token = self._match(TokenType.ALIAS) 2195 alias = ( 2196 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2197 or self._parse_string_as_identifier() 2198 ) 2199 2200 index = self._index 2201 if self._match(TokenType.L_PAREN): 2202 columns = self._parse_csv(self._parse_function_parameter) 2203 self._match_r_paren() if columns else self._retreat(index) 2204 else: 2205 columns = None 2206 2207 if not alias and not columns: 2208 return None 2209 2210 return self.expression(exp.TableAlias, this=alias, columns=columns) 2211 2212 def _parse_subquery( 2213 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2214 ) -> t.Optional[exp.Subquery]: 2215 if not this: 2216 return None 2217 2218 return self.expression( 2219 exp.Subquery, 2220 this=this, 2221 pivots=self._parse_pivots(), 2222 alias=self._parse_table_alias() if parse_alias else None, 2223 ) 2224 2225 def _parse_query_modifiers( 2226 self, this: t.Optional[exp.Expression] 2227 ) -> t.Optional[exp.Expression]: 2228 if isinstance(this, self.MODIFIABLES): 2229 for join in iter(self._parse_join, None): 2230 this.append("joins", join) 2231 for lateral in iter(self._parse_lateral, None): 2232 this.append("laterals", lateral) 2233 2234 while True: 2235 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2236 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2237 key, expression = parser(self) 2238 2239 if expression: 2240 this.set(key, expression) 2241 if key == "limit": 2242 offset = expression.args.pop("offset", None) 2243 if offset: 2244 this.set("offset", exp.Offset(expression=offset)) 2245 continue 2246 break 2247 return this 2248 2249 def _parse_hint(self) -> t.Optional[exp.Hint]: 2250 if self._match(TokenType.HINT): 2251 hints = [] 2252 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2253 hints.extend(hint) 2254 2255 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2256 self.raise_error("Expected */ after HINT") 2257 2258 return self.expression(exp.Hint, expressions=hints) 2259 2260 return None 2261 2262 def _parse_into(self) -> t.Optional[exp.Into]: 2263 if not self._match(TokenType.INTO): 2264 return None 2265 2266 temp = self._match(TokenType.TEMPORARY) 2267 unlogged = self._match_text_seq("UNLOGGED") 2268 self._match(TokenType.TABLE) 2269 2270 return self.expression( 2271 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2272 ) 2273 2274 def _parse_from( 2275 self, joins: bool = False, skip_from_token: bool = False 2276 ) -> t.Optional[exp.From]: 2277 if not skip_from_token and not self._match(TokenType.FROM): 2278 return None 2279 2280 return self.expression( 2281 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2282 ) 2283 2284 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2285 if not self._match(TokenType.MATCH_RECOGNIZE): 2286 return None 2287 2288 self._match_l_paren() 2289 2290 partition = self._parse_partition_by() 2291 order = self._parse_order() 2292 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2293 2294 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2295 rows = exp.var("ONE ROW PER MATCH") 2296 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2297 text = "ALL ROWS PER MATCH" 2298 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2299 text += f" SHOW EMPTY MATCHES" 2300 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2301 text += f" OMIT EMPTY MATCHES" 2302 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2303 text += f" WITH UNMATCHED ROWS" 2304 rows = exp.var(text) 2305 else: 2306 rows = None 2307 2308 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2309 text = "AFTER MATCH SKIP" 2310 if self._match_text_seq("PAST", "LAST", "ROW"): 2311 text += f" PAST LAST ROW" 2312 elif self._match_text_seq("TO", "NEXT", "ROW"): 2313 text += f" TO NEXT ROW" 2314 elif self._match_text_seq("TO", "FIRST"): 2315 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2316 elif self._match_text_seq("TO", "LAST"): 2317 text += f" TO LAST {self._advance_any().text}" # type: ignore 2318 after = exp.var(text) 2319 else: 2320 after = None 2321 2322 if self._match_text_seq("PATTERN"): 2323 self._match_l_paren() 2324 2325 if not self._curr: 2326 self.raise_error("Expecting )", self._curr) 2327 2328 paren = 1 2329 start = self._curr 2330 2331 while self._curr and paren > 0: 2332 if self._curr.token_type == TokenType.L_PAREN: 2333 paren += 1 2334 if self._curr.token_type == TokenType.R_PAREN: 2335 paren -= 1 2336 2337 end = self._prev 2338 self._advance() 2339 2340 if paren > 0: 2341 self.raise_error("Expecting )", self._curr) 2342 2343 pattern = exp.var(self._find_sql(start, end)) 2344 else: 2345 pattern = None 2346 2347 define = ( 2348 self._parse_csv( 2349 lambda: self.expression( 2350 exp.Alias, 2351 alias=self._parse_id_var(any_token=True), 2352 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2353 ) 2354 ) 2355 if self._match_text_seq("DEFINE") 2356 else None 2357 ) 2358 2359 self._match_r_paren() 2360 2361 return self.expression( 2362 exp.MatchRecognize, 2363 partition_by=partition, 2364 order=order, 2365 measures=measures, 2366 rows=rows, 2367 after=after, 2368 pattern=pattern, 2369 define=define, 2370 alias=self._parse_table_alias(), 2371 ) 2372 2373 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2374 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2375 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2376 2377 if outer_apply or cross_apply: 2378 this = self._parse_select(table=True) 2379 view = None 2380 outer = not cross_apply 2381 elif self._match(TokenType.LATERAL): 2382 this = self._parse_select(table=True) 2383 view = self._match(TokenType.VIEW) 2384 outer = self._match(TokenType.OUTER) 2385 else: 2386 return None 2387 2388 if not this: 2389 this = ( 2390 self._parse_unnest() 2391 or self._parse_function() 2392 or self._parse_id_var(any_token=False) 2393 ) 2394 2395 while self._match(TokenType.DOT): 2396 this = exp.Dot( 2397 this=this, 2398 expression=self._parse_function() or self._parse_id_var(any_token=False), 2399 ) 2400 2401 if view: 2402 table = self._parse_id_var(any_token=False) 2403 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2404 table_alias: t.Optional[exp.TableAlias] = self.expression( 2405 exp.TableAlias, this=table, columns=columns 2406 ) 2407 elif isinstance(this, exp.Subquery) and this.alias: 2408 # Ensures parity between the Subquery's and the Lateral's "alias" args 2409 table_alias = this.args["alias"].copy() 2410 else: 2411 table_alias = self._parse_table_alias() 2412 2413 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2414 2415 def _parse_join_parts( 2416 self, 2417 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2418 return ( 2419 self._match_set(self.JOIN_METHODS) and self._prev, 2420 self._match_set(self.JOIN_SIDES) and self._prev, 2421 self._match_set(self.JOIN_KINDS) and self._prev, 2422 ) 2423 2424 def _parse_join( 2425 self, skip_join_token: bool = False, parse_bracket: bool = False 2426 ) -> t.Optional[exp.Join]: 2427 if self._match(TokenType.COMMA): 2428 return self.expression(exp.Join, this=self._parse_table()) 2429 2430 index = self._index 2431 method, side, kind = self._parse_join_parts() 2432 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2433 join = self._match(TokenType.JOIN) 2434 2435 if not skip_join_token and not join: 2436 self._retreat(index) 2437 kind = None 2438 method = None 2439 side = None 2440 2441 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2442 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2443 2444 if not skip_join_token and not join and not outer_apply and not cross_apply: 2445 return None 2446 2447 if outer_apply: 2448 side = Token(TokenType.LEFT, "LEFT") 2449 2450 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2451 2452 if method: 2453 kwargs["method"] = method.text 2454 if side: 2455 kwargs["side"] = side.text 2456 if kind: 2457 kwargs["kind"] = kind.text 2458 if hint: 2459 kwargs["hint"] = hint 2460 2461 if self._match(TokenType.ON): 2462 kwargs["on"] = self._parse_conjunction() 2463 elif self._match(TokenType.USING): 2464 kwargs["using"] = self._parse_wrapped_id_vars() 2465 elif not (kind and kind.token_type == TokenType.CROSS): 2466 index = self._index 2467 join = self._parse_join() 2468 2469 if join and self._match(TokenType.ON): 2470 kwargs["on"] = self._parse_conjunction() 2471 elif join and self._match(TokenType.USING): 2472 kwargs["using"] = self._parse_wrapped_id_vars() 2473 else: 2474 join = None 2475 self._retreat(index) 2476 2477 kwargs["this"].set("joins", [join] if join else None) 2478 2479 comments = [c for token in (method, side, kind) if token for c in token.comments] 2480 return self.expression(exp.Join, comments=comments, **kwargs) 2481 2482 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2483 this = self._parse_conjunction() 2484 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2485 return this 2486 2487 opclass = self._parse_var(any_token=True) 2488 if opclass: 2489 return self.expression(exp.Opclass, this=this, expression=opclass) 2490 2491 return this 2492 2493 def _parse_index( 2494 self, 2495 index: t.Optional[exp.Expression] = None, 2496 ) -> t.Optional[exp.Index]: 2497 if index: 2498 unique = None 2499 primary = None 2500 amp = None 2501 2502 self._match(TokenType.ON) 2503 self._match(TokenType.TABLE) # hive 2504 table = self._parse_table_parts(schema=True) 2505 else: 2506 unique = self._match(TokenType.UNIQUE) 2507 primary = self._match_text_seq("PRIMARY") 2508 amp = self._match_text_seq("AMP") 2509 2510 if not self._match(TokenType.INDEX): 2511 return None 2512 2513 index = self._parse_id_var() 2514 table = None 2515 2516 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2517 2518 if self._match(TokenType.L_PAREN, advance=False): 2519 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2520 else: 2521 columns = None 2522 2523 return self.expression( 2524 exp.Index, 2525 this=index, 2526 table=table, 2527 using=using, 2528 columns=columns, 2529 unique=unique, 2530 primary=primary, 2531 amp=amp, 2532 partition_by=self._parse_partition_by(), 2533 where=self._parse_where(), 2534 ) 2535 2536 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2537 hints: t.List[exp.Expression] = [] 2538 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2539 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2540 hints.append( 2541 self.expression( 2542 exp.WithTableHint, 2543 expressions=self._parse_csv( 2544 lambda: self._parse_function() or self._parse_var(any_token=True) 2545 ), 2546 ) 2547 ) 2548 self._match_r_paren() 2549 else: 2550 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2551 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2552 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2553 2554 self._match_texts({"INDEX", "KEY"}) 2555 if self._match(TokenType.FOR): 2556 hint.set("target", self._advance_any() and self._prev.text.upper()) 2557 2558 hint.set("expressions", self._parse_wrapped_id_vars()) 2559 hints.append(hint) 2560 2561 return hints or None 2562 2563 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2564 return ( 2565 (not schema and self._parse_function(optional_parens=False)) 2566 or self._parse_id_var(any_token=False) 2567 or self._parse_string_as_identifier() 2568 or self._parse_placeholder() 2569 ) 2570 2571 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2572 catalog = None 2573 db = None 2574 table = self._parse_table_part(schema=schema) 2575 2576 while self._match(TokenType.DOT): 2577 if catalog: 2578 # This allows nesting the table in arbitrarily many dot expressions if needed 2579 table = self.expression( 2580 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2581 ) 2582 else: 2583 catalog = db 2584 db = table 2585 table = self._parse_table_part(schema=schema) 2586 2587 if not table: 2588 self.raise_error(f"Expected table name but got {self._curr}") 2589 2590 return self.expression( 2591 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2592 ) 2593 2594 def _parse_table( 2595 self, 2596 schema: bool = False, 2597 joins: bool = False, 2598 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2599 parse_bracket: bool = False, 2600 ) -> t.Optional[exp.Expression]: 2601 lateral = self._parse_lateral() 2602 if lateral: 2603 return lateral 2604 2605 unnest = self._parse_unnest() 2606 if unnest: 2607 return unnest 2608 2609 values = self._parse_derived_table_values() 2610 if values: 2611 return values 2612 2613 subquery = self._parse_select(table=True) 2614 if subquery: 2615 if not subquery.args.get("pivots"): 2616 subquery.set("pivots", self._parse_pivots()) 2617 return subquery 2618 2619 bracket = parse_bracket and self._parse_bracket(None) 2620 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2621 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2622 2623 if schema: 2624 return self._parse_schema(this=this) 2625 2626 version = self._parse_version() 2627 2628 if version: 2629 this.set("version", version) 2630 2631 if self.ALIAS_POST_TABLESAMPLE: 2632 table_sample = self._parse_table_sample() 2633 2634 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2635 if alias: 2636 this.set("alias", alias) 2637 2638 this.set("hints", self._parse_table_hints()) 2639 2640 if not this.args.get("pivots"): 2641 this.set("pivots", self._parse_pivots()) 2642 2643 if not self.ALIAS_POST_TABLESAMPLE: 2644 table_sample = self._parse_table_sample() 2645 2646 if table_sample: 2647 table_sample.set("this", this) 2648 this = table_sample 2649 2650 if joins: 2651 for join in iter(self._parse_join, None): 2652 this.append("joins", join) 2653 2654 return this 2655 2656 def _parse_version(self) -> t.Optional[exp.Version]: 2657 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2658 this = "TIMESTAMP" 2659 elif self._match(TokenType.VERSION_SNAPSHOT): 2660 this = "VERSION" 2661 else: 2662 return None 2663 2664 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2665 kind = self._prev.text.upper() 2666 start = self._parse_bitwise() 2667 self._match_texts(("TO", "AND")) 2668 end = self._parse_bitwise() 2669 expression: t.Optional[exp.Expression] = self.expression( 2670 exp.Tuple, expressions=[start, end] 2671 ) 2672 elif self._match_text_seq("CONTAINED", "IN"): 2673 kind = "CONTAINED IN" 2674 expression = self.expression( 2675 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2676 ) 2677 elif self._match(TokenType.ALL): 2678 kind = "ALL" 2679 expression = None 2680 else: 2681 self._match_text_seq("AS", "OF") 2682 kind = "AS OF" 2683 expression = self._parse_type() 2684 2685 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2686 2687 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2688 if not self._match(TokenType.UNNEST): 2689 return None 2690 2691 expressions = self._parse_wrapped_csv(self._parse_type) 2692 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2693 2694 alias = self._parse_table_alias() if with_alias else None 2695 2696 if alias: 2697 if self.UNNEST_COLUMN_ONLY: 2698 if alias.args.get("columns"): 2699 self.raise_error("Unexpected extra column alias in unnest.") 2700 2701 alias.set("columns", [alias.this]) 2702 alias.set("this", None) 2703 2704 columns = alias.args.get("columns") or [] 2705 if offset and len(expressions) < len(columns): 2706 offset = columns.pop() 2707 2708 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2709 self._match(TokenType.ALIAS) 2710 offset = self._parse_id_var( 2711 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2712 ) or exp.to_identifier("offset") 2713 2714 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2715 2716 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2717 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2718 if not is_derived and not self._match(TokenType.VALUES): 2719 return None 2720 2721 expressions = self._parse_csv(self._parse_value) 2722 alias = self._parse_table_alias() 2723 2724 if is_derived: 2725 self._match_r_paren() 2726 2727 return self.expression( 2728 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2729 ) 2730 2731 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2732 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2733 as_modifier and self._match_text_seq("USING", "SAMPLE") 2734 ): 2735 return None 2736 2737 bucket_numerator = None 2738 bucket_denominator = None 2739 bucket_field = None 2740 percent = None 2741 rows = None 2742 size = None 2743 seed = None 2744 2745 kind = ( 2746 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2747 ) 2748 method = self._parse_var(tokens=(TokenType.ROW,)) 2749 2750 matched_l_paren = self._match(TokenType.L_PAREN) 2751 2752 if self.TABLESAMPLE_CSV: 2753 num = None 2754 expressions = self._parse_csv(self._parse_primary) 2755 else: 2756 expressions = None 2757 num = ( 2758 self._parse_factor() 2759 if self._match(TokenType.NUMBER, advance=False) 2760 else self._parse_primary() 2761 ) 2762 2763 if self._match_text_seq("BUCKET"): 2764 bucket_numerator = self._parse_number() 2765 self._match_text_seq("OUT", "OF") 2766 bucket_denominator = bucket_denominator = self._parse_number() 2767 self._match(TokenType.ON) 2768 bucket_field = self._parse_field() 2769 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2770 percent = num 2771 elif self._match(TokenType.ROWS): 2772 rows = num 2773 elif num: 2774 size = num 2775 2776 if matched_l_paren: 2777 self._match_r_paren() 2778 2779 if self._match(TokenType.L_PAREN): 2780 method = self._parse_var() 2781 seed = self._match(TokenType.COMMA) and self._parse_number() 2782 self._match_r_paren() 2783 elif self._match_texts(("SEED", "REPEATABLE")): 2784 seed = self._parse_wrapped(self._parse_number) 2785 2786 return self.expression( 2787 exp.TableSample, 2788 expressions=expressions, 2789 method=method, 2790 bucket_numerator=bucket_numerator, 2791 bucket_denominator=bucket_denominator, 2792 bucket_field=bucket_field, 2793 percent=percent, 2794 rows=rows, 2795 size=size, 2796 seed=seed, 2797 kind=kind, 2798 ) 2799 2800 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2801 return list(iter(self._parse_pivot, None)) or None 2802 2803 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2804 return list(iter(self._parse_join, None)) or None 2805 2806 # https://duckdb.org/docs/sql/statements/pivot 2807 def _parse_simplified_pivot(self) -> exp.Pivot: 2808 def _parse_on() -> t.Optional[exp.Expression]: 2809 this = self._parse_bitwise() 2810 return self._parse_in(this) if self._match(TokenType.IN) else this 2811 2812 this = self._parse_table() 2813 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2814 using = self._match(TokenType.USING) and self._parse_csv( 2815 lambda: self._parse_alias(self._parse_function()) 2816 ) 2817 group = self._parse_group() 2818 return self.expression( 2819 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2820 ) 2821 2822 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2823 index = self._index 2824 include_nulls = None 2825 2826 if self._match(TokenType.PIVOT): 2827 unpivot = False 2828 elif self._match(TokenType.UNPIVOT): 2829 unpivot = True 2830 2831 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2832 if self._match_text_seq("INCLUDE", "NULLS"): 2833 include_nulls = True 2834 elif self._match_text_seq("EXCLUDE", "NULLS"): 2835 include_nulls = False 2836 else: 2837 return None 2838 2839 expressions = [] 2840 field = None 2841 2842 if not self._match(TokenType.L_PAREN): 2843 self._retreat(index) 2844 return None 2845 2846 if unpivot: 2847 expressions = self._parse_csv(self._parse_column) 2848 else: 2849 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2850 2851 if not expressions: 2852 self.raise_error("Failed to parse PIVOT's aggregation list") 2853 2854 if not self._match(TokenType.FOR): 2855 self.raise_error("Expecting FOR") 2856 2857 value = self._parse_column() 2858 2859 if not self._match(TokenType.IN): 2860 self.raise_error("Expecting IN") 2861 2862 field = self._parse_in(value, alias=True) 2863 2864 self._match_r_paren() 2865 2866 pivot = self.expression( 2867 exp.Pivot, 2868 expressions=expressions, 2869 field=field, 2870 unpivot=unpivot, 2871 include_nulls=include_nulls, 2872 ) 2873 2874 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2875 pivot.set("alias", self._parse_table_alias()) 2876 2877 if not unpivot: 2878 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2879 2880 columns: t.List[exp.Expression] = [] 2881 for fld in pivot.args["field"].expressions: 2882 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2883 for name in names: 2884 if self.PREFIXED_PIVOT_COLUMNS: 2885 name = f"{name}_{field_name}" if name else field_name 2886 else: 2887 name = f"{field_name}_{name}" if name else field_name 2888 2889 columns.append(exp.to_identifier(name)) 2890 2891 pivot.set("columns", columns) 2892 2893 return pivot 2894 2895 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2896 return [agg.alias for agg in aggregations] 2897 2898 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2899 if not skip_where_token and not self._match(TokenType.WHERE): 2900 return None 2901 2902 return self.expression( 2903 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2904 ) 2905 2906 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2907 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2908 return None 2909 2910 elements = defaultdict(list) 2911 2912 if self._match(TokenType.ALL): 2913 return self.expression(exp.Group, all=True) 2914 2915 while True: 2916 expressions = self._parse_csv(self._parse_conjunction) 2917 if expressions: 2918 elements["expressions"].extend(expressions) 2919 2920 grouping_sets = self._parse_grouping_sets() 2921 if grouping_sets: 2922 elements["grouping_sets"].extend(grouping_sets) 2923 2924 rollup = None 2925 cube = None 2926 totals = None 2927 2928 with_ = self._match(TokenType.WITH) 2929 if self._match(TokenType.ROLLUP): 2930 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2931 elements["rollup"].extend(ensure_list(rollup)) 2932 2933 if self._match(TokenType.CUBE): 2934 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2935 elements["cube"].extend(ensure_list(cube)) 2936 2937 if self._match_text_seq("TOTALS"): 2938 totals = True 2939 elements["totals"] = True # type: ignore 2940 2941 if not (grouping_sets or rollup or cube or totals): 2942 break 2943 2944 return self.expression(exp.Group, **elements) # type: ignore 2945 2946 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2947 if not self._match(TokenType.GROUPING_SETS): 2948 return None 2949 2950 return self._parse_wrapped_csv(self._parse_grouping_set) 2951 2952 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2953 if self._match(TokenType.L_PAREN): 2954 grouping_set = self._parse_csv(self._parse_column) 2955 self._match_r_paren() 2956 return self.expression(exp.Tuple, expressions=grouping_set) 2957 2958 return self._parse_column() 2959 2960 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2961 if not skip_having_token and not self._match(TokenType.HAVING): 2962 return None 2963 return self.expression(exp.Having, this=self._parse_conjunction()) 2964 2965 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2966 if not self._match(TokenType.QUALIFY): 2967 return None 2968 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2969 2970 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2971 if skip_start_token: 2972 start = None 2973 elif self._match(TokenType.START_WITH): 2974 start = self._parse_conjunction() 2975 else: 2976 return None 2977 2978 self._match(TokenType.CONNECT_BY) 2979 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2980 exp.Prior, this=self._parse_bitwise() 2981 ) 2982 connect = self._parse_conjunction() 2983 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2984 2985 if not start and self._match(TokenType.START_WITH): 2986 start = self._parse_conjunction() 2987 2988 return self.expression(exp.Connect, start=start, connect=connect) 2989 2990 def _parse_order( 2991 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2992 ) -> t.Optional[exp.Expression]: 2993 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2994 return this 2995 2996 return self.expression( 2997 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2998 ) 2999 3000 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3001 if not self._match(token): 3002 return None 3003 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3004 3005 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3006 this = parse_method() if parse_method else self._parse_conjunction() 3007 3008 asc = self._match(TokenType.ASC) 3009 desc = self._match(TokenType.DESC) or (asc and False) 3010 3011 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3012 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3013 3014 nulls_first = is_nulls_first or False 3015 explicitly_null_ordered = is_nulls_first or is_nulls_last 3016 3017 if ( 3018 not explicitly_null_ordered 3019 and ( 3020 (not desc and self.NULL_ORDERING == "nulls_are_small") 3021 or (desc and self.NULL_ORDERING != "nulls_are_small") 3022 ) 3023 and self.NULL_ORDERING != "nulls_are_last" 3024 ): 3025 nulls_first = True 3026 3027 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3028 3029 def _parse_limit( 3030 self, this: t.Optional[exp.Expression] = None, top: bool = False 3031 ) -> t.Optional[exp.Expression]: 3032 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3033 comments = self._prev_comments 3034 if top: 3035 limit_paren = self._match(TokenType.L_PAREN) 3036 expression = self._parse_number() 3037 3038 if limit_paren: 3039 self._match_r_paren() 3040 else: 3041 expression = self._parse_term() 3042 3043 if self._match(TokenType.COMMA): 3044 offset = expression 3045 expression = self._parse_term() 3046 else: 3047 offset = None 3048 3049 limit_exp = self.expression( 3050 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3051 ) 3052 3053 return limit_exp 3054 3055 if self._match(TokenType.FETCH): 3056 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3057 direction = self._prev.text if direction else "FIRST" 3058 3059 count = self._parse_field(tokens=self.FETCH_TOKENS) 3060 percent = self._match(TokenType.PERCENT) 3061 3062 self._match_set((TokenType.ROW, TokenType.ROWS)) 3063 3064 only = self._match_text_seq("ONLY") 3065 with_ties = self._match_text_seq("WITH", "TIES") 3066 3067 if only and with_ties: 3068 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3069 3070 return self.expression( 3071 exp.Fetch, 3072 direction=direction, 3073 count=count, 3074 percent=percent, 3075 with_ties=with_ties, 3076 ) 3077 3078 return this 3079 3080 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3081 if not self._match(TokenType.OFFSET): 3082 return this 3083 3084 count = self._parse_term() 3085 self._match_set((TokenType.ROW, TokenType.ROWS)) 3086 return self.expression(exp.Offset, this=this, expression=count) 3087 3088 def _parse_locks(self) -> t.List[exp.Lock]: 3089 locks = [] 3090 while True: 3091 if self._match_text_seq("FOR", "UPDATE"): 3092 update = True 3093 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3094 "LOCK", "IN", "SHARE", "MODE" 3095 ): 3096 update = False 3097 else: 3098 break 3099 3100 expressions = None 3101 if self._match_text_seq("OF"): 3102 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3103 3104 wait: t.Optional[bool | exp.Expression] = None 3105 if self._match_text_seq("NOWAIT"): 3106 wait = True 3107 elif self._match_text_seq("WAIT"): 3108 wait = self._parse_primary() 3109 elif self._match_text_seq("SKIP", "LOCKED"): 3110 wait = False 3111 3112 locks.append( 3113 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3114 ) 3115 3116 return locks 3117 3118 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3119 if not self._match_set(self.SET_OPERATIONS): 3120 return this 3121 3122 token_type = self._prev.token_type 3123 3124 if token_type == TokenType.UNION: 3125 expression = exp.Union 3126 elif token_type == TokenType.EXCEPT: 3127 expression = exp.Except 3128 else: 3129 expression = exp.Intersect 3130 3131 return self.expression( 3132 expression, 3133 this=this, 3134 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3135 by_name=self._match_text_seq("BY", "NAME"), 3136 expression=self._parse_set_operations(self._parse_select(nested=True)), 3137 ) 3138 3139 def _parse_expression(self) -> t.Optional[exp.Expression]: 3140 return self._parse_alias(self._parse_conjunction()) 3141 3142 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3143 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3144 3145 def _parse_equality(self) -> t.Optional[exp.Expression]: 3146 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3147 3148 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3149 return self._parse_tokens(self._parse_range, self.COMPARISON) 3150 3151 def _parse_range(self) -> t.Optional[exp.Expression]: 3152 this = self._parse_bitwise() 3153 negate = self._match(TokenType.NOT) 3154 3155 if self._match_set(self.RANGE_PARSERS): 3156 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3157 if not expression: 3158 return this 3159 3160 this = expression 3161 elif self._match(TokenType.ISNULL): 3162 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3163 3164 # Postgres supports ISNULL and NOTNULL for conditions. 3165 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3166 if self._match(TokenType.NOTNULL): 3167 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3168 this = self.expression(exp.Not, this=this) 3169 3170 if negate: 3171 this = self.expression(exp.Not, this=this) 3172 3173 if self._match(TokenType.IS): 3174 this = self._parse_is(this) 3175 3176 return this 3177 3178 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3179 index = self._index - 1 3180 negate = self._match(TokenType.NOT) 3181 3182 if self._match_text_seq("DISTINCT", "FROM"): 3183 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3184 return self.expression(klass, this=this, expression=self._parse_expression()) 3185 3186 expression = self._parse_null() or self._parse_boolean() 3187 if not expression: 3188 self._retreat(index) 3189 return None 3190 3191 this = self.expression(exp.Is, this=this, expression=expression) 3192 return self.expression(exp.Not, this=this) if negate else this 3193 3194 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3195 unnest = self._parse_unnest(with_alias=False) 3196 if unnest: 3197 this = self.expression(exp.In, this=this, unnest=unnest) 3198 elif self._match(TokenType.L_PAREN): 3199 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3200 3201 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3202 this = self.expression(exp.In, this=this, query=expressions[0]) 3203 else: 3204 this = self.expression(exp.In, this=this, expressions=expressions) 3205 3206 self._match_r_paren(this) 3207 else: 3208 this = self.expression(exp.In, this=this, field=self._parse_field()) 3209 3210 return this 3211 3212 def _parse_between(self, this: exp.Expression) -> exp.Between: 3213 low = self._parse_bitwise() 3214 self._match(TokenType.AND) 3215 high = self._parse_bitwise() 3216 return self.expression(exp.Between, this=this, low=low, high=high) 3217 3218 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3219 if not self._match(TokenType.ESCAPE): 3220 return this 3221 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3222 3223 def _parse_interval(self) -> t.Optional[exp.Interval]: 3224 index = self._index 3225 3226 if not self._match(TokenType.INTERVAL): 3227 return None 3228 3229 if self._match(TokenType.STRING, advance=False): 3230 this = self._parse_primary() 3231 else: 3232 this = self._parse_term() 3233 3234 if not this: 3235 self._retreat(index) 3236 return None 3237 3238 unit = self._parse_function() or self._parse_var(any_token=True) 3239 3240 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3241 # each INTERVAL expression into this canonical form so it's easy to transpile 3242 if this and this.is_number: 3243 this = exp.Literal.string(this.name) 3244 elif this and this.is_string: 3245 parts = this.name.split() 3246 3247 if len(parts) == 2: 3248 if unit: 3249 # This is not actually a unit, it's something else (e.g. a "window side") 3250 unit = None 3251 self._retreat(self._index - 1) 3252 3253 this = exp.Literal.string(parts[0]) 3254 unit = self.expression(exp.Var, this=parts[1]) 3255 3256 return self.expression(exp.Interval, this=this, unit=unit) 3257 3258 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3259 this = self._parse_term() 3260 3261 while True: 3262 if self._match_set(self.BITWISE): 3263 this = self.expression( 3264 self.BITWISE[self._prev.token_type], 3265 this=this, 3266 expression=self._parse_term(), 3267 ) 3268 elif self._match(TokenType.DQMARK): 3269 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3270 elif self._match_pair(TokenType.LT, TokenType.LT): 3271 this = self.expression( 3272 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3273 ) 3274 elif self._match_pair(TokenType.GT, TokenType.GT): 3275 this = self.expression( 3276 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3277 ) 3278 else: 3279 break 3280 3281 return this 3282 3283 def _parse_term(self) -> t.Optional[exp.Expression]: 3284 return self._parse_tokens(self._parse_factor, self.TERM) 3285 3286 def _parse_factor(self) -> t.Optional[exp.Expression]: 3287 return self._parse_tokens(self._parse_unary, self.FACTOR) 3288 3289 def _parse_unary(self) -> t.Optional[exp.Expression]: 3290 if self._match_set(self.UNARY_PARSERS): 3291 return self.UNARY_PARSERS[self._prev.token_type](self) 3292 return self._parse_at_time_zone(self._parse_type()) 3293 3294 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3295 interval = parse_interval and self._parse_interval() 3296 if interval: 3297 return interval 3298 3299 index = self._index 3300 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3301 this = self._parse_column() 3302 3303 if data_type: 3304 if isinstance(this, exp.Literal): 3305 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3306 if parser: 3307 return parser(self, this, data_type) 3308 return self.expression(exp.Cast, this=this, to=data_type) 3309 if not data_type.expressions: 3310 self._retreat(index) 3311 return self._parse_column() 3312 return self._parse_column_ops(data_type) 3313 3314 return this and self._parse_column_ops(this) 3315 3316 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3317 this = self._parse_type() 3318 if not this: 3319 return None 3320 3321 return self.expression( 3322 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3323 ) 3324 3325 def _parse_types( 3326 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3327 ) -> t.Optional[exp.Expression]: 3328 index = self._index 3329 3330 prefix = self._match_text_seq("SYSUDTLIB", ".") 3331 3332 if not self._match_set(self.TYPE_TOKENS): 3333 identifier = allow_identifiers and self._parse_id_var( 3334 any_token=False, tokens=(TokenType.VAR,) 3335 ) 3336 3337 if identifier: 3338 tokens = self._tokenizer.tokenize(identifier.name) 3339 3340 if len(tokens) != 1: 3341 self.raise_error("Unexpected identifier", self._prev) 3342 3343 if tokens[0].token_type in self.TYPE_TOKENS: 3344 self._prev = tokens[0] 3345 elif self.SUPPORTS_USER_DEFINED_TYPES: 3346 type_name = identifier.name 3347 3348 while self._match(TokenType.DOT): 3349 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3350 3351 return exp.DataType.build(type_name, udt=True) 3352 else: 3353 return None 3354 else: 3355 return None 3356 3357 type_token = self._prev.token_type 3358 3359 if type_token == TokenType.PSEUDO_TYPE: 3360 return self.expression(exp.PseudoType, this=self._prev.text) 3361 3362 if type_token == TokenType.OBJECT_IDENTIFIER: 3363 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3364 3365 nested = type_token in self.NESTED_TYPE_TOKENS 3366 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3367 expressions = None 3368 maybe_func = False 3369 3370 if self._match(TokenType.L_PAREN): 3371 if is_struct: 3372 expressions = self._parse_csv(self._parse_struct_types) 3373 elif nested: 3374 expressions = self._parse_csv( 3375 lambda: self._parse_types( 3376 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3377 ) 3378 ) 3379 elif type_token in self.ENUM_TYPE_TOKENS: 3380 expressions = self._parse_csv(self._parse_equality) 3381 else: 3382 expressions = self._parse_csv(self._parse_type_size) 3383 3384 if not expressions or not self._match(TokenType.R_PAREN): 3385 self._retreat(index) 3386 return None 3387 3388 maybe_func = True 3389 3390 this: t.Optional[exp.Expression] = None 3391 values: t.Optional[t.List[exp.Expression]] = None 3392 3393 if nested and self._match(TokenType.LT): 3394 if is_struct: 3395 expressions = self._parse_csv(self._parse_struct_types) 3396 else: 3397 expressions = self._parse_csv( 3398 lambda: self._parse_types( 3399 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3400 ) 3401 ) 3402 3403 if not self._match(TokenType.GT): 3404 self.raise_error("Expecting >") 3405 3406 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3407 values = self._parse_csv(self._parse_conjunction) 3408 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3409 3410 if type_token in self.TIMESTAMPS: 3411 if self._match_text_seq("WITH", "TIME", "ZONE"): 3412 maybe_func = False 3413 tz_type = ( 3414 exp.DataType.Type.TIMETZ 3415 if type_token in self.TIMES 3416 else exp.DataType.Type.TIMESTAMPTZ 3417 ) 3418 this = exp.DataType(this=tz_type, expressions=expressions) 3419 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3420 maybe_func = False 3421 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3422 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3423 maybe_func = False 3424 elif type_token == TokenType.INTERVAL: 3425 unit = self._parse_var() 3426 3427 if self._match_text_seq("TO"): 3428 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3429 else: 3430 span = None 3431 3432 if span or not unit: 3433 this = self.expression( 3434 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3435 ) 3436 else: 3437 this = self.expression(exp.Interval, unit=unit) 3438 3439 if maybe_func and check_func: 3440 index2 = self._index 3441 peek = self._parse_string() 3442 3443 if not peek: 3444 self._retreat(index) 3445 return None 3446 3447 self._retreat(index2) 3448 3449 if not this: 3450 if self._match_text_seq("UNSIGNED"): 3451 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3452 if not unsigned_type_token: 3453 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3454 3455 type_token = unsigned_type_token or type_token 3456 3457 this = exp.DataType( 3458 this=exp.DataType.Type[type_token.value], 3459 expressions=expressions, 3460 nested=nested, 3461 values=values, 3462 prefix=prefix, 3463 ) 3464 3465 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3466 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3467 3468 return this 3469 3470 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3471 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3472 self._match(TokenType.COLON) 3473 return self._parse_column_def(this) 3474 3475 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3476 if not self._match_text_seq("AT", "TIME", "ZONE"): 3477 return this 3478 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3479 3480 def _parse_column(self) -> t.Optional[exp.Expression]: 3481 this = self._parse_field() 3482 if isinstance(this, exp.Identifier): 3483 this = self.expression(exp.Column, this=this) 3484 elif not this: 3485 return self._parse_bracket(this) 3486 return self._parse_column_ops(this) 3487 3488 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3489 this = self._parse_bracket(this) 3490 3491 while self._match_set(self.COLUMN_OPERATORS): 3492 op_token = self._prev.token_type 3493 op = self.COLUMN_OPERATORS.get(op_token) 3494 3495 if op_token == TokenType.DCOLON: 3496 field = self._parse_types() 3497 if not field: 3498 self.raise_error("Expected type") 3499 elif op and self._curr: 3500 self._advance() 3501 value = self._prev.text 3502 field = ( 3503 exp.Literal.number(value) 3504 if self._prev.token_type == TokenType.NUMBER 3505 else exp.Literal.string(value) 3506 ) 3507 else: 3508 field = self._parse_field(anonymous_func=True, any_token=True) 3509 3510 if isinstance(field, exp.Func): 3511 # bigquery allows function calls like x.y.count(...) 3512 # SAFE.SUBSTR(...) 3513 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3514 this = self._replace_columns_with_dots(this) 3515 3516 if op: 3517 this = op(self, this, field) 3518 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3519 this = self.expression( 3520 exp.Column, 3521 this=field, 3522 table=this.this, 3523 db=this.args.get("table"), 3524 catalog=this.args.get("db"), 3525 ) 3526 else: 3527 this = self.expression(exp.Dot, this=this, expression=field) 3528 this = self._parse_bracket(this) 3529 return this 3530 3531 def _parse_primary(self) -> t.Optional[exp.Expression]: 3532 if self._match_set(self.PRIMARY_PARSERS): 3533 token_type = self._prev.token_type 3534 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3535 3536 if token_type == TokenType.STRING: 3537 expressions = [primary] 3538 while self._match(TokenType.STRING): 3539 expressions.append(exp.Literal.string(self._prev.text)) 3540 3541 if len(expressions) > 1: 3542 return self.expression(exp.Concat, expressions=expressions) 3543 3544 return primary 3545 3546 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3547 return exp.Literal.number(f"0.{self._prev.text}") 3548 3549 if self._match(TokenType.L_PAREN): 3550 comments = self._prev_comments 3551 query = self._parse_select() 3552 3553 if query: 3554 expressions = [query] 3555 else: 3556 expressions = self._parse_expressions() 3557 3558 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3559 3560 if isinstance(this, exp.Subqueryable): 3561 this = self._parse_set_operations( 3562 self._parse_subquery(this=this, parse_alias=False) 3563 ) 3564 elif len(expressions) > 1: 3565 this = self.expression(exp.Tuple, expressions=expressions) 3566 else: 3567 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3568 3569 if this: 3570 this.add_comments(comments) 3571 3572 self._match_r_paren(expression=this) 3573 return this 3574 3575 return None 3576 3577 def _parse_field( 3578 self, 3579 any_token: bool = False, 3580 tokens: t.Optional[t.Collection[TokenType]] = None, 3581 anonymous_func: bool = False, 3582 ) -> t.Optional[exp.Expression]: 3583 return ( 3584 self._parse_primary() 3585 or self._parse_function(anonymous=anonymous_func) 3586 or self._parse_id_var(any_token=any_token, tokens=tokens) 3587 ) 3588 3589 def _parse_function( 3590 self, 3591 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3592 anonymous: bool = False, 3593 optional_parens: bool = True, 3594 ) -> t.Optional[exp.Expression]: 3595 if not self._curr: 3596 return None 3597 3598 token_type = self._curr.token_type 3599 this = self._curr.text 3600 upper = this.upper() 3601 3602 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3603 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3604 self._advance() 3605 return parser(self) 3606 3607 if not self._next or self._next.token_type != TokenType.L_PAREN: 3608 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3609 self._advance() 3610 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3611 3612 return None 3613 3614 if token_type not in self.FUNC_TOKENS: 3615 return None 3616 3617 self._advance(2) 3618 3619 parser = self.FUNCTION_PARSERS.get(upper) 3620 if parser and not anonymous: 3621 this = parser(self) 3622 else: 3623 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3624 3625 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3626 this = self.expression(subquery_predicate, this=self._parse_select()) 3627 self._match_r_paren() 3628 return this 3629 3630 if functions is None: 3631 functions = self.FUNCTIONS 3632 3633 function = functions.get(upper) 3634 3635 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3636 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3637 3638 if function and not anonymous: 3639 func = self.validate_expression(function(args), args) 3640 if not self.NORMALIZE_FUNCTIONS: 3641 func.meta["name"] = this 3642 this = func 3643 else: 3644 this = self.expression(exp.Anonymous, this=this, expressions=args) 3645 3646 self._match_r_paren(this) 3647 return self._parse_window(this) 3648 3649 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3650 return self._parse_column_def(self._parse_id_var()) 3651 3652 def _parse_user_defined_function( 3653 self, kind: t.Optional[TokenType] = None 3654 ) -> t.Optional[exp.Expression]: 3655 this = self._parse_id_var() 3656 3657 while self._match(TokenType.DOT): 3658 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3659 3660 if not self._match(TokenType.L_PAREN): 3661 return this 3662 3663 expressions = self._parse_csv(self._parse_function_parameter) 3664 self._match_r_paren() 3665 return self.expression( 3666 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3667 ) 3668 3669 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3670 literal = self._parse_primary() 3671 if literal: 3672 return self.expression(exp.Introducer, this=token.text, expression=literal) 3673 3674 return self.expression(exp.Identifier, this=token.text) 3675 3676 def _parse_session_parameter(self) -> exp.SessionParameter: 3677 kind = None 3678 this = self._parse_id_var() or self._parse_primary() 3679 3680 if this and self._match(TokenType.DOT): 3681 kind = this.name 3682 this = self._parse_var() or self._parse_primary() 3683 3684 return self.expression(exp.SessionParameter, this=this, kind=kind) 3685 3686 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3687 index = self._index 3688 3689 if self._match(TokenType.L_PAREN): 3690 expressions = t.cast( 3691 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3692 ) 3693 3694 if not self._match(TokenType.R_PAREN): 3695 self._retreat(index) 3696 else: 3697 expressions = [self._parse_id_var()] 3698 3699 if self._match_set(self.LAMBDAS): 3700 return self.LAMBDAS[self._prev.token_type](self, expressions) 3701 3702 self._retreat(index) 3703 3704 this: t.Optional[exp.Expression] 3705 3706 if self._match(TokenType.DISTINCT): 3707 this = self.expression( 3708 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3709 ) 3710 else: 3711 this = self._parse_select_or_expression(alias=alias) 3712 3713 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3714 3715 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3716 index = self._index 3717 3718 if not self.errors: 3719 try: 3720 if self._parse_select(nested=True): 3721 return this 3722 except ParseError: 3723 pass 3724 finally: 3725 self.errors.clear() 3726 self._retreat(index) 3727 3728 if not self._match(TokenType.L_PAREN): 3729 return this 3730 3731 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3732 3733 self._match_r_paren() 3734 return self.expression(exp.Schema, this=this, expressions=args) 3735 3736 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3737 return self._parse_column_def(self._parse_field(any_token=True)) 3738 3739 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3740 # column defs are not really columns, they're identifiers 3741 if isinstance(this, exp.Column): 3742 this = this.this 3743 3744 kind = self._parse_types(schema=True) 3745 3746 if self._match_text_seq("FOR", "ORDINALITY"): 3747 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3748 3749 constraints: t.List[exp.Expression] = [] 3750 3751 if not kind and self._match(TokenType.ALIAS): 3752 constraints.append( 3753 self.expression( 3754 exp.ComputedColumnConstraint, 3755 this=self._parse_conjunction(), 3756 persisted=self._match_text_seq("PERSISTED"), 3757 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3758 ) 3759 ) 3760 3761 while True: 3762 constraint = self._parse_column_constraint() 3763 if not constraint: 3764 break 3765 constraints.append(constraint) 3766 3767 if not kind and not constraints: 3768 return this 3769 3770 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3771 3772 def _parse_auto_increment( 3773 self, 3774 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3775 start = None 3776 increment = None 3777 3778 if self._match(TokenType.L_PAREN, advance=False): 3779 args = self._parse_wrapped_csv(self._parse_bitwise) 3780 start = seq_get(args, 0) 3781 increment = seq_get(args, 1) 3782 elif self._match_text_seq("START"): 3783 start = self._parse_bitwise() 3784 self._match_text_seq("INCREMENT") 3785 increment = self._parse_bitwise() 3786 3787 if start and increment: 3788 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3789 3790 return exp.AutoIncrementColumnConstraint() 3791 3792 def _parse_compress(self) -> exp.CompressColumnConstraint: 3793 if self._match(TokenType.L_PAREN, advance=False): 3794 return self.expression( 3795 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3796 ) 3797 3798 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3799 3800 def _parse_generated_as_identity( 3801 self, 3802 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3803 if self._match_text_seq("BY", "DEFAULT"): 3804 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3805 this = self.expression( 3806 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3807 ) 3808 else: 3809 self._match_text_seq("ALWAYS") 3810 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3811 3812 self._match(TokenType.ALIAS) 3813 identity = self._match_text_seq("IDENTITY") 3814 3815 if self._match(TokenType.L_PAREN): 3816 if self._match(TokenType.START_WITH): 3817 this.set("start", self._parse_bitwise()) 3818 if self._match_text_seq("INCREMENT", "BY"): 3819 this.set("increment", self._parse_bitwise()) 3820 if self._match_text_seq("MINVALUE"): 3821 this.set("minvalue", self._parse_bitwise()) 3822 if self._match_text_seq("MAXVALUE"): 3823 this.set("maxvalue", self._parse_bitwise()) 3824 3825 if self._match_text_seq("CYCLE"): 3826 this.set("cycle", True) 3827 elif self._match_text_seq("NO", "CYCLE"): 3828 this.set("cycle", False) 3829 3830 if not identity: 3831 this.set("expression", self._parse_bitwise()) 3832 3833 self._match_r_paren() 3834 3835 return this 3836 3837 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3838 self._match_text_seq("LENGTH") 3839 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3840 3841 def _parse_not_constraint( 3842 self, 3843 ) -> t.Optional[exp.Expression]: 3844 if self._match_text_seq("NULL"): 3845 return self.expression(exp.NotNullColumnConstraint) 3846 if self._match_text_seq("CASESPECIFIC"): 3847 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3848 if self._match_text_seq("FOR", "REPLICATION"): 3849 return self.expression(exp.NotForReplicationColumnConstraint) 3850 return None 3851 3852 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3853 if self._match(TokenType.CONSTRAINT): 3854 this = self._parse_id_var() 3855 else: 3856 this = None 3857 3858 if self._match_texts(self.CONSTRAINT_PARSERS): 3859 return self.expression( 3860 exp.ColumnConstraint, 3861 this=this, 3862 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3863 ) 3864 3865 return this 3866 3867 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3868 if not self._match(TokenType.CONSTRAINT): 3869 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3870 3871 this = self._parse_id_var() 3872 expressions = [] 3873 3874 while True: 3875 constraint = self._parse_unnamed_constraint() or self._parse_function() 3876 if not constraint: 3877 break 3878 expressions.append(constraint) 3879 3880 return self.expression(exp.Constraint, this=this, expressions=expressions) 3881 3882 def _parse_unnamed_constraint( 3883 self, constraints: t.Optional[t.Collection[str]] = None 3884 ) -> t.Optional[exp.Expression]: 3885 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3886 return None 3887 3888 constraint = self._prev.text.upper() 3889 if constraint not in self.CONSTRAINT_PARSERS: 3890 self.raise_error(f"No parser found for schema constraint {constraint}.") 3891 3892 return self.CONSTRAINT_PARSERS[constraint](self) 3893 3894 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3895 self._match_text_seq("KEY") 3896 return self.expression( 3897 exp.UniqueColumnConstraint, 3898 this=self._parse_schema(self._parse_id_var(any_token=False)), 3899 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3900 ) 3901 3902 def _parse_key_constraint_options(self) -> t.List[str]: 3903 options = [] 3904 while True: 3905 if not self._curr: 3906 break 3907 3908 if self._match(TokenType.ON): 3909 action = None 3910 on = self._advance_any() and self._prev.text 3911 3912 if self._match_text_seq("NO", "ACTION"): 3913 action = "NO ACTION" 3914 elif self._match_text_seq("CASCADE"): 3915 action = "CASCADE" 3916 elif self._match_text_seq("RESTRICT"): 3917 action = "RESTRICT" 3918 elif self._match_pair(TokenType.SET, TokenType.NULL): 3919 action = "SET NULL" 3920 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3921 action = "SET DEFAULT" 3922 else: 3923 self.raise_error("Invalid key constraint") 3924 3925 options.append(f"ON {on} {action}") 3926 elif self._match_text_seq("NOT", "ENFORCED"): 3927 options.append("NOT ENFORCED") 3928 elif self._match_text_seq("DEFERRABLE"): 3929 options.append("DEFERRABLE") 3930 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3931 options.append("INITIALLY DEFERRED") 3932 elif self._match_text_seq("NORELY"): 3933 options.append("NORELY") 3934 elif self._match_text_seq("MATCH", "FULL"): 3935 options.append("MATCH FULL") 3936 else: 3937 break 3938 3939 return options 3940 3941 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3942 if match and not self._match(TokenType.REFERENCES): 3943 return None 3944 3945 expressions = None 3946 this = self._parse_table(schema=True) 3947 options = self._parse_key_constraint_options() 3948 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3949 3950 def _parse_foreign_key(self) -> exp.ForeignKey: 3951 expressions = self._parse_wrapped_id_vars() 3952 reference = self._parse_references() 3953 options = {} 3954 3955 while self._match(TokenType.ON): 3956 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3957 self.raise_error("Expected DELETE or UPDATE") 3958 3959 kind = self._prev.text.lower() 3960 3961 if self._match_text_seq("NO", "ACTION"): 3962 action = "NO ACTION" 3963 elif self._match(TokenType.SET): 3964 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3965 action = "SET " + self._prev.text.upper() 3966 else: 3967 self._advance() 3968 action = self._prev.text.upper() 3969 3970 options[kind] = action 3971 3972 return self.expression( 3973 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3974 ) 3975 3976 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3977 return self._parse_field() 3978 3979 def _parse_primary_key( 3980 self, wrapped_optional: bool = False, in_props: bool = False 3981 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3982 desc = ( 3983 self._match_set((TokenType.ASC, TokenType.DESC)) 3984 and self._prev.token_type == TokenType.DESC 3985 ) 3986 3987 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3988 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3989 3990 expressions = self._parse_wrapped_csv( 3991 self._parse_primary_key_part, optional=wrapped_optional 3992 ) 3993 options = self._parse_key_constraint_options() 3994 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3995 3996 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3997 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3998 return this 3999 4000 bracket_kind = self._prev.token_type 4001 4002 if self._match(TokenType.COLON): 4003 expressions: t.List[exp.Expression] = [ 4004 self.expression(exp.Slice, expression=self._parse_conjunction()) 4005 ] 4006 else: 4007 expressions = self._parse_csv( 4008 lambda: self._parse_slice( 4009 self._parse_alias(self._parse_conjunction(), explicit=True) 4010 ) 4011 ) 4012 4013 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4014 if bracket_kind == TokenType.L_BRACE: 4015 this = self.expression(exp.Struct, expressions=expressions) 4016 elif not this or this.name.upper() == "ARRAY": 4017 this = self.expression(exp.Array, expressions=expressions) 4018 else: 4019 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4020 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4021 4022 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4023 self.raise_error("Expected ]") 4024 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4025 self.raise_error("Expected }") 4026 4027 self._add_comments(this) 4028 return self._parse_bracket(this) 4029 4030 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4031 if self._match(TokenType.COLON): 4032 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4033 return this 4034 4035 def _parse_case(self) -> t.Optional[exp.Expression]: 4036 ifs = [] 4037 default = None 4038 4039 comments = self._prev_comments 4040 expression = self._parse_conjunction() 4041 4042 while self._match(TokenType.WHEN): 4043 this = self._parse_conjunction() 4044 self._match(TokenType.THEN) 4045 then = self._parse_conjunction() 4046 ifs.append(self.expression(exp.If, this=this, true=then)) 4047 4048 if self._match(TokenType.ELSE): 4049 default = self._parse_conjunction() 4050 4051 if not self._match(TokenType.END): 4052 self.raise_error("Expected END after CASE", self._prev) 4053 4054 return self._parse_window( 4055 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4056 ) 4057 4058 def _parse_if(self) -> t.Optional[exp.Expression]: 4059 if self._match(TokenType.L_PAREN): 4060 args = self._parse_csv(self._parse_conjunction) 4061 this = self.validate_expression(exp.If.from_arg_list(args), args) 4062 self._match_r_paren() 4063 else: 4064 index = self._index - 1 4065 condition = self._parse_conjunction() 4066 4067 if not condition: 4068 self._retreat(index) 4069 return None 4070 4071 self._match(TokenType.THEN) 4072 true = self._parse_conjunction() 4073 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4074 self._match(TokenType.END) 4075 this = self.expression(exp.If, this=condition, true=true, false=false) 4076 4077 return self._parse_window(this) 4078 4079 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4080 if not self._match_text_seq("VALUE", "FOR"): 4081 self._retreat(self._index - 1) 4082 return None 4083 4084 return self.expression( 4085 exp.NextValueFor, 4086 this=self._parse_column(), 4087 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4088 ) 4089 4090 def _parse_extract(self) -> exp.Extract: 4091 this = self._parse_function() or self._parse_var() or self._parse_type() 4092 4093 if self._match(TokenType.FROM): 4094 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4095 4096 if not self._match(TokenType.COMMA): 4097 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4098 4099 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4100 4101 def _parse_any_value(self) -> exp.AnyValue: 4102 this = self._parse_lambda() 4103 is_max = None 4104 having = None 4105 4106 if self._match(TokenType.HAVING): 4107 self._match_texts(("MAX", "MIN")) 4108 is_max = self._prev.text == "MAX" 4109 having = self._parse_column() 4110 4111 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4112 4113 def _parse_cast(self, strict: bool) -> exp.Expression: 4114 this = self._parse_conjunction() 4115 4116 if not self._match(TokenType.ALIAS): 4117 if self._match(TokenType.COMMA): 4118 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4119 4120 self.raise_error("Expected AS after CAST") 4121 4122 fmt = None 4123 to = self._parse_types() 4124 4125 if not to: 4126 self.raise_error("Expected TYPE after CAST") 4127 elif isinstance(to, exp.Identifier): 4128 to = exp.DataType.build(to.name, udt=True) 4129 elif to.this == exp.DataType.Type.CHAR: 4130 if self._match(TokenType.CHARACTER_SET): 4131 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4132 elif self._match(TokenType.FORMAT): 4133 fmt_string = self._parse_string() 4134 fmt = self._parse_at_time_zone(fmt_string) 4135 4136 if to.this in exp.DataType.TEMPORAL_TYPES: 4137 this = self.expression( 4138 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4139 this=this, 4140 format=exp.Literal.string( 4141 format_time( 4142 fmt_string.this if fmt_string else "", 4143 self.FORMAT_MAPPING or self.TIME_MAPPING, 4144 self.FORMAT_TRIE or self.TIME_TRIE, 4145 ) 4146 ), 4147 ) 4148 4149 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4150 this.set("zone", fmt.args["zone"]) 4151 4152 return this 4153 4154 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4155 4156 def _parse_concat(self) -> t.Optional[exp.Expression]: 4157 args = self._parse_csv(self._parse_conjunction) 4158 if self.CONCAT_NULL_OUTPUTS_STRING: 4159 args = self._ensure_string_if_null(args) 4160 4161 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4162 # we find such a call we replace it with its argument. 4163 if len(args) == 1: 4164 return args[0] 4165 4166 return self.expression( 4167 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4168 ) 4169 4170 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4171 args = self._parse_csv(self._parse_conjunction) 4172 if len(args) < 2: 4173 return self.expression(exp.ConcatWs, expressions=args) 4174 delim, *values = args 4175 if self.CONCAT_NULL_OUTPUTS_STRING: 4176 values = self._ensure_string_if_null(values) 4177 4178 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4179 4180 def _parse_string_agg(self) -> exp.Expression: 4181 if self._match(TokenType.DISTINCT): 4182 args: t.List[t.Optional[exp.Expression]] = [ 4183 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4184 ] 4185 if self._match(TokenType.COMMA): 4186 args.extend(self._parse_csv(self._parse_conjunction)) 4187 else: 4188 args = self._parse_csv(self._parse_conjunction) # type: ignore 4189 4190 index = self._index 4191 if not self._match(TokenType.R_PAREN) and args: 4192 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4193 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4194 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4195 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4196 4197 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4198 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4199 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4200 if not self._match_text_seq("WITHIN", "GROUP"): 4201 self._retreat(index) 4202 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4203 4204 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4205 order = self._parse_order(this=seq_get(args, 0)) 4206 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4207 4208 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4209 this = self._parse_bitwise() 4210 4211 if self._match(TokenType.USING): 4212 to: t.Optional[exp.Expression] = self.expression( 4213 exp.CharacterSet, this=self._parse_var() 4214 ) 4215 elif self._match(TokenType.COMMA): 4216 to = self._parse_types() 4217 else: 4218 to = None 4219 4220 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4221 4222 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4223 """ 4224 There are generally two variants of the DECODE function: 4225 4226 - DECODE(bin, charset) 4227 - DECODE(expression, search, result [, search, result] ... [, default]) 4228 4229 The second variant will always be parsed into a CASE expression. Note that NULL 4230 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4231 instead of relying on pattern matching. 4232 """ 4233 args = self._parse_csv(self._parse_conjunction) 4234 4235 if len(args) < 3: 4236 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4237 4238 expression, *expressions = args 4239 if not expression: 4240 return None 4241 4242 ifs = [] 4243 for search, result in zip(expressions[::2], expressions[1::2]): 4244 if not search or not result: 4245 return None 4246 4247 if isinstance(search, exp.Literal): 4248 ifs.append( 4249 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4250 ) 4251 elif isinstance(search, exp.Null): 4252 ifs.append( 4253 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4254 ) 4255 else: 4256 cond = exp.or_( 4257 exp.EQ(this=expression.copy(), expression=search), 4258 exp.and_( 4259 exp.Is(this=expression.copy(), expression=exp.Null()), 4260 exp.Is(this=search.copy(), expression=exp.Null()), 4261 copy=False, 4262 ), 4263 copy=False, 4264 ) 4265 ifs.append(exp.If(this=cond, true=result)) 4266 4267 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4268 4269 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4270 self._match_text_seq("KEY") 4271 key = self._parse_column() 4272 self._match_set((TokenType.COLON, TokenType.COMMA)) 4273 self._match_text_seq("VALUE") 4274 value = self._parse_bitwise() 4275 4276 if not key and not value: 4277 return None 4278 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4279 4280 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4281 if not this or not self._match_text_seq("FORMAT", "JSON"): 4282 return this 4283 4284 return self.expression(exp.FormatJson, this=this) 4285 4286 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4287 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4288 for value in values: 4289 if self._match_text_seq(value, "ON", on): 4290 return f"{value} ON {on}" 4291 4292 return None 4293 4294 def _parse_json_object(self) -> exp.JSONObject: 4295 star = self._parse_star() 4296 expressions = ( 4297 [star] 4298 if star 4299 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4300 ) 4301 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4302 4303 unique_keys = None 4304 if self._match_text_seq("WITH", "UNIQUE"): 4305 unique_keys = True 4306 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4307 unique_keys = False 4308 4309 self._match_text_seq("KEYS") 4310 4311 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4312 self._parse_type() 4313 ) 4314 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4315 4316 return self.expression( 4317 exp.JSONObject, 4318 expressions=expressions, 4319 null_handling=null_handling, 4320 unique_keys=unique_keys, 4321 return_type=return_type, 4322 encoding=encoding, 4323 ) 4324 4325 def _parse_logarithm(self) -> exp.Func: 4326 # Default argument order is base, expression 4327 args = self._parse_csv(self._parse_range) 4328 4329 if len(args) > 1: 4330 if not self.LOG_BASE_FIRST: 4331 args.reverse() 4332 return exp.Log.from_arg_list(args) 4333 4334 return self.expression( 4335 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4336 ) 4337 4338 def _parse_match_against(self) -> exp.MatchAgainst: 4339 expressions = self._parse_csv(self._parse_column) 4340 4341 self._match_text_seq(")", "AGAINST", "(") 4342 4343 this = self._parse_string() 4344 4345 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4346 modifier = "IN NATURAL LANGUAGE MODE" 4347 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4348 modifier = f"{modifier} WITH QUERY EXPANSION" 4349 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4350 modifier = "IN BOOLEAN MODE" 4351 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4352 modifier = "WITH QUERY EXPANSION" 4353 else: 4354 modifier = None 4355 4356 return self.expression( 4357 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4358 ) 4359 4360 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4361 def _parse_open_json(self) -> exp.OpenJSON: 4362 this = self._parse_bitwise() 4363 path = self._match(TokenType.COMMA) and self._parse_string() 4364 4365 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4366 this = self._parse_field(any_token=True) 4367 kind = self._parse_types() 4368 path = self._parse_string() 4369 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4370 4371 return self.expression( 4372 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4373 ) 4374 4375 expressions = None 4376 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4377 self._match_l_paren() 4378 expressions = self._parse_csv(_parse_open_json_column_def) 4379 4380 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4381 4382 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4383 args = self._parse_csv(self._parse_bitwise) 4384 4385 if self._match(TokenType.IN): 4386 return self.expression( 4387 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4388 ) 4389 4390 if haystack_first: 4391 haystack = seq_get(args, 0) 4392 needle = seq_get(args, 1) 4393 else: 4394 needle = seq_get(args, 0) 4395 haystack = seq_get(args, 1) 4396 4397 return self.expression( 4398 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4399 ) 4400 4401 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4402 args = self._parse_csv(self._parse_table) 4403 return exp.JoinHint(this=func_name.upper(), expressions=args) 4404 4405 def _parse_substring(self) -> exp.Substring: 4406 # Postgres supports the form: substring(string [from int] [for int]) 4407 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4408 4409 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4410 4411 if self._match(TokenType.FROM): 4412 args.append(self._parse_bitwise()) 4413 if self._match(TokenType.FOR): 4414 args.append(self._parse_bitwise()) 4415 4416 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4417 4418 def _parse_trim(self) -> exp.Trim: 4419 # https://www.w3resource.com/sql/character-functions/trim.php 4420 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4421 4422 position = None 4423 collation = None 4424 expression = None 4425 4426 if self._match_texts(self.TRIM_TYPES): 4427 position = self._prev.text.upper() 4428 4429 this = self._parse_bitwise() 4430 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4431 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4432 expression = self._parse_bitwise() 4433 4434 if invert_order: 4435 this, expression = expression, this 4436 4437 if self._match(TokenType.COLLATE): 4438 collation = self._parse_bitwise() 4439 4440 return self.expression( 4441 exp.Trim, this=this, position=position, expression=expression, collation=collation 4442 ) 4443 4444 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4445 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4446 4447 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4448 return self._parse_window(self._parse_id_var(), alias=True) 4449 4450 def _parse_respect_or_ignore_nulls( 4451 self, this: t.Optional[exp.Expression] 4452 ) -> t.Optional[exp.Expression]: 4453 if self._match_text_seq("IGNORE", "NULLS"): 4454 return self.expression(exp.IgnoreNulls, this=this) 4455 if self._match_text_seq("RESPECT", "NULLS"): 4456 return self.expression(exp.RespectNulls, this=this) 4457 return this 4458 4459 def _parse_window( 4460 self, this: t.Optional[exp.Expression], alias: bool = False 4461 ) -> t.Optional[exp.Expression]: 4462 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4463 self._match(TokenType.WHERE) 4464 this = self.expression( 4465 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4466 ) 4467 self._match_r_paren() 4468 4469 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4470 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4471 if self._match_text_seq("WITHIN", "GROUP"): 4472 order = self._parse_wrapped(self._parse_order) 4473 this = self.expression(exp.WithinGroup, this=this, expression=order) 4474 4475 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4476 # Some dialects choose to implement and some do not. 4477 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4478 4479 # There is some code above in _parse_lambda that handles 4480 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4481 4482 # The below changes handle 4483 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4484 4485 # Oracle allows both formats 4486 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4487 # and Snowflake chose to do the same for familiarity 4488 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4489 this = self._parse_respect_or_ignore_nulls(this) 4490 4491 # bigquery select from window x AS (partition by ...) 4492 if alias: 4493 over = None 4494 self._match(TokenType.ALIAS) 4495 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4496 return this 4497 else: 4498 over = self._prev.text.upper() 4499 4500 if not self._match(TokenType.L_PAREN): 4501 return self.expression( 4502 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4503 ) 4504 4505 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4506 4507 first = self._match(TokenType.FIRST) 4508 if self._match_text_seq("LAST"): 4509 first = False 4510 4511 partition, order = self._parse_partition_and_order() 4512 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4513 4514 if kind: 4515 self._match(TokenType.BETWEEN) 4516 start = self._parse_window_spec() 4517 self._match(TokenType.AND) 4518 end = self._parse_window_spec() 4519 4520 spec = self.expression( 4521 exp.WindowSpec, 4522 kind=kind, 4523 start=start["value"], 4524 start_side=start["side"], 4525 end=end["value"], 4526 end_side=end["side"], 4527 ) 4528 else: 4529 spec = None 4530 4531 self._match_r_paren() 4532 4533 window = self.expression( 4534 exp.Window, 4535 this=this, 4536 partition_by=partition, 4537 order=order, 4538 spec=spec, 4539 alias=window_alias, 4540 over=over, 4541 first=first, 4542 ) 4543 4544 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4545 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4546 return self._parse_window(window, alias=alias) 4547 4548 return window 4549 4550 def _parse_partition_and_order( 4551 self, 4552 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4553 return self._parse_partition_by(), self._parse_order() 4554 4555 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4556 self._match(TokenType.BETWEEN) 4557 4558 return { 4559 "value": ( 4560 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4561 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4562 or self._parse_bitwise() 4563 ), 4564 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4565 } 4566 4567 def _parse_alias( 4568 self, this: t.Optional[exp.Expression], explicit: bool = False 4569 ) -> t.Optional[exp.Expression]: 4570 any_token = self._match(TokenType.ALIAS) 4571 4572 if explicit and not any_token: 4573 return this 4574 4575 if self._match(TokenType.L_PAREN): 4576 aliases = self.expression( 4577 exp.Aliases, 4578 this=this, 4579 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4580 ) 4581 self._match_r_paren(aliases) 4582 return aliases 4583 4584 alias = self._parse_id_var(any_token) 4585 4586 if alias: 4587 return self.expression(exp.Alias, this=this, alias=alias) 4588 4589 return this 4590 4591 def _parse_id_var( 4592 self, 4593 any_token: bool = True, 4594 tokens: t.Optional[t.Collection[TokenType]] = None, 4595 ) -> t.Optional[exp.Expression]: 4596 identifier = self._parse_identifier() 4597 4598 if identifier: 4599 return identifier 4600 4601 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4602 quoted = self._prev.token_type == TokenType.STRING 4603 return exp.Identifier(this=self._prev.text, quoted=quoted) 4604 4605 return None 4606 4607 def _parse_string(self) -> t.Optional[exp.Expression]: 4608 if self._match(TokenType.STRING): 4609 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4610 return self._parse_placeholder() 4611 4612 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4613 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4614 4615 def _parse_number(self) -> t.Optional[exp.Expression]: 4616 if self._match(TokenType.NUMBER): 4617 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4618 return self._parse_placeholder() 4619 4620 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4621 if self._match(TokenType.IDENTIFIER): 4622 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4623 return self._parse_placeholder() 4624 4625 def _parse_var( 4626 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4627 ) -> t.Optional[exp.Expression]: 4628 if ( 4629 (any_token and self._advance_any()) 4630 or self._match(TokenType.VAR) 4631 or (self._match_set(tokens) if tokens else False) 4632 ): 4633 return self.expression(exp.Var, this=self._prev.text) 4634 return self._parse_placeholder() 4635 4636 def _advance_any(self) -> t.Optional[Token]: 4637 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4638 self._advance() 4639 return self._prev 4640 return None 4641 4642 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4643 return self._parse_var() or self._parse_string() 4644 4645 def _parse_null(self) -> t.Optional[exp.Expression]: 4646 if self._match_set(self.NULL_TOKENS): 4647 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4648 return self._parse_placeholder() 4649 4650 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4651 if self._match(TokenType.TRUE): 4652 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4653 if self._match(TokenType.FALSE): 4654 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4655 return self._parse_placeholder() 4656 4657 def _parse_star(self) -> t.Optional[exp.Expression]: 4658 if self._match(TokenType.STAR): 4659 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4660 return self._parse_placeholder() 4661 4662 def _parse_parameter(self) -> exp.Parameter: 4663 wrapped = self._match(TokenType.L_BRACE) 4664 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4665 self._match(TokenType.R_BRACE) 4666 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4667 4668 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4669 if self._match_set(self.PLACEHOLDER_PARSERS): 4670 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4671 if placeholder: 4672 return placeholder 4673 self._advance(-1) 4674 return None 4675 4676 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4677 if not self._match(TokenType.EXCEPT): 4678 return None 4679 if self._match(TokenType.L_PAREN, advance=False): 4680 return self._parse_wrapped_csv(self._parse_column) 4681 4682 except_column = self._parse_column() 4683 return [except_column] if except_column else None 4684 4685 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4686 if not self._match(TokenType.REPLACE): 4687 return None 4688 if self._match(TokenType.L_PAREN, advance=False): 4689 return self._parse_wrapped_csv(self._parse_expression) 4690 4691 replace_expression = self._parse_expression() 4692 return [replace_expression] if replace_expression else None 4693 4694 def _parse_csv( 4695 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4696 ) -> t.List[exp.Expression]: 4697 parse_result = parse_method() 4698 items = [parse_result] if parse_result is not None else [] 4699 4700 while self._match(sep): 4701 self._add_comments(parse_result) 4702 parse_result = parse_method() 4703 if parse_result is not None: 4704 items.append(parse_result) 4705 4706 return items 4707 4708 def _parse_tokens( 4709 self, parse_method: t.Callable, expressions: t.Dict 4710 ) -> t.Optional[exp.Expression]: 4711 this = parse_method() 4712 4713 while self._match_set(expressions): 4714 this = self.expression( 4715 expressions[self._prev.token_type], 4716 this=this, 4717 comments=self._prev_comments, 4718 expression=parse_method(), 4719 ) 4720 4721 return this 4722 4723 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4724 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4725 4726 def _parse_wrapped_csv( 4727 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4728 ) -> t.List[exp.Expression]: 4729 return self._parse_wrapped( 4730 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4731 ) 4732 4733 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4734 wrapped = self._match(TokenType.L_PAREN) 4735 if not wrapped and not optional: 4736 self.raise_error("Expecting (") 4737 parse_result = parse_method() 4738 if wrapped: 4739 self._match_r_paren() 4740 return parse_result 4741 4742 def _parse_expressions(self) -> t.List[exp.Expression]: 4743 return self._parse_csv(self._parse_expression) 4744 4745 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4746 return self._parse_select() or self._parse_set_operations( 4747 self._parse_expression() if alias else self._parse_conjunction() 4748 ) 4749 4750 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4751 return self._parse_query_modifiers( 4752 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4753 ) 4754 4755 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4756 this = None 4757 if self._match_texts(self.TRANSACTION_KIND): 4758 this = self._prev.text 4759 4760 self._match_texts({"TRANSACTION", "WORK"}) 4761 4762 modes = [] 4763 while True: 4764 mode = [] 4765 while self._match(TokenType.VAR): 4766 mode.append(self._prev.text) 4767 4768 if mode: 4769 modes.append(" ".join(mode)) 4770 if not self._match(TokenType.COMMA): 4771 break 4772 4773 return self.expression(exp.Transaction, this=this, modes=modes) 4774 4775 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4776 chain = None 4777 savepoint = None 4778 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4779 4780 self._match_texts({"TRANSACTION", "WORK"}) 4781 4782 if self._match_text_seq("TO"): 4783 self._match_text_seq("SAVEPOINT") 4784 savepoint = self._parse_id_var() 4785 4786 if self._match(TokenType.AND): 4787 chain = not self._match_text_seq("NO") 4788 self._match_text_seq("CHAIN") 4789 4790 if is_rollback: 4791 return self.expression(exp.Rollback, savepoint=savepoint) 4792 4793 return self.expression(exp.Commit, chain=chain) 4794 4795 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4796 if not self._match_text_seq("ADD"): 4797 return None 4798 4799 self._match(TokenType.COLUMN) 4800 exists_column = self._parse_exists(not_=True) 4801 expression = self._parse_field_def() 4802 4803 if expression: 4804 expression.set("exists", exists_column) 4805 4806 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4807 if self._match_texts(("FIRST", "AFTER")): 4808 position = self._prev.text 4809 column_position = self.expression( 4810 exp.ColumnPosition, this=self._parse_column(), position=position 4811 ) 4812 expression.set("position", column_position) 4813 4814 return expression 4815 4816 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4817 drop = self._match(TokenType.DROP) and self._parse_drop() 4818 if drop and not isinstance(drop, exp.Command): 4819 drop.set("kind", drop.args.get("kind", "COLUMN")) 4820 return drop 4821 4822 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4823 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4824 return self.expression( 4825 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4826 ) 4827 4828 def _parse_add_constraint(self) -> exp.AddConstraint: 4829 this = None 4830 kind = self._prev.token_type 4831 4832 if kind == TokenType.CONSTRAINT: 4833 this = self._parse_id_var() 4834 4835 if self._match_text_seq("CHECK"): 4836 expression = self._parse_wrapped(self._parse_conjunction) 4837 enforced = self._match_text_seq("ENFORCED") 4838 4839 return self.expression( 4840 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4841 ) 4842 4843 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4844 expression = self._parse_foreign_key() 4845 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4846 expression = self._parse_primary_key() 4847 else: 4848 expression = None 4849 4850 return self.expression(exp.AddConstraint, this=this, expression=expression) 4851 4852 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4853 index = self._index - 1 4854 4855 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4856 return self._parse_csv(self._parse_add_constraint) 4857 4858 self._retreat(index) 4859 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4860 return self._parse_csv(self._parse_field_def) 4861 4862 return self._parse_csv(self._parse_add_column) 4863 4864 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4865 self._match(TokenType.COLUMN) 4866 column = self._parse_field(any_token=True) 4867 4868 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4869 return self.expression(exp.AlterColumn, this=column, drop=True) 4870 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4871 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4872 4873 self._match_text_seq("SET", "DATA") 4874 return self.expression( 4875 exp.AlterColumn, 4876 this=column, 4877 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4878 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4879 using=self._match(TokenType.USING) and self._parse_conjunction(), 4880 ) 4881 4882 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4883 index = self._index - 1 4884 4885 partition_exists = self._parse_exists() 4886 if self._match(TokenType.PARTITION, advance=False): 4887 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4888 4889 self._retreat(index) 4890 return self._parse_csv(self._parse_drop_column) 4891 4892 def _parse_alter_table_rename(self) -> exp.RenameTable: 4893 self._match_text_seq("TO") 4894 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4895 4896 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4897 start = self._prev 4898 4899 if not self._match(TokenType.TABLE): 4900 return self._parse_as_command(start) 4901 4902 exists = self._parse_exists() 4903 only = self._match_text_seq("ONLY") 4904 this = self._parse_table(schema=True) 4905 4906 if self._next: 4907 self._advance() 4908 4909 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4910 if parser: 4911 actions = ensure_list(parser(self)) 4912 4913 if not self._curr: 4914 return self.expression( 4915 exp.AlterTable, 4916 this=this, 4917 exists=exists, 4918 actions=actions, 4919 only=only, 4920 ) 4921 4922 return self._parse_as_command(start) 4923 4924 def _parse_merge(self) -> exp.Merge: 4925 self._match(TokenType.INTO) 4926 target = self._parse_table() 4927 4928 if target and self._match(TokenType.ALIAS, advance=False): 4929 target.set("alias", self._parse_table_alias()) 4930 4931 self._match(TokenType.USING) 4932 using = self._parse_table() 4933 4934 self._match(TokenType.ON) 4935 on = self._parse_conjunction() 4936 4937 whens = [] 4938 while self._match(TokenType.WHEN): 4939 matched = not self._match(TokenType.NOT) 4940 self._match_text_seq("MATCHED") 4941 source = ( 4942 False 4943 if self._match_text_seq("BY", "TARGET") 4944 else self._match_text_seq("BY", "SOURCE") 4945 ) 4946 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4947 4948 self._match(TokenType.THEN) 4949 4950 if self._match(TokenType.INSERT): 4951 _this = self._parse_star() 4952 if _this: 4953 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4954 else: 4955 then = self.expression( 4956 exp.Insert, 4957 this=self._parse_value(), 4958 expression=self._match(TokenType.VALUES) and self._parse_value(), 4959 ) 4960 elif self._match(TokenType.UPDATE): 4961 expressions = self._parse_star() 4962 if expressions: 4963 then = self.expression(exp.Update, expressions=expressions) 4964 else: 4965 then = self.expression( 4966 exp.Update, 4967 expressions=self._match(TokenType.SET) 4968 and self._parse_csv(self._parse_equality), 4969 ) 4970 elif self._match(TokenType.DELETE): 4971 then = self.expression(exp.Var, this=self._prev.text) 4972 else: 4973 then = None 4974 4975 whens.append( 4976 self.expression( 4977 exp.When, 4978 matched=matched, 4979 source=source, 4980 condition=condition, 4981 then=then, 4982 ) 4983 ) 4984 4985 return self.expression( 4986 exp.Merge, 4987 this=target, 4988 using=using, 4989 on=on, 4990 expressions=whens, 4991 ) 4992 4993 def _parse_show(self) -> t.Optional[exp.Expression]: 4994 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4995 if parser: 4996 return parser(self) 4997 return self._parse_as_command(self._prev) 4998 4999 def _parse_set_item_assignment( 5000 self, kind: t.Optional[str] = None 5001 ) -> t.Optional[exp.Expression]: 5002 index = self._index 5003 5004 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5005 return self._parse_set_transaction(global_=kind == "GLOBAL") 5006 5007 left = self._parse_primary() or self._parse_id_var() 5008 assignment_delimiter = self._match_texts(("=", "TO")) 5009 5010 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5011 self._retreat(index) 5012 return None 5013 5014 right = self._parse_statement() or self._parse_id_var() 5015 this = self.expression(exp.EQ, this=left, expression=right) 5016 5017 return self.expression(exp.SetItem, this=this, kind=kind) 5018 5019 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5020 self._match_text_seq("TRANSACTION") 5021 characteristics = self._parse_csv( 5022 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5023 ) 5024 return self.expression( 5025 exp.SetItem, 5026 expressions=characteristics, 5027 kind="TRANSACTION", 5028 **{"global": global_}, # type: ignore 5029 ) 5030 5031 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5032 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5033 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5034 5035 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5036 index = self._index 5037 set_ = self.expression( 5038 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5039 ) 5040 5041 if self._curr: 5042 self._retreat(index) 5043 return self._parse_as_command(self._prev) 5044 5045 return set_ 5046 5047 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5048 for option in options: 5049 if self._match_text_seq(*option.split(" ")): 5050 return exp.var(option) 5051 return None 5052 5053 def _parse_as_command(self, start: Token) -> exp.Command: 5054 while self._curr: 5055 self._advance() 5056 text = self._find_sql(start, self._prev) 5057 size = len(start.text) 5058 return exp.Command(this=text[:size], expression=text[size:]) 5059 5060 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5061 settings = [] 5062 5063 self._match_l_paren() 5064 kind = self._parse_id_var() 5065 5066 if self._match(TokenType.L_PAREN): 5067 while True: 5068 key = self._parse_id_var() 5069 value = self._parse_primary() 5070 5071 if not key and value is None: 5072 break 5073 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5074 self._match(TokenType.R_PAREN) 5075 5076 self._match_r_paren() 5077 5078 return self.expression( 5079 exp.DictProperty, 5080 this=this, 5081 kind=kind.this if kind else None, 5082 settings=settings, 5083 ) 5084 5085 def _parse_dict_range(self, this: str) -> exp.DictRange: 5086 self._match_l_paren() 5087 has_min = self._match_text_seq("MIN") 5088 if has_min: 5089 min = self._parse_var() or self._parse_primary() 5090 self._match_text_seq("MAX") 5091 max = self._parse_var() or self._parse_primary() 5092 else: 5093 max = self._parse_var() or self._parse_primary() 5094 min = exp.Literal.number(0) 5095 self._match_r_paren() 5096 return self.expression(exp.DictRange, this=this, min=min, max=max) 5097 5098 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5099 index = self._index 5100 expression = self._parse_column() 5101 if not self._match(TokenType.IN): 5102 self._retreat(index - 1) 5103 return None 5104 iterator = self._parse_column() 5105 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5106 return self.expression( 5107 exp.Comprehension, 5108 this=this, 5109 expression=expression, 5110 iterator=iterator, 5111 condition=condition, 5112 ) 5113 5114 def _find_parser( 5115 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5116 ) -> t.Optional[t.Callable]: 5117 if not self._curr: 5118 return None 5119 5120 index = self._index 5121 this = [] 5122 while True: 5123 # The current token might be multiple words 5124 curr = self._curr.text.upper() 5125 key = curr.split(" ") 5126 this.append(curr) 5127 5128 self._advance() 5129 result, trie = in_trie(trie, key) 5130 if result == TrieResult.FAILED: 5131 break 5132 5133 if result == TrieResult.EXISTS: 5134 subparser = parsers[" ".join(this)] 5135 return subparser 5136 5137 self._retreat(index) 5138 return None 5139 5140 def _match(self, token_type, advance=True, expression=None): 5141 if not self._curr: 5142 return None 5143 5144 if self._curr.token_type == token_type: 5145 if advance: 5146 self._advance() 5147 self._add_comments(expression) 5148 return True 5149 5150 return None 5151 5152 def _match_set(self, types, advance=True): 5153 if not self._curr: 5154 return None 5155 5156 if self._curr.token_type in types: 5157 if advance: 5158 self._advance() 5159 return True 5160 5161 return None 5162 5163 def _match_pair(self, token_type_a, token_type_b, advance=True): 5164 if not self._curr or not self._next: 5165 return None 5166 5167 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5168 if advance: 5169 self._advance(2) 5170 return True 5171 5172 return None 5173 5174 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5175 if not self._match(TokenType.L_PAREN, expression=expression): 5176 self.raise_error("Expecting (") 5177 5178 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5179 if not self._match(TokenType.R_PAREN, expression=expression): 5180 self.raise_error("Expecting )") 5181 5182 def _match_texts(self, texts, advance=True): 5183 if self._curr and self._curr.text.upper() in texts: 5184 if advance: 5185 self._advance() 5186 return True 5187 return False 5188 5189 def _match_text_seq(self, *texts, advance=True): 5190 index = self._index 5191 for text in texts: 5192 if self._curr and self._curr.text.upper() == text: 5193 self._advance() 5194 else: 5195 self._retreat(index) 5196 return False 5197 5198 if not advance: 5199 self._retreat(index) 5200 5201 return True 5202 5203 @t.overload 5204 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5205 ... 5206 5207 @t.overload 5208 def _replace_columns_with_dots( 5209 self, this: t.Optional[exp.Expression] 5210 ) -> t.Optional[exp.Expression]: 5211 ... 5212 5213 def _replace_columns_with_dots(self, this): 5214 if isinstance(this, exp.Dot): 5215 exp.replace_children(this, self._replace_columns_with_dots) 5216 elif isinstance(this, exp.Column): 5217 exp.replace_children(this, self._replace_columns_with_dots) 5218 table = this.args.get("table") 5219 this = ( 5220 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5221 ) 5222 5223 return this 5224 5225 def _replace_lambda( 5226 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5227 ) -> t.Optional[exp.Expression]: 5228 if not node: 5229 return node 5230 5231 for column in node.find_all(exp.Column): 5232 if column.parts[0].name in lambda_variables: 5233 dot_or_id = column.to_dot() if column.table else column.this 5234 parent = column.parent 5235 5236 while isinstance(parent, exp.Dot): 5237 if not isinstance(parent.parent, exp.Dot): 5238 parent.replace(dot_or_id) 5239 break 5240 parent = parent.parent 5241 else: 5242 if column is node: 5243 node = dot_or_id 5244 else: 5245 column.replace(dot_or_id) 5246 return node 5247 5248 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5249 return [ 5250 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5251 for value in values 5252 if value 5253 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.UDECIMAL, 183 TokenType.BIGDECIMAL, 184 TokenType.UUID, 185 TokenType.GEOGRAPHY, 186 TokenType.GEOMETRY, 187 TokenType.HLLSKETCH, 188 TokenType.HSTORE, 189 TokenType.PSEUDO_TYPE, 190 TokenType.SUPER, 191 TokenType.SERIAL, 192 TokenType.SMALLSERIAL, 193 TokenType.BIGSERIAL, 194 TokenType.XML, 195 TokenType.YEAR, 196 TokenType.UNIQUEIDENTIFIER, 197 TokenType.USERDEFINED, 198 TokenType.MONEY, 199 TokenType.SMALLMONEY, 200 TokenType.ROWVERSION, 201 TokenType.IMAGE, 202 TokenType.VARIANT, 203 TokenType.OBJECT, 204 TokenType.OBJECT_IDENTIFIER, 205 TokenType.INET, 206 TokenType.IPADDRESS, 207 TokenType.IPPREFIX, 208 TokenType.UNKNOWN, 209 TokenType.NULL, 210 *ENUM_TYPE_TOKENS, 211 *NESTED_TYPE_TOKENS, 212 } 213 214 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 215 TokenType.BIGINT: TokenType.UBIGINT, 216 TokenType.INT: TokenType.UINT, 217 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 218 TokenType.SMALLINT: TokenType.USMALLINT, 219 TokenType.TINYINT: TokenType.UTINYINT, 220 TokenType.DECIMAL: TokenType.UDECIMAL, 221 } 222 223 SUBQUERY_PREDICATES = { 224 TokenType.ANY: exp.Any, 225 TokenType.ALL: exp.All, 226 TokenType.EXISTS: exp.Exists, 227 TokenType.SOME: exp.Any, 228 } 229 230 RESERVED_KEYWORDS = { 231 *Tokenizer.SINGLE_TOKENS.values(), 232 TokenType.SELECT, 233 } 234 235 DB_CREATABLES = { 236 TokenType.DATABASE, 237 TokenType.SCHEMA, 238 TokenType.TABLE, 239 TokenType.VIEW, 240 TokenType.DICTIONARY, 241 } 242 243 CREATABLES = { 244 TokenType.COLUMN, 245 TokenType.FUNCTION, 246 TokenType.INDEX, 247 TokenType.PROCEDURE, 248 *DB_CREATABLES, 249 } 250 251 # Tokens that can represent identifiers 252 ID_VAR_TOKENS = { 253 TokenType.VAR, 254 TokenType.ANTI, 255 TokenType.APPLY, 256 TokenType.ASC, 257 TokenType.AUTO_INCREMENT, 258 TokenType.BEGIN, 259 TokenType.CACHE, 260 TokenType.CASE, 261 TokenType.COLLATE, 262 TokenType.COMMAND, 263 TokenType.COMMENT, 264 TokenType.COMMIT, 265 TokenType.CONSTRAINT, 266 TokenType.DEFAULT, 267 TokenType.DELETE, 268 TokenType.DESC, 269 TokenType.DESCRIBE, 270 TokenType.DICTIONARY, 271 TokenType.DIV, 272 TokenType.END, 273 TokenType.EXECUTE, 274 TokenType.ESCAPE, 275 TokenType.FALSE, 276 TokenType.FIRST, 277 TokenType.FILTER, 278 TokenType.FORMAT, 279 TokenType.FULL, 280 TokenType.IS, 281 TokenType.ISNULL, 282 TokenType.INTERVAL, 283 TokenType.KEEP, 284 TokenType.KILL, 285 TokenType.LEFT, 286 TokenType.LOAD, 287 TokenType.MERGE, 288 TokenType.NATURAL, 289 TokenType.NEXT, 290 TokenType.OFFSET, 291 TokenType.ORDINALITY, 292 TokenType.OVERLAPS, 293 TokenType.OVERWRITE, 294 TokenType.PARTITION, 295 TokenType.PERCENT, 296 TokenType.PIVOT, 297 TokenType.PRAGMA, 298 TokenType.RANGE, 299 TokenType.REFERENCES, 300 TokenType.RIGHT, 301 TokenType.ROW, 302 TokenType.ROWS, 303 TokenType.SEMI, 304 TokenType.SET, 305 TokenType.SETTINGS, 306 TokenType.SHOW, 307 TokenType.TEMPORARY, 308 TokenType.TOP, 309 TokenType.TRUE, 310 TokenType.UNIQUE, 311 TokenType.UNPIVOT, 312 TokenType.UPDATE, 313 TokenType.VOLATILE, 314 TokenType.WINDOW, 315 *CREATABLES, 316 *SUBQUERY_PREDICATES, 317 *TYPE_TOKENS, 318 *NO_PAREN_FUNCTIONS, 319 } 320 321 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 322 323 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 324 TokenType.ANTI, 325 TokenType.APPLY, 326 TokenType.ASOF, 327 TokenType.FULL, 328 TokenType.LEFT, 329 TokenType.LOCK, 330 TokenType.NATURAL, 331 TokenType.OFFSET, 332 TokenType.RIGHT, 333 TokenType.SEMI, 334 TokenType.WINDOW, 335 } 336 337 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 338 339 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 340 341 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 342 343 FUNC_TOKENS = { 344 TokenType.COLLATE, 345 TokenType.COMMAND, 346 TokenType.CURRENT_DATE, 347 TokenType.CURRENT_DATETIME, 348 TokenType.CURRENT_TIMESTAMP, 349 TokenType.CURRENT_TIME, 350 TokenType.CURRENT_USER, 351 TokenType.FILTER, 352 TokenType.FIRST, 353 TokenType.FORMAT, 354 TokenType.GLOB, 355 TokenType.IDENTIFIER, 356 TokenType.INDEX, 357 TokenType.ISNULL, 358 TokenType.ILIKE, 359 TokenType.INSERT, 360 TokenType.LIKE, 361 TokenType.MERGE, 362 TokenType.OFFSET, 363 TokenType.PRIMARY_KEY, 364 TokenType.RANGE, 365 TokenType.REPLACE, 366 TokenType.RLIKE, 367 TokenType.ROW, 368 TokenType.UNNEST, 369 TokenType.VAR, 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.DATE, 373 TokenType.DATETIME, 374 TokenType.TABLE, 375 TokenType.TIMESTAMP, 376 TokenType.TIMESTAMPTZ, 377 TokenType.WINDOW, 378 TokenType.XOR, 379 *TYPE_TOKENS, 380 *SUBQUERY_PREDICATES, 381 } 382 383 CONJUNCTION = { 384 TokenType.AND: exp.And, 385 TokenType.OR: exp.Or, 386 } 387 388 EQUALITY = { 389 TokenType.EQ: exp.EQ, 390 TokenType.NEQ: exp.NEQ, 391 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 392 } 393 394 COMPARISON = { 395 TokenType.GT: exp.GT, 396 TokenType.GTE: exp.GTE, 397 TokenType.LT: exp.LT, 398 TokenType.LTE: exp.LTE, 399 } 400 401 BITWISE = { 402 TokenType.AMP: exp.BitwiseAnd, 403 TokenType.CARET: exp.BitwiseXor, 404 TokenType.PIPE: exp.BitwiseOr, 405 TokenType.DPIPE: exp.DPipe, 406 } 407 408 TERM = { 409 TokenType.DASH: exp.Sub, 410 TokenType.PLUS: exp.Add, 411 TokenType.MOD: exp.Mod, 412 TokenType.COLLATE: exp.Collate, 413 } 414 415 FACTOR = { 416 TokenType.DIV: exp.IntDiv, 417 TokenType.LR_ARROW: exp.Distance, 418 TokenType.SLASH: exp.Div, 419 TokenType.STAR: exp.Mul, 420 } 421 422 TIMES = { 423 TokenType.TIME, 424 TokenType.TIMETZ, 425 } 426 427 TIMESTAMPS = { 428 TokenType.TIMESTAMP, 429 TokenType.TIMESTAMPTZ, 430 TokenType.TIMESTAMPLTZ, 431 *TIMES, 432 } 433 434 SET_OPERATIONS = { 435 TokenType.UNION, 436 TokenType.INTERSECT, 437 TokenType.EXCEPT, 438 } 439 440 JOIN_METHODS = { 441 TokenType.NATURAL, 442 TokenType.ASOF, 443 } 444 445 JOIN_SIDES = { 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.FULL, 449 } 450 451 JOIN_KINDS = { 452 TokenType.INNER, 453 TokenType.OUTER, 454 TokenType.CROSS, 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 JOIN_HINTS: t.Set[str] = set() 460 461 LAMBDAS = { 462 TokenType.ARROW: lambda self, expressions: self.expression( 463 exp.Lambda, 464 this=self._replace_lambda( 465 self._parse_conjunction(), 466 {node.name for node in expressions}, 467 ), 468 expressions=expressions, 469 ), 470 TokenType.FARROW: lambda self, expressions: self.expression( 471 exp.Kwarg, 472 this=exp.var(expressions[0].name), 473 expression=self._parse_conjunction(), 474 ), 475 } 476 477 COLUMN_OPERATORS = { 478 TokenType.DOT: None, 479 TokenType.DCOLON: lambda self, this, to: self.expression( 480 exp.Cast if self.STRICT_CAST else exp.TryCast, 481 this=this, 482 to=to, 483 ), 484 TokenType.ARROW: lambda self, this, path: self.expression( 485 exp.JSONExtract, 486 this=this, 487 expression=path, 488 ), 489 TokenType.DARROW: lambda self, this, path: self.expression( 490 exp.JSONExtractScalar, 491 this=this, 492 expression=path, 493 ), 494 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 495 exp.JSONBExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 505 exp.JSONBContains, 506 this=this, 507 expression=key, 508 ), 509 } 510 511 EXPRESSION_PARSERS = { 512 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 513 exp.Column: lambda self: self._parse_column(), 514 exp.Condition: lambda self: self._parse_conjunction(), 515 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 516 exp.Expression: lambda self: self._parse_statement(), 517 exp.From: lambda self: self._parse_from(), 518 exp.Group: lambda self: self._parse_group(), 519 exp.Having: lambda self: self._parse_having(), 520 exp.Identifier: lambda self: self._parse_id_var(), 521 exp.Join: lambda self: self._parse_join(), 522 exp.Lambda: lambda self: self._parse_lambda(), 523 exp.Lateral: lambda self: self._parse_lateral(), 524 exp.Limit: lambda self: self._parse_limit(), 525 exp.Offset: lambda self: self._parse_offset(), 526 exp.Order: lambda self: self._parse_order(), 527 exp.Ordered: lambda self: self._parse_ordered(), 528 exp.Properties: lambda self: self._parse_properties(), 529 exp.Qualify: lambda self: self._parse_qualify(), 530 exp.Returning: lambda self: self._parse_returning(), 531 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 532 exp.Table: lambda self: self._parse_table_parts(), 533 exp.TableAlias: lambda self: self._parse_table_alias(), 534 exp.Where: lambda self: self._parse_where(), 535 exp.Window: lambda self: self._parse_named_window(), 536 exp.With: lambda self: self._parse_with(), 537 "JOIN_TYPE": lambda self: self._parse_join_parts(), 538 } 539 540 STATEMENT_PARSERS = { 541 TokenType.ALTER: lambda self: self._parse_alter(), 542 TokenType.BEGIN: lambda self: self._parse_transaction(), 543 TokenType.CACHE: lambda self: self._parse_cache(), 544 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 545 TokenType.COMMENT: lambda self: self._parse_comment(), 546 TokenType.CREATE: lambda self: self._parse_create(), 547 TokenType.DELETE: lambda self: self._parse_delete(), 548 TokenType.DESC: lambda self: self._parse_describe(), 549 TokenType.DESCRIBE: lambda self: self._parse_describe(), 550 TokenType.DROP: lambda self: self._parse_drop(), 551 TokenType.INSERT: lambda self: self._parse_insert(), 552 TokenType.KILL: lambda self: self._parse_kill(), 553 TokenType.LOAD: lambda self: self._parse_load(), 554 TokenType.MERGE: lambda self: self._parse_merge(), 555 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 556 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 557 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 558 TokenType.SET: lambda self: self._parse_set(), 559 TokenType.UNCACHE: lambda self: self._parse_uncache(), 560 TokenType.UPDATE: lambda self: self._parse_update(), 561 TokenType.USE: lambda self: self.expression( 562 exp.Use, 563 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 564 and exp.var(self._prev.text), 565 this=self._parse_table(schema=False), 566 ), 567 } 568 569 UNARY_PARSERS = { 570 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 571 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 572 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 573 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 574 } 575 576 PRIMARY_PARSERS = { 577 TokenType.STRING: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=True 579 ), 580 TokenType.NUMBER: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=False 582 ), 583 TokenType.STAR: lambda self, _: self.expression( 584 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 585 ), 586 TokenType.NULL: lambda self, _: self.expression(exp.Null), 587 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 588 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 589 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 590 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 591 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 592 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 593 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 594 exp.National, this=token.text 595 ), 596 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 597 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 598 exp.RawString, this=token.text 599 ), 600 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 601 } 602 603 PLACEHOLDER_PARSERS = { 604 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 605 TokenType.PARAMETER: lambda self: self._parse_parameter(), 606 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 607 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 608 else None, 609 } 610 611 RANGE_PARSERS = { 612 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 613 TokenType.GLOB: binary_range_parser(exp.Glob), 614 TokenType.ILIKE: binary_range_parser(exp.ILike), 615 TokenType.IN: lambda self, this: self._parse_in(this), 616 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 617 TokenType.IS: lambda self, this: self._parse_is(this), 618 TokenType.LIKE: binary_range_parser(exp.Like), 619 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 620 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 621 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 622 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 623 } 624 625 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 626 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 627 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 628 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 629 "CHARACTER SET": lambda self: self._parse_character_set(), 630 "CHECKSUM": lambda self: self._parse_checksum(), 631 "CLUSTER BY": lambda self: self._parse_cluster(), 632 "CLUSTERED": lambda self: self._parse_clustered_by(), 633 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 634 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 635 "COPY": lambda self: self._parse_copy_property(), 636 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 637 "DEFINER": lambda self: self._parse_definer(), 638 "DETERMINISTIC": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "DISTKEY": lambda self: self._parse_distkey(), 642 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 643 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 644 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 645 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 646 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 647 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "FREESPACE": lambda self: self._parse_freespace(), 649 "HEAP": lambda self: self.expression(exp.HeapProperty), 650 "IMMUTABLE": lambda self: self.expression( 651 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 652 ), 653 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 654 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 655 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 656 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 657 "LIKE": lambda self: self._parse_create_like(), 658 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 659 "LOCK": lambda self: self._parse_locking(), 660 "LOCKING": lambda self: self._parse_locking(), 661 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 662 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 663 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 664 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 665 "NO": lambda self: self._parse_no_property(), 666 "ON": lambda self: self._parse_on_property(), 667 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 668 "PARTITION BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 670 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 671 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 672 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 673 "RETURNS": lambda self: self._parse_returns(), 674 "ROW": lambda self: self._parse_row(), 675 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 676 "SAMPLE": lambda self: self.expression( 677 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 678 ), 679 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 680 "SETTINGS": lambda self: self.expression( 681 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 682 ), 683 "SORTKEY": lambda self: self._parse_sortkey(), 684 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 685 "STABLE": lambda self: self.expression( 686 exp.StabilityProperty, this=exp.Literal.string("STABLE") 687 ), 688 "STORED": lambda self: self._parse_stored(), 689 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 690 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 691 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 692 "TO": lambda self: self._parse_to_table(), 693 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 694 "TTL": lambda self: self._parse_ttl(), 695 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 696 "VOLATILE": lambda self: self._parse_volatile_property(), 697 "WITH": lambda self: self._parse_with_property(), 698 } 699 700 CONSTRAINT_PARSERS = { 701 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 702 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 703 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 704 "CHARACTER SET": lambda self: self.expression( 705 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 706 ), 707 "CHECK": lambda self: self.expression( 708 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 709 ), 710 "COLLATE": lambda self: self.expression( 711 exp.CollateColumnConstraint, this=self._parse_var() 712 ), 713 "COMMENT": lambda self: self.expression( 714 exp.CommentColumnConstraint, this=self._parse_string() 715 ), 716 "COMPRESS": lambda self: self._parse_compress(), 717 "CLUSTERED": lambda self: self.expression( 718 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 719 ), 720 "NONCLUSTERED": lambda self: self.expression( 721 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 722 ), 723 "DEFAULT": lambda self: self.expression( 724 exp.DefaultColumnConstraint, this=self._parse_bitwise() 725 ), 726 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 727 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 728 "FORMAT": lambda self: self.expression( 729 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 730 ), 731 "GENERATED": lambda self: self._parse_generated_as_identity(), 732 "IDENTITY": lambda self: self._parse_auto_increment(), 733 "INLINE": lambda self: self._parse_inline(), 734 "LIKE": lambda self: self._parse_create_like(), 735 "NOT": lambda self: self._parse_not_constraint(), 736 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 737 "ON": lambda self: ( 738 self._match(TokenType.UPDATE) 739 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 740 ) 741 or self.expression(exp.OnProperty, this=self._parse_id_var()), 742 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 743 "PRIMARY KEY": lambda self: self._parse_primary_key(), 744 "REFERENCES": lambda self: self._parse_references(match=False), 745 "TITLE": lambda self: self.expression( 746 exp.TitleColumnConstraint, this=self._parse_var_or_string() 747 ), 748 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 749 "UNIQUE": lambda self: self._parse_unique(), 750 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 751 "WITH": lambda self: self.expression( 752 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 753 ), 754 } 755 756 ALTER_PARSERS = { 757 "ADD": lambda self: self._parse_alter_table_add(), 758 "ALTER": lambda self: self._parse_alter_table_alter(), 759 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 760 "DROP": lambda self: self._parse_alter_table_drop(), 761 "RENAME": lambda self: self._parse_alter_table_rename(), 762 } 763 764 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 765 766 NO_PAREN_FUNCTION_PARSERS = { 767 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 768 "CASE": lambda self: self._parse_case(), 769 "IF": lambda self: self._parse_if(), 770 "NEXT": lambda self: self._parse_next_value_for(), 771 } 772 773 INVALID_FUNC_NAME_TOKENS = { 774 TokenType.IDENTIFIER, 775 TokenType.STRING, 776 } 777 778 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 779 780 FUNCTION_PARSERS = { 781 "ANY_VALUE": lambda self: self._parse_any_value(), 782 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 783 "CONCAT": lambda self: self._parse_concat(), 784 "CONCAT_WS": lambda self: self._parse_concat_ws(), 785 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 786 "DECODE": lambda self: self._parse_decode(), 787 "EXTRACT": lambda self: self._parse_extract(), 788 "JSON_OBJECT": lambda self: self._parse_json_object(), 789 "LOG": lambda self: self._parse_logarithm(), 790 "MATCH": lambda self: self._parse_match_against(), 791 "OPENJSON": lambda self: self._parse_open_json(), 792 "POSITION": lambda self: self._parse_position(), 793 "SAFE_CAST": lambda self: self._parse_cast(False), 794 "STRING_AGG": lambda self: self._parse_string_agg(), 795 "SUBSTRING": lambda self: self._parse_substring(), 796 "TRIM": lambda self: self._parse_trim(), 797 "TRY_CAST": lambda self: self._parse_cast(False), 798 "TRY_CONVERT": lambda self: self._parse_convert(False), 799 } 800 801 QUERY_MODIFIER_PARSERS = { 802 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 803 TokenType.WHERE: lambda self: ("where", self._parse_where()), 804 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 805 TokenType.HAVING: lambda self: ("having", self._parse_having()), 806 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 807 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 808 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 809 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 810 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 811 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 812 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 813 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 814 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 815 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 816 TokenType.CLUSTER_BY: lambda self: ( 817 "cluster", 818 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 819 ), 820 TokenType.DISTRIBUTE_BY: lambda self: ( 821 "distribute", 822 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 823 ), 824 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 825 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 826 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 827 } 828 829 SET_PARSERS = { 830 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 831 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 832 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 833 "TRANSACTION": lambda self: self._parse_set_transaction(), 834 } 835 836 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 837 838 TYPE_LITERAL_PARSERS = { 839 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 840 } 841 842 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 843 844 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 845 846 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 847 848 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 849 TRANSACTION_CHARACTERISTICS = { 850 "ISOLATION LEVEL REPEATABLE READ", 851 "ISOLATION LEVEL READ COMMITTED", 852 "ISOLATION LEVEL READ UNCOMMITTED", 853 "ISOLATION LEVEL SERIALIZABLE", 854 "READ WRITE", 855 "READ ONLY", 856 } 857 858 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 859 860 CLONE_KEYWORDS = {"CLONE", "COPY"} 861 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 862 863 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 864 865 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 866 867 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 868 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 869 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 870 871 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 872 873 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 874 875 DISTINCT_TOKENS = {TokenType.DISTINCT} 876 877 NULL_TOKENS = {TokenType.NULL} 878 879 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 880 881 STRICT_CAST = True 882 883 # A NULL arg in CONCAT yields NULL by default 884 CONCAT_NULL_OUTPUTS_STRING = False 885 886 PREFIXED_PIVOT_COLUMNS = False 887 IDENTIFY_PIVOT_STRINGS = False 888 889 LOG_BASE_FIRST = True 890 LOG_DEFAULTS_TO_LN = False 891 892 # Whether or not ADD is present for each column added by ALTER TABLE 893 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 894 895 # Whether or not the table sample clause expects CSV syntax 896 TABLESAMPLE_CSV = False 897 898 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 899 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 900 901 # Whether the TRIM function expects the characters to trim as its first argument 902 TRIM_PATTERN_FIRST = False 903 904 __slots__ = ( 905 "error_level", 906 "error_message_context", 907 "max_errors", 908 "sql", 909 "errors", 910 "_tokens", 911 "_index", 912 "_curr", 913 "_next", 914 "_prev", 915 "_prev_comments", 916 "_tokenizer", 917 ) 918 919 # Autofilled 920 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 921 INDEX_OFFSET: int = 0 922 UNNEST_COLUMN_ONLY: bool = False 923 ALIAS_POST_TABLESAMPLE: bool = False 924 STRICT_STRING_CONCAT = False 925 SUPPORTS_USER_DEFINED_TYPES = True 926 NORMALIZE_FUNCTIONS = "upper" 927 NULL_ORDERING: str = "nulls_are_small" 928 SHOW_TRIE: t.Dict = {} 929 SET_TRIE: t.Dict = {} 930 FORMAT_MAPPING: t.Dict[str, str] = {} 931 FORMAT_TRIE: t.Dict = {} 932 TIME_MAPPING: t.Dict[str, str] = {} 933 TIME_TRIE: t.Dict = {} 934 935 def __init__( 936 self, 937 error_level: t.Optional[ErrorLevel] = None, 938 error_message_context: int = 100, 939 max_errors: int = 3, 940 ): 941 self.error_level = error_level or ErrorLevel.IMMEDIATE 942 self.error_message_context = error_message_context 943 self.max_errors = max_errors 944 self._tokenizer = self.TOKENIZER_CLASS() 945 self.reset() 946 947 def reset(self): 948 self.sql = "" 949 self.errors = [] 950 self._tokens = [] 951 self._index = 0 952 self._curr = None 953 self._next = None 954 self._prev = None 955 self._prev_comments = None 956 957 def parse( 958 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 959 ) -> t.List[t.Optional[exp.Expression]]: 960 """ 961 Parses a list of tokens and returns a list of syntax trees, one tree 962 per parsed SQL statement. 963 964 Args: 965 raw_tokens: The list of tokens. 966 sql: The original SQL string, used to produce helpful debug messages. 967 968 Returns: 969 The list of the produced syntax trees. 970 """ 971 return self._parse( 972 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 973 ) 974 975 def parse_into( 976 self, 977 expression_types: exp.IntoType, 978 raw_tokens: t.List[Token], 979 sql: t.Optional[str] = None, 980 ) -> t.List[t.Optional[exp.Expression]]: 981 """ 982 Parses a list of tokens into a given Expression type. If a collection of Expression 983 types is given instead, this method will try to parse the token list into each one 984 of them, stopping at the first for which the parsing succeeds. 985 986 Args: 987 expression_types: The expression type(s) to try and parse the token list into. 988 raw_tokens: The list of tokens. 989 sql: The original SQL string, used to produce helpful debug messages. 990 991 Returns: 992 The target Expression. 993 """ 994 errors = [] 995 for expression_type in ensure_list(expression_types): 996 parser = self.EXPRESSION_PARSERS.get(expression_type) 997 if not parser: 998 raise TypeError(f"No parser registered for {expression_type}") 999 1000 try: 1001 return self._parse(parser, raw_tokens, sql) 1002 except ParseError as e: 1003 e.errors[0]["into_expression"] = expression_type 1004 errors.append(e) 1005 1006 raise ParseError( 1007 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1008 errors=merge_errors(errors), 1009 ) from errors[-1] 1010 1011 def _parse( 1012 self, 1013 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1014 raw_tokens: t.List[Token], 1015 sql: t.Optional[str] = None, 1016 ) -> t.List[t.Optional[exp.Expression]]: 1017 self.reset() 1018 self.sql = sql or "" 1019 1020 total = len(raw_tokens) 1021 chunks: t.List[t.List[Token]] = [[]] 1022 1023 for i, token in enumerate(raw_tokens): 1024 if token.token_type == TokenType.SEMICOLON: 1025 if i < total - 1: 1026 chunks.append([]) 1027 else: 1028 chunks[-1].append(token) 1029 1030 expressions = [] 1031 1032 for tokens in chunks: 1033 self._index = -1 1034 self._tokens = tokens 1035 self._advance() 1036 1037 expressions.append(parse_method(self)) 1038 1039 if self._index < len(self._tokens): 1040 self.raise_error("Invalid expression / Unexpected token") 1041 1042 self.check_errors() 1043 1044 return expressions 1045 1046 def check_errors(self) -> None: 1047 """Logs or raises any found errors, depending on the chosen error level setting.""" 1048 if self.error_level == ErrorLevel.WARN: 1049 for error in self.errors: 1050 logger.error(str(error)) 1051 elif self.error_level == ErrorLevel.RAISE and self.errors: 1052 raise ParseError( 1053 concat_messages(self.errors, self.max_errors), 1054 errors=merge_errors(self.errors), 1055 ) 1056 1057 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1058 """ 1059 Appends an error in the list of recorded errors or raises it, depending on the chosen 1060 error level setting. 1061 """ 1062 token = token or self._curr or self._prev or Token.string("") 1063 start = token.start 1064 end = token.end + 1 1065 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1066 highlight = self.sql[start:end] 1067 end_context = self.sql[end : end + self.error_message_context] 1068 1069 error = ParseError.new( 1070 f"{message}. Line {token.line}, Col: {token.col}.\n" 1071 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1072 description=message, 1073 line=token.line, 1074 col=token.col, 1075 start_context=start_context, 1076 highlight=highlight, 1077 end_context=end_context, 1078 ) 1079 1080 if self.error_level == ErrorLevel.IMMEDIATE: 1081 raise error 1082 1083 self.errors.append(error) 1084 1085 def expression( 1086 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1087 ) -> E: 1088 """ 1089 Creates a new, validated Expression. 1090 1091 Args: 1092 exp_class: The expression class to instantiate. 1093 comments: An optional list of comments to attach to the expression. 1094 kwargs: The arguments to set for the expression along with their respective values. 1095 1096 Returns: 1097 The target expression. 1098 """ 1099 instance = exp_class(**kwargs) 1100 instance.add_comments(comments) if comments else self._add_comments(instance) 1101 return self.validate_expression(instance) 1102 1103 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1104 if expression and self._prev_comments: 1105 expression.add_comments(self._prev_comments) 1106 self._prev_comments = None 1107 1108 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1109 """ 1110 Validates an Expression, making sure that all its mandatory arguments are set. 1111 1112 Args: 1113 expression: The expression to validate. 1114 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1115 1116 Returns: 1117 The validated expression. 1118 """ 1119 if self.error_level != ErrorLevel.IGNORE: 1120 for error_message in expression.error_messages(args): 1121 self.raise_error(error_message) 1122 1123 return expression 1124 1125 def _find_sql(self, start: Token, end: Token) -> str: 1126 return self.sql[start.start : end.end + 1] 1127 1128 def _advance(self, times: int = 1) -> None: 1129 self._index += times 1130 self._curr = seq_get(self._tokens, self._index) 1131 self._next = seq_get(self._tokens, self._index + 1) 1132 1133 if self._index > 0: 1134 self._prev = self._tokens[self._index - 1] 1135 self._prev_comments = self._prev.comments 1136 else: 1137 self._prev = None 1138 self._prev_comments = None 1139 1140 def _retreat(self, index: int) -> None: 1141 if index != self._index: 1142 self._advance(index - self._index) 1143 1144 def _parse_command(self) -> exp.Command: 1145 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1146 1147 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1148 start = self._prev 1149 exists = self._parse_exists() if allow_exists else None 1150 1151 self._match(TokenType.ON) 1152 1153 kind = self._match_set(self.CREATABLES) and self._prev 1154 if not kind: 1155 return self._parse_as_command(start) 1156 1157 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1158 this = self._parse_user_defined_function(kind=kind.token_type) 1159 elif kind.token_type == TokenType.TABLE: 1160 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1161 elif kind.token_type == TokenType.COLUMN: 1162 this = self._parse_column() 1163 else: 1164 this = self._parse_id_var() 1165 1166 self._match(TokenType.IS) 1167 1168 return self.expression( 1169 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1170 ) 1171 1172 def _parse_to_table( 1173 self, 1174 ) -> exp.ToTableProperty: 1175 table = self._parse_table_parts(schema=True) 1176 return self.expression(exp.ToTableProperty, this=table) 1177 1178 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1179 def _parse_ttl(self) -> exp.Expression: 1180 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1181 this = self._parse_bitwise() 1182 1183 if self._match_text_seq("DELETE"): 1184 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1185 if self._match_text_seq("RECOMPRESS"): 1186 return self.expression( 1187 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1188 ) 1189 if self._match_text_seq("TO", "DISK"): 1190 return self.expression( 1191 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1192 ) 1193 if self._match_text_seq("TO", "VOLUME"): 1194 return self.expression( 1195 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1196 ) 1197 1198 return this 1199 1200 expressions = self._parse_csv(_parse_ttl_action) 1201 where = self._parse_where() 1202 group = self._parse_group() 1203 1204 aggregates = None 1205 if group and self._match(TokenType.SET): 1206 aggregates = self._parse_csv(self._parse_set_item) 1207 1208 return self.expression( 1209 exp.MergeTreeTTL, 1210 expressions=expressions, 1211 where=where, 1212 group=group, 1213 aggregates=aggregates, 1214 ) 1215 1216 def _parse_statement(self) -> t.Optional[exp.Expression]: 1217 if self._curr is None: 1218 return None 1219 1220 if self._match_set(self.STATEMENT_PARSERS): 1221 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1222 1223 if self._match_set(Tokenizer.COMMANDS): 1224 return self._parse_command() 1225 1226 expression = self._parse_expression() 1227 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1228 return self._parse_query_modifiers(expression) 1229 1230 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1231 start = self._prev 1232 temporary = self._match(TokenType.TEMPORARY) 1233 materialized = self._match_text_seq("MATERIALIZED") 1234 1235 kind = self._match_set(self.CREATABLES) and self._prev.text 1236 if not kind: 1237 return self._parse_as_command(start) 1238 1239 return self.expression( 1240 exp.Drop, 1241 comments=start.comments, 1242 exists=exists or self._parse_exists(), 1243 this=self._parse_table(schema=True), 1244 kind=kind, 1245 temporary=temporary, 1246 materialized=materialized, 1247 cascade=self._match_text_seq("CASCADE"), 1248 constraints=self._match_text_seq("CONSTRAINTS"), 1249 purge=self._match_text_seq("PURGE"), 1250 ) 1251 1252 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1253 return ( 1254 self._match_text_seq("IF") 1255 and (not not_ or self._match(TokenType.NOT)) 1256 and self._match(TokenType.EXISTS) 1257 ) 1258 1259 def _parse_create(self) -> exp.Create | exp.Command: 1260 # Note: this can't be None because we've matched a statement parser 1261 start = self._prev 1262 comments = self._prev_comments 1263 1264 replace = start.text.upper() == "REPLACE" or self._match_pair( 1265 TokenType.OR, TokenType.REPLACE 1266 ) 1267 unique = self._match(TokenType.UNIQUE) 1268 1269 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1270 self._advance() 1271 1272 properties = None 1273 create_token = self._match_set(self.CREATABLES) and self._prev 1274 1275 if not create_token: 1276 # exp.Properties.Location.POST_CREATE 1277 properties = self._parse_properties() 1278 create_token = self._match_set(self.CREATABLES) and self._prev 1279 1280 if not properties or not create_token: 1281 return self._parse_as_command(start) 1282 1283 exists = self._parse_exists(not_=True) 1284 this = None 1285 expression: t.Optional[exp.Expression] = None 1286 indexes = None 1287 no_schema_binding = None 1288 begin = None 1289 clone = None 1290 1291 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1292 nonlocal properties 1293 if properties and temp_props: 1294 properties.expressions.extend(temp_props.expressions) 1295 elif temp_props: 1296 properties = temp_props 1297 1298 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1299 this = self._parse_user_defined_function(kind=create_token.token_type) 1300 1301 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1302 extend_props(self._parse_properties()) 1303 1304 self._match(TokenType.ALIAS) 1305 1306 if self._match(TokenType.COMMAND): 1307 expression = self._parse_as_command(self._prev) 1308 else: 1309 begin = self._match(TokenType.BEGIN) 1310 return_ = self._match_text_seq("RETURN") 1311 1312 if self._match(TokenType.STRING, advance=False): 1313 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1314 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1315 expression = self._parse_string() 1316 extend_props(self._parse_properties()) 1317 else: 1318 expression = self._parse_statement() 1319 1320 if return_: 1321 expression = self.expression(exp.Return, this=expression) 1322 elif create_token.token_type == TokenType.INDEX: 1323 this = self._parse_index(index=self._parse_id_var()) 1324 elif create_token.token_type in self.DB_CREATABLES: 1325 table_parts = self._parse_table_parts(schema=True) 1326 1327 # exp.Properties.Location.POST_NAME 1328 self._match(TokenType.COMMA) 1329 extend_props(self._parse_properties(before=True)) 1330 1331 this = self._parse_schema(this=table_parts) 1332 1333 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1334 extend_props(self._parse_properties()) 1335 1336 self._match(TokenType.ALIAS) 1337 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1338 # exp.Properties.Location.POST_ALIAS 1339 extend_props(self._parse_properties()) 1340 1341 expression = self._parse_ddl_select() 1342 1343 if create_token.token_type == TokenType.TABLE: 1344 # exp.Properties.Location.POST_EXPRESSION 1345 extend_props(self._parse_properties()) 1346 1347 indexes = [] 1348 while True: 1349 index = self._parse_index() 1350 1351 # exp.Properties.Location.POST_INDEX 1352 extend_props(self._parse_properties()) 1353 1354 if not index: 1355 break 1356 else: 1357 self._match(TokenType.COMMA) 1358 indexes.append(index) 1359 elif create_token.token_type == TokenType.VIEW: 1360 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1361 no_schema_binding = True 1362 1363 shallow = self._match_text_seq("SHALLOW") 1364 1365 if self._match_texts(self.CLONE_KEYWORDS): 1366 copy = self._prev.text.lower() == "copy" 1367 clone = self._parse_table(schema=True) 1368 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1369 clone_kind = ( 1370 self._match(TokenType.L_PAREN) 1371 and self._match_texts(self.CLONE_KINDS) 1372 and self._prev.text.upper() 1373 ) 1374 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1375 self._match(TokenType.R_PAREN) 1376 clone = self.expression( 1377 exp.Clone, 1378 this=clone, 1379 when=when, 1380 kind=clone_kind, 1381 shallow=shallow, 1382 expression=clone_expression, 1383 copy=copy, 1384 ) 1385 1386 return self.expression( 1387 exp.Create, 1388 comments=comments, 1389 this=this, 1390 kind=create_token.text, 1391 replace=replace, 1392 unique=unique, 1393 expression=expression, 1394 exists=exists, 1395 properties=properties, 1396 indexes=indexes, 1397 no_schema_binding=no_schema_binding, 1398 begin=begin, 1399 clone=clone, 1400 ) 1401 1402 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1403 # only used for teradata currently 1404 self._match(TokenType.COMMA) 1405 1406 kwargs = { 1407 "no": self._match_text_seq("NO"), 1408 "dual": self._match_text_seq("DUAL"), 1409 "before": self._match_text_seq("BEFORE"), 1410 "default": self._match_text_seq("DEFAULT"), 1411 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1412 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1413 "after": self._match_text_seq("AFTER"), 1414 "minimum": self._match_texts(("MIN", "MINIMUM")), 1415 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1416 } 1417 1418 if self._match_texts(self.PROPERTY_PARSERS): 1419 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1420 try: 1421 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1422 except TypeError: 1423 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1424 1425 return None 1426 1427 def _parse_property(self) -> t.Optional[exp.Expression]: 1428 if self._match_texts(self.PROPERTY_PARSERS): 1429 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1430 1431 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1432 return self._parse_character_set(default=True) 1433 1434 if self._match_text_seq("COMPOUND", "SORTKEY"): 1435 return self._parse_sortkey(compound=True) 1436 1437 if self._match_text_seq("SQL", "SECURITY"): 1438 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1439 1440 index = self._index 1441 key = self._parse_column() 1442 1443 if not self._match(TokenType.EQ): 1444 self._retreat(index) 1445 return None 1446 1447 return self.expression( 1448 exp.Property, 1449 this=key.to_dot() if isinstance(key, exp.Column) else key, 1450 value=self._parse_column() or self._parse_var(any_token=True), 1451 ) 1452 1453 def _parse_stored(self) -> exp.FileFormatProperty: 1454 self._match(TokenType.ALIAS) 1455 1456 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1457 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1458 1459 return self.expression( 1460 exp.FileFormatProperty, 1461 this=self.expression( 1462 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1463 ) 1464 if input_format or output_format 1465 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1466 ) 1467 1468 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1469 self._match(TokenType.EQ) 1470 self._match(TokenType.ALIAS) 1471 return self.expression(exp_class, this=self._parse_field()) 1472 1473 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1474 properties = [] 1475 while True: 1476 if before: 1477 prop = self._parse_property_before() 1478 else: 1479 prop = self._parse_property() 1480 1481 if not prop: 1482 break 1483 for p in ensure_list(prop): 1484 properties.append(p) 1485 1486 if properties: 1487 return self.expression(exp.Properties, expressions=properties) 1488 1489 return None 1490 1491 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1492 return self.expression( 1493 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1494 ) 1495 1496 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1497 if self._index >= 2: 1498 pre_volatile_token = self._tokens[self._index - 2] 1499 else: 1500 pre_volatile_token = None 1501 1502 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1503 return exp.VolatileProperty() 1504 1505 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1506 1507 def _parse_with_property( 1508 self, 1509 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1510 if self._match(TokenType.L_PAREN, advance=False): 1511 return self._parse_wrapped_csv(self._parse_property) 1512 1513 if self._match_text_seq("JOURNAL"): 1514 return self._parse_withjournaltable() 1515 1516 if self._match_text_seq("DATA"): 1517 return self._parse_withdata(no=False) 1518 elif self._match_text_seq("NO", "DATA"): 1519 return self._parse_withdata(no=True) 1520 1521 if not self._next: 1522 return None 1523 1524 return self._parse_withisolatedloading() 1525 1526 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1527 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1528 self._match(TokenType.EQ) 1529 1530 user = self._parse_id_var() 1531 self._match(TokenType.PARAMETER) 1532 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1533 1534 if not user or not host: 1535 return None 1536 1537 return exp.DefinerProperty(this=f"{user}@{host}") 1538 1539 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1540 self._match(TokenType.TABLE) 1541 self._match(TokenType.EQ) 1542 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1543 1544 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1545 return self.expression(exp.LogProperty, no=no) 1546 1547 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1548 return self.expression(exp.JournalProperty, **kwargs) 1549 1550 def _parse_checksum(self) -> exp.ChecksumProperty: 1551 self._match(TokenType.EQ) 1552 1553 on = None 1554 if self._match(TokenType.ON): 1555 on = True 1556 elif self._match_text_seq("OFF"): 1557 on = False 1558 1559 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1560 1561 def _parse_cluster(self) -> exp.Cluster: 1562 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1563 1564 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1565 self._match_text_seq("BY") 1566 1567 self._match_l_paren() 1568 expressions = self._parse_csv(self._parse_column) 1569 self._match_r_paren() 1570 1571 if self._match_text_seq("SORTED", "BY"): 1572 self._match_l_paren() 1573 sorted_by = self._parse_csv(self._parse_ordered) 1574 self._match_r_paren() 1575 else: 1576 sorted_by = None 1577 1578 self._match(TokenType.INTO) 1579 buckets = self._parse_number() 1580 self._match_text_seq("BUCKETS") 1581 1582 return self.expression( 1583 exp.ClusteredByProperty, 1584 expressions=expressions, 1585 sorted_by=sorted_by, 1586 buckets=buckets, 1587 ) 1588 1589 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1590 if not self._match_text_seq("GRANTS"): 1591 self._retreat(self._index - 1) 1592 return None 1593 1594 return self.expression(exp.CopyGrantsProperty) 1595 1596 def _parse_freespace(self) -> exp.FreespaceProperty: 1597 self._match(TokenType.EQ) 1598 return self.expression( 1599 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1600 ) 1601 1602 def _parse_mergeblockratio( 1603 self, no: bool = False, default: bool = False 1604 ) -> exp.MergeBlockRatioProperty: 1605 if self._match(TokenType.EQ): 1606 return self.expression( 1607 exp.MergeBlockRatioProperty, 1608 this=self._parse_number(), 1609 percent=self._match(TokenType.PERCENT), 1610 ) 1611 1612 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1613 1614 def _parse_datablocksize( 1615 self, 1616 default: t.Optional[bool] = None, 1617 minimum: t.Optional[bool] = None, 1618 maximum: t.Optional[bool] = None, 1619 ) -> exp.DataBlocksizeProperty: 1620 self._match(TokenType.EQ) 1621 size = self._parse_number() 1622 1623 units = None 1624 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1625 units = self._prev.text 1626 1627 return self.expression( 1628 exp.DataBlocksizeProperty, 1629 size=size, 1630 units=units, 1631 default=default, 1632 minimum=minimum, 1633 maximum=maximum, 1634 ) 1635 1636 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1637 self._match(TokenType.EQ) 1638 always = self._match_text_seq("ALWAYS") 1639 manual = self._match_text_seq("MANUAL") 1640 never = self._match_text_seq("NEVER") 1641 default = self._match_text_seq("DEFAULT") 1642 1643 autotemp = None 1644 if self._match_text_seq("AUTOTEMP"): 1645 autotemp = self._parse_schema() 1646 1647 return self.expression( 1648 exp.BlockCompressionProperty, 1649 always=always, 1650 manual=manual, 1651 never=never, 1652 default=default, 1653 autotemp=autotemp, 1654 ) 1655 1656 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1657 no = self._match_text_seq("NO") 1658 concurrent = self._match_text_seq("CONCURRENT") 1659 self._match_text_seq("ISOLATED", "LOADING") 1660 for_all = self._match_text_seq("FOR", "ALL") 1661 for_insert = self._match_text_seq("FOR", "INSERT") 1662 for_none = self._match_text_seq("FOR", "NONE") 1663 return self.expression( 1664 exp.IsolatedLoadingProperty, 1665 no=no, 1666 concurrent=concurrent, 1667 for_all=for_all, 1668 for_insert=for_insert, 1669 for_none=for_none, 1670 ) 1671 1672 def _parse_locking(self) -> exp.LockingProperty: 1673 if self._match(TokenType.TABLE): 1674 kind = "TABLE" 1675 elif self._match(TokenType.VIEW): 1676 kind = "VIEW" 1677 elif self._match(TokenType.ROW): 1678 kind = "ROW" 1679 elif self._match_text_seq("DATABASE"): 1680 kind = "DATABASE" 1681 else: 1682 kind = None 1683 1684 if kind in ("DATABASE", "TABLE", "VIEW"): 1685 this = self._parse_table_parts() 1686 else: 1687 this = None 1688 1689 if self._match(TokenType.FOR): 1690 for_or_in = "FOR" 1691 elif self._match(TokenType.IN): 1692 for_or_in = "IN" 1693 else: 1694 for_or_in = None 1695 1696 if self._match_text_seq("ACCESS"): 1697 lock_type = "ACCESS" 1698 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1699 lock_type = "EXCLUSIVE" 1700 elif self._match_text_seq("SHARE"): 1701 lock_type = "SHARE" 1702 elif self._match_text_seq("READ"): 1703 lock_type = "READ" 1704 elif self._match_text_seq("WRITE"): 1705 lock_type = "WRITE" 1706 elif self._match_text_seq("CHECKSUM"): 1707 lock_type = "CHECKSUM" 1708 else: 1709 lock_type = None 1710 1711 override = self._match_text_seq("OVERRIDE") 1712 1713 return self.expression( 1714 exp.LockingProperty, 1715 this=this, 1716 kind=kind, 1717 for_or_in=for_or_in, 1718 lock_type=lock_type, 1719 override=override, 1720 ) 1721 1722 def _parse_partition_by(self) -> t.List[exp.Expression]: 1723 if self._match(TokenType.PARTITION_BY): 1724 return self._parse_csv(self._parse_conjunction) 1725 return [] 1726 1727 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1728 self._match(TokenType.EQ) 1729 return self.expression( 1730 exp.PartitionedByProperty, 1731 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1732 ) 1733 1734 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1735 if self._match_text_seq("AND", "STATISTICS"): 1736 statistics = True 1737 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1738 statistics = False 1739 else: 1740 statistics = None 1741 1742 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1743 1744 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1745 if self._match_text_seq("PRIMARY", "INDEX"): 1746 return exp.NoPrimaryIndexProperty() 1747 return None 1748 1749 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1750 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1751 return exp.OnCommitProperty() 1752 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1753 return exp.OnCommitProperty(delete=True) 1754 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1755 1756 def _parse_distkey(self) -> exp.DistKeyProperty: 1757 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1758 1759 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1760 table = self._parse_table(schema=True) 1761 1762 options = [] 1763 while self._match_texts(("INCLUDING", "EXCLUDING")): 1764 this = self._prev.text.upper() 1765 1766 id_var = self._parse_id_var() 1767 if not id_var: 1768 return None 1769 1770 options.append( 1771 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1772 ) 1773 1774 return self.expression(exp.LikeProperty, this=table, expressions=options) 1775 1776 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1777 return self.expression( 1778 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1779 ) 1780 1781 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1782 self._match(TokenType.EQ) 1783 return self.expression( 1784 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1785 ) 1786 1787 def _parse_returns(self) -> exp.ReturnsProperty: 1788 value: t.Optional[exp.Expression] 1789 is_table = self._match(TokenType.TABLE) 1790 1791 if is_table: 1792 if self._match(TokenType.LT): 1793 value = self.expression( 1794 exp.Schema, 1795 this="TABLE", 1796 expressions=self._parse_csv(self._parse_struct_types), 1797 ) 1798 if not self._match(TokenType.GT): 1799 self.raise_error("Expecting >") 1800 else: 1801 value = self._parse_schema(exp.var("TABLE")) 1802 else: 1803 value = self._parse_types() 1804 1805 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1806 1807 def _parse_describe(self) -> exp.Describe: 1808 kind = self._match_set(self.CREATABLES) and self._prev.text 1809 this = self._parse_table(schema=True) 1810 properties = self._parse_properties() 1811 expressions = properties.expressions if properties else None 1812 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1813 1814 def _parse_insert(self) -> exp.Insert: 1815 comments = ensure_list(self._prev_comments) 1816 overwrite = self._match(TokenType.OVERWRITE) 1817 ignore = self._match(TokenType.IGNORE) 1818 local = self._match_text_seq("LOCAL") 1819 alternative = None 1820 1821 if self._match_text_seq("DIRECTORY"): 1822 this: t.Optional[exp.Expression] = self.expression( 1823 exp.Directory, 1824 this=self._parse_var_or_string(), 1825 local=local, 1826 row_format=self._parse_row_format(match_row=True), 1827 ) 1828 else: 1829 if self._match(TokenType.OR): 1830 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1831 1832 self._match(TokenType.INTO) 1833 comments += ensure_list(self._prev_comments) 1834 self._match(TokenType.TABLE) 1835 this = self._parse_table(schema=True) 1836 1837 returning = self._parse_returning() 1838 1839 return self.expression( 1840 exp.Insert, 1841 comments=comments, 1842 this=this, 1843 by_name=self._match_text_seq("BY", "NAME"), 1844 exists=self._parse_exists(), 1845 partition=self._parse_partition(), 1846 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1847 and self._parse_conjunction(), 1848 expression=self._parse_ddl_select(), 1849 conflict=self._parse_on_conflict(), 1850 returning=returning or self._parse_returning(), 1851 overwrite=overwrite, 1852 alternative=alternative, 1853 ignore=ignore, 1854 ) 1855 1856 def _parse_kill(self) -> exp.Kill: 1857 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1858 1859 return self.expression( 1860 exp.Kill, 1861 this=self._parse_primary(), 1862 kind=kind, 1863 ) 1864 1865 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1866 conflict = self._match_text_seq("ON", "CONFLICT") 1867 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1868 1869 if not conflict and not duplicate: 1870 return None 1871 1872 nothing = None 1873 expressions = None 1874 key = None 1875 constraint = None 1876 1877 if conflict: 1878 if self._match_text_seq("ON", "CONSTRAINT"): 1879 constraint = self._parse_id_var() 1880 else: 1881 key = self._parse_csv(self._parse_value) 1882 1883 self._match_text_seq("DO") 1884 if self._match_text_seq("NOTHING"): 1885 nothing = True 1886 else: 1887 self._match(TokenType.UPDATE) 1888 self._match(TokenType.SET) 1889 expressions = self._parse_csv(self._parse_equality) 1890 1891 return self.expression( 1892 exp.OnConflict, 1893 duplicate=duplicate, 1894 expressions=expressions, 1895 nothing=nothing, 1896 key=key, 1897 constraint=constraint, 1898 ) 1899 1900 def _parse_returning(self) -> t.Optional[exp.Returning]: 1901 if not self._match(TokenType.RETURNING): 1902 return None 1903 return self.expression( 1904 exp.Returning, 1905 expressions=self._parse_csv(self._parse_expression), 1906 into=self._match(TokenType.INTO) and self._parse_table_part(), 1907 ) 1908 1909 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1910 if not self._match(TokenType.FORMAT): 1911 return None 1912 return self._parse_row_format() 1913 1914 def _parse_row_format( 1915 self, match_row: bool = False 1916 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1917 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1918 return None 1919 1920 if self._match_text_seq("SERDE"): 1921 this = self._parse_string() 1922 1923 serde_properties = None 1924 if self._match(TokenType.SERDE_PROPERTIES): 1925 serde_properties = self.expression( 1926 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1927 ) 1928 1929 return self.expression( 1930 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1931 ) 1932 1933 self._match_text_seq("DELIMITED") 1934 1935 kwargs = {} 1936 1937 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1938 kwargs["fields"] = self._parse_string() 1939 if self._match_text_seq("ESCAPED", "BY"): 1940 kwargs["escaped"] = self._parse_string() 1941 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1942 kwargs["collection_items"] = self._parse_string() 1943 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1944 kwargs["map_keys"] = self._parse_string() 1945 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1946 kwargs["lines"] = self._parse_string() 1947 if self._match_text_seq("NULL", "DEFINED", "AS"): 1948 kwargs["null"] = self._parse_string() 1949 1950 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1951 1952 def _parse_load(self) -> exp.LoadData | exp.Command: 1953 if self._match_text_seq("DATA"): 1954 local = self._match_text_seq("LOCAL") 1955 self._match_text_seq("INPATH") 1956 inpath = self._parse_string() 1957 overwrite = self._match(TokenType.OVERWRITE) 1958 self._match_pair(TokenType.INTO, TokenType.TABLE) 1959 1960 return self.expression( 1961 exp.LoadData, 1962 this=self._parse_table(schema=True), 1963 local=local, 1964 overwrite=overwrite, 1965 inpath=inpath, 1966 partition=self._parse_partition(), 1967 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1968 serde=self._match_text_seq("SERDE") and self._parse_string(), 1969 ) 1970 return self._parse_as_command(self._prev) 1971 1972 def _parse_delete(self) -> exp.Delete: 1973 # This handles MySQL's "Multiple-Table Syntax" 1974 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1975 tables = None 1976 comments = self._prev_comments 1977 if not self._match(TokenType.FROM, advance=False): 1978 tables = self._parse_csv(self._parse_table) or None 1979 1980 returning = self._parse_returning() 1981 1982 return self.expression( 1983 exp.Delete, 1984 comments=comments, 1985 tables=tables, 1986 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1987 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1988 where=self._parse_where(), 1989 returning=returning or self._parse_returning(), 1990 limit=self._parse_limit(), 1991 ) 1992 1993 def _parse_update(self) -> exp.Update: 1994 comments = self._prev_comments 1995 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1996 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1997 returning = self._parse_returning() 1998 return self.expression( 1999 exp.Update, 2000 comments=comments, 2001 **{ # type: ignore 2002 "this": this, 2003 "expressions": expressions, 2004 "from": self._parse_from(joins=True), 2005 "where": self._parse_where(), 2006 "returning": returning or self._parse_returning(), 2007 "order": self._parse_order(), 2008 "limit": self._parse_limit(), 2009 }, 2010 ) 2011 2012 def _parse_uncache(self) -> exp.Uncache: 2013 if not self._match(TokenType.TABLE): 2014 self.raise_error("Expecting TABLE after UNCACHE") 2015 2016 return self.expression( 2017 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2018 ) 2019 2020 def _parse_cache(self) -> exp.Cache: 2021 lazy = self._match_text_seq("LAZY") 2022 self._match(TokenType.TABLE) 2023 table = self._parse_table(schema=True) 2024 2025 options = [] 2026 if self._match_text_seq("OPTIONS"): 2027 self._match_l_paren() 2028 k = self._parse_string() 2029 self._match(TokenType.EQ) 2030 v = self._parse_string() 2031 options = [k, v] 2032 self._match_r_paren() 2033 2034 self._match(TokenType.ALIAS) 2035 return self.expression( 2036 exp.Cache, 2037 this=table, 2038 lazy=lazy, 2039 options=options, 2040 expression=self._parse_select(nested=True), 2041 ) 2042 2043 def _parse_partition(self) -> t.Optional[exp.Partition]: 2044 if not self._match(TokenType.PARTITION): 2045 return None 2046 2047 return self.expression( 2048 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2049 ) 2050 2051 def _parse_value(self) -> exp.Tuple: 2052 if self._match(TokenType.L_PAREN): 2053 expressions = self._parse_csv(self._parse_conjunction) 2054 self._match_r_paren() 2055 return self.expression(exp.Tuple, expressions=expressions) 2056 2057 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2058 # https://prestodb.io/docs/current/sql/values.html 2059 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2060 2061 def _parse_projections(self) -> t.List[exp.Expression]: 2062 return self._parse_expressions() 2063 2064 def _parse_select( 2065 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2066 ) -> t.Optional[exp.Expression]: 2067 cte = self._parse_with() 2068 2069 if cte: 2070 this = self._parse_statement() 2071 2072 if not this: 2073 self.raise_error("Failed to parse any statement following CTE") 2074 return cte 2075 2076 if "with" in this.arg_types: 2077 this.set("with", cte) 2078 else: 2079 self.raise_error(f"{this.key} does not support CTE") 2080 this = cte 2081 2082 return this 2083 2084 # duckdb supports leading with FROM x 2085 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2086 2087 if self._match(TokenType.SELECT): 2088 comments = self._prev_comments 2089 2090 hint = self._parse_hint() 2091 all_ = self._match(TokenType.ALL) 2092 distinct = self._match_set(self.DISTINCT_TOKENS) 2093 2094 kind = ( 2095 self._match(TokenType.ALIAS) 2096 and self._match_texts(("STRUCT", "VALUE")) 2097 and self._prev.text 2098 ) 2099 2100 if distinct: 2101 distinct = self.expression( 2102 exp.Distinct, 2103 on=self._parse_value() if self._match(TokenType.ON) else None, 2104 ) 2105 2106 if all_ and distinct: 2107 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2108 2109 limit = self._parse_limit(top=True) 2110 projections = self._parse_projections() 2111 2112 this = self.expression( 2113 exp.Select, 2114 kind=kind, 2115 hint=hint, 2116 distinct=distinct, 2117 expressions=projections, 2118 limit=limit, 2119 ) 2120 this.comments = comments 2121 2122 into = self._parse_into() 2123 if into: 2124 this.set("into", into) 2125 2126 if not from_: 2127 from_ = self._parse_from() 2128 2129 if from_: 2130 this.set("from", from_) 2131 2132 this = self._parse_query_modifiers(this) 2133 elif (table or nested) and self._match(TokenType.L_PAREN): 2134 if self._match(TokenType.PIVOT): 2135 this = self._parse_simplified_pivot() 2136 elif self._match(TokenType.FROM): 2137 this = exp.select("*").from_( 2138 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2139 ) 2140 else: 2141 this = self._parse_table() if table else self._parse_select(nested=True) 2142 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2143 2144 self._match_r_paren() 2145 2146 # We return early here so that the UNION isn't attached to the subquery by the 2147 # following call to _parse_set_operations, but instead becomes the parent node 2148 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2149 elif self._match(TokenType.VALUES): 2150 this = self.expression( 2151 exp.Values, 2152 expressions=self._parse_csv(self._parse_value), 2153 alias=self._parse_table_alias(), 2154 ) 2155 elif from_: 2156 this = exp.select("*").from_(from_.this, copy=False) 2157 else: 2158 this = None 2159 2160 return self._parse_set_operations(this) 2161 2162 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2163 if not skip_with_token and not self._match(TokenType.WITH): 2164 return None 2165 2166 comments = self._prev_comments 2167 recursive = self._match(TokenType.RECURSIVE) 2168 2169 expressions = [] 2170 while True: 2171 expressions.append(self._parse_cte()) 2172 2173 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2174 break 2175 else: 2176 self._match(TokenType.WITH) 2177 2178 return self.expression( 2179 exp.With, comments=comments, expressions=expressions, recursive=recursive 2180 ) 2181 2182 def _parse_cte(self) -> exp.CTE: 2183 alias = self._parse_table_alias() 2184 if not alias or not alias.this: 2185 self.raise_error("Expected CTE to have alias") 2186 2187 self._match(TokenType.ALIAS) 2188 return self.expression( 2189 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2190 ) 2191 2192 def _parse_table_alias( 2193 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2194 ) -> t.Optional[exp.TableAlias]: 2195 any_token = self._match(TokenType.ALIAS) 2196 alias = ( 2197 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2198 or self._parse_string_as_identifier() 2199 ) 2200 2201 index = self._index 2202 if self._match(TokenType.L_PAREN): 2203 columns = self._parse_csv(self._parse_function_parameter) 2204 self._match_r_paren() if columns else self._retreat(index) 2205 else: 2206 columns = None 2207 2208 if not alias and not columns: 2209 return None 2210 2211 return self.expression(exp.TableAlias, this=alias, columns=columns) 2212 2213 def _parse_subquery( 2214 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2215 ) -> t.Optional[exp.Subquery]: 2216 if not this: 2217 return None 2218 2219 return self.expression( 2220 exp.Subquery, 2221 this=this, 2222 pivots=self._parse_pivots(), 2223 alias=self._parse_table_alias() if parse_alias else None, 2224 ) 2225 2226 def _parse_query_modifiers( 2227 self, this: t.Optional[exp.Expression] 2228 ) -> t.Optional[exp.Expression]: 2229 if isinstance(this, self.MODIFIABLES): 2230 for join in iter(self._parse_join, None): 2231 this.append("joins", join) 2232 for lateral in iter(self._parse_lateral, None): 2233 this.append("laterals", lateral) 2234 2235 while True: 2236 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2237 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2238 key, expression = parser(self) 2239 2240 if expression: 2241 this.set(key, expression) 2242 if key == "limit": 2243 offset = expression.args.pop("offset", None) 2244 if offset: 2245 this.set("offset", exp.Offset(expression=offset)) 2246 continue 2247 break 2248 return this 2249 2250 def _parse_hint(self) -> t.Optional[exp.Hint]: 2251 if self._match(TokenType.HINT): 2252 hints = [] 2253 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2254 hints.extend(hint) 2255 2256 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2257 self.raise_error("Expected */ after HINT") 2258 2259 return self.expression(exp.Hint, expressions=hints) 2260 2261 return None 2262 2263 def _parse_into(self) -> t.Optional[exp.Into]: 2264 if not self._match(TokenType.INTO): 2265 return None 2266 2267 temp = self._match(TokenType.TEMPORARY) 2268 unlogged = self._match_text_seq("UNLOGGED") 2269 self._match(TokenType.TABLE) 2270 2271 return self.expression( 2272 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2273 ) 2274 2275 def _parse_from( 2276 self, joins: bool = False, skip_from_token: bool = False 2277 ) -> t.Optional[exp.From]: 2278 if not skip_from_token and not self._match(TokenType.FROM): 2279 return None 2280 2281 return self.expression( 2282 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2283 ) 2284 2285 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2286 if not self._match(TokenType.MATCH_RECOGNIZE): 2287 return None 2288 2289 self._match_l_paren() 2290 2291 partition = self._parse_partition_by() 2292 order = self._parse_order() 2293 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2294 2295 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2296 rows = exp.var("ONE ROW PER MATCH") 2297 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2298 text = "ALL ROWS PER MATCH" 2299 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2300 text += f" SHOW EMPTY MATCHES" 2301 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2302 text += f" OMIT EMPTY MATCHES" 2303 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2304 text += f" WITH UNMATCHED ROWS" 2305 rows = exp.var(text) 2306 else: 2307 rows = None 2308 2309 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2310 text = "AFTER MATCH SKIP" 2311 if self._match_text_seq("PAST", "LAST", "ROW"): 2312 text += f" PAST LAST ROW" 2313 elif self._match_text_seq("TO", "NEXT", "ROW"): 2314 text += f" TO NEXT ROW" 2315 elif self._match_text_seq("TO", "FIRST"): 2316 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2317 elif self._match_text_seq("TO", "LAST"): 2318 text += f" TO LAST {self._advance_any().text}" # type: ignore 2319 after = exp.var(text) 2320 else: 2321 after = None 2322 2323 if self._match_text_seq("PATTERN"): 2324 self._match_l_paren() 2325 2326 if not self._curr: 2327 self.raise_error("Expecting )", self._curr) 2328 2329 paren = 1 2330 start = self._curr 2331 2332 while self._curr and paren > 0: 2333 if self._curr.token_type == TokenType.L_PAREN: 2334 paren += 1 2335 if self._curr.token_type == TokenType.R_PAREN: 2336 paren -= 1 2337 2338 end = self._prev 2339 self._advance() 2340 2341 if paren > 0: 2342 self.raise_error("Expecting )", self._curr) 2343 2344 pattern = exp.var(self._find_sql(start, end)) 2345 else: 2346 pattern = None 2347 2348 define = ( 2349 self._parse_csv( 2350 lambda: self.expression( 2351 exp.Alias, 2352 alias=self._parse_id_var(any_token=True), 2353 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2354 ) 2355 ) 2356 if self._match_text_seq("DEFINE") 2357 else None 2358 ) 2359 2360 self._match_r_paren() 2361 2362 return self.expression( 2363 exp.MatchRecognize, 2364 partition_by=partition, 2365 order=order, 2366 measures=measures, 2367 rows=rows, 2368 after=after, 2369 pattern=pattern, 2370 define=define, 2371 alias=self._parse_table_alias(), 2372 ) 2373 2374 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2375 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2376 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2377 2378 if outer_apply or cross_apply: 2379 this = self._parse_select(table=True) 2380 view = None 2381 outer = not cross_apply 2382 elif self._match(TokenType.LATERAL): 2383 this = self._parse_select(table=True) 2384 view = self._match(TokenType.VIEW) 2385 outer = self._match(TokenType.OUTER) 2386 else: 2387 return None 2388 2389 if not this: 2390 this = ( 2391 self._parse_unnest() 2392 or self._parse_function() 2393 or self._parse_id_var(any_token=False) 2394 ) 2395 2396 while self._match(TokenType.DOT): 2397 this = exp.Dot( 2398 this=this, 2399 expression=self._parse_function() or self._parse_id_var(any_token=False), 2400 ) 2401 2402 if view: 2403 table = self._parse_id_var(any_token=False) 2404 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2405 table_alias: t.Optional[exp.TableAlias] = self.expression( 2406 exp.TableAlias, this=table, columns=columns 2407 ) 2408 elif isinstance(this, exp.Subquery) and this.alias: 2409 # Ensures parity between the Subquery's and the Lateral's "alias" args 2410 table_alias = this.args["alias"].copy() 2411 else: 2412 table_alias = self._parse_table_alias() 2413 2414 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2415 2416 def _parse_join_parts( 2417 self, 2418 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2419 return ( 2420 self._match_set(self.JOIN_METHODS) and self._prev, 2421 self._match_set(self.JOIN_SIDES) and self._prev, 2422 self._match_set(self.JOIN_KINDS) and self._prev, 2423 ) 2424 2425 def _parse_join( 2426 self, skip_join_token: bool = False, parse_bracket: bool = False 2427 ) -> t.Optional[exp.Join]: 2428 if self._match(TokenType.COMMA): 2429 return self.expression(exp.Join, this=self._parse_table()) 2430 2431 index = self._index 2432 method, side, kind = self._parse_join_parts() 2433 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2434 join = self._match(TokenType.JOIN) 2435 2436 if not skip_join_token and not join: 2437 self._retreat(index) 2438 kind = None 2439 method = None 2440 side = None 2441 2442 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2443 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2444 2445 if not skip_join_token and not join and not outer_apply and not cross_apply: 2446 return None 2447 2448 if outer_apply: 2449 side = Token(TokenType.LEFT, "LEFT") 2450 2451 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2452 2453 if method: 2454 kwargs["method"] = method.text 2455 if side: 2456 kwargs["side"] = side.text 2457 if kind: 2458 kwargs["kind"] = kind.text 2459 if hint: 2460 kwargs["hint"] = hint 2461 2462 if self._match(TokenType.ON): 2463 kwargs["on"] = self._parse_conjunction() 2464 elif self._match(TokenType.USING): 2465 kwargs["using"] = self._parse_wrapped_id_vars() 2466 elif not (kind and kind.token_type == TokenType.CROSS): 2467 index = self._index 2468 join = self._parse_join() 2469 2470 if join and self._match(TokenType.ON): 2471 kwargs["on"] = self._parse_conjunction() 2472 elif join and self._match(TokenType.USING): 2473 kwargs["using"] = self._parse_wrapped_id_vars() 2474 else: 2475 join = None 2476 self._retreat(index) 2477 2478 kwargs["this"].set("joins", [join] if join else None) 2479 2480 comments = [c for token in (method, side, kind) if token for c in token.comments] 2481 return self.expression(exp.Join, comments=comments, **kwargs) 2482 2483 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2484 this = self._parse_conjunction() 2485 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2486 return this 2487 2488 opclass = self._parse_var(any_token=True) 2489 if opclass: 2490 return self.expression(exp.Opclass, this=this, expression=opclass) 2491 2492 return this 2493 2494 def _parse_index( 2495 self, 2496 index: t.Optional[exp.Expression] = None, 2497 ) -> t.Optional[exp.Index]: 2498 if index: 2499 unique = None 2500 primary = None 2501 amp = None 2502 2503 self._match(TokenType.ON) 2504 self._match(TokenType.TABLE) # hive 2505 table = self._parse_table_parts(schema=True) 2506 else: 2507 unique = self._match(TokenType.UNIQUE) 2508 primary = self._match_text_seq("PRIMARY") 2509 amp = self._match_text_seq("AMP") 2510 2511 if not self._match(TokenType.INDEX): 2512 return None 2513 2514 index = self._parse_id_var() 2515 table = None 2516 2517 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2518 2519 if self._match(TokenType.L_PAREN, advance=False): 2520 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2521 else: 2522 columns = None 2523 2524 return self.expression( 2525 exp.Index, 2526 this=index, 2527 table=table, 2528 using=using, 2529 columns=columns, 2530 unique=unique, 2531 primary=primary, 2532 amp=amp, 2533 partition_by=self._parse_partition_by(), 2534 where=self._parse_where(), 2535 ) 2536 2537 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2538 hints: t.List[exp.Expression] = [] 2539 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2540 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2541 hints.append( 2542 self.expression( 2543 exp.WithTableHint, 2544 expressions=self._parse_csv( 2545 lambda: self._parse_function() or self._parse_var(any_token=True) 2546 ), 2547 ) 2548 ) 2549 self._match_r_paren() 2550 else: 2551 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2552 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2553 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2554 2555 self._match_texts({"INDEX", "KEY"}) 2556 if self._match(TokenType.FOR): 2557 hint.set("target", self._advance_any() and self._prev.text.upper()) 2558 2559 hint.set("expressions", self._parse_wrapped_id_vars()) 2560 hints.append(hint) 2561 2562 return hints or None 2563 2564 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2565 return ( 2566 (not schema and self._parse_function(optional_parens=False)) 2567 or self._parse_id_var(any_token=False) 2568 or self._parse_string_as_identifier() 2569 or self._parse_placeholder() 2570 ) 2571 2572 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2573 catalog = None 2574 db = None 2575 table = self._parse_table_part(schema=schema) 2576 2577 while self._match(TokenType.DOT): 2578 if catalog: 2579 # This allows nesting the table in arbitrarily many dot expressions if needed 2580 table = self.expression( 2581 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2582 ) 2583 else: 2584 catalog = db 2585 db = table 2586 table = self._parse_table_part(schema=schema) 2587 2588 if not table: 2589 self.raise_error(f"Expected table name but got {self._curr}") 2590 2591 return self.expression( 2592 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2593 ) 2594 2595 def _parse_table( 2596 self, 2597 schema: bool = False, 2598 joins: bool = False, 2599 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2600 parse_bracket: bool = False, 2601 ) -> t.Optional[exp.Expression]: 2602 lateral = self._parse_lateral() 2603 if lateral: 2604 return lateral 2605 2606 unnest = self._parse_unnest() 2607 if unnest: 2608 return unnest 2609 2610 values = self._parse_derived_table_values() 2611 if values: 2612 return values 2613 2614 subquery = self._parse_select(table=True) 2615 if subquery: 2616 if not subquery.args.get("pivots"): 2617 subquery.set("pivots", self._parse_pivots()) 2618 return subquery 2619 2620 bracket = parse_bracket and self._parse_bracket(None) 2621 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2622 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2623 2624 if schema: 2625 return self._parse_schema(this=this) 2626 2627 version = self._parse_version() 2628 2629 if version: 2630 this.set("version", version) 2631 2632 if self.ALIAS_POST_TABLESAMPLE: 2633 table_sample = self._parse_table_sample() 2634 2635 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2636 if alias: 2637 this.set("alias", alias) 2638 2639 this.set("hints", self._parse_table_hints()) 2640 2641 if not this.args.get("pivots"): 2642 this.set("pivots", self._parse_pivots()) 2643 2644 if not self.ALIAS_POST_TABLESAMPLE: 2645 table_sample = self._parse_table_sample() 2646 2647 if table_sample: 2648 table_sample.set("this", this) 2649 this = table_sample 2650 2651 if joins: 2652 for join in iter(self._parse_join, None): 2653 this.append("joins", join) 2654 2655 return this 2656 2657 def _parse_version(self) -> t.Optional[exp.Version]: 2658 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2659 this = "TIMESTAMP" 2660 elif self._match(TokenType.VERSION_SNAPSHOT): 2661 this = "VERSION" 2662 else: 2663 return None 2664 2665 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2666 kind = self._prev.text.upper() 2667 start = self._parse_bitwise() 2668 self._match_texts(("TO", "AND")) 2669 end = self._parse_bitwise() 2670 expression: t.Optional[exp.Expression] = self.expression( 2671 exp.Tuple, expressions=[start, end] 2672 ) 2673 elif self._match_text_seq("CONTAINED", "IN"): 2674 kind = "CONTAINED IN" 2675 expression = self.expression( 2676 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2677 ) 2678 elif self._match(TokenType.ALL): 2679 kind = "ALL" 2680 expression = None 2681 else: 2682 self._match_text_seq("AS", "OF") 2683 kind = "AS OF" 2684 expression = self._parse_type() 2685 2686 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2687 2688 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2689 if not self._match(TokenType.UNNEST): 2690 return None 2691 2692 expressions = self._parse_wrapped_csv(self._parse_type) 2693 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2694 2695 alias = self._parse_table_alias() if with_alias else None 2696 2697 if alias: 2698 if self.UNNEST_COLUMN_ONLY: 2699 if alias.args.get("columns"): 2700 self.raise_error("Unexpected extra column alias in unnest.") 2701 2702 alias.set("columns", [alias.this]) 2703 alias.set("this", None) 2704 2705 columns = alias.args.get("columns") or [] 2706 if offset and len(expressions) < len(columns): 2707 offset = columns.pop() 2708 2709 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2710 self._match(TokenType.ALIAS) 2711 offset = self._parse_id_var( 2712 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2713 ) or exp.to_identifier("offset") 2714 2715 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2716 2717 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2718 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2719 if not is_derived and not self._match(TokenType.VALUES): 2720 return None 2721 2722 expressions = self._parse_csv(self._parse_value) 2723 alias = self._parse_table_alias() 2724 2725 if is_derived: 2726 self._match_r_paren() 2727 2728 return self.expression( 2729 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2730 ) 2731 2732 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2733 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2734 as_modifier and self._match_text_seq("USING", "SAMPLE") 2735 ): 2736 return None 2737 2738 bucket_numerator = None 2739 bucket_denominator = None 2740 bucket_field = None 2741 percent = None 2742 rows = None 2743 size = None 2744 seed = None 2745 2746 kind = ( 2747 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2748 ) 2749 method = self._parse_var(tokens=(TokenType.ROW,)) 2750 2751 matched_l_paren = self._match(TokenType.L_PAREN) 2752 2753 if self.TABLESAMPLE_CSV: 2754 num = None 2755 expressions = self._parse_csv(self._parse_primary) 2756 else: 2757 expressions = None 2758 num = ( 2759 self._parse_factor() 2760 if self._match(TokenType.NUMBER, advance=False) 2761 else self._parse_primary() 2762 ) 2763 2764 if self._match_text_seq("BUCKET"): 2765 bucket_numerator = self._parse_number() 2766 self._match_text_seq("OUT", "OF") 2767 bucket_denominator = bucket_denominator = self._parse_number() 2768 self._match(TokenType.ON) 2769 bucket_field = self._parse_field() 2770 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2771 percent = num 2772 elif self._match(TokenType.ROWS): 2773 rows = num 2774 elif num: 2775 size = num 2776 2777 if matched_l_paren: 2778 self._match_r_paren() 2779 2780 if self._match(TokenType.L_PAREN): 2781 method = self._parse_var() 2782 seed = self._match(TokenType.COMMA) and self._parse_number() 2783 self._match_r_paren() 2784 elif self._match_texts(("SEED", "REPEATABLE")): 2785 seed = self._parse_wrapped(self._parse_number) 2786 2787 return self.expression( 2788 exp.TableSample, 2789 expressions=expressions, 2790 method=method, 2791 bucket_numerator=bucket_numerator, 2792 bucket_denominator=bucket_denominator, 2793 bucket_field=bucket_field, 2794 percent=percent, 2795 rows=rows, 2796 size=size, 2797 seed=seed, 2798 kind=kind, 2799 ) 2800 2801 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2802 return list(iter(self._parse_pivot, None)) or None 2803 2804 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2805 return list(iter(self._parse_join, None)) or None 2806 2807 # https://duckdb.org/docs/sql/statements/pivot 2808 def _parse_simplified_pivot(self) -> exp.Pivot: 2809 def _parse_on() -> t.Optional[exp.Expression]: 2810 this = self._parse_bitwise() 2811 return self._parse_in(this) if self._match(TokenType.IN) else this 2812 2813 this = self._parse_table() 2814 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2815 using = self._match(TokenType.USING) and self._parse_csv( 2816 lambda: self._parse_alias(self._parse_function()) 2817 ) 2818 group = self._parse_group() 2819 return self.expression( 2820 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2821 ) 2822 2823 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2824 index = self._index 2825 include_nulls = None 2826 2827 if self._match(TokenType.PIVOT): 2828 unpivot = False 2829 elif self._match(TokenType.UNPIVOT): 2830 unpivot = True 2831 2832 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2833 if self._match_text_seq("INCLUDE", "NULLS"): 2834 include_nulls = True 2835 elif self._match_text_seq("EXCLUDE", "NULLS"): 2836 include_nulls = False 2837 else: 2838 return None 2839 2840 expressions = [] 2841 field = None 2842 2843 if not self._match(TokenType.L_PAREN): 2844 self._retreat(index) 2845 return None 2846 2847 if unpivot: 2848 expressions = self._parse_csv(self._parse_column) 2849 else: 2850 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2851 2852 if not expressions: 2853 self.raise_error("Failed to parse PIVOT's aggregation list") 2854 2855 if not self._match(TokenType.FOR): 2856 self.raise_error("Expecting FOR") 2857 2858 value = self._parse_column() 2859 2860 if not self._match(TokenType.IN): 2861 self.raise_error("Expecting IN") 2862 2863 field = self._parse_in(value, alias=True) 2864 2865 self._match_r_paren() 2866 2867 pivot = self.expression( 2868 exp.Pivot, 2869 expressions=expressions, 2870 field=field, 2871 unpivot=unpivot, 2872 include_nulls=include_nulls, 2873 ) 2874 2875 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2876 pivot.set("alias", self._parse_table_alias()) 2877 2878 if not unpivot: 2879 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2880 2881 columns: t.List[exp.Expression] = [] 2882 for fld in pivot.args["field"].expressions: 2883 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2884 for name in names: 2885 if self.PREFIXED_PIVOT_COLUMNS: 2886 name = f"{name}_{field_name}" if name else field_name 2887 else: 2888 name = f"{field_name}_{name}" if name else field_name 2889 2890 columns.append(exp.to_identifier(name)) 2891 2892 pivot.set("columns", columns) 2893 2894 return pivot 2895 2896 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2897 return [agg.alias for agg in aggregations] 2898 2899 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2900 if not skip_where_token and not self._match(TokenType.WHERE): 2901 return None 2902 2903 return self.expression( 2904 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2905 ) 2906 2907 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2908 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2909 return None 2910 2911 elements = defaultdict(list) 2912 2913 if self._match(TokenType.ALL): 2914 return self.expression(exp.Group, all=True) 2915 2916 while True: 2917 expressions = self._parse_csv(self._parse_conjunction) 2918 if expressions: 2919 elements["expressions"].extend(expressions) 2920 2921 grouping_sets = self._parse_grouping_sets() 2922 if grouping_sets: 2923 elements["grouping_sets"].extend(grouping_sets) 2924 2925 rollup = None 2926 cube = None 2927 totals = None 2928 2929 with_ = self._match(TokenType.WITH) 2930 if self._match(TokenType.ROLLUP): 2931 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2932 elements["rollup"].extend(ensure_list(rollup)) 2933 2934 if self._match(TokenType.CUBE): 2935 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2936 elements["cube"].extend(ensure_list(cube)) 2937 2938 if self._match_text_seq("TOTALS"): 2939 totals = True 2940 elements["totals"] = True # type: ignore 2941 2942 if not (grouping_sets or rollup or cube or totals): 2943 break 2944 2945 return self.expression(exp.Group, **elements) # type: ignore 2946 2947 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2948 if not self._match(TokenType.GROUPING_SETS): 2949 return None 2950 2951 return self._parse_wrapped_csv(self._parse_grouping_set) 2952 2953 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2954 if self._match(TokenType.L_PAREN): 2955 grouping_set = self._parse_csv(self._parse_column) 2956 self._match_r_paren() 2957 return self.expression(exp.Tuple, expressions=grouping_set) 2958 2959 return self._parse_column() 2960 2961 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2962 if not skip_having_token and not self._match(TokenType.HAVING): 2963 return None 2964 return self.expression(exp.Having, this=self._parse_conjunction()) 2965 2966 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2967 if not self._match(TokenType.QUALIFY): 2968 return None 2969 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2970 2971 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2972 if skip_start_token: 2973 start = None 2974 elif self._match(TokenType.START_WITH): 2975 start = self._parse_conjunction() 2976 else: 2977 return None 2978 2979 self._match(TokenType.CONNECT_BY) 2980 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2981 exp.Prior, this=self._parse_bitwise() 2982 ) 2983 connect = self._parse_conjunction() 2984 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2985 2986 if not start and self._match(TokenType.START_WITH): 2987 start = self._parse_conjunction() 2988 2989 return self.expression(exp.Connect, start=start, connect=connect) 2990 2991 def _parse_order( 2992 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2993 ) -> t.Optional[exp.Expression]: 2994 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2995 return this 2996 2997 return self.expression( 2998 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2999 ) 3000 3001 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3002 if not self._match(token): 3003 return None 3004 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3005 3006 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3007 this = parse_method() if parse_method else self._parse_conjunction() 3008 3009 asc = self._match(TokenType.ASC) 3010 desc = self._match(TokenType.DESC) or (asc and False) 3011 3012 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3013 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3014 3015 nulls_first = is_nulls_first or False 3016 explicitly_null_ordered = is_nulls_first or is_nulls_last 3017 3018 if ( 3019 not explicitly_null_ordered 3020 and ( 3021 (not desc and self.NULL_ORDERING == "nulls_are_small") 3022 or (desc and self.NULL_ORDERING != "nulls_are_small") 3023 ) 3024 and self.NULL_ORDERING != "nulls_are_last" 3025 ): 3026 nulls_first = True 3027 3028 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3029 3030 def _parse_limit( 3031 self, this: t.Optional[exp.Expression] = None, top: bool = False 3032 ) -> t.Optional[exp.Expression]: 3033 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3034 comments = self._prev_comments 3035 if top: 3036 limit_paren = self._match(TokenType.L_PAREN) 3037 expression = self._parse_number() 3038 3039 if limit_paren: 3040 self._match_r_paren() 3041 else: 3042 expression = self._parse_term() 3043 3044 if self._match(TokenType.COMMA): 3045 offset = expression 3046 expression = self._parse_term() 3047 else: 3048 offset = None 3049 3050 limit_exp = self.expression( 3051 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3052 ) 3053 3054 return limit_exp 3055 3056 if self._match(TokenType.FETCH): 3057 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3058 direction = self._prev.text if direction else "FIRST" 3059 3060 count = self._parse_field(tokens=self.FETCH_TOKENS) 3061 percent = self._match(TokenType.PERCENT) 3062 3063 self._match_set((TokenType.ROW, TokenType.ROWS)) 3064 3065 only = self._match_text_seq("ONLY") 3066 with_ties = self._match_text_seq("WITH", "TIES") 3067 3068 if only and with_ties: 3069 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3070 3071 return self.expression( 3072 exp.Fetch, 3073 direction=direction, 3074 count=count, 3075 percent=percent, 3076 with_ties=with_ties, 3077 ) 3078 3079 return this 3080 3081 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3082 if not self._match(TokenType.OFFSET): 3083 return this 3084 3085 count = self._parse_term() 3086 self._match_set((TokenType.ROW, TokenType.ROWS)) 3087 return self.expression(exp.Offset, this=this, expression=count) 3088 3089 def _parse_locks(self) -> t.List[exp.Lock]: 3090 locks = [] 3091 while True: 3092 if self._match_text_seq("FOR", "UPDATE"): 3093 update = True 3094 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3095 "LOCK", "IN", "SHARE", "MODE" 3096 ): 3097 update = False 3098 else: 3099 break 3100 3101 expressions = None 3102 if self._match_text_seq("OF"): 3103 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3104 3105 wait: t.Optional[bool | exp.Expression] = None 3106 if self._match_text_seq("NOWAIT"): 3107 wait = True 3108 elif self._match_text_seq("WAIT"): 3109 wait = self._parse_primary() 3110 elif self._match_text_seq("SKIP", "LOCKED"): 3111 wait = False 3112 3113 locks.append( 3114 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3115 ) 3116 3117 return locks 3118 3119 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3120 if not self._match_set(self.SET_OPERATIONS): 3121 return this 3122 3123 token_type = self._prev.token_type 3124 3125 if token_type == TokenType.UNION: 3126 expression = exp.Union 3127 elif token_type == TokenType.EXCEPT: 3128 expression = exp.Except 3129 else: 3130 expression = exp.Intersect 3131 3132 return self.expression( 3133 expression, 3134 this=this, 3135 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3136 by_name=self._match_text_seq("BY", "NAME"), 3137 expression=self._parse_set_operations(self._parse_select(nested=True)), 3138 ) 3139 3140 def _parse_expression(self) -> t.Optional[exp.Expression]: 3141 return self._parse_alias(self._parse_conjunction()) 3142 3143 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3144 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3145 3146 def _parse_equality(self) -> t.Optional[exp.Expression]: 3147 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3148 3149 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3150 return self._parse_tokens(self._parse_range, self.COMPARISON) 3151 3152 def _parse_range(self) -> t.Optional[exp.Expression]: 3153 this = self._parse_bitwise() 3154 negate = self._match(TokenType.NOT) 3155 3156 if self._match_set(self.RANGE_PARSERS): 3157 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3158 if not expression: 3159 return this 3160 3161 this = expression 3162 elif self._match(TokenType.ISNULL): 3163 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3164 3165 # Postgres supports ISNULL and NOTNULL for conditions. 3166 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3167 if self._match(TokenType.NOTNULL): 3168 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3169 this = self.expression(exp.Not, this=this) 3170 3171 if negate: 3172 this = self.expression(exp.Not, this=this) 3173 3174 if self._match(TokenType.IS): 3175 this = self._parse_is(this) 3176 3177 return this 3178 3179 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3180 index = self._index - 1 3181 negate = self._match(TokenType.NOT) 3182 3183 if self._match_text_seq("DISTINCT", "FROM"): 3184 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3185 return self.expression(klass, this=this, expression=self._parse_expression()) 3186 3187 expression = self._parse_null() or self._parse_boolean() 3188 if not expression: 3189 self._retreat(index) 3190 return None 3191 3192 this = self.expression(exp.Is, this=this, expression=expression) 3193 return self.expression(exp.Not, this=this) if negate else this 3194 3195 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3196 unnest = self._parse_unnest(with_alias=False) 3197 if unnest: 3198 this = self.expression(exp.In, this=this, unnest=unnest) 3199 elif self._match(TokenType.L_PAREN): 3200 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3201 3202 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3203 this = self.expression(exp.In, this=this, query=expressions[0]) 3204 else: 3205 this = self.expression(exp.In, this=this, expressions=expressions) 3206 3207 self._match_r_paren(this) 3208 else: 3209 this = self.expression(exp.In, this=this, field=self._parse_field()) 3210 3211 return this 3212 3213 def _parse_between(self, this: exp.Expression) -> exp.Between: 3214 low = self._parse_bitwise() 3215 self._match(TokenType.AND) 3216 high = self._parse_bitwise() 3217 return self.expression(exp.Between, this=this, low=low, high=high) 3218 3219 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3220 if not self._match(TokenType.ESCAPE): 3221 return this 3222 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3223 3224 def _parse_interval(self) -> t.Optional[exp.Interval]: 3225 index = self._index 3226 3227 if not self._match(TokenType.INTERVAL): 3228 return None 3229 3230 if self._match(TokenType.STRING, advance=False): 3231 this = self._parse_primary() 3232 else: 3233 this = self._parse_term() 3234 3235 if not this: 3236 self._retreat(index) 3237 return None 3238 3239 unit = self._parse_function() or self._parse_var(any_token=True) 3240 3241 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3242 # each INTERVAL expression into this canonical form so it's easy to transpile 3243 if this and this.is_number: 3244 this = exp.Literal.string(this.name) 3245 elif this and this.is_string: 3246 parts = this.name.split() 3247 3248 if len(parts) == 2: 3249 if unit: 3250 # This is not actually a unit, it's something else (e.g. a "window side") 3251 unit = None 3252 self._retreat(self._index - 1) 3253 3254 this = exp.Literal.string(parts[0]) 3255 unit = self.expression(exp.Var, this=parts[1]) 3256 3257 return self.expression(exp.Interval, this=this, unit=unit) 3258 3259 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3260 this = self._parse_term() 3261 3262 while True: 3263 if self._match_set(self.BITWISE): 3264 this = self.expression( 3265 self.BITWISE[self._prev.token_type], 3266 this=this, 3267 expression=self._parse_term(), 3268 ) 3269 elif self._match(TokenType.DQMARK): 3270 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3271 elif self._match_pair(TokenType.LT, TokenType.LT): 3272 this = self.expression( 3273 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3274 ) 3275 elif self._match_pair(TokenType.GT, TokenType.GT): 3276 this = self.expression( 3277 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3278 ) 3279 else: 3280 break 3281 3282 return this 3283 3284 def _parse_term(self) -> t.Optional[exp.Expression]: 3285 return self._parse_tokens(self._parse_factor, self.TERM) 3286 3287 def _parse_factor(self) -> t.Optional[exp.Expression]: 3288 return self._parse_tokens(self._parse_unary, self.FACTOR) 3289 3290 def _parse_unary(self) -> t.Optional[exp.Expression]: 3291 if self._match_set(self.UNARY_PARSERS): 3292 return self.UNARY_PARSERS[self._prev.token_type](self) 3293 return self._parse_at_time_zone(self._parse_type()) 3294 3295 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3296 interval = parse_interval and self._parse_interval() 3297 if interval: 3298 return interval 3299 3300 index = self._index 3301 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3302 this = self._parse_column() 3303 3304 if data_type: 3305 if isinstance(this, exp.Literal): 3306 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3307 if parser: 3308 return parser(self, this, data_type) 3309 return self.expression(exp.Cast, this=this, to=data_type) 3310 if not data_type.expressions: 3311 self._retreat(index) 3312 return self._parse_column() 3313 return self._parse_column_ops(data_type) 3314 3315 return this and self._parse_column_ops(this) 3316 3317 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3318 this = self._parse_type() 3319 if not this: 3320 return None 3321 3322 return self.expression( 3323 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3324 ) 3325 3326 def _parse_types( 3327 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3328 ) -> t.Optional[exp.Expression]: 3329 index = self._index 3330 3331 prefix = self._match_text_seq("SYSUDTLIB", ".") 3332 3333 if not self._match_set(self.TYPE_TOKENS): 3334 identifier = allow_identifiers and self._parse_id_var( 3335 any_token=False, tokens=(TokenType.VAR,) 3336 ) 3337 3338 if identifier: 3339 tokens = self._tokenizer.tokenize(identifier.name) 3340 3341 if len(tokens) != 1: 3342 self.raise_error("Unexpected identifier", self._prev) 3343 3344 if tokens[0].token_type in self.TYPE_TOKENS: 3345 self._prev = tokens[0] 3346 elif self.SUPPORTS_USER_DEFINED_TYPES: 3347 type_name = identifier.name 3348 3349 while self._match(TokenType.DOT): 3350 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3351 3352 return exp.DataType.build(type_name, udt=True) 3353 else: 3354 return None 3355 else: 3356 return None 3357 3358 type_token = self._prev.token_type 3359 3360 if type_token == TokenType.PSEUDO_TYPE: 3361 return self.expression(exp.PseudoType, this=self._prev.text) 3362 3363 if type_token == TokenType.OBJECT_IDENTIFIER: 3364 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3365 3366 nested = type_token in self.NESTED_TYPE_TOKENS 3367 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3368 expressions = None 3369 maybe_func = False 3370 3371 if self._match(TokenType.L_PAREN): 3372 if is_struct: 3373 expressions = self._parse_csv(self._parse_struct_types) 3374 elif nested: 3375 expressions = self._parse_csv( 3376 lambda: self._parse_types( 3377 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3378 ) 3379 ) 3380 elif type_token in self.ENUM_TYPE_TOKENS: 3381 expressions = self._parse_csv(self._parse_equality) 3382 else: 3383 expressions = self._parse_csv(self._parse_type_size) 3384 3385 if not expressions or not self._match(TokenType.R_PAREN): 3386 self._retreat(index) 3387 return None 3388 3389 maybe_func = True 3390 3391 this: t.Optional[exp.Expression] = None 3392 values: t.Optional[t.List[exp.Expression]] = None 3393 3394 if nested and self._match(TokenType.LT): 3395 if is_struct: 3396 expressions = self._parse_csv(self._parse_struct_types) 3397 else: 3398 expressions = self._parse_csv( 3399 lambda: self._parse_types( 3400 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3401 ) 3402 ) 3403 3404 if not self._match(TokenType.GT): 3405 self.raise_error("Expecting >") 3406 3407 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3408 values = self._parse_csv(self._parse_conjunction) 3409 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3410 3411 if type_token in self.TIMESTAMPS: 3412 if self._match_text_seq("WITH", "TIME", "ZONE"): 3413 maybe_func = False 3414 tz_type = ( 3415 exp.DataType.Type.TIMETZ 3416 if type_token in self.TIMES 3417 else exp.DataType.Type.TIMESTAMPTZ 3418 ) 3419 this = exp.DataType(this=tz_type, expressions=expressions) 3420 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3421 maybe_func = False 3422 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3423 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3424 maybe_func = False 3425 elif type_token == TokenType.INTERVAL: 3426 unit = self._parse_var() 3427 3428 if self._match_text_seq("TO"): 3429 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3430 else: 3431 span = None 3432 3433 if span or not unit: 3434 this = self.expression( 3435 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3436 ) 3437 else: 3438 this = self.expression(exp.Interval, unit=unit) 3439 3440 if maybe_func and check_func: 3441 index2 = self._index 3442 peek = self._parse_string() 3443 3444 if not peek: 3445 self._retreat(index) 3446 return None 3447 3448 self._retreat(index2) 3449 3450 if not this: 3451 if self._match_text_seq("UNSIGNED"): 3452 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3453 if not unsigned_type_token: 3454 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3455 3456 type_token = unsigned_type_token or type_token 3457 3458 this = exp.DataType( 3459 this=exp.DataType.Type[type_token.value], 3460 expressions=expressions, 3461 nested=nested, 3462 values=values, 3463 prefix=prefix, 3464 ) 3465 3466 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3467 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3468 3469 return this 3470 3471 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3472 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3473 self._match(TokenType.COLON) 3474 return self._parse_column_def(this) 3475 3476 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3477 if not self._match_text_seq("AT", "TIME", "ZONE"): 3478 return this 3479 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3480 3481 def _parse_column(self) -> t.Optional[exp.Expression]: 3482 this = self._parse_field() 3483 if isinstance(this, exp.Identifier): 3484 this = self.expression(exp.Column, this=this) 3485 elif not this: 3486 return self._parse_bracket(this) 3487 return self._parse_column_ops(this) 3488 3489 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3490 this = self._parse_bracket(this) 3491 3492 while self._match_set(self.COLUMN_OPERATORS): 3493 op_token = self._prev.token_type 3494 op = self.COLUMN_OPERATORS.get(op_token) 3495 3496 if op_token == TokenType.DCOLON: 3497 field = self._parse_types() 3498 if not field: 3499 self.raise_error("Expected type") 3500 elif op and self._curr: 3501 self._advance() 3502 value = self._prev.text 3503 field = ( 3504 exp.Literal.number(value) 3505 if self._prev.token_type == TokenType.NUMBER 3506 else exp.Literal.string(value) 3507 ) 3508 else: 3509 field = self._parse_field(anonymous_func=True, any_token=True) 3510 3511 if isinstance(field, exp.Func): 3512 # bigquery allows function calls like x.y.count(...) 3513 # SAFE.SUBSTR(...) 3514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3515 this = self._replace_columns_with_dots(this) 3516 3517 if op: 3518 this = op(self, this, field) 3519 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3520 this = self.expression( 3521 exp.Column, 3522 this=field, 3523 table=this.this, 3524 db=this.args.get("table"), 3525 catalog=this.args.get("db"), 3526 ) 3527 else: 3528 this = self.expression(exp.Dot, this=this, expression=field) 3529 this = self._parse_bracket(this) 3530 return this 3531 3532 def _parse_primary(self) -> t.Optional[exp.Expression]: 3533 if self._match_set(self.PRIMARY_PARSERS): 3534 token_type = self._prev.token_type 3535 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3536 3537 if token_type == TokenType.STRING: 3538 expressions = [primary] 3539 while self._match(TokenType.STRING): 3540 expressions.append(exp.Literal.string(self._prev.text)) 3541 3542 if len(expressions) > 1: 3543 return self.expression(exp.Concat, expressions=expressions) 3544 3545 return primary 3546 3547 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3548 return exp.Literal.number(f"0.{self._prev.text}") 3549 3550 if self._match(TokenType.L_PAREN): 3551 comments = self._prev_comments 3552 query = self._parse_select() 3553 3554 if query: 3555 expressions = [query] 3556 else: 3557 expressions = self._parse_expressions() 3558 3559 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3560 3561 if isinstance(this, exp.Subqueryable): 3562 this = self._parse_set_operations( 3563 self._parse_subquery(this=this, parse_alias=False) 3564 ) 3565 elif len(expressions) > 1: 3566 this = self.expression(exp.Tuple, expressions=expressions) 3567 else: 3568 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3569 3570 if this: 3571 this.add_comments(comments) 3572 3573 self._match_r_paren(expression=this) 3574 return this 3575 3576 return None 3577 3578 def _parse_field( 3579 self, 3580 any_token: bool = False, 3581 tokens: t.Optional[t.Collection[TokenType]] = None, 3582 anonymous_func: bool = False, 3583 ) -> t.Optional[exp.Expression]: 3584 return ( 3585 self._parse_primary() 3586 or self._parse_function(anonymous=anonymous_func) 3587 or self._parse_id_var(any_token=any_token, tokens=tokens) 3588 ) 3589 3590 def _parse_function( 3591 self, 3592 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3593 anonymous: bool = False, 3594 optional_parens: bool = True, 3595 ) -> t.Optional[exp.Expression]: 3596 if not self._curr: 3597 return None 3598 3599 token_type = self._curr.token_type 3600 this = self._curr.text 3601 upper = this.upper() 3602 3603 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3604 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3605 self._advance() 3606 return parser(self) 3607 3608 if not self._next or self._next.token_type != TokenType.L_PAREN: 3609 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3610 self._advance() 3611 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3612 3613 return None 3614 3615 if token_type not in self.FUNC_TOKENS: 3616 return None 3617 3618 self._advance(2) 3619 3620 parser = self.FUNCTION_PARSERS.get(upper) 3621 if parser and not anonymous: 3622 this = parser(self) 3623 else: 3624 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3625 3626 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3627 this = self.expression(subquery_predicate, this=self._parse_select()) 3628 self._match_r_paren() 3629 return this 3630 3631 if functions is None: 3632 functions = self.FUNCTIONS 3633 3634 function = functions.get(upper) 3635 3636 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3637 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3638 3639 if function and not anonymous: 3640 func = self.validate_expression(function(args), args) 3641 if not self.NORMALIZE_FUNCTIONS: 3642 func.meta["name"] = this 3643 this = func 3644 else: 3645 this = self.expression(exp.Anonymous, this=this, expressions=args) 3646 3647 self._match_r_paren(this) 3648 return self._parse_window(this) 3649 3650 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3651 return self._parse_column_def(self._parse_id_var()) 3652 3653 def _parse_user_defined_function( 3654 self, kind: t.Optional[TokenType] = None 3655 ) -> t.Optional[exp.Expression]: 3656 this = self._parse_id_var() 3657 3658 while self._match(TokenType.DOT): 3659 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3660 3661 if not self._match(TokenType.L_PAREN): 3662 return this 3663 3664 expressions = self._parse_csv(self._parse_function_parameter) 3665 self._match_r_paren() 3666 return self.expression( 3667 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3668 ) 3669 3670 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3671 literal = self._parse_primary() 3672 if literal: 3673 return self.expression(exp.Introducer, this=token.text, expression=literal) 3674 3675 return self.expression(exp.Identifier, this=token.text) 3676 3677 def _parse_session_parameter(self) -> exp.SessionParameter: 3678 kind = None 3679 this = self._parse_id_var() or self._parse_primary() 3680 3681 if this and self._match(TokenType.DOT): 3682 kind = this.name 3683 this = self._parse_var() or self._parse_primary() 3684 3685 return self.expression(exp.SessionParameter, this=this, kind=kind) 3686 3687 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3688 index = self._index 3689 3690 if self._match(TokenType.L_PAREN): 3691 expressions = t.cast( 3692 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3693 ) 3694 3695 if not self._match(TokenType.R_PAREN): 3696 self._retreat(index) 3697 else: 3698 expressions = [self._parse_id_var()] 3699 3700 if self._match_set(self.LAMBDAS): 3701 return self.LAMBDAS[self._prev.token_type](self, expressions) 3702 3703 self._retreat(index) 3704 3705 this: t.Optional[exp.Expression] 3706 3707 if self._match(TokenType.DISTINCT): 3708 this = self.expression( 3709 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3710 ) 3711 else: 3712 this = self._parse_select_or_expression(alias=alias) 3713 3714 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3715 3716 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3717 index = self._index 3718 3719 if not self.errors: 3720 try: 3721 if self._parse_select(nested=True): 3722 return this 3723 except ParseError: 3724 pass 3725 finally: 3726 self.errors.clear() 3727 self._retreat(index) 3728 3729 if not self._match(TokenType.L_PAREN): 3730 return this 3731 3732 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3733 3734 self._match_r_paren() 3735 return self.expression(exp.Schema, this=this, expressions=args) 3736 3737 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3738 return self._parse_column_def(self._parse_field(any_token=True)) 3739 3740 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3741 # column defs are not really columns, they're identifiers 3742 if isinstance(this, exp.Column): 3743 this = this.this 3744 3745 kind = self._parse_types(schema=True) 3746 3747 if self._match_text_seq("FOR", "ORDINALITY"): 3748 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3749 3750 constraints: t.List[exp.Expression] = [] 3751 3752 if not kind and self._match(TokenType.ALIAS): 3753 constraints.append( 3754 self.expression( 3755 exp.ComputedColumnConstraint, 3756 this=self._parse_conjunction(), 3757 persisted=self._match_text_seq("PERSISTED"), 3758 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3759 ) 3760 ) 3761 3762 while True: 3763 constraint = self._parse_column_constraint() 3764 if not constraint: 3765 break 3766 constraints.append(constraint) 3767 3768 if not kind and not constraints: 3769 return this 3770 3771 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3772 3773 def _parse_auto_increment( 3774 self, 3775 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3776 start = None 3777 increment = None 3778 3779 if self._match(TokenType.L_PAREN, advance=False): 3780 args = self._parse_wrapped_csv(self._parse_bitwise) 3781 start = seq_get(args, 0) 3782 increment = seq_get(args, 1) 3783 elif self._match_text_seq("START"): 3784 start = self._parse_bitwise() 3785 self._match_text_seq("INCREMENT") 3786 increment = self._parse_bitwise() 3787 3788 if start and increment: 3789 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3790 3791 return exp.AutoIncrementColumnConstraint() 3792 3793 def _parse_compress(self) -> exp.CompressColumnConstraint: 3794 if self._match(TokenType.L_PAREN, advance=False): 3795 return self.expression( 3796 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3797 ) 3798 3799 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3800 3801 def _parse_generated_as_identity( 3802 self, 3803 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3804 if self._match_text_seq("BY", "DEFAULT"): 3805 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3806 this = self.expression( 3807 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3808 ) 3809 else: 3810 self._match_text_seq("ALWAYS") 3811 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3812 3813 self._match(TokenType.ALIAS) 3814 identity = self._match_text_seq("IDENTITY") 3815 3816 if self._match(TokenType.L_PAREN): 3817 if self._match(TokenType.START_WITH): 3818 this.set("start", self._parse_bitwise()) 3819 if self._match_text_seq("INCREMENT", "BY"): 3820 this.set("increment", self._parse_bitwise()) 3821 if self._match_text_seq("MINVALUE"): 3822 this.set("minvalue", self._parse_bitwise()) 3823 if self._match_text_seq("MAXVALUE"): 3824 this.set("maxvalue", self._parse_bitwise()) 3825 3826 if self._match_text_seq("CYCLE"): 3827 this.set("cycle", True) 3828 elif self._match_text_seq("NO", "CYCLE"): 3829 this.set("cycle", False) 3830 3831 if not identity: 3832 this.set("expression", self._parse_bitwise()) 3833 3834 self._match_r_paren() 3835 3836 return this 3837 3838 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3839 self._match_text_seq("LENGTH") 3840 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3841 3842 def _parse_not_constraint( 3843 self, 3844 ) -> t.Optional[exp.Expression]: 3845 if self._match_text_seq("NULL"): 3846 return self.expression(exp.NotNullColumnConstraint) 3847 if self._match_text_seq("CASESPECIFIC"): 3848 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3849 if self._match_text_seq("FOR", "REPLICATION"): 3850 return self.expression(exp.NotForReplicationColumnConstraint) 3851 return None 3852 3853 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3854 if self._match(TokenType.CONSTRAINT): 3855 this = self._parse_id_var() 3856 else: 3857 this = None 3858 3859 if self._match_texts(self.CONSTRAINT_PARSERS): 3860 return self.expression( 3861 exp.ColumnConstraint, 3862 this=this, 3863 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3864 ) 3865 3866 return this 3867 3868 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3869 if not self._match(TokenType.CONSTRAINT): 3870 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3871 3872 this = self._parse_id_var() 3873 expressions = [] 3874 3875 while True: 3876 constraint = self._parse_unnamed_constraint() or self._parse_function() 3877 if not constraint: 3878 break 3879 expressions.append(constraint) 3880 3881 return self.expression(exp.Constraint, this=this, expressions=expressions) 3882 3883 def _parse_unnamed_constraint( 3884 self, constraints: t.Optional[t.Collection[str]] = None 3885 ) -> t.Optional[exp.Expression]: 3886 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3887 return None 3888 3889 constraint = self._prev.text.upper() 3890 if constraint not in self.CONSTRAINT_PARSERS: 3891 self.raise_error(f"No parser found for schema constraint {constraint}.") 3892 3893 return self.CONSTRAINT_PARSERS[constraint](self) 3894 3895 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3896 self._match_text_seq("KEY") 3897 return self.expression( 3898 exp.UniqueColumnConstraint, 3899 this=self._parse_schema(self._parse_id_var(any_token=False)), 3900 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3901 ) 3902 3903 def _parse_key_constraint_options(self) -> t.List[str]: 3904 options = [] 3905 while True: 3906 if not self._curr: 3907 break 3908 3909 if self._match(TokenType.ON): 3910 action = None 3911 on = self._advance_any() and self._prev.text 3912 3913 if self._match_text_seq("NO", "ACTION"): 3914 action = "NO ACTION" 3915 elif self._match_text_seq("CASCADE"): 3916 action = "CASCADE" 3917 elif self._match_text_seq("RESTRICT"): 3918 action = "RESTRICT" 3919 elif self._match_pair(TokenType.SET, TokenType.NULL): 3920 action = "SET NULL" 3921 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3922 action = "SET DEFAULT" 3923 else: 3924 self.raise_error("Invalid key constraint") 3925 3926 options.append(f"ON {on} {action}") 3927 elif self._match_text_seq("NOT", "ENFORCED"): 3928 options.append("NOT ENFORCED") 3929 elif self._match_text_seq("DEFERRABLE"): 3930 options.append("DEFERRABLE") 3931 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3932 options.append("INITIALLY DEFERRED") 3933 elif self._match_text_seq("NORELY"): 3934 options.append("NORELY") 3935 elif self._match_text_seq("MATCH", "FULL"): 3936 options.append("MATCH FULL") 3937 else: 3938 break 3939 3940 return options 3941 3942 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3943 if match and not self._match(TokenType.REFERENCES): 3944 return None 3945 3946 expressions = None 3947 this = self._parse_table(schema=True) 3948 options = self._parse_key_constraint_options() 3949 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3950 3951 def _parse_foreign_key(self) -> exp.ForeignKey: 3952 expressions = self._parse_wrapped_id_vars() 3953 reference = self._parse_references() 3954 options = {} 3955 3956 while self._match(TokenType.ON): 3957 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3958 self.raise_error("Expected DELETE or UPDATE") 3959 3960 kind = self._prev.text.lower() 3961 3962 if self._match_text_seq("NO", "ACTION"): 3963 action = "NO ACTION" 3964 elif self._match(TokenType.SET): 3965 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3966 action = "SET " + self._prev.text.upper() 3967 else: 3968 self._advance() 3969 action = self._prev.text.upper() 3970 3971 options[kind] = action 3972 3973 return self.expression( 3974 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3975 ) 3976 3977 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3978 return self._parse_field() 3979 3980 def _parse_primary_key( 3981 self, wrapped_optional: bool = False, in_props: bool = False 3982 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3983 desc = ( 3984 self._match_set((TokenType.ASC, TokenType.DESC)) 3985 and self._prev.token_type == TokenType.DESC 3986 ) 3987 3988 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3989 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3990 3991 expressions = self._parse_wrapped_csv( 3992 self._parse_primary_key_part, optional=wrapped_optional 3993 ) 3994 options = self._parse_key_constraint_options() 3995 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3996 3997 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3998 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3999 return this 4000 4001 bracket_kind = self._prev.token_type 4002 4003 if self._match(TokenType.COLON): 4004 expressions: t.List[exp.Expression] = [ 4005 self.expression(exp.Slice, expression=self._parse_conjunction()) 4006 ] 4007 else: 4008 expressions = self._parse_csv( 4009 lambda: self._parse_slice( 4010 self._parse_alias(self._parse_conjunction(), explicit=True) 4011 ) 4012 ) 4013 4014 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4015 if bracket_kind == TokenType.L_BRACE: 4016 this = self.expression(exp.Struct, expressions=expressions) 4017 elif not this or this.name.upper() == "ARRAY": 4018 this = self.expression(exp.Array, expressions=expressions) 4019 else: 4020 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4021 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4022 4023 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4024 self.raise_error("Expected ]") 4025 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4026 self.raise_error("Expected }") 4027 4028 self._add_comments(this) 4029 return self._parse_bracket(this) 4030 4031 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4032 if self._match(TokenType.COLON): 4033 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4034 return this 4035 4036 def _parse_case(self) -> t.Optional[exp.Expression]: 4037 ifs = [] 4038 default = None 4039 4040 comments = self._prev_comments 4041 expression = self._parse_conjunction() 4042 4043 while self._match(TokenType.WHEN): 4044 this = self._parse_conjunction() 4045 self._match(TokenType.THEN) 4046 then = self._parse_conjunction() 4047 ifs.append(self.expression(exp.If, this=this, true=then)) 4048 4049 if self._match(TokenType.ELSE): 4050 default = self._parse_conjunction() 4051 4052 if not self._match(TokenType.END): 4053 self.raise_error("Expected END after CASE", self._prev) 4054 4055 return self._parse_window( 4056 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4057 ) 4058 4059 def _parse_if(self) -> t.Optional[exp.Expression]: 4060 if self._match(TokenType.L_PAREN): 4061 args = self._parse_csv(self._parse_conjunction) 4062 this = self.validate_expression(exp.If.from_arg_list(args), args) 4063 self._match_r_paren() 4064 else: 4065 index = self._index - 1 4066 condition = self._parse_conjunction() 4067 4068 if not condition: 4069 self._retreat(index) 4070 return None 4071 4072 self._match(TokenType.THEN) 4073 true = self._parse_conjunction() 4074 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4075 self._match(TokenType.END) 4076 this = self.expression(exp.If, this=condition, true=true, false=false) 4077 4078 return self._parse_window(this) 4079 4080 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4081 if not self._match_text_seq("VALUE", "FOR"): 4082 self._retreat(self._index - 1) 4083 return None 4084 4085 return self.expression( 4086 exp.NextValueFor, 4087 this=self._parse_column(), 4088 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4089 ) 4090 4091 def _parse_extract(self) -> exp.Extract: 4092 this = self._parse_function() or self._parse_var() or self._parse_type() 4093 4094 if self._match(TokenType.FROM): 4095 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4096 4097 if not self._match(TokenType.COMMA): 4098 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4099 4100 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4101 4102 def _parse_any_value(self) -> exp.AnyValue: 4103 this = self._parse_lambda() 4104 is_max = None 4105 having = None 4106 4107 if self._match(TokenType.HAVING): 4108 self._match_texts(("MAX", "MIN")) 4109 is_max = self._prev.text == "MAX" 4110 having = self._parse_column() 4111 4112 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4113 4114 def _parse_cast(self, strict: bool) -> exp.Expression: 4115 this = self._parse_conjunction() 4116 4117 if not self._match(TokenType.ALIAS): 4118 if self._match(TokenType.COMMA): 4119 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4120 4121 self.raise_error("Expected AS after CAST") 4122 4123 fmt = None 4124 to = self._parse_types() 4125 4126 if not to: 4127 self.raise_error("Expected TYPE after CAST") 4128 elif isinstance(to, exp.Identifier): 4129 to = exp.DataType.build(to.name, udt=True) 4130 elif to.this == exp.DataType.Type.CHAR: 4131 if self._match(TokenType.CHARACTER_SET): 4132 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4133 elif self._match(TokenType.FORMAT): 4134 fmt_string = self._parse_string() 4135 fmt = self._parse_at_time_zone(fmt_string) 4136 4137 if to.this in exp.DataType.TEMPORAL_TYPES: 4138 this = self.expression( 4139 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4140 this=this, 4141 format=exp.Literal.string( 4142 format_time( 4143 fmt_string.this if fmt_string else "", 4144 self.FORMAT_MAPPING or self.TIME_MAPPING, 4145 self.FORMAT_TRIE or self.TIME_TRIE, 4146 ) 4147 ), 4148 ) 4149 4150 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4151 this.set("zone", fmt.args["zone"]) 4152 4153 return this 4154 4155 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4156 4157 def _parse_concat(self) -> t.Optional[exp.Expression]: 4158 args = self._parse_csv(self._parse_conjunction) 4159 if self.CONCAT_NULL_OUTPUTS_STRING: 4160 args = self._ensure_string_if_null(args) 4161 4162 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4163 # we find such a call we replace it with its argument. 4164 if len(args) == 1: 4165 return args[0] 4166 4167 return self.expression( 4168 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4169 ) 4170 4171 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4172 args = self._parse_csv(self._parse_conjunction) 4173 if len(args) < 2: 4174 return self.expression(exp.ConcatWs, expressions=args) 4175 delim, *values = args 4176 if self.CONCAT_NULL_OUTPUTS_STRING: 4177 values = self._ensure_string_if_null(values) 4178 4179 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4180 4181 def _parse_string_agg(self) -> exp.Expression: 4182 if self._match(TokenType.DISTINCT): 4183 args: t.List[t.Optional[exp.Expression]] = [ 4184 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4185 ] 4186 if self._match(TokenType.COMMA): 4187 args.extend(self._parse_csv(self._parse_conjunction)) 4188 else: 4189 args = self._parse_csv(self._parse_conjunction) # type: ignore 4190 4191 index = self._index 4192 if not self._match(TokenType.R_PAREN) and args: 4193 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4194 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4195 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4196 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4197 4198 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4199 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4200 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4201 if not self._match_text_seq("WITHIN", "GROUP"): 4202 self._retreat(index) 4203 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4204 4205 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4206 order = self._parse_order(this=seq_get(args, 0)) 4207 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4208 4209 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4210 this = self._parse_bitwise() 4211 4212 if self._match(TokenType.USING): 4213 to: t.Optional[exp.Expression] = self.expression( 4214 exp.CharacterSet, this=self._parse_var() 4215 ) 4216 elif self._match(TokenType.COMMA): 4217 to = self._parse_types() 4218 else: 4219 to = None 4220 4221 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4222 4223 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4224 """ 4225 There are generally two variants of the DECODE function: 4226 4227 - DECODE(bin, charset) 4228 - DECODE(expression, search, result [, search, result] ... [, default]) 4229 4230 The second variant will always be parsed into a CASE expression. Note that NULL 4231 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4232 instead of relying on pattern matching. 4233 """ 4234 args = self._parse_csv(self._parse_conjunction) 4235 4236 if len(args) < 3: 4237 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4238 4239 expression, *expressions = args 4240 if not expression: 4241 return None 4242 4243 ifs = [] 4244 for search, result in zip(expressions[::2], expressions[1::2]): 4245 if not search or not result: 4246 return None 4247 4248 if isinstance(search, exp.Literal): 4249 ifs.append( 4250 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4251 ) 4252 elif isinstance(search, exp.Null): 4253 ifs.append( 4254 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4255 ) 4256 else: 4257 cond = exp.or_( 4258 exp.EQ(this=expression.copy(), expression=search), 4259 exp.and_( 4260 exp.Is(this=expression.copy(), expression=exp.Null()), 4261 exp.Is(this=search.copy(), expression=exp.Null()), 4262 copy=False, 4263 ), 4264 copy=False, 4265 ) 4266 ifs.append(exp.If(this=cond, true=result)) 4267 4268 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4269 4270 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4271 self._match_text_seq("KEY") 4272 key = self._parse_column() 4273 self._match_set((TokenType.COLON, TokenType.COMMA)) 4274 self._match_text_seq("VALUE") 4275 value = self._parse_bitwise() 4276 4277 if not key and not value: 4278 return None 4279 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4280 4281 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4282 if not this or not self._match_text_seq("FORMAT", "JSON"): 4283 return this 4284 4285 return self.expression(exp.FormatJson, this=this) 4286 4287 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4288 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4289 for value in values: 4290 if self._match_text_seq(value, "ON", on): 4291 return f"{value} ON {on}" 4292 4293 return None 4294 4295 def _parse_json_object(self) -> exp.JSONObject: 4296 star = self._parse_star() 4297 expressions = ( 4298 [star] 4299 if star 4300 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4301 ) 4302 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4303 4304 unique_keys = None 4305 if self._match_text_seq("WITH", "UNIQUE"): 4306 unique_keys = True 4307 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4308 unique_keys = False 4309 4310 self._match_text_seq("KEYS") 4311 4312 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4313 self._parse_type() 4314 ) 4315 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4316 4317 return self.expression( 4318 exp.JSONObject, 4319 expressions=expressions, 4320 null_handling=null_handling, 4321 unique_keys=unique_keys, 4322 return_type=return_type, 4323 encoding=encoding, 4324 ) 4325 4326 def _parse_logarithm(self) -> exp.Func: 4327 # Default argument order is base, expression 4328 args = self._parse_csv(self._parse_range) 4329 4330 if len(args) > 1: 4331 if not self.LOG_BASE_FIRST: 4332 args.reverse() 4333 return exp.Log.from_arg_list(args) 4334 4335 return self.expression( 4336 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4337 ) 4338 4339 def _parse_match_against(self) -> exp.MatchAgainst: 4340 expressions = self._parse_csv(self._parse_column) 4341 4342 self._match_text_seq(")", "AGAINST", "(") 4343 4344 this = self._parse_string() 4345 4346 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4347 modifier = "IN NATURAL LANGUAGE MODE" 4348 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4349 modifier = f"{modifier} WITH QUERY EXPANSION" 4350 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4351 modifier = "IN BOOLEAN MODE" 4352 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4353 modifier = "WITH QUERY EXPANSION" 4354 else: 4355 modifier = None 4356 4357 return self.expression( 4358 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4359 ) 4360 4361 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4362 def _parse_open_json(self) -> exp.OpenJSON: 4363 this = self._parse_bitwise() 4364 path = self._match(TokenType.COMMA) and self._parse_string() 4365 4366 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4367 this = self._parse_field(any_token=True) 4368 kind = self._parse_types() 4369 path = self._parse_string() 4370 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4371 4372 return self.expression( 4373 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4374 ) 4375 4376 expressions = None 4377 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4378 self._match_l_paren() 4379 expressions = self._parse_csv(_parse_open_json_column_def) 4380 4381 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4382 4383 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4384 args = self._parse_csv(self._parse_bitwise) 4385 4386 if self._match(TokenType.IN): 4387 return self.expression( 4388 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4389 ) 4390 4391 if haystack_first: 4392 haystack = seq_get(args, 0) 4393 needle = seq_get(args, 1) 4394 else: 4395 needle = seq_get(args, 0) 4396 haystack = seq_get(args, 1) 4397 4398 return self.expression( 4399 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4400 ) 4401 4402 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4403 args = self._parse_csv(self._parse_table) 4404 return exp.JoinHint(this=func_name.upper(), expressions=args) 4405 4406 def _parse_substring(self) -> exp.Substring: 4407 # Postgres supports the form: substring(string [from int] [for int]) 4408 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4409 4410 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4411 4412 if self._match(TokenType.FROM): 4413 args.append(self._parse_bitwise()) 4414 if self._match(TokenType.FOR): 4415 args.append(self._parse_bitwise()) 4416 4417 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4418 4419 def _parse_trim(self) -> exp.Trim: 4420 # https://www.w3resource.com/sql/character-functions/trim.php 4421 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4422 4423 position = None 4424 collation = None 4425 expression = None 4426 4427 if self._match_texts(self.TRIM_TYPES): 4428 position = self._prev.text.upper() 4429 4430 this = self._parse_bitwise() 4431 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4432 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4433 expression = self._parse_bitwise() 4434 4435 if invert_order: 4436 this, expression = expression, this 4437 4438 if self._match(TokenType.COLLATE): 4439 collation = self._parse_bitwise() 4440 4441 return self.expression( 4442 exp.Trim, this=this, position=position, expression=expression, collation=collation 4443 ) 4444 4445 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4446 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4447 4448 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4449 return self._parse_window(self._parse_id_var(), alias=True) 4450 4451 def _parse_respect_or_ignore_nulls( 4452 self, this: t.Optional[exp.Expression] 4453 ) -> t.Optional[exp.Expression]: 4454 if self._match_text_seq("IGNORE", "NULLS"): 4455 return self.expression(exp.IgnoreNulls, this=this) 4456 if self._match_text_seq("RESPECT", "NULLS"): 4457 return self.expression(exp.RespectNulls, this=this) 4458 return this 4459 4460 def _parse_window( 4461 self, this: t.Optional[exp.Expression], alias: bool = False 4462 ) -> t.Optional[exp.Expression]: 4463 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4464 self._match(TokenType.WHERE) 4465 this = self.expression( 4466 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4467 ) 4468 self._match_r_paren() 4469 4470 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4471 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4472 if self._match_text_seq("WITHIN", "GROUP"): 4473 order = self._parse_wrapped(self._parse_order) 4474 this = self.expression(exp.WithinGroup, this=this, expression=order) 4475 4476 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4477 # Some dialects choose to implement and some do not. 4478 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4479 4480 # There is some code above in _parse_lambda that handles 4481 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4482 4483 # The below changes handle 4484 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4485 4486 # Oracle allows both formats 4487 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4488 # and Snowflake chose to do the same for familiarity 4489 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4490 this = self._parse_respect_or_ignore_nulls(this) 4491 4492 # bigquery select from window x AS (partition by ...) 4493 if alias: 4494 over = None 4495 self._match(TokenType.ALIAS) 4496 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4497 return this 4498 else: 4499 over = self._prev.text.upper() 4500 4501 if not self._match(TokenType.L_PAREN): 4502 return self.expression( 4503 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4504 ) 4505 4506 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4507 4508 first = self._match(TokenType.FIRST) 4509 if self._match_text_seq("LAST"): 4510 first = False 4511 4512 partition, order = self._parse_partition_and_order() 4513 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4514 4515 if kind: 4516 self._match(TokenType.BETWEEN) 4517 start = self._parse_window_spec() 4518 self._match(TokenType.AND) 4519 end = self._parse_window_spec() 4520 4521 spec = self.expression( 4522 exp.WindowSpec, 4523 kind=kind, 4524 start=start["value"], 4525 start_side=start["side"], 4526 end=end["value"], 4527 end_side=end["side"], 4528 ) 4529 else: 4530 spec = None 4531 4532 self._match_r_paren() 4533 4534 window = self.expression( 4535 exp.Window, 4536 this=this, 4537 partition_by=partition, 4538 order=order, 4539 spec=spec, 4540 alias=window_alias, 4541 over=over, 4542 first=first, 4543 ) 4544 4545 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4546 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4547 return self._parse_window(window, alias=alias) 4548 4549 return window 4550 4551 def _parse_partition_and_order( 4552 self, 4553 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4554 return self._parse_partition_by(), self._parse_order() 4555 4556 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4557 self._match(TokenType.BETWEEN) 4558 4559 return { 4560 "value": ( 4561 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4562 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4563 or self._parse_bitwise() 4564 ), 4565 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4566 } 4567 4568 def _parse_alias( 4569 self, this: t.Optional[exp.Expression], explicit: bool = False 4570 ) -> t.Optional[exp.Expression]: 4571 any_token = self._match(TokenType.ALIAS) 4572 4573 if explicit and not any_token: 4574 return this 4575 4576 if self._match(TokenType.L_PAREN): 4577 aliases = self.expression( 4578 exp.Aliases, 4579 this=this, 4580 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4581 ) 4582 self._match_r_paren(aliases) 4583 return aliases 4584 4585 alias = self._parse_id_var(any_token) 4586 4587 if alias: 4588 return self.expression(exp.Alias, this=this, alias=alias) 4589 4590 return this 4591 4592 def _parse_id_var( 4593 self, 4594 any_token: bool = True, 4595 tokens: t.Optional[t.Collection[TokenType]] = None, 4596 ) -> t.Optional[exp.Expression]: 4597 identifier = self._parse_identifier() 4598 4599 if identifier: 4600 return identifier 4601 4602 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4603 quoted = self._prev.token_type == TokenType.STRING 4604 return exp.Identifier(this=self._prev.text, quoted=quoted) 4605 4606 return None 4607 4608 def _parse_string(self) -> t.Optional[exp.Expression]: 4609 if self._match(TokenType.STRING): 4610 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4611 return self._parse_placeholder() 4612 4613 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4614 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4615 4616 def _parse_number(self) -> t.Optional[exp.Expression]: 4617 if self._match(TokenType.NUMBER): 4618 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4619 return self._parse_placeholder() 4620 4621 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4622 if self._match(TokenType.IDENTIFIER): 4623 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4624 return self._parse_placeholder() 4625 4626 def _parse_var( 4627 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4628 ) -> t.Optional[exp.Expression]: 4629 if ( 4630 (any_token and self._advance_any()) 4631 or self._match(TokenType.VAR) 4632 or (self._match_set(tokens) if tokens else False) 4633 ): 4634 return self.expression(exp.Var, this=self._prev.text) 4635 return self._parse_placeholder() 4636 4637 def _advance_any(self) -> t.Optional[Token]: 4638 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4639 self._advance() 4640 return self._prev 4641 return None 4642 4643 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4644 return self._parse_var() or self._parse_string() 4645 4646 def _parse_null(self) -> t.Optional[exp.Expression]: 4647 if self._match_set(self.NULL_TOKENS): 4648 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4649 return self._parse_placeholder() 4650 4651 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4652 if self._match(TokenType.TRUE): 4653 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4654 if self._match(TokenType.FALSE): 4655 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4656 return self._parse_placeholder() 4657 4658 def _parse_star(self) -> t.Optional[exp.Expression]: 4659 if self._match(TokenType.STAR): 4660 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4661 return self._parse_placeholder() 4662 4663 def _parse_parameter(self) -> exp.Parameter: 4664 wrapped = self._match(TokenType.L_BRACE) 4665 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4666 self._match(TokenType.R_BRACE) 4667 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4668 4669 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4670 if self._match_set(self.PLACEHOLDER_PARSERS): 4671 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4672 if placeholder: 4673 return placeholder 4674 self._advance(-1) 4675 return None 4676 4677 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4678 if not self._match(TokenType.EXCEPT): 4679 return None 4680 if self._match(TokenType.L_PAREN, advance=False): 4681 return self._parse_wrapped_csv(self._parse_column) 4682 4683 except_column = self._parse_column() 4684 return [except_column] if except_column else None 4685 4686 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4687 if not self._match(TokenType.REPLACE): 4688 return None 4689 if self._match(TokenType.L_PAREN, advance=False): 4690 return self._parse_wrapped_csv(self._parse_expression) 4691 4692 replace_expression = self._parse_expression() 4693 return [replace_expression] if replace_expression else None 4694 4695 def _parse_csv( 4696 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4697 ) -> t.List[exp.Expression]: 4698 parse_result = parse_method() 4699 items = [parse_result] if parse_result is not None else [] 4700 4701 while self._match(sep): 4702 self._add_comments(parse_result) 4703 parse_result = parse_method() 4704 if parse_result is not None: 4705 items.append(parse_result) 4706 4707 return items 4708 4709 def _parse_tokens( 4710 self, parse_method: t.Callable, expressions: t.Dict 4711 ) -> t.Optional[exp.Expression]: 4712 this = parse_method() 4713 4714 while self._match_set(expressions): 4715 this = self.expression( 4716 expressions[self._prev.token_type], 4717 this=this, 4718 comments=self._prev_comments, 4719 expression=parse_method(), 4720 ) 4721 4722 return this 4723 4724 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4725 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4726 4727 def _parse_wrapped_csv( 4728 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4729 ) -> t.List[exp.Expression]: 4730 return self._parse_wrapped( 4731 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4732 ) 4733 4734 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4735 wrapped = self._match(TokenType.L_PAREN) 4736 if not wrapped and not optional: 4737 self.raise_error("Expecting (") 4738 parse_result = parse_method() 4739 if wrapped: 4740 self._match_r_paren() 4741 return parse_result 4742 4743 def _parse_expressions(self) -> t.List[exp.Expression]: 4744 return self._parse_csv(self._parse_expression) 4745 4746 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4747 return self._parse_select() or self._parse_set_operations( 4748 self._parse_expression() if alias else self._parse_conjunction() 4749 ) 4750 4751 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4752 return self._parse_query_modifiers( 4753 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4754 ) 4755 4756 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4757 this = None 4758 if self._match_texts(self.TRANSACTION_KIND): 4759 this = self._prev.text 4760 4761 self._match_texts({"TRANSACTION", "WORK"}) 4762 4763 modes = [] 4764 while True: 4765 mode = [] 4766 while self._match(TokenType.VAR): 4767 mode.append(self._prev.text) 4768 4769 if mode: 4770 modes.append(" ".join(mode)) 4771 if not self._match(TokenType.COMMA): 4772 break 4773 4774 return self.expression(exp.Transaction, this=this, modes=modes) 4775 4776 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4777 chain = None 4778 savepoint = None 4779 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4780 4781 self._match_texts({"TRANSACTION", "WORK"}) 4782 4783 if self._match_text_seq("TO"): 4784 self._match_text_seq("SAVEPOINT") 4785 savepoint = self._parse_id_var() 4786 4787 if self._match(TokenType.AND): 4788 chain = not self._match_text_seq("NO") 4789 self._match_text_seq("CHAIN") 4790 4791 if is_rollback: 4792 return self.expression(exp.Rollback, savepoint=savepoint) 4793 4794 return self.expression(exp.Commit, chain=chain) 4795 4796 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4797 if not self._match_text_seq("ADD"): 4798 return None 4799 4800 self._match(TokenType.COLUMN) 4801 exists_column = self._parse_exists(not_=True) 4802 expression = self._parse_field_def() 4803 4804 if expression: 4805 expression.set("exists", exists_column) 4806 4807 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4808 if self._match_texts(("FIRST", "AFTER")): 4809 position = self._prev.text 4810 column_position = self.expression( 4811 exp.ColumnPosition, this=self._parse_column(), position=position 4812 ) 4813 expression.set("position", column_position) 4814 4815 return expression 4816 4817 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4818 drop = self._match(TokenType.DROP) and self._parse_drop() 4819 if drop and not isinstance(drop, exp.Command): 4820 drop.set("kind", drop.args.get("kind", "COLUMN")) 4821 return drop 4822 4823 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4824 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4825 return self.expression( 4826 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4827 ) 4828 4829 def _parse_add_constraint(self) -> exp.AddConstraint: 4830 this = None 4831 kind = self._prev.token_type 4832 4833 if kind == TokenType.CONSTRAINT: 4834 this = self._parse_id_var() 4835 4836 if self._match_text_seq("CHECK"): 4837 expression = self._parse_wrapped(self._parse_conjunction) 4838 enforced = self._match_text_seq("ENFORCED") 4839 4840 return self.expression( 4841 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4842 ) 4843 4844 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4845 expression = self._parse_foreign_key() 4846 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4847 expression = self._parse_primary_key() 4848 else: 4849 expression = None 4850 4851 return self.expression(exp.AddConstraint, this=this, expression=expression) 4852 4853 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4854 index = self._index - 1 4855 4856 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4857 return self._parse_csv(self._parse_add_constraint) 4858 4859 self._retreat(index) 4860 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4861 return self._parse_csv(self._parse_field_def) 4862 4863 return self._parse_csv(self._parse_add_column) 4864 4865 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4866 self._match(TokenType.COLUMN) 4867 column = self._parse_field(any_token=True) 4868 4869 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4870 return self.expression(exp.AlterColumn, this=column, drop=True) 4871 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4872 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4873 4874 self._match_text_seq("SET", "DATA") 4875 return self.expression( 4876 exp.AlterColumn, 4877 this=column, 4878 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4879 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4880 using=self._match(TokenType.USING) and self._parse_conjunction(), 4881 ) 4882 4883 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4884 index = self._index - 1 4885 4886 partition_exists = self._parse_exists() 4887 if self._match(TokenType.PARTITION, advance=False): 4888 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4889 4890 self._retreat(index) 4891 return self._parse_csv(self._parse_drop_column) 4892 4893 def _parse_alter_table_rename(self) -> exp.RenameTable: 4894 self._match_text_seq("TO") 4895 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4896 4897 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4898 start = self._prev 4899 4900 if not self._match(TokenType.TABLE): 4901 return self._parse_as_command(start) 4902 4903 exists = self._parse_exists() 4904 only = self._match_text_seq("ONLY") 4905 this = self._parse_table(schema=True) 4906 4907 if self._next: 4908 self._advance() 4909 4910 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4911 if parser: 4912 actions = ensure_list(parser(self)) 4913 4914 if not self._curr: 4915 return self.expression( 4916 exp.AlterTable, 4917 this=this, 4918 exists=exists, 4919 actions=actions, 4920 only=only, 4921 ) 4922 4923 return self._parse_as_command(start) 4924 4925 def _parse_merge(self) -> exp.Merge: 4926 self._match(TokenType.INTO) 4927 target = self._parse_table() 4928 4929 if target and self._match(TokenType.ALIAS, advance=False): 4930 target.set("alias", self._parse_table_alias()) 4931 4932 self._match(TokenType.USING) 4933 using = self._parse_table() 4934 4935 self._match(TokenType.ON) 4936 on = self._parse_conjunction() 4937 4938 whens = [] 4939 while self._match(TokenType.WHEN): 4940 matched = not self._match(TokenType.NOT) 4941 self._match_text_seq("MATCHED") 4942 source = ( 4943 False 4944 if self._match_text_seq("BY", "TARGET") 4945 else self._match_text_seq("BY", "SOURCE") 4946 ) 4947 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4948 4949 self._match(TokenType.THEN) 4950 4951 if self._match(TokenType.INSERT): 4952 _this = self._parse_star() 4953 if _this: 4954 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4955 else: 4956 then = self.expression( 4957 exp.Insert, 4958 this=self._parse_value(), 4959 expression=self._match(TokenType.VALUES) and self._parse_value(), 4960 ) 4961 elif self._match(TokenType.UPDATE): 4962 expressions = self._parse_star() 4963 if expressions: 4964 then = self.expression(exp.Update, expressions=expressions) 4965 else: 4966 then = self.expression( 4967 exp.Update, 4968 expressions=self._match(TokenType.SET) 4969 and self._parse_csv(self._parse_equality), 4970 ) 4971 elif self._match(TokenType.DELETE): 4972 then = self.expression(exp.Var, this=self._prev.text) 4973 else: 4974 then = None 4975 4976 whens.append( 4977 self.expression( 4978 exp.When, 4979 matched=matched, 4980 source=source, 4981 condition=condition, 4982 then=then, 4983 ) 4984 ) 4985 4986 return self.expression( 4987 exp.Merge, 4988 this=target, 4989 using=using, 4990 on=on, 4991 expressions=whens, 4992 ) 4993 4994 def _parse_show(self) -> t.Optional[exp.Expression]: 4995 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4996 if parser: 4997 return parser(self) 4998 return self._parse_as_command(self._prev) 4999 5000 def _parse_set_item_assignment( 5001 self, kind: t.Optional[str] = None 5002 ) -> t.Optional[exp.Expression]: 5003 index = self._index 5004 5005 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5006 return self._parse_set_transaction(global_=kind == "GLOBAL") 5007 5008 left = self._parse_primary() or self._parse_id_var() 5009 assignment_delimiter = self._match_texts(("=", "TO")) 5010 5011 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5012 self._retreat(index) 5013 return None 5014 5015 right = self._parse_statement() or self._parse_id_var() 5016 this = self.expression(exp.EQ, this=left, expression=right) 5017 5018 return self.expression(exp.SetItem, this=this, kind=kind) 5019 5020 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5021 self._match_text_seq("TRANSACTION") 5022 characteristics = self._parse_csv( 5023 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5024 ) 5025 return self.expression( 5026 exp.SetItem, 5027 expressions=characteristics, 5028 kind="TRANSACTION", 5029 **{"global": global_}, # type: ignore 5030 ) 5031 5032 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5033 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5034 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5035 5036 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5037 index = self._index 5038 set_ = self.expression( 5039 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5040 ) 5041 5042 if self._curr: 5043 self._retreat(index) 5044 return self._parse_as_command(self._prev) 5045 5046 return set_ 5047 5048 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5049 for option in options: 5050 if self._match_text_seq(*option.split(" ")): 5051 return exp.var(option) 5052 return None 5053 5054 def _parse_as_command(self, start: Token) -> exp.Command: 5055 while self._curr: 5056 self._advance() 5057 text = self._find_sql(start, self._prev) 5058 size = len(start.text) 5059 return exp.Command(this=text[:size], expression=text[size:]) 5060 5061 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5062 settings = [] 5063 5064 self._match_l_paren() 5065 kind = self._parse_id_var() 5066 5067 if self._match(TokenType.L_PAREN): 5068 while True: 5069 key = self._parse_id_var() 5070 value = self._parse_primary() 5071 5072 if not key and value is None: 5073 break 5074 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5075 self._match(TokenType.R_PAREN) 5076 5077 self._match_r_paren() 5078 5079 return self.expression( 5080 exp.DictProperty, 5081 this=this, 5082 kind=kind.this if kind else None, 5083 settings=settings, 5084 ) 5085 5086 def _parse_dict_range(self, this: str) -> exp.DictRange: 5087 self._match_l_paren() 5088 has_min = self._match_text_seq("MIN") 5089 if has_min: 5090 min = self._parse_var() or self._parse_primary() 5091 self._match_text_seq("MAX") 5092 max = self._parse_var() or self._parse_primary() 5093 else: 5094 max = self._parse_var() or self._parse_primary() 5095 min = exp.Literal.number(0) 5096 self._match_r_paren() 5097 return self.expression(exp.DictRange, this=this, min=min, max=max) 5098 5099 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5100 index = self._index 5101 expression = self._parse_column() 5102 if not self._match(TokenType.IN): 5103 self._retreat(index - 1) 5104 return None 5105 iterator = self._parse_column() 5106 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5107 return self.expression( 5108 exp.Comprehension, 5109 this=this, 5110 expression=expression, 5111 iterator=iterator, 5112 condition=condition, 5113 ) 5114 5115 def _find_parser( 5116 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5117 ) -> t.Optional[t.Callable]: 5118 if not self._curr: 5119 return None 5120 5121 index = self._index 5122 this = [] 5123 while True: 5124 # The current token might be multiple words 5125 curr = self._curr.text.upper() 5126 key = curr.split(" ") 5127 this.append(curr) 5128 5129 self._advance() 5130 result, trie = in_trie(trie, key) 5131 if result == TrieResult.FAILED: 5132 break 5133 5134 if result == TrieResult.EXISTS: 5135 subparser = parsers[" ".join(this)] 5136 return subparser 5137 5138 self._retreat(index) 5139 return None 5140 5141 def _match(self, token_type, advance=True, expression=None): 5142 if not self._curr: 5143 return None 5144 5145 if self._curr.token_type == token_type: 5146 if advance: 5147 self._advance() 5148 self._add_comments(expression) 5149 return True 5150 5151 return None 5152 5153 def _match_set(self, types, advance=True): 5154 if not self._curr: 5155 return None 5156 5157 if self._curr.token_type in types: 5158 if advance: 5159 self._advance() 5160 return True 5161 5162 return None 5163 5164 def _match_pair(self, token_type_a, token_type_b, advance=True): 5165 if not self._curr or not self._next: 5166 return None 5167 5168 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5169 if advance: 5170 self._advance(2) 5171 return True 5172 5173 return None 5174 5175 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5176 if not self._match(TokenType.L_PAREN, expression=expression): 5177 self.raise_error("Expecting (") 5178 5179 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5180 if not self._match(TokenType.R_PAREN, expression=expression): 5181 self.raise_error("Expecting )") 5182 5183 def _match_texts(self, texts, advance=True): 5184 if self._curr and self._curr.text.upper() in texts: 5185 if advance: 5186 self._advance() 5187 return True 5188 return False 5189 5190 def _match_text_seq(self, *texts, advance=True): 5191 index = self._index 5192 for text in texts: 5193 if self._curr and self._curr.text.upper() == text: 5194 self._advance() 5195 else: 5196 self._retreat(index) 5197 return False 5198 5199 if not advance: 5200 self._retreat(index) 5201 5202 return True 5203 5204 @t.overload 5205 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5206 ... 5207 5208 @t.overload 5209 def _replace_columns_with_dots( 5210 self, this: t.Optional[exp.Expression] 5211 ) -> t.Optional[exp.Expression]: 5212 ... 5213 5214 def _replace_columns_with_dots(self, this): 5215 if isinstance(this, exp.Dot): 5216 exp.replace_children(this, self._replace_columns_with_dots) 5217 elif isinstance(this, exp.Column): 5218 exp.replace_children(this, self._replace_columns_with_dots) 5219 table = this.args.get("table") 5220 this = ( 5221 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5222 ) 5223 5224 return this 5225 5226 def _replace_lambda( 5227 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5228 ) -> t.Optional[exp.Expression]: 5229 if not node: 5230 return node 5231 5232 for column in node.find_all(exp.Column): 5233 if column.parts[0].name in lambda_variables: 5234 dot_or_id = column.to_dot() if column.table else column.this 5235 parent = column.parent 5236 5237 while isinstance(parent, exp.Dot): 5238 if not isinstance(parent.parent, exp.Dot): 5239 parent.replace(dot_or_id) 5240 break 5241 parent = parent.parent 5242 else: 5243 if column is node: 5244 node = dot_or_id 5245 else: 5246 column.replace(dot_or_id) 5247 return node 5248 5249 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5250 return [ 5251 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5252 for value in values 5253 if value 5254 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
935 def __init__( 936 self, 937 error_level: t.Optional[ErrorLevel] = None, 938 error_message_context: int = 100, 939 max_errors: int = 3, 940 ): 941 self.error_level = error_level or ErrorLevel.IMMEDIATE 942 self.error_message_context = error_message_context 943 self.max_errors = max_errors 944 self._tokenizer = self.TOKENIZER_CLASS() 945 self.reset()
957 def parse( 958 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 959 ) -> t.List[t.Optional[exp.Expression]]: 960 """ 961 Parses a list of tokens and returns a list of syntax trees, one tree 962 per parsed SQL statement. 963 964 Args: 965 raw_tokens: The list of tokens. 966 sql: The original SQL string, used to produce helpful debug messages. 967 968 Returns: 969 The list of the produced syntax trees. 970 """ 971 return self._parse( 972 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 973 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
975 def parse_into( 976 self, 977 expression_types: exp.IntoType, 978 raw_tokens: t.List[Token], 979 sql: t.Optional[str] = None, 980 ) -> t.List[t.Optional[exp.Expression]]: 981 """ 982 Parses a list of tokens into a given Expression type. If a collection of Expression 983 types is given instead, this method will try to parse the token list into each one 984 of them, stopping at the first for which the parsing succeeds. 985 986 Args: 987 expression_types: The expression type(s) to try and parse the token list into. 988 raw_tokens: The list of tokens. 989 sql: The original SQL string, used to produce helpful debug messages. 990 991 Returns: 992 The target Expression. 993 """ 994 errors = [] 995 for expression_type in ensure_list(expression_types): 996 parser = self.EXPRESSION_PARSERS.get(expression_type) 997 if not parser: 998 raise TypeError(f"No parser registered for {expression_type}") 999 1000 try: 1001 return self._parse(parser, raw_tokens, sql) 1002 except ParseError as e: 1003 e.errors[0]["into_expression"] = expression_type 1004 errors.append(e) 1005 1006 raise ParseError( 1007 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1008 errors=merge_errors(errors), 1009 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1046 def check_errors(self) -> None: 1047 """Logs or raises any found errors, depending on the chosen error level setting.""" 1048 if self.error_level == ErrorLevel.WARN: 1049 for error in self.errors: 1050 logger.error(str(error)) 1051 elif self.error_level == ErrorLevel.RAISE and self.errors: 1052 raise ParseError( 1053 concat_messages(self.errors, self.max_errors), 1054 errors=merge_errors(self.errors), 1055 )
Logs or raises any found errors, depending on the chosen error level setting.
1057 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1058 """ 1059 Appends an error in the list of recorded errors or raises it, depending on the chosen 1060 error level setting. 1061 """ 1062 token = token or self._curr or self._prev or Token.string("") 1063 start = token.start 1064 end = token.end + 1 1065 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1066 highlight = self.sql[start:end] 1067 end_context = self.sql[end : end + self.error_message_context] 1068 1069 error = ParseError.new( 1070 f"{message}. Line {token.line}, Col: {token.col}.\n" 1071 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1072 description=message, 1073 line=token.line, 1074 col=token.col, 1075 start_context=start_context, 1076 highlight=highlight, 1077 end_context=end_context, 1078 ) 1079 1080 if self.error_level == ErrorLevel.IMMEDIATE: 1081 raise error 1082 1083 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1085 def expression( 1086 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1087 ) -> E: 1088 """ 1089 Creates a new, validated Expression. 1090 1091 Args: 1092 exp_class: The expression class to instantiate. 1093 comments: An optional list of comments to attach to the expression. 1094 kwargs: The arguments to set for the expression along with their respective values. 1095 1096 Returns: 1097 The target expression. 1098 """ 1099 instance = exp_class(**kwargs) 1100 instance.add_comments(comments) if comments else self._add_comments(instance) 1101 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1108 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1109 """ 1110 Validates an Expression, making sure that all its mandatory arguments are set. 1111 1112 Args: 1113 expression: The expression to validate. 1114 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1115 1116 Returns: 1117 The validated expression. 1118 """ 1119 if self.error_level != ErrorLevel.IGNORE: 1120 for error_message in expression.error_messages(args): 1121 self.raise_error(error_message) 1122 1123 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.