sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 721 "CONCAT": lambda self: self._parse_concat(), 722 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 723 "DECODE": lambda self: self._parse_decode(), 724 "EXTRACT": lambda self: self._parse_extract(), 725 "JSON_OBJECT": lambda self: self._parse_json_object(), 726 "LOG": lambda self: self._parse_logarithm(), 727 "MATCH": lambda self: self._parse_match_against(), 728 "OPENJSON": lambda self: self._parse_open_json(), 729 "POSITION": lambda self: self._parse_position(), 730 "SAFE_CAST": lambda self: self._parse_cast(False), 731 "STRING_AGG": lambda self: self._parse_string_agg(), 732 "SUBSTRING": lambda self: self._parse_substring(), 733 "TRIM": lambda self: self._parse_trim(), 734 "TRY_CAST": lambda self: self._parse_cast(False), 735 "TRY_CONVERT": lambda self: self._parse_convert(False), 736 } 737 738 QUERY_MODIFIER_PARSERS = { 739 "joins": lambda self: list(iter(self._parse_join, None)), 740 "laterals": lambda self: list(iter(self._parse_lateral, None)), 741 "match": lambda self: self._parse_match_recognize(), 742 "where": lambda self: self._parse_where(), 743 "group": lambda self: self._parse_group(), 744 "having": lambda self: self._parse_having(), 745 "qualify": lambda self: self._parse_qualify(), 746 "windows": lambda self: self._parse_window_clause(), 747 "order": lambda self: self._parse_order(), 748 "limit": lambda self: self._parse_limit(), 749 "offset": lambda self: self._parse_offset(), 750 "locks": lambda self: self._parse_locks(), 751 "sample": lambda self: self._parse_table_sample(as_modifier=True), 752 } 753 754 SET_PARSERS = { 755 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 756 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 757 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 758 "TRANSACTION": lambda self: self._parse_set_transaction(), 759 } 760 761 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 762 763 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 764 765 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 766 767 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 768 769 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 770 771 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 772 TRANSACTION_CHARACTERISTICS = { 773 "ISOLATION LEVEL REPEATABLE READ", 774 "ISOLATION LEVEL READ COMMITTED", 775 "ISOLATION LEVEL READ UNCOMMITTED", 776 "ISOLATION LEVEL SERIALIZABLE", 777 "READ WRITE", 778 "READ ONLY", 779 } 780 781 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 782 783 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 784 785 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 786 787 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 788 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 789 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 790 791 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 792 793 STRICT_CAST = True 794 795 # A NULL arg in CONCAT yields NULL by default 796 CONCAT_NULL_OUTPUTS_STRING = False 797 798 PREFIXED_PIVOT_COLUMNS = False 799 IDENTIFY_PIVOT_STRINGS = False 800 801 LOG_BASE_FIRST = True 802 LOG_DEFAULTS_TO_LN = False 803 804 __slots__ = ( 805 "error_level", 806 "error_message_context", 807 "max_errors", 808 "sql", 809 "errors", 810 "_tokens", 811 "_index", 812 "_curr", 813 "_next", 814 "_prev", 815 "_prev_comments", 816 ) 817 818 # Autofilled 819 INDEX_OFFSET: int = 0 820 UNNEST_COLUMN_ONLY: bool = False 821 ALIAS_POST_TABLESAMPLE: bool = False 822 STRICT_STRING_CONCAT = False 823 NULL_ORDERING: str = "nulls_are_small" 824 SHOW_TRIE: t.Dict = {} 825 SET_TRIE: t.Dict = {} 826 FORMAT_MAPPING: t.Dict[str, str] = {} 827 FORMAT_TRIE: t.Dict = {} 828 TIME_MAPPING: t.Dict[str, str] = {} 829 TIME_TRIE: t.Dict = {} 830 831 def __init__( 832 self, 833 error_level: t.Optional[ErrorLevel] = None, 834 error_message_context: int = 100, 835 max_errors: int = 3, 836 ): 837 self.error_level = error_level or ErrorLevel.IMMEDIATE 838 self.error_message_context = error_message_context 839 self.max_errors = max_errors 840 self.reset() 841 842 def reset(self): 843 self.sql = "" 844 self.errors = [] 845 self._tokens = [] 846 self._index = 0 847 self._curr = None 848 self._next = None 849 self._prev = None 850 self._prev_comments = None 851 852 def parse( 853 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 854 ) -> t.List[t.Optional[exp.Expression]]: 855 """ 856 Parses a list of tokens and returns a list of syntax trees, one tree 857 per parsed SQL statement. 858 859 Args: 860 raw_tokens: The list of tokens. 861 sql: The original SQL string, used to produce helpful debug messages. 862 863 Returns: 864 The list of the produced syntax trees. 865 """ 866 return self._parse( 867 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 868 ) 869 870 def parse_into( 871 self, 872 expression_types: exp.IntoType, 873 raw_tokens: t.List[Token], 874 sql: t.Optional[str] = None, 875 ) -> t.List[t.Optional[exp.Expression]]: 876 """ 877 Parses a list of tokens into a given Expression type. If a collection of Expression 878 types is given instead, this method will try to parse the token list into each one 879 of them, stopping at the first for which the parsing succeeds. 880 881 Args: 882 expression_types: The expression type(s) to try and parse the token list into. 883 raw_tokens: The list of tokens. 884 sql: The original SQL string, used to produce helpful debug messages. 885 886 Returns: 887 The target Expression. 888 """ 889 errors = [] 890 for expression_type in ensure_list(expression_types): 891 parser = self.EXPRESSION_PARSERS.get(expression_type) 892 if not parser: 893 raise TypeError(f"No parser registered for {expression_type}") 894 895 try: 896 return self._parse(parser, raw_tokens, sql) 897 except ParseError as e: 898 e.errors[0]["into_expression"] = expression_type 899 errors.append(e) 900 901 raise ParseError( 902 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 903 errors=merge_errors(errors), 904 ) from errors[-1] 905 906 def _parse( 907 self, 908 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 909 raw_tokens: t.List[Token], 910 sql: t.Optional[str] = None, 911 ) -> t.List[t.Optional[exp.Expression]]: 912 self.reset() 913 self.sql = sql or "" 914 915 total = len(raw_tokens) 916 chunks: t.List[t.List[Token]] = [[]] 917 918 for i, token in enumerate(raw_tokens): 919 if token.token_type == TokenType.SEMICOLON: 920 if i < total - 1: 921 chunks.append([]) 922 else: 923 chunks[-1].append(token) 924 925 expressions = [] 926 927 for tokens in chunks: 928 self._index = -1 929 self._tokens = tokens 930 self._advance() 931 932 expressions.append(parse_method(self)) 933 934 if self._index < len(self._tokens): 935 self.raise_error("Invalid expression / Unexpected token") 936 937 self.check_errors() 938 939 return expressions 940 941 def check_errors(self) -> None: 942 """Logs or raises any found errors, depending on the chosen error level setting.""" 943 if self.error_level == ErrorLevel.WARN: 944 for error in self.errors: 945 logger.error(str(error)) 946 elif self.error_level == ErrorLevel.RAISE and self.errors: 947 raise ParseError( 948 concat_messages(self.errors, self.max_errors), 949 errors=merge_errors(self.errors), 950 ) 951 952 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 953 """ 954 Appends an error in the list of recorded errors or raises it, depending on the chosen 955 error level setting. 956 """ 957 token = token or self._curr or self._prev or Token.string("") 958 start = token.start 959 end = token.end + 1 960 start_context = self.sql[max(start - self.error_message_context, 0) : start] 961 highlight = self.sql[start:end] 962 end_context = self.sql[end : end + self.error_message_context] 963 964 error = ParseError.new( 965 f"{message}. Line {token.line}, Col: {token.col}.\n" 966 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 967 description=message, 968 line=token.line, 969 col=token.col, 970 start_context=start_context, 971 highlight=highlight, 972 end_context=end_context, 973 ) 974 975 if self.error_level == ErrorLevel.IMMEDIATE: 976 raise error 977 978 self.errors.append(error) 979 980 def expression( 981 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 982 ) -> E: 983 """ 984 Creates a new, validated Expression. 985 986 Args: 987 exp_class: The expression class to instantiate. 988 comments: An optional list of comments to attach to the expression. 989 kwargs: The arguments to set for the expression along with their respective values. 990 991 Returns: 992 The target expression. 993 """ 994 instance = exp_class(**kwargs) 995 instance.add_comments(comments) if comments else self._add_comments(instance) 996 return self.validate_expression(instance) 997 998 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 999 if expression and self._prev_comments: 1000 expression.add_comments(self._prev_comments) 1001 self._prev_comments = None 1002 1003 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1004 """ 1005 Validates an Expression, making sure that all its mandatory arguments are set. 1006 1007 Args: 1008 expression: The expression to validate. 1009 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1010 1011 Returns: 1012 The validated expression. 1013 """ 1014 if self.error_level != ErrorLevel.IGNORE: 1015 for error_message in expression.error_messages(args): 1016 self.raise_error(error_message) 1017 1018 return expression 1019 1020 def _find_sql(self, start: Token, end: Token) -> str: 1021 return self.sql[start.start : end.end + 1] 1022 1023 def _advance(self, times: int = 1) -> None: 1024 self._index += times 1025 self._curr = seq_get(self._tokens, self._index) 1026 self._next = seq_get(self._tokens, self._index + 1) 1027 1028 if self._index > 0: 1029 self._prev = self._tokens[self._index - 1] 1030 self._prev_comments = self._prev.comments 1031 else: 1032 self._prev = None 1033 self._prev_comments = None 1034 1035 def _retreat(self, index: int) -> None: 1036 if index != self._index: 1037 self._advance(index - self._index) 1038 1039 def _parse_command(self) -> exp.Command: 1040 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1041 1042 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1043 start = self._prev 1044 exists = self._parse_exists() if allow_exists else None 1045 1046 self._match(TokenType.ON) 1047 1048 kind = self._match_set(self.CREATABLES) and self._prev 1049 if not kind: 1050 return self._parse_as_command(start) 1051 1052 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1053 this = self._parse_user_defined_function(kind=kind.token_type) 1054 elif kind.token_type == TokenType.TABLE: 1055 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1056 elif kind.token_type == TokenType.COLUMN: 1057 this = self._parse_column() 1058 else: 1059 this = self._parse_id_var() 1060 1061 self._match(TokenType.IS) 1062 1063 return self.expression( 1064 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1065 ) 1066 1067 def _parse_to_table( 1068 self, 1069 ) -> exp.ToTableProperty: 1070 table = self._parse_table_parts(schema=True) 1071 return self.expression(exp.ToTableProperty, this=table) 1072 1073 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1074 def _parse_ttl(self) -> exp.Expression: 1075 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1076 this = self._parse_bitwise() 1077 1078 if self._match_text_seq("DELETE"): 1079 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1080 if self._match_text_seq("RECOMPRESS"): 1081 return self.expression( 1082 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1083 ) 1084 if self._match_text_seq("TO", "DISK"): 1085 return self.expression( 1086 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1087 ) 1088 if self._match_text_seq("TO", "VOLUME"): 1089 return self.expression( 1090 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1091 ) 1092 1093 return this 1094 1095 expressions = self._parse_csv(_parse_ttl_action) 1096 where = self._parse_where() 1097 group = self._parse_group() 1098 1099 aggregates = None 1100 if group and self._match(TokenType.SET): 1101 aggregates = self._parse_csv(self._parse_set_item) 1102 1103 return self.expression( 1104 exp.MergeTreeTTL, 1105 expressions=expressions, 1106 where=where, 1107 group=group, 1108 aggregates=aggregates, 1109 ) 1110 1111 def _parse_statement(self) -> t.Optional[exp.Expression]: 1112 if self._curr is None: 1113 return None 1114 1115 if self._match_set(self.STATEMENT_PARSERS): 1116 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1117 1118 if self._match_set(Tokenizer.COMMANDS): 1119 return self._parse_command() 1120 1121 expression = self._parse_expression() 1122 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1123 return self._parse_query_modifiers(expression) 1124 1125 def _parse_drop(self) -> exp.Drop | exp.Command: 1126 start = self._prev 1127 temporary = self._match(TokenType.TEMPORARY) 1128 materialized = self._match_text_seq("MATERIALIZED") 1129 1130 kind = self._match_set(self.CREATABLES) and self._prev.text 1131 if not kind: 1132 return self._parse_as_command(start) 1133 1134 return self.expression( 1135 exp.Drop, 1136 exists=self._parse_exists(), 1137 this=self._parse_table(schema=True), 1138 kind=kind, 1139 temporary=temporary, 1140 materialized=materialized, 1141 cascade=self._match_text_seq("CASCADE"), 1142 constraints=self._match_text_seq("CONSTRAINTS"), 1143 purge=self._match_text_seq("PURGE"), 1144 ) 1145 1146 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1147 return ( 1148 self._match(TokenType.IF) 1149 and (not not_ or self._match(TokenType.NOT)) 1150 and self._match(TokenType.EXISTS) 1151 ) 1152 1153 def _parse_create(self) -> exp.Create | exp.Command: 1154 # Note: this can't be None because we've matched a statement parser 1155 start = self._prev 1156 replace = start.text.upper() == "REPLACE" or self._match_pair( 1157 TokenType.OR, TokenType.REPLACE 1158 ) 1159 unique = self._match(TokenType.UNIQUE) 1160 1161 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1162 self._advance() 1163 1164 properties = None 1165 create_token = self._match_set(self.CREATABLES) and self._prev 1166 1167 if not create_token: 1168 # exp.Properties.Location.POST_CREATE 1169 properties = self._parse_properties() 1170 create_token = self._match_set(self.CREATABLES) and self._prev 1171 1172 if not properties or not create_token: 1173 return self._parse_as_command(start) 1174 1175 exists = self._parse_exists(not_=True) 1176 this = None 1177 expression = None 1178 indexes = None 1179 no_schema_binding = None 1180 begin = None 1181 clone = None 1182 1183 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1184 nonlocal properties 1185 if properties and temp_props: 1186 properties.expressions.extend(temp_props.expressions) 1187 elif temp_props: 1188 properties = temp_props 1189 1190 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1191 this = self._parse_user_defined_function(kind=create_token.token_type) 1192 1193 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1194 extend_props(self._parse_properties()) 1195 1196 self._match(TokenType.ALIAS) 1197 begin = self._match(TokenType.BEGIN) 1198 return_ = self._match_text_seq("RETURN") 1199 expression = self._parse_statement() 1200 1201 if return_: 1202 expression = self.expression(exp.Return, this=expression) 1203 elif create_token.token_type == TokenType.INDEX: 1204 this = self._parse_index(index=self._parse_id_var()) 1205 elif create_token.token_type in self.DB_CREATABLES: 1206 table_parts = self._parse_table_parts(schema=True) 1207 1208 # exp.Properties.Location.POST_NAME 1209 self._match(TokenType.COMMA) 1210 extend_props(self._parse_properties(before=True)) 1211 1212 this = self._parse_schema(this=table_parts) 1213 1214 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1215 extend_props(self._parse_properties()) 1216 1217 self._match(TokenType.ALIAS) 1218 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1219 # exp.Properties.Location.POST_ALIAS 1220 extend_props(self._parse_properties()) 1221 1222 expression = self._parse_ddl_select() 1223 1224 if create_token.token_type == TokenType.TABLE: 1225 indexes = [] 1226 while True: 1227 index = self._parse_index() 1228 1229 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1230 extend_props(self._parse_properties()) 1231 1232 if not index: 1233 break 1234 else: 1235 self._match(TokenType.COMMA) 1236 indexes.append(index) 1237 elif create_token.token_type == TokenType.VIEW: 1238 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1239 no_schema_binding = True 1240 1241 if self._match_text_seq("CLONE"): 1242 clone = self._parse_table(schema=True) 1243 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1244 clone_kind = ( 1245 self._match(TokenType.L_PAREN) 1246 and self._match_texts(self.CLONE_KINDS) 1247 and self._prev.text.upper() 1248 ) 1249 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1250 self._match(TokenType.R_PAREN) 1251 clone = self.expression( 1252 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1253 ) 1254 1255 return self.expression( 1256 exp.Create, 1257 this=this, 1258 kind=create_token.text, 1259 replace=replace, 1260 unique=unique, 1261 expression=expression, 1262 exists=exists, 1263 properties=properties, 1264 indexes=indexes, 1265 no_schema_binding=no_schema_binding, 1266 begin=begin, 1267 clone=clone, 1268 ) 1269 1270 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1271 # only used for teradata currently 1272 self._match(TokenType.COMMA) 1273 1274 kwargs = { 1275 "no": self._match_text_seq("NO"), 1276 "dual": self._match_text_seq("DUAL"), 1277 "before": self._match_text_seq("BEFORE"), 1278 "default": self._match_text_seq("DEFAULT"), 1279 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1280 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1281 "after": self._match_text_seq("AFTER"), 1282 "minimum": self._match_texts(("MIN", "MINIMUM")), 1283 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1284 } 1285 1286 if self._match_texts(self.PROPERTY_PARSERS): 1287 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1288 try: 1289 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1290 except TypeError: 1291 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1292 1293 return None 1294 1295 def _parse_property(self) -> t.Optional[exp.Expression]: 1296 if self._match_texts(self.PROPERTY_PARSERS): 1297 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1298 1299 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1300 return self._parse_character_set(default=True) 1301 1302 if self._match_text_seq("COMPOUND", "SORTKEY"): 1303 return self._parse_sortkey(compound=True) 1304 1305 if self._match_text_seq("SQL", "SECURITY"): 1306 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1307 1308 assignment = self._match_pair( 1309 TokenType.VAR, TokenType.EQ, advance=False 1310 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1311 1312 if assignment: 1313 key = self._parse_var_or_string() 1314 self._match(TokenType.EQ) 1315 return self.expression(exp.Property, this=key, value=self._parse_column()) 1316 1317 return None 1318 1319 def _parse_stored(self) -> exp.FileFormatProperty: 1320 self._match(TokenType.ALIAS) 1321 1322 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1323 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1324 1325 return self.expression( 1326 exp.FileFormatProperty, 1327 this=self.expression( 1328 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1329 ) 1330 if input_format or output_format 1331 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1332 ) 1333 1334 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1335 self._match(TokenType.EQ) 1336 self._match(TokenType.ALIAS) 1337 return self.expression(exp_class, this=self._parse_field()) 1338 1339 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1340 properties = [] 1341 while True: 1342 if before: 1343 prop = self._parse_property_before() 1344 else: 1345 prop = self._parse_property() 1346 1347 if not prop: 1348 break 1349 for p in ensure_list(prop): 1350 properties.append(p) 1351 1352 if properties: 1353 return self.expression(exp.Properties, expressions=properties) 1354 1355 return None 1356 1357 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1358 return self.expression( 1359 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1360 ) 1361 1362 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1363 if self._index >= 2: 1364 pre_volatile_token = self._tokens[self._index - 2] 1365 else: 1366 pre_volatile_token = None 1367 1368 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1369 return exp.VolatileProperty() 1370 1371 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1372 1373 def _parse_with_property( 1374 self, 1375 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1376 self._match(TokenType.WITH) 1377 if self._match(TokenType.L_PAREN, advance=False): 1378 return self._parse_wrapped_csv(self._parse_property) 1379 1380 if self._match_text_seq("JOURNAL"): 1381 return self._parse_withjournaltable() 1382 1383 if self._match_text_seq("DATA"): 1384 return self._parse_withdata(no=False) 1385 elif self._match_text_seq("NO", "DATA"): 1386 return self._parse_withdata(no=True) 1387 1388 if not self._next: 1389 return None 1390 1391 return self._parse_withisolatedloading() 1392 1393 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1394 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1395 self._match(TokenType.EQ) 1396 1397 user = self._parse_id_var() 1398 self._match(TokenType.PARAMETER) 1399 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1400 1401 if not user or not host: 1402 return None 1403 1404 return exp.DefinerProperty(this=f"{user}@{host}") 1405 1406 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1407 self._match(TokenType.TABLE) 1408 self._match(TokenType.EQ) 1409 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1410 1411 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1412 return self.expression(exp.LogProperty, no=no) 1413 1414 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1415 return self.expression(exp.JournalProperty, **kwargs) 1416 1417 def _parse_checksum(self) -> exp.ChecksumProperty: 1418 self._match(TokenType.EQ) 1419 1420 on = None 1421 if self._match(TokenType.ON): 1422 on = True 1423 elif self._match_text_seq("OFF"): 1424 on = False 1425 1426 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1427 1428 def _parse_cluster(self) -> exp.Cluster: 1429 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1430 1431 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1432 self._match_text_seq("BY") 1433 1434 self._match_l_paren() 1435 expressions = self._parse_csv(self._parse_column) 1436 self._match_r_paren() 1437 1438 if self._match_text_seq("SORTED", "BY"): 1439 self._match_l_paren() 1440 sorted_by = self._parse_csv(self._parse_ordered) 1441 self._match_r_paren() 1442 else: 1443 sorted_by = None 1444 1445 self._match(TokenType.INTO) 1446 buckets = self._parse_number() 1447 self._match_text_seq("BUCKETS") 1448 1449 return self.expression( 1450 exp.ClusteredByProperty, 1451 expressions=expressions, 1452 sorted_by=sorted_by, 1453 buckets=buckets, 1454 ) 1455 1456 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1457 if not self._match_text_seq("GRANTS"): 1458 self._retreat(self._index - 1) 1459 return None 1460 1461 return self.expression(exp.CopyGrantsProperty) 1462 1463 def _parse_freespace(self) -> exp.FreespaceProperty: 1464 self._match(TokenType.EQ) 1465 return self.expression( 1466 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1467 ) 1468 1469 def _parse_mergeblockratio( 1470 self, no: bool = False, default: bool = False 1471 ) -> exp.MergeBlockRatioProperty: 1472 if self._match(TokenType.EQ): 1473 return self.expression( 1474 exp.MergeBlockRatioProperty, 1475 this=self._parse_number(), 1476 percent=self._match(TokenType.PERCENT), 1477 ) 1478 1479 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1480 1481 def _parse_datablocksize( 1482 self, 1483 default: t.Optional[bool] = None, 1484 minimum: t.Optional[bool] = None, 1485 maximum: t.Optional[bool] = None, 1486 ) -> exp.DataBlocksizeProperty: 1487 self._match(TokenType.EQ) 1488 size = self._parse_number() 1489 1490 units = None 1491 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1492 units = self._prev.text 1493 1494 return self.expression( 1495 exp.DataBlocksizeProperty, 1496 size=size, 1497 units=units, 1498 default=default, 1499 minimum=minimum, 1500 maximum=maximum, 1501 ) 1502 1503 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1504 self._match(TokenType.EQ) 1505 always = self._match_text_seq("ALWAYS") 1506 manual = self._match_text_seq("MANUAL") 1507 never = self._match_text_seq("NEVER") 1508 default = self._match_text_seq("DEFAULT") 1509 1510 autotemp = None 1511 if self._match_text_seq("AUTOTEMP"): 1512 autotemp = self._parse_schema() 1513 1514 return self.expression( 1515 exp.BlockCompressionProperty, 1516 always=always, 1517 manual=manual, 1518 never=never, 1519 default=default, 1520 autotemp=autotemp, 1521 ) 1522 1523 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1524 no = self._match_text_seq("NO") 1525 concurrent = self._match_text_seq("CONCURRENT") 1526 self._match_text_seq("ISOLATED", "LOADING") 1527 for_all = self._match_text_seq("FOR", "ALL") 1528 for_insert = self._match_text_seq("FOR", "INSERT") 1529 for_none = self._match_text_seq("FOR", "NONE") 1530 return self.expression( 1531 exp.IsolatedLoadingProperty, 1532 no=no, 1533 concurrent=concurrent, 1534 for_all=for_all, 1535 for_insert=for_insert, 1536 for_none=for_none, 1537 ) 1538 1539 def _parse_locking(self) -> exp.LockingProperty: 1540 if self._match(TokenType.TABLE): 1541 kind = "TABLE" 1542 elif self._match(TokenType.VIEW): 1543 kind = "VIEW" 1544 elif self._match(TokenType.ROW): 1545 kind = "ROW" 1546 elif self._match_text_seq("DATABASE"): 1547 kind = "DATABASE" 1548 else: 1549 kind = None 1550 1551 if kind in ("DATABASE", "TABLE", "VIEW"): 1552 this = self._parse_table_parts() 1553 else: 1554 this = None 1555 1556 if self._match(TokenType.FOR): 1557 for_or_in = "FOR" 1558 elif self._match(TokenType.IN): 1559 for_or_in = "IN" 1560 else: 1561 for_or_in = None 1562 1563 if self._match_text_seq("ACCESS"): 1564 lock_type = "ACCESS" 1565 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1566 lock_type = "EXCLUSIVE" 1567 elif self._match_text_seq("SHARE"): 1568 lock_type = "SHARE" 1569 elif self._match_text_seq("READ"): 1570 lock_type = "READ" 1571 elif self._match_text_seq("WRITE"): 1572 lock_type = "WRITE" 1573 elif self._match_text_seq("CHECKSUM"): 1574 lock_type = "CHECKSUM" 1575 else: 1576 lock_type = None 1577 1578 override = self._match_text_seq("OVERRIDE") 1579 1580 return self.expression( 1581 exp.LockingProperty, 1582 this=this, 1583 kind=kind, 1584 for_or_in=for_or_in, 1585 lock_type=lock_type, 1586 override=override, 1587 ) 1588 1589 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1590 if self._match(TokenType.PARTITION_BY): 1591 return self._parse_csv(self._parse_conjunction) 1592 return [] 1593 1594 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1595 self._match(TokenType.EQ) 1596 return self.expression( 1597 exp.PartitionedByProperty, 1598 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1599 ) 1600 1601 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1602 if self._match_text_seq("AND", "STATISTICS"): 1603 statistics = True 1604 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1605 statistics = False 1606 else: 1607 statistics = None 1608 1609 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1610 1611 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1612 if self._match_text_seq("PRIMARY", "INDEX"): 1613 return exp.NoPrimaryIndexProperty() 1614 return None 1615 1616 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1617 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1618 return exp.OnCommitProperty() 1619 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1620 return exp.OnCommitProperty(delete=True) 1621 return None 1622 1623 def _parse_distkey(self) -> exp.DistKeyProperty: 1624 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1625 1626 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1627 table = self._parse_table(schema=True) 1628 1629 options = [] 1630 while self._match_texts(("INCLUDING", "EXCLUDING")): 1631 this = self._prev.text.upper() 1632 1633 id_var = self._parse_id_var() 1634 if not id_var: 1635 return None 1636 1637 options.append( 1638 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1639 ) 1640 1641 return self.expression(exp.LikeProperty, this=table, expressions=options) 1642 1643 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1644 return self.expression( 1645 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1646 ) 1647 1648 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1649 self._match(TokenType.EQ) 1650 return self.expression( 1651 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1652 ) 1653 1654 def _parse_returns(self) -> exp.ReturnsProperty: 1655 value: t.Optional[exp.Expression] 1656 is_table = self._match(TokenType.TABLE) 1657 1658 if is_table: 1659 if self._match(TokenType.LT): 1660 value = self.expression( 1661 exp.Schema, 1662 this="TABLE", 1663 expressions=self._parse_csv(self._parse_struct_types), 1664 ) 1665 if not self._match(TokenType.GT): 1666 self.raise_error("Expecting >") 1667 else: 1668 value = self._parse_schema(exp.var("TABLE")) 1669 else: 1670 value = self._parse_types() 1671 1672 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1673 1674 def _parse_describe(self) -> exp.Describe: 1675 kind = self._match_set(self.CREATABLES) and self._prev.text 1676 this = self._parse_table() 1677 return self.expression(exp.Describe, this=this, kind=kind) 1678 1679 def _parse_insert(self) -> exp.Insert: 1680 overwrite = self._match(TokenType.OVERWRITE) 1681 local = self._match_text_seq("LOCAL") 1682 alternative = None 1683 1684 if self._match_text_seq("DIRECTORY"): 1685 this: t.Optional[exp.Expression] = self.expression( 1686 exp.Directory, 1687 this=self._parse_var_or_string(), 1688 local=local, 1689 row_format=self._parse_row_format(match_row=True), 1690 ) 1691 else: 1692 if self._match(TokenType.OR): 1693 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1694 1695 self._match(TokenType.INTO) 1696 self._match(TokenType.TABLE) 1697 this = self._parse_table(schema=True) 1698 1699 return self.expression( 1700 exp.Insert, 1701 this=this, 1702 exists=self._parse_exists(), 1703 partition=self._parse_partition(), 1704 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1705 and self._parse_conjunction(), 1706 expression=self._parse_ddl_select(), 1707 conflict=self._parse_on_conflict(), 1708 returning=self._parse_returning(), 1709 overwrite=overwrite, 1710 alternative=alternative, 1711 ) 1712 1713 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1714 conflict = self._match_text_seq("ON", "CONFLICT") 1715 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1716 1717 if not conflict and not duplicate: 1718 return None 1719 1720 nothing = None 1721 expressions = None 1722 key = None 1723 constraint = None 1724 1725 if conflict: 1726 if self._match_text_seq("ON", "CONSTRAINT"): 1727 constraint = self._parse_id_var() 1728 else: 1729 key = self._parse_csv(self._parse_value) 1730 1731 self._match_text_seq("DO") 1732 if self._match_text_seq("NOTHING"): 1733 nothing = True 1734 else: 1735 self._match(TokenType.UPDATE) 1736 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1737 1738 return self.expression( 1739 exp.OnConflict, 1740 duplicate=duplicate, 1741 expressions=expressions, 1742 nothing=nothing, 1743 key=key, 1744 constraint=constraint, 1745 ) 1746 1747 def _parse_returning(self) -> t.Optional[exp.Returning]: 1748 if not self._match(TokenType.RETURNING): 1749 return None 1750 1751 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1752 1753 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1754 if not self._match(TokenType.FORMAT): 1755 return None 1756 return self._parse_row_format() 1757 1758 def _parse_row_format( 1759 self, match_row: bool = False 1760 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1761 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1762 return None 1763 1764 if self._match_text_seq("SERDE"): 1765 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1766 1767 self._match_text_seq("DELIMITED") 1768 1769 kwargs = {} 1770 1771 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1772 kwargs["fields"] = self._parse_string() 1773 if self._match_text_seq("ESCAPED", "BY"): 1774 kwargs["escaped"] = self._parse_string() 1775 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1776 kwargs["collection_items"] = self._parse_string() 1777 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1778 kwargs["map_keys"] = self._parse_string() 1779 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1780 kwargs["lines"] = self._parse_string() 1781 if self._match_text_seq("NULL", "DEFINED", "AS"): 1782 kwargs["null"] = self._parse_string() 1783 1784 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1785 1786 def _parse_load(self) -> exp.LoadData | exp.Command: 1787 if self._match_text_seq("DATA"): 1788 local = self._match_text_seq("LOCAL") 1789 self._match_text_seq("INPATH") 1790 inpath = self._parse_string() 1791 overwrite = self._match(TokenType.OVERWRITE) 1792 self._match_pair(TokenType.INTO, TokenType.TABLE) 1793 1794 return self.expression( 1795 exp.LoadData, 1796 this=self._parse_table(schema=True), 1797 local=local, 1798 overwrite=overwrite, 1799 inpath=inpath, 1800 partition=self._parse_partition(), 1801 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1802 serde=self._match_text_seq("SERDE") and self._parse_string(), 1803 ) 1804 return self._parse_as_command(self._prev) 1805 1806 def _parse_delete(self) -> exp.Delete: 1807 self._match(TokenType.FROM) 1808 1809 return self.expression( 1810 exp.Delete, 1811 this=self._parse_table(), 1812 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1813 where=self._parse_where(), 1814 returning=self._parse_returning(), 1815 limit=self._parse_limit(), 1816 ) 1817 1818 def _parse_update(self) -> exp.Update: 1819 return self.expression( 1820 exp.Update, 1821 **{ # type: ignore 1822 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1823 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1824 "from": self._parse_from(modifiers=True), 1825 "where": self._parse_where(), 1826 "returning": self._parse_returning(), 1827 "limit": self._parse_limit(), 1828 }, 1829 ) 1830 1831 def _parse_uncache(self) -> exp.Uncache: 1832 if not self._match(TokenType.TABLE): 1833 self.raise_error("Expecting TABLE after UNCACHE") 1834 1835 return self.expression( 1836 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1837 ) 1838 1839 def _parse_cache(self) -> exp.Cache: 1840 lazy = self._match_text_seq("LAZY") 1841 self._match(TokenType.TABLE) 1842 table = self._parse_table(schema=True) 1843 1844 options = [] 1845 if self._match_text_seq("OPTIONS"): 1846 self._match_l_paren() 1847 k = self._parse_string() 1848 self._match(TokenType.EQ) 1849 v = self._parse_string() 1850 options = [k, v] 1851 self._match_r_paren() 1852 1853 self._match(TokenType.ALIAS) 1854 return self.expression( 1855 exp.Cache, 1856 this=table, 1857 lazy=lazy, 1858 options=options, 1859 expression=self._parse_select(nested=True), 1860 ) 1861 1862 def _parse_partition(self) -> t.Optional[exp.Partition]: 1863 if not self._match(TokenType.PARTITION): 1864 return None 1865 1866 return self.expression( 1867 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1868 ) 1869 1870 def _parse_value(self) -> exp.Tuple: 1871 if self._match(TokenType.L_PAREN): 1872 expressions = self._parse_csv(self._parse_conjunction) 1873 self._match_r_paren() 1874 return self.expression(exp.Tuple, expressions=expressions) 1875 1876 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1877 # Source: https://prestodb.io/docs/current/sql/values.html 1878 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1879 1880 def _parse_select( 1881 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1882 ) -> t.Optional[exp.Expression]: 1883 cte = self._parse_with() 1884 if cte: 1885 this = self._parse_statement() 1886 1887 if not this: 1888 self.raise_error("Failed to parse any statement following CTE") 1889 return cte 1890 1891 if "with" in this.arg_types: 1892 this.set("with", cte) 1893 else: 1894 self.raise_error(f"{this.key} does not support CTE") 1895 this = cte 1896 elif self._match(TokenType.SELECT): 1897 comments = self._prev_comments 1898 1899 hint = self._parse_hint() 1900 all_ = self._match(TokenType.ALL) 1901 distinct = self._match(TokenType.DISTINCT) 1902 1903 kind = ( 1904 self._match(TokenType.ALIAS) 1905 and self._match_texts(("STRUCT", "VALUE")) 1906 and self._prev.text 1907 ) 1908 1909 if distinct: 1910 distinct = self.expression( 1911 exp.Distinct, 1912 on=self._parse_value() if self._match(TokenType.ON) else None, 1913 ) 1914 1915 if all_ and distinct: 1916 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1917 1918 limit = self._parse_limit(top=True) 1919 expressions = self._parse_csv(self._parse_expression) 1920 1921 this = self.expression( 1922 exp.Select, 1923 kind=kind, 1924 hint=hint, 1925 distinct=distinct, 1926 expressions=expressions, 1927 limit=limit, 1928 ) 1929 this.comments = comments 1930 1931 into = self._parse_into() 1932 if into: 1933 this.set("into", into) 1934 1935 from_ = self._parse_from() 1936 if from_: 1937 this.set("from", from_) 1938 1939 this = self._parse_query_modifiers(this) 1940 elif (table or nested) and self._match(TokenType.L_PAREN): 1941 if self._match(TokenType.PIVOT): 1942 this = self._parse_simplified_pivot() 1943 elif self._match(TokenType.FROM): 1944 this = exp.select("*").from_( 1945 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1946 ) 1947 else: 1948 this = self._parse_table() if table else self._parse_select(nested=True) 1949 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1950 1951 self._match_r_paren() 1952 1953 # early return so that subquery unions aren't parsed again 1954 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1955 # Union ALL should be a property of the top select node, not the subquery 1956 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1957 elif self._match(TokenType.VALUES): 1958 this = self.expression( 1959 exp.Values, 1960 expressions=self._parse_csv(self._parse_value), 1961 alias=self._parse_table_alias(), 1962 ) 1963 else: 1964 this = None 1965 1966 return self._parse_set_operations(this) 1967 1968 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1969 if not skip_with_token and not self._match(TokenType.WITH): 1970 return None 1971 1972 comments = self._prev_comments 1973 recursive = self._match(TokenType.RECURSIVE) 1974 1975 expressions = [] 1976 while True: 1977 expressions.append(self._parse_cte()) 1978 1979 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1980 break 1981 else: 1982 self._match(TokenType.WITH) 1983 1984 return self.expression( 1985 exp.With, comments=comments, expressions=expressions, recursive=recursive 1986 ) 1987 1988 def _parse_cte(self) -> exp.CTE: 1989 alias = self._parse_table_alias() 1990 if not alias or not alias.this: 1991 self.raise_error("Expected CTE to have alias") 1992 1993 self._match(TokenType.ALIAS) 1994 return self.expression( 1995 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1996 ) 1997 1998 def _parse_table_alias( 1999 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2000 ) -> t.Optional[exp.TableAlias]: 2001 any_token = self._match(TokenType.ALIAS) 2002 alias = ( 2003 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2004 or self._parse_string_as_identifier() 2005 ) 2006 2007 index = self._index 2008 if self._match(TokenType.L_PAREN): 2009 columns = self._parse_csv(self._parse_function_parameter) 2010 self._match_r_paren() if columns else self._retreat(index) 2011 else: 2012 columns = None 2013 2014 if not alias and not columns: 2015 return None 2016 2017 return self.expression(exp.TableAlias, this=alias, columns=columns) 2018 2019 def _parse_subquery( 2020 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2021 ) -> t.Optional[exp.Subquery]: 2022 if not this: 2023 return None 2024 2025 return self.expression( 2026 exp.Subquery, 2027 this=this, 2028 pivots=self._parse_pivots(), 2029 alias=self._parse_table_alias() if parse_alias else None, 2030 ) 2031 2032 def _parse_query_modifiers( 2033 self, this: t.Optional[exp.Expression] 2034 ) -> t.Optional[exp.Expression]: 2035 if isinstance(this, self.MODIFIABLES): 2036 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2037 expression = parser(self) 2038 2039 if expression: 2040 if key == "limit": 2041 offset = expression.args.pop("offset", None) 2042 if offset: 2043 this.set("offset", exp.Offset(expression=offset)) 2044 this.set(key, expression) 2045 return this 2046 2047 def _parse_hint(self) -> t.Optional[exp.Hint]: 2048 if self._match(TokenType.HINT): 2049 hints = self._parse_csv(self._parse_function) 2050 2051 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2052 self.raise_error("Expected */ after HINT") 2053 2054 return self.expression(exp.Hint, expressions=hints) 2055 2056 return None 2057 2058 def _parse_into(self) -> t.Optional[exp.Into]: 2059 if not self._match(TokenType.INTO): 2060 return None 2061 2062 temp = self._match(TokenType.TEMPORARY) 2063 unlogged = self._match_text_seq("UNLOGGED") 2064 self._match(TokenType.TABLE) 2065 2066 return self.expression( 2067 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2068 ) 2069 2070 def _parse_from( 2071 self, modifiers: bool = False, skip_from_token: bool = False 2072 ) -> t.Optional[exp.From]: 2073 if not skip_from_token and not self._match(TokenType.FROM): 2074 return None 2075 2076 comments = self._prev_comments 2077 this = self._parse_table() 2078 2079 return self.expression( 2080 exp.From, 2081 comments=comments, 2082 this=self._parse_query_modifiers(this) if modifiers else this, 2083 ) 2084 2085 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2086 if not self._match(TokenType.MATCH_RECOGNIZE): 2087 return None 2088 2089 self._match_l_paren() 2090 2091 partition = self._parse_partition_by() 2092 order = self._parse_order() 2093 measures = ( 2094 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2095 ) 2096 2097 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2098 rows = exp.var("ONE ROW PER MATCH") 2099 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2100 text = "ALL ROWS PER MATCH" 2101 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2102 text += f" SHOW EMPTY MATCHES" 2103 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2104 text += f" OMIT EMPTY MATCHES" 2105 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2106 text += f" WITH UNMATCHED ROWS" 2107 rows = exp.var(text) 2108 else: 2109 rows = None 2110 2111 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2112 text = "AFTER MATCH SKIP" 2113 if self._match_text_seq("PAST", "LAST", "ROW"): 2114 text += f" PAST LAST ROW" 2115 elif self._match_text_seq("TO", "NEXT", "ROW"): 2116 text += f" TO NEXT ROW" 2117 elif self._match_text_seq("TO", "FIRST"): 2118 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2119 elif self._match_text_seq("TO", "LAST"): 2120 text += f" TO LAST {self._advance_any().text}" # type: ignore 2121 after = exp.var(text) 2122 else: 2123 after = None 2124 2125 if self._match_text_seq("PATTERN"): 2126 self._match_l_paren() 2127 2128 if not self._curr: 2129 self.raise_error("Expecting )", self._curr) 2130 2131 paren = 1 2132 start = self._curr 2133 2134 while self._curr and paren > 0: 2135 if self._curr.token_type == TokenType.L_PAREN: 2136 paren += 1 2137 if self._curr.token_type == TokenType.R_PAREN: 2138 paren -= 1 2139 2140 end = self._prev 2141 self._advance() 2142 2143 if paren > 0: 2144 self.raise_error("Expecting )", self._curr) 2145 2146 pattern = exp.var(self._find_sql(start, end)) 2147 else: 2148 pattern = None 2149 2150 define = ( 2151 self._parse_csv( 2152 lambda: self.expression( 2153 exp.Alias, 2154 alias=self._parse_id_var(any_token=True), 2155 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2156 ) 2157 ) 2158 if self._match_text_seq("DEFINE") 2159 else None 2160 ) 2161 2162 self._match_r_paren() 2163 2164 return self.expression( 2165 exp.MatchRecognize, 2166 partition_by=partition, 2167 order=order, 2168 measures=measures, 2169 rows=rows, 2170 after=after, 2171 pattern=pattern, 2172 define=define, 2173 alias=self._parse_table_alias(), 2174 ) 2175 2176 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2177 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2178 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2179 2180 if outer_apply or cross_apply: 2181 this = self._parse_select(table=True) 2182 view = None 2183 outer = not cross_apply 2184 elif self._match(TokenType.LATERAL): 2185 this = self._parse_select(table=True) 2186 view = self._match(TokenType.VIEW) 2187 outer = self._match(TokenType.OUTER) 2188 else: 2189 return None 2190 2191 if not this: 2192 this = self._parse_function() or self._parse_id_var(any_token=False) 2193 while self._match(TokenType.DOT): 2194 this = exp.Dot( 2195 this=this, 2196 expression=self._parse_function() or self._parse_id_var(any_token=False), 2197 ) 2198 2199 if view: 2200 table = self._parse_id_var(any_token=False) 2201 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2202 table_alias: t.Optional[exp.TableAlias] = self.expression( 2203 exp.TableAlias, this=table, columns=columns 2204 ) 2205 elif isinstance(this, exp.Subquery) and this.alias: 2206 # Ensures parity between the Subquery's and the Lateral's "alias" args 2207 table_alias = this.args["alias"].copy() 2208 else: 2209 table_alias = self._parse_table_alias() 2210 2211 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2212 2213 def _parse_join_parts( 2214 self, 2215 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2216 return ( 2217 self._match_set(self.JOIN_METHODS) and self._prev, 2218 self._match_set(self.JOIN_SIDES) and self._prev, 2219 self._match_set(self.JOIN_KINDS) and self._prev, 2220 ) 2221 2222 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2223 if self._match(TokenType.COMMA): 2224 return self.expression(exp.Join, this=self._parse_table()) 2225 2226 index = self._index 2227 method, side, kind = self._parse_join_parts() 2228 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2229 join = self._match(TokenType.JOIN) 2230 2231 if not skip_join_token and not join: 2232 self._retreat(index) 2233 kind = None 2234 method = None 2235 side = None 2236 2237 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2238 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2239 2240 if not skip_join_token and not join and not outer_apply and not cross_apply: 2241 return None 2242 2243 if outer_apply: 2244 side = Token(TokenType.LEFT, "LEFT") 2245 2246 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2247 2248 if method: 2249 kwargs["method"] = method.text 2250 if side: 2251 kwargs["side"] = side.text 2252 if kind: 2253 kwargs["kind"] = kind.text 2254 if hint: 2255 kwargs["hint"] = hint 2256 2257 if self._match(TokenType.ON): 2258 kwargs["on"] = self._parse_conjunction() 2259 elif self._match(TokenType.USING): 2260 kwargs["using"] = self._parse_wrapped_id_vars() 2261 2262 return self.expression(exp.Join, **kwargs) 2263 2264 def _parse_index( 2265 self, 2266 index: t.Optional[exp.Expression] = None, 2267 ) -> t.Optional[exp.Index]: 2268 if index: 2269 unique = None 2270 primary = None 2271 amp = None 2272 2273 self._match(TokenType.ON) 2274 self._match(TokenType.TABLE) # hive 2275 table = self._parse_table_parts(schema=True) 2276 else: 2277 unique = self._match(TokenType.UNIQUE) 2278 primary = self._match_text_seq("PRIMARY") 2279 amp = self._match_text_seq("AMP") 2280 2281 if not self._match(TokenType.INDEX): 2282 return None 2283 2284 index = self._parse_id_var() 2285 table = None 2286 2287 using = self._parse_field() if self._match(TokenType.USING) else None 2288 2289 if self._match(TokenType.L_PAREN, advance=False): 2290 columns = self._parse_wrapped_csv(self._parse_ordered) 2291 else: 2292 columns = None 2293 2294 return self.expression( 2295 exp.Index, 2296 this=index, 2297 table=table, 2298 using=using, 2299 columns=columns, 2300 unique=unique, 2301 primary=primary, 2302 amp=amp, 2303 partition_by=self._parse_partition_by(), 2304 ) 2305 2306 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2307 hints: t.List[exp.Expression] = [] 2308 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2309 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2310 hints.append( 2311 self.expression( 2312 exp.WithTableHint, 2313 expressions=self._parse_csv( 2314 lambda: self._parse_function() or self._parse_var(any_token=True) 2315 ), 2316 ) 2317 ) 2318 self._match_r_paren() 2319 else: 2320 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2321 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2322 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2323 2324 self._match_texts({"INDEX", "KEY"}) 2325 if self._match(TokenType.FOR): 2326 hint.set("target", self._advance_any() and self._prev.text.upper()) 2327 2328 hint.set("expressions", self._parse_wrapped_id_vars()) 2329 hints.append(hint) 2330 2331 return hints or None 2332 2333 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2334 return ( 2335 (not schema and self._parse_function(optional_parens=False)) 2336 or self._parse_id_var(any_token=False) 2337 or self._parse_string_as_identifier() 2338 or self._parse_placeholder() 2339 ) 2340 2341 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2342 catalog = None 2343 db = None 2344 table = self._parse_table_part(schema=schema) 2345 2346 while self._match(TokenType.DOT): 2347 if catalog: 2348 # This allows nesting the table in arbitrarily many dot expressions if needed 2349 table = self.expression( 2350 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2351 ) 2352 else: 2353 catalog = db 2354 db = table 2355 table = self._parse_table_part(schema=schema) 2356 2357 if not table: 2358 self.raise_error(f"Expected table name but got {self._curr}") 2359 2360 return self.expression( 2361 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2362 ) 2363 2364 def _parse_table( 2365 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2366 ) -> t.Optional[exp.Expression]: 2367 lateral = self._parse_lateral() 2368 if lateral: 2369 return lateral 2370 2371 unnest = self._parse_unnest() 2372 if unnest: 2373 return unnest 2374 2375 values = self._parse_derived_table_values() 2376 if values: 2377 return values 2378 2379 subquery = self._parse_select(table=True) 2380 if subquery: 2381 if not subquery.args.get("pivots"): 2382 subquery.set("pivots", self._parse_pivots()) 2383 return subquery 2384 2385 this: exp.Expression = self._parse_table_parts(schema=schema) 2386 2387 if schema: 2388 return self._parse_schema(this=this) 2389 2390 if self.ALIAS_POST_TABLESAMPLE: 2391 table_sample = self._parse_table_sample() 2392 2393 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2394 if alias: 2395 this.set("alias", alias) 2396 2397 if not this.args.get("pivots"): 2398 this.set("pivots", self._parse_pivots()) 2399 2400 this.set("hints", self._parse_table_hints()) 2401 2402 if not self.ALIAS_POST_TABLESAMPLE: 2403 table_sample = self._parse_table_sample() 2404 2405 if table_sample: 2406 table_sample.set("this", this) 2407 this = table_sample 2408 2409 return this 2410 2411 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2412 if not self._match(TokenType.UNNEST): 2413 return None 2414 2415 expressions = self._parse_wrapped_csv(self._parse_type) 2416 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2417 2418 alias = self._parse_table_alias() if with_alias else None 2419 2420 if alias and self.UNNEST_COLUMN_ONLY: 2421 if alias.args.get("columns"): 2422 self.raise_error("Unexpected extra column alias in unnest.") 2423 2424 alias.set("columns", [alias.this]) 2425 alias.set("this", None) 2426 2427 offset = None 2428 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2429 self._match(TokenType.ALIAS) 2430 offset = self._parse_id_var() or exp.to_identifier("offset") 2431 2432 return self.expression( 2433 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2434 ) 2435 2436 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2437 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2438 if not is_derived and not self._match(TokenType.VALUES): 2439 return None 2440 2441 expressions = self._parse_csv(self._parse_value) 2442 alias = self._parse_table_alias() 2443 2444 if is_derived: 2445 self._match_r_paren() 2446 2447 return self.expression( 2448 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2449 ) 2450 2451 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2452 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2453 as_modifier and self._match_text_seq("USING", "SAMPLE") 2454 ): 2455 return None 2456 2457 bucket_numerator = None 2458 bucket_denominator = None 2459 bucket_field = None 2460 percent = None 2461 rows = None 2462 size = None 2463 seed = None 2464 2465 kind = ( 2466 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2467 ) 2468 method = self._parse_var(tokens=(TokenType.ROW,)) 2469 2470 self._match(TokenType.L_PAREN) 2471 2472 num = self._parse_number() 2473 2474 if self._match_text_seq("BUCKET"): 2475 bucket_numerator = self._parse_number() 2476 self._match_text_seq("OUT", "OF") 2477 bucket_denominator = bucket_denominator = self._parse_number() 2478 self._match(TokenType.ON) 2479 bucket_field = self._parse_field() 2480 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2481 percent = num 2482 elif self._match(TokenType.ROWS): 2483 rows = num 2484 else: 2485 size = num 2486 2487 self._match(TokenType.R_PAREN) 2488 2489 if self._match(TokenType.L_PAREN): 2490 method = self._parse_var() 2491 seed = self._match(TokenType.COMMA) and self._parse_number() 2492 self._match_r_paren() 2493 elif self._match_texts(("SEED", "REPEATABLE")): 2494 seed = self._parse_wrapped(self._parse_number) 2495 2496 return self.expression( 2497 exp.TableSample, 2498 method=method, 2499 bucket_numerator=bucket_numerator, 2500 bucket_denominator=bucket_denominator, 2501 bucket_field=bucket_field, 2502 percent=percent, 2503 rows=rows, 2504 size=size, 2505 seed=seed, 2506 kind=kind, 2507 ) 2508 2509 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2510 return list(iter(self._parse_pivot, None)) 2511 2512 # https://duckdb.org/docs/sql/statements/pivot 2513 def _parse_simplified_pivot(self) -> exp.Pivot: 2514 def _parse_on() -> t.Optional[exp.Expression]: 2515 this = self._parse_bitwise() 2516 return self._parse_in(this) if self._match(TokenType.IN) else this 2517 2518 this = self._parse_table() 2519 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2520 using = self._match(TokenType.USING) and self._parse_csv( 2521 lambda: self._parse_alias(self._parse_function()) 2522 ) 2523 group = self._parse_group() 2524 return self.expression( 2525 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2526 ) 2527 2528 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2529 index = self._index 2530 2531 if self._match(TokenType.PIVOT): 2532 unpivot = False 2533 elif self._match(TokenType.UNPIVOT): 2534 unpivot = True 2535 else: 2536 return None 2537 2538 expressions = [] 2539 field = None 2540 2541 if not self._match(TokenType.L_PAREN): 2542 self._retreat(index) 2543 return None 2544 2545 if unpivot: 2546 expressions = self._parse_csv(self._parse_column) 2547 else: 2548 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2549 2550 if not expressions: 2551 self.raise_error("Failed to parse PIVOT's aggregation list") 2552 2553 if not self._match(TokenType.FOR): 2554 self.raise_error("Expecting FOR") 2555 2556 value = self._parse_column() 2557 2558 if not self._match(TokenType.IN): 2559 self.raise_error("Expecting IN") 2560 2561 field = self._parse_in(value, alias=True) 2562 2563 self._match_r_paren() 2564 2565 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2566 2567 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2568 pivot.set("alias", self._parse_table_alias()) 2569 2570 if not unpivot: 2571 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2572 2573 columns: t.List[exp.Expression] = [] 2574 for fld in pivot.args["field"].expressions: 2575 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2576 for name in names: 2577 if self.PREFIXED_PIVOT_COLUMNS: 2578 name = f"{name}_{field_name}" if name else field_name 2579 else: 2580 name = f"{field_name}_{name}" if name else field_name 2581 2582 columns.append(exp.to_identifier(name)) 2583 2584 pivot.set("columns", columns) 2585 2586 return pivot 2587 2588 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2589 return [agg.alias for agg in aggregations] 2590 2591 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2592 if not skip_where_token and not self._match(TokenType.WHERE): 2593 return None 2594 2595 return self.expression( 2596 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2597 ) 2598 2599 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2600 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2601 return None 2602 2603 elements = defaultdict(list) 2604 2605 while True: 2606 expressions = self._parse_csv(self._parse_conjunction) 2607 if expressions: 2608 elements["expressions"].extend(expressions) 2609 2610 grouping_sets = self._parse_grouping_sets() 2611 if grouping_sets: 2612 elements["grouping_sets"].extend(grouping_sets) 2613 2614 rollup = None 2615 cube = None 2616 totals = None 2617 2618 with_ = self._match(TokenType.WITH) 2619 if self._match(TokenType.ROLLUP): 2620 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2621 elements["rollup"].extend(ensure_list(rollup)) 2622 2623 if self._match(TokenType.CUBE): 2624 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2625 elements["cube"].extend(ensure_list(cube)) 2626 2627 if self._match_text_seq("TOTALS"): 2628 totals = True 2629 elements["totals"] = True # type: ignore 2630 2631 if not (grouping_sets or rollup or cube or totals): 2632 break 2633 2634 return self.expression(exp.Group, **elements) # type: ignore 2635 2636 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2637 if not self._match(TokenType.GROUPING_SETS): 2638 return None 2639 2640 return self._parse_wrapped_csv(self._parse_grouping_set) 2641 2642 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2643 if self._match(TokenType.L_PAREN): 2644 grouping_set = self._parse_csv(self._parse_column) 2645 self._match_r_paren() 2646 return self.expression(exp.Tuple, expressions=grouping_set) 2647 2648 return self._parse_column() 2649 2650 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2651 if not skip_having_token and not self._match(TokenType.HAVING): 2652 return None 2653 return self.expression(exp.Having, this=self._parse_conjunction()) 2654 2655 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2656 if not self._match(TokenType.QUALIFY): 2657 return None 2658 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2659 2660 def _parse_order( 2661 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2662 ) -> t.Optional[exp.Expression]: 2663 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2664 return this 2665 2666 return self.expression( 2667 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2668 ) 2669 2670 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2671 if not self._match(token): 2672 return None 2673 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2674 2675 def _parse_ordered(self) -> exp.Ordered: 2676 this = self._parse_conjunction() 2677 self._match(TokenType.ASC) 2678 2679 is_desc = self._match(TokenType.DESC) 2680 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2681 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2682 desc = is_desc or False 2683 asc = not desc 2684 nulls_first = is_nulls_first or False 2685 explicitly_null_ordered = is_nulls_first or is_nulls_last 2686 2687 if ( 2688 not explicitly_null_ordered 2689 and ( 2690 (asc and self.NULL_ORDERING == "nulls_are_small") 2691 or (desc and self.NULL_ORDERING != "nulls_are_small") 2692 ) 2693 and self.NULL_ORDERING != "nulls_are_last" 2694 ): 2695 nulls_first = True 2696 2697 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2698 2699 def _parse_limit( 2700 self, this: t.Optional[exp.Expression] = None, top: bool = False 2701 ) -> t.Optional[exp.Expression]: 2702 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2703 limit_paren = self._match(TokenType.L_PAREN) 2704 expression = self._parse_number() if top else self._parse_term() 2705 2706 if self._match(TokenType.COMMA): 2707 offset = expression 2708 expression = self._parse_term() 2709 else: 2710 offset = None 2711 2712 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2713 2714 if limit_paren: 2715 self._match_r_paren() 2716 2717 return limit_exp 2718 2719 if self._match(TokenType.FETCH): 2720 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2721 direction = self._prev.text if direction else "FIRST" 2722 2723 count = self._parse_number() 2724 percent = self._match(TokenType.PERCENT) 2725 2726 self._match_set((TokenType.ROW, TokenType.ROWS)) 2727 2728 only = self._match_text_seq("ONLY") 2729 with_ties = self._match_text_seq("WITH", "TIES") 2730 2731 if only and with_ties: 2732 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2733 2734 return self.expression( 2735 exp.Fetch, 2736 direction=direction, 2737 count=count, 2738 percent=percent, 2739 with_ties=with_ties, 2740 ) 2741 2742 return this 2743 2744 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2745 if not self._match(TokenType.OFFSET): 2746 return this 2747 2748 count = self._parse_number() 2749 self._match_set((TokenType.ROW, TokenType.ROWS)) 2750 return self.expression(exp.Offset, this=this, expression=count) 2751 2752 def _parse_locks(self) -> t.List[exp.Lock]: 2753 locks = [] 2754 while True: 2755 if self._match_text_seq("FOR", "UPDATE"): 2756 update = True 2757 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2758 "LOCK", "IN", "SHARE", "MODE" 2759 ): 2760 update = False 2761 else: 2762 break 2763 2764 expressions = None 2765 if self._match_text_seq("OF"): 2766 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2767 2768 wait: t.Optional[bool | exp.Expression] = None 2769 if self._match_text_seq("NOWAIT"): 2770 wait = True 2771 elif self._match_text_seq("WAIT"): 2772 wait = self._parse_primary() 2773 elif self._match_text_seq("SKIP", "LOCKED"): 2774 wait = False 2775 2776 locks.append( 2777 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2778 ) 2779 2780 return locks 2781 2782 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2783 if not self._match_set(self.SET_OPERATIONS): 2784 return this 2785 2786 token_type = self._prev.token_type 2787 2788 if token_type == TokenType.UNION: 2789 expression = exp.Union 2790 elif token_type == TokenType.EXCEPT: 2791 expression = exp.Except 2792 else: 2793 expression = exp.Intersect 2794 2795 return self.expression( 2796 expression, 2797 this=this, 2798 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2799 expression=self._parse_set_operations(self._parse_select(nested=True)), 2800 ) 2801 2802 def _parse_expression(self) -> t.Optional[exp.Expression]: 2803 return self._parse_alias(self._parse_conjunction()) 2804 2805 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2806 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2807 2808 def _parse_equality(self) -> t.Optional[exp.Expression]: 2809 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2810 2811 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2812 return self._parse_tokens(self._parse_range, self.COMPARISON) 2813 2814 def _parse_range(self) -> t.Optional[exp.Expression]: 2815 this = self._parse_bitwise() 2816 negate = self._match(TokenType.NOT) 2817 2818 if self._match_set(self.RANGE_PARSERS): 2819 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2820 if not expression: 2821 return this 2822 2823 this = expression 2824 elif self._match(TokenType.ISNULL): 2825 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2826 2827 # Postgres supports ISNULL and NOTNULL for conditions. 2828 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2829 if self._match(TokenType.NOTNULL): 2830 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2831 this = self.expression(exp.Not, this=this) 2832 2833 if negate: 2834 this = self.expression(exp.Not, this=this) 2835 2836 if self._match(TokenType.IS): 2837 this = self._parse_is(this) 2838 2839 return this 2840 2841 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2842 index = self._index - 1 2843 negate = self._match(TokenType.NOT) 2844 2845 if self._match_text_seq("DISTINCT", "FROM"): 2846 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2847 return self.expression(klass, this=this, expression=self._parse_expression()) 2848 2849 expression = self._parse_null() or self._parse_boolean() 2850 if not expression: 2851 self._retreat(index) 2852 return None 2853 2854 this = self.expression(exp.Is, this=this, expression=expression) 2855 return self.expression(exp.Not, this=this) if negate else this 2856 2857 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2858 unnest = self._parse_unnest(with_alias=False) 2859 if unnest: 2860 this = self.expression(exp.In, this=this, unnest=unnest) 2861 elif self._match(TokenType.L_PAREN): 2862 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2863 2864 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2865 this = self.expression(exp.In, this=this, query=expressions[0]) 2866 else: 2867 this = self.expression(exp.In, this=this, expressions=expressions) 2868 2869 self._match_r_paren(this) 2870 else: 2871 this = self.expression(exp.In, this=this, field=self._parse_field()) 2872 2873 return this 2874 2875 def _parse_between(self, this: exp.Expression) -> exp.Between: 2876 low = self._parse_bitwise() 2877 self._match(TokenType.AND) 2878 high = self._parse_bitwise() 2879 return self.expression(exp.Between, this=this, low=low, high=high) 2880 2881 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2882 if not self._match(TokenType.ESCAPE): 2883 return this 2884 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2885 2886 def _parse_interval(self) -> t.Optional[exp.Interval]: 2887 if not self._match(TokenType.INTERVAL): 2888 return None 2889 2890 if self._match(TokenType.STRING, advance=False): 2891 this = self._parse_primary() 2892 else: 2893 this = self._parse_term() 2894 2895 unit = self._parse_function() or self._parse_var() 2896 2897 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2898 # each INTERVAL expression into this canonical form so it's easy to transpile 2899 if this and this.is_number: 2900 this = exp.Literal.string(this.name) 2901 elif this and this.is_string: 2902 parts = this.name.split() 2903 2904 if len(parts) == 2: 2905 if unit: 2906 # this is not actually a unit, it's something else 2907 unit = None 2908 self._retreat(self._index - 1) 2909 else: 2910 this = exp.Literal.string(parts[0]) 2911 unit = self.expression(exp.Var, this=parts[1]) 2912 2913 return self.expression(exp.Interval, this=this, unit=unit) 2914 2915 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2916 this = self._parse_term() 2917 2918 while True: 2919 if self._match_set(self.BITWISE): 2920 this = self.expression( 2921 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2922 ) 2923 elif self._match_pair(TokenType.LT, TokenType.LT): 2924 this = self.expression( 2925 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2926 ) 2927 elif self._match_pair(TokenType.GT, TokenType.GT): 2928 this = self.expression( 2929 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2930 ) 2931 else: 2932 break 2933 2934 return this 2935 2936 def _parse_term(self) -> t.Optional[exp.Expression]: 2937 return self._parse_tokens(self._parse_factor, self.TERM) 2938 2939 def _parse_factor(self) -> t.Optional[exp.Expression]: 2940 return self._parse_tokens(self._parse_unary, self.FACTOR) 2941 2942 def _parse_unary(self) -> t.Optional[exp.Expression]: 2943 if self._match_set(self.UNARY_PARSERS): 2944 return self.UNARY_PARSERS[self._prev.token_type](self) 2945 return self._parse_at_time_zone(self._parse_type()) 2946 2947 def _parse_type(self) -> t.Optional[exp.Expression]: 2948 interval = self._parse_interval() 2949 if interval: 2950 return interval 2951 2952 index = self._index 2953 data_type = self._parse_types(check_func=True) 2954 this = self._parse_column() 2955 2956 if data_type: 2957 if isinstance(this, exp.Literal): 2958 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2959 if parser: 2960 return parser(self, this, data_type) 2961 return self.expression(exp.Cast, this=this, to=data_type) 2962 if not data_type.expressions: 2963 self._retreat(index) 2964 return self._parse_column() 2965 return self._parse_column_ops(data_type) 2966 2967 return this 2968 2969 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2970 this = self._parse_type() 2971 if not this: 2972 return None 2973 2974 return self.expression( 2975 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2976 ) 2977 2978 def _parse_types( 2979 self, check_func: bool = False, schema: bool = False 2980 ) -> t.Optional[exp.Expression]: 2981 index = self._index 2982 2983 prefix = self._match_text_seq("SYSUDTLIB", ".") 2984 2985 if not self._match_set(self.TYPE_TOKENS): 2986 return None 2987 2988 type_token = self._prev.token_type 2989 2990 if type_token == TokenType.PSEUDO_TYPE: 2991 return self.expression(exp.PseudoType, this=self._prev.text) 2992 2993 nested = type_token in self.NESTED_TYPE_TOKENS 2994 is_struct = type_token == TokenType.STRUCT 2995 expressions = None 2996 maybe_func = False 2997 2998 if self._match(TokenType.L_PAREN): 2999 if is_struct: 3000 expressions = self._parse_csv(self._parse_struct_types) 3001 elif nested: 3002 expressions = self._parse_csv( 3003 lambda: self._parse_types(check_func=check_func, schema=schema) 3004 ) 3005 elif type_token in self.ENUM_TYPE_TOKENS: 3006 expressions = self._parse_csv(self._parse_primary) 3007 else: 3008 expressions = self._parse_csv(self._parse_type_size) 3009 3010 if not expressions or not self._match(TokenType.R_PAREN): 3011 self._retreat(index) 3012 return None 3013 3014 maybe_func = True 3015 3016 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3017 this = exp.DataType( 3018 this=exp.DataType.Type.ARRAY, 3019 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3020 nested=True, 3021 ) 3022 3023 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3024 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3025 3026 return this 3027 3028 if self._match(TokenType.L_BRACKET): 3029 self._retreat(index) 3030 return None 3031 3032 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3033 if nested and self._match(TokenType.LT): 3034 if is_struct: 3035 expressions = self._parse_csv(self._parse_struct_types) 3036 else: 3037 expressions = self._parse_csv( 3038 lambda: self._parse_types(check_func=check_func, schema=schema) 3039 ) 3040 3041 if not self._match(TokenType.GT): 3042 self.raise_error("Expecting >") 3043 3044 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3045 values = self._parse_csv(self._parse_conjunction) 3046 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3047 3048 value: t.Optional[exp.Expression] = None 3049 if type_token in self.TIMESTAMPS: 3050 if self._match_text_seq("WITH", "TIME", "ZONE"): 3051 maybe_func = False 3052 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3053 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3054 maybe_func = False 3055 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3056 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3057 maybe_func = False 3058 elif type_token == TokenType.INTERVAL: 3059 unit = self._parse_var() 3060 3061 if not unit: 3062 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3063 else: 3064 value = self.expression(exp.Interval, unit=unit) 3065 3066 if maybe_func and check_func: 3067 index2 = self._index 3068 peek = self._parse_string() 3069 3070 if not peek: 3071 self._retreat(index) 3072 return None 3073 3074 self._retreat(index2) 3075 3076 if value: 3077 return value 3078 3079 return exp.DataType( 3080 this=exp.DataType.Type[type_token.value.upper()], 3081 expressions=expressions, 3082 nested=nested, 3083 values=values, 3084 prefix=prefix, 3085 ) 3086 3087 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3088 this = self._parse_type() or self._parse_id_var() 3089 self._match(TokenType.COLON) 3090 return self._parse_column_def(this) 3091 3092 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3093 if not self._match_text_seq("AT", "TIME", "ZONE"): 3094 return this 3095 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3096 3097 def _parse_column(self) -> t.Optional[exp.Expression]: 3098 this = self._parse_field() 3099 if isinstance(this, exp.Identifier): 3100 this = self.expression(exp.Column, this=this) 3101 elif not this: 3102 return self._parse_bracket(this) 3103 return self._parse_column_ops(this) 3104 3105 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3106 this = self._parse_bracket(this) 3107 3108 while self._match_set(self.COLUMN_OPERATORS): 3109 op_token = self._prev.token_type 3110 op = self.COLUMN_OPERATORS.get(op_token) 3111 3112 if op_token == TokenType.DCOLON: 3113 field = self._parse_types() 3114 if not field: 3115 self.raise_error("Expected type") 3116 elif op and self._curr: 3117 self._advance() 3118 value = self._prev.text 3119 field = ( 3120 exp.Literal.number(value) 3121 if self._prev.token_type == TokenType.NUMBER 3122 else exp.Literal.string(value) 3123 ) 3124 else: 3125 field = self._parse_field(anonymous_func=True, any_token=True) 3126 3127 if isinstance(field, exp.Func): 3128 # bigquery allows function calls like x.y.count(...) 3129 # SAFE.SUBSTR(...) 3130 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3131 this = self._replace_columns_with_dots(this) 3132 3133 if op: 3134 this = op(self, this, field) 3135 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3136 this = self.expression( 3137 exp.Column, 3138 this=field, 3139 table=this.this, 3140 db=this.args.get("table"), 3141 catalog=this.args.get("db"), 3142 ) 3143 else: 3144 this = self.expression(exp.Dot, this=this, expression=field) 3145 this = self._parse_bracket(this) 3146 return this 3147 3148 def _parse_primary(self) -> t.Optional[exp.Expression]: 3149 if self._match_set(self.PRIMARY_PARSERS): 3150 token_type = self._prev.token_type 3151 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3152 3153 if token_type == TokenType.STRING: 3154 expressions = [primary] 3155 while self._match(TokenType.STRING): 3156 expressions.append(exp.Literal.string(self._prev.text)) 3157 3158 if len(expressions) > 1: 3159 return self.expression(exp.Concat, expressions=expressions) 3160 3161 return primary 3162 3163 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3164 return exp.Literal.number(f"0.{self._prev.text}") 3165 3166 if self._match(TokenType.L_PAREN): 3167 comments = self._prev_comments 3168 query = self._parse_select() 3169 3170 if query: 3171 expressions = [query] 3172 else: 3173 expressions = self._parse_csv(self._parse_expression) 3174 3175 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3176 3177 if isinstance(this, exp.Subqueryable): 3178 this = self._parse_set_operations( 3179 self._parse_subquery(this=this, parse_alias=False) 3180 ) 3181 elif len(expressions) > 1: 3182 this = self.expression(exp.Tuple, expressions=expressions) 3183 else: 3184 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3185 3186 if this: 3187 this.add_comments(comments) 3188 3189 self._match_r_paren(expression=this) 3190 return this 3191 3192 return None 3193 3194 def _parse_field( 3195 self, 3196 any_token: bool = False, 3197 tokens: t.Optional[t.Collection[TokenType]] = None, 3198 anonymous_func: bool = False, 3199 ) -> t.Optional[exp.Expression]: 3200 return ( 3201 self._parse_primary() 3202 or self._parse_function(anonymous=anonymous_func) 3203 or self._parse_id_var(any_token=any_token, tokens=tokens) 3204 ) 3205 3206 def _parse_function( 3207 self, 3208 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3209 anonymous: bool = False, 3210 optional_parens: bool = True, 3211 ) -> t.Optional[exp.Expression]: 3212 if not self._curr: 3213 return None 3214 3215 token_type = self._curr.token_type 3216 3217 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3218 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3219 3220 if not self._next or self._next.token_type != TokenType.L_PAREN: 3221 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3222 self._advance() 3223 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3224 3225 return None 3226 3227 if token_type not in self.FUNC_TOKENS: 3228 return None 3229 3230 this = self._curr.text 3231 upper = this.upper() 3232 self._advance(2) 3233 3234 parser = self.FUNCTION_PARSERS.get(upper) 3235 3236 if parser and not anonymous: 3237 this = parser(self) 3238 else: 3239 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3240 3241 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3242 this = self.expression(subquery_predicate, this=self._parse_select()) 3243 self._match_r_paren() 3244 return this 3245 3246 if functions is None: 3247 functions = self.FUNCTIONS 3248 3249 function = functions.get(upper) 3250 3251 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3252 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3253 3254 if function and not anonymous: 3255 this = self.validate_expression(function(args), args) 3256 else: 3257 this = self.expression(exp.Anonymous, this=this, expressions=args) 3258 3259 self._match_r_paren(this) 3260 return self._parse_window(this) 3261 3262 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3263 return self._parse_column_def(self._parse_id_var()) 3264 3265 def _parse_user_defined_function( 3266 self, kind: t.Optional[TokenType] = None 3267 ) -> t.Optional[exp.Expression]: 3268 this = self._parse_id_var() 3269 3270 while self._match(TokenType.DOT): 3271 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3272 3273 if not self._match(TokenType.L_PAREN): 3274 return this 3275 3276 expressions = self._parse_csv(self._parse_function_parameter) 3277 self._match_r_paren() 3278 return self.expression( 3279 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3280 ) 3281 3282 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3283 literal = self._parse_primary() 3284 if literal: 3285 return self.expression(exp.Introducer, this=token.text, expression=literal) 3286 3287 return self.expression(exp.Identifier, this=token.text) 3288 3289 def _parse_session_parameter(self) -> exp.SessionParameter: 3290 kind = None 3291 this = self._parse_id_var() or self._parse_primary() 3292 3293 if this and self._match(TokenType.DOT): 3294 kind = this.name 3295 this = self._parse_var() or self._parse_primary() 3296 3297 return self.expression(exp.SessionParameter, this=this, kind=kind) 3298 3299 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3300 index = self._index 3301 3302 if self._match(TokenType.L_PAREN): 3303 expressions = self._parse_csv(self._parse_id_var) 3304 3305 if not self._match(TokenType.R_PAREN): 3306 self._retreat(index) 3307 else: 3308 expressions = [self._parse_id_var()] 3309 3310 if self._match_set(self.LAMBDAS): 3311 return self.LAMBDAS[self._prev.token_type](self, expressions) 3312 3313 self._retreat(index) 3314 3315 this: t.Optional[exp.Expression] 3316 3317 if self._match(TokenType.DISTINCT): 3318 this = self.expression( 3319 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3320 ) 3321 else: 3322 this = self._parse_select_or_expression(alias=alias) 3323 3324 if isinstance(this, exp.EQ): 3325 left = this.this 3326 if isinstance(left, exp.Column): 3327 left.replace(exp.var(left.text("this"))) 3328 3329 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3330 3331 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3332 index = self._index 3333 3334 if not self.errors: 3335 try: 3336 if self._parse_select(nested=True): 3337 return this 3338 except ParseError: 3339 pass 3340 finally: 3341 self.errors.clear() 3342 self._retreat(index) 3343 3344 if not self._match(TokenType.L_PAREN): 3345 return this 3346 3347 args = self._parse_csv( 3348 lambda: self._parse_constraint() 3349 or self._parse_column_def(self._parse_field(any_token=True)) 3350 ) 3351 3352 self._match_r_paren() 3353 return self.expression(exp.Schema, this=this, expressions=args) 3354 3355 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3356 # column defs are not really columns, they're identifiers 3357 if isinstance(this, exp.Column): 3358 this = this.this 3359 3360 kind = self._parse_types(schema=True) 3361 3362 if self._match_text_seq("FOR", "ORDINALITY"): 3363 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3364 3365 constraints = [] 3366 while True: 3367 constraint = self._parse_column_constraint() 3368 if not constraint: 3369 break 3370 constraints.append(constraint) 3371 3372 if not kind and not constraints: 3373 return this 3374 3375 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3376 3377 def _parse_auto_increment( 3378 self, 3379 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3380 start = None 3381 increment = None 3382 3383 if self._match(TokenType.L_PAREN, advance=False): 3384 args = self._parse_wrapped_csv(self._parse_bitwise) 3385 start = seq_get(args, 0) 3386 increment = seq_get(args, 1) 3387 elif self._match_text_seq("START"): 3388 start = self._parse_bitwise() 3389 self._match_text_seq("INCREMENT") 3390 increment = self._parse_bitwise() 3391 3392 if start and increment: 3393 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3394 3395 return exp.AutoIncrementColumnConstraint() 3396 3397 def _parse_compress(self) -> exp.CompressColumnConstraint: 3398 if self._match(TokenType.L_PAREN, advance=False): 3399 return self.expression( 3400 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3401 ) 3402 3403 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3404 3405 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3406 if self._match_text_seq("BY", "DEFAULT"): 3407 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3408 this = self.expression( 3409 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3410 ) 3411 else: 3412 self._match_text_seq("ALWAYS") 3413 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3414 3415 self._match(TokenType.ALIAS) 3416 identity = self._match_text_seq("IDENTITY") 3417 3418 if self._match(TokenType.L_PAREN): 3419 if self._match_text_seq("START", "WITH"): 3420 this.set("start", self._parse_bitwise()) 3421 if self._match_text_seq("INCREMENT", "BY"): 3422 this.set("increment", self._parse_bitwise()) 3423 if self._match_text_seq("MINVALUE"): 3424 this.set("minvalue", self._parse_bitwise()) 3425 if self._match_text_seq("MAXVALUE"): 3426 this.set("maxvalue", self._parse_bitwise()) 3427 3428 if self._match_text_seq("CYCLE"): 3429 this.set("cycle", True) 3430 elif self._match_text_seq("NO", "CYCLE"): 3431 this.set("cycle", False) 3432 3433 if not identity: 3434 this.set("expression", self._parse_bitwise()) 3435 3436 self._match_r_paren() 3437 3438 return this 3439 3440 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3441 self._match_text_seq("LENGTH") 3442 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3443 3444 def _parse_not_constraint( 3445 self, 3446 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3447 if self._match_text_seq("NULL"): 3448 return self.expression(exp.NotNullColumnConstraint) 3449 if self._match_text_seq("CASESPECIFIC"): 3450 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3451 return None 3452 3453 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3454 if self._match(TokenType.CONSTRAINT): 3455 this = self._parse_id_var() 3456 else: 3457 this = None 3458 3459 if self._match_texts(self.CONSTRAINT_PARSERS): 3460 return self.expression( 3461 exp.ColumnConstraint, 3462 this=this, 3463 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3464 ) 3465 3466 return this 3467 3468 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3469 if not self._match(TokenType.CONSTRAINT): 3470 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3471 3472 this = self._parse_id_var() 3473 expressions = [] 3474 3475 while True: 3476 constraint = self._parse_unnamed_constraint() or self._parse_function() 3477 if not constraint: 3478 break 3479 expressions.append(constraint) 3480 3481 return self.expression(exp.Constraint, this=this, expressions=expressions) 3482 3483 def _parse_unnamed_constraint( 3484 self, constraints: t.Optional[t.Collection[str]] = None 3485 ) -> t.Optional[exp.Expression]: 3486 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3487 return None 3488 3489 constraint = self._prev.text.upper() 3490 if constraint not in self.CONSTRAINT_PARSERS: 3491 self.raise_error(f"No parser found for schema constraint {constraint}.") 3492 3493 return self.CONSTRAINT_PARSERS[constraint](self) 3494 3495 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3496 self._match_text_seq("KEY") 3497 return self.expression( 3498 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3499 ) 3500 3501 def _parse_key_constraint_options(self) -> t.List[str]: 3502 options = [] 3503 while True: 3504 if not self._curr: 3505 break 3506 3507 if self._match(TokenType.ON): 3508 action = None 3509 on = self._advance_any() and self._prev.text 3510 3511 if self._match_text_seq("NO", "ACTION"): 3512 action = "NO ACTION" 3513 elif self._match_text_seq("CASCADE"): 3514 action = "CASCADE" 3515 elif self._match_pair(TokenType.SET, TokenType.NULL): 3516 action = "SET NULL" 3517 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3518 action = "SET DEFAULT" 3519 else: 3520 self.raise_error("Invalid key constraint") 3521 3522 options.append(f"ON {on} {action}") 3523 elif self._match_text_seq("NOT", "ENFORCED"): 3524 options.append("NOT ENFORCED") 3525 elif self._match_text_seq("DEFERRABLE"): 3526 options.append("DEFERRABLE") 3527 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3528 options.append("INITIALLY DEFERRED") 3529 elif self._match_text_seq("NORELY"): 3530 options.append("NORELY") 3531 elif self._match_text_seq("MATCH", "FULL"): 3532 options.append("MATCH FULL") 3533 else: 3534 break 3535 3536 return options 3537 3538 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3539 if match and not self._match(TokenType.REFERENCES): 3540 return None 3541 3542 expressions = None 3543 this = self._parse_id_var() 3544 3545 if self._match(TokenType.L_PAREN, advance=False): 3546 expressions = self._parse_wrapped_id_vars() 3547 3548 options = self._parse_key_constraint_options() 3549 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3550 3551 def _parse_foreign_key(self) -> exp.ForeignKey: 3552 expressions = self._parse_wrapped_id_vars() 3553 reference = self._parse_references() 3554 options = {} 3555 3556 while self._match(TokenType.ON): 3557 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3558 self.raise_error("Expected DELETE or UPDATE") 3559 3560 kind = self._prev.text.lower() 3561 3562 if self._match_text_seq("NO", "ACTION"): 3563 action = "NO ACTION" 3564 elif self._match(TokenType.SET): 3565 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3566 action = "SET " + self._prev.text.upper() 3567 else: 3568 self._advance() 3569 action = self._prev.text.upper() 3570 3571 options[kind] = action 3572 3573 return self.expression( 3574 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3575 ) 3576 3577 def _parse_primary_key( 3578 self, wrapped_optional: bool = False, in_props: bool = False 3579 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3580 desc = ( 3581 self._match_set((TokenType.ASC, TokenType.DESC)) 3582 and self._prev.token_type == TokenType.DESC 3583 ) 3584 3585 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3586 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3587 3588 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3589 options = self._parse_key_constraint_options() 3590 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3591 3592 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3593 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3594 return this 3595 3596 bracket_kind = self._prev.token_type 3597 3598 if self._match(TokenType.COLON): 3599 expressions: t.List[t.Optional[exp.Expression]] = [ 3600 self.expression(exp.Slice, expression=self._parse_conjunction()) 3601 ] 3602 else: 3603 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3604 3605 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3606 if bracket_kind == TokenType.L_BRACE: 3607 this = self.expression(exp.Struct, expressions=expressions) 3608 elif not this or this.name.upper() == "ARRAY": 3609 this = self.expression(exp.Array, expressions=expressions) 3610 else: 3611 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3612 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3613 3614 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3615 self.raise_error("Expected ]") 3616 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3617 self.raise_error("Expected }") 3618 3619 self._add_comments(this) 3620 return self._parse_bracket(this) 3621 3622 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3623 if self._match(TokenType.COLON): 3624 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3625 return this 3626 3627 def _parse_case(self) -> t.Optional[exp.Expression]: 3628 ifs = [] 3629 default = None 3630 3631 expression = self._parse_conjunction() 3632 3633 while self._match(TokenType.WHEN): 3634 this = self._parse_conjunction() 3635 self._match(TokenType.THEN) 3636 then = self._parse_conjunction() 3637 ifs.append(self.expression(exp.If, this=this, true=then)) 3638 3639 if self._match(TokenType.ELSE): 3640 default = self._parse_conjunction() 3641 3642 if not self._match(TokenType.END): 3643 self.raise_error("Expected END after CASE", self._prev) 3644 3645 return self._parse_window( 3646 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3647 ) 3648 3649 def _parse_if(self) -> t.Optional[exp.Expression]: 3650 if self._match(TokenType.L_PAREN): 3651 args = self._parse_csv(self._parse_conjunction) 3652 this = self.validate_expression(exp.If.from_arg_list(args), args) 3653 self._match_r_paren() 3654 else: 3655 index = self._index - 1 3656 condition = self._parse_conjunction() 3657 3658 if not condition: 3659 self._retreat(index) 3660 return None 3661 3662 self._match(TokenType.THEN) 3663 true = self._parse_conjunction() 3664 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3665 self._match(TokenType.END) 3666 this = self.expression(exp.If, this=condition, true=true, false=false) 3667 3668 return self._parse_window(this) 3669 3670 def _parse_extract(self) -> exp.Extract: 3671 this = self._parse_function() or self._parse_var() or self._parse_type() 3672 3673 if self._match(TokenType.FROM): 3674 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3675 3676 if not self._match(TokenType.COMMA): 3677 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3678 3679 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3680 3681 def _parse_cast(self, strict: bool) -> exp.Expression: 3682 this = self._parse_conjunction() 3683 3684 if not self._match(TokenType.ALIAS): 3685 if self._match(TokenType.COMMA): 3686 return self.expression( 3687 exp.CastToStrType, this=this, expression=self._parse_string() 3688 ) 3689 else: 3690 self.raise_error("Expected AS after CAST") 3691 3692 fmt = None 3693 to = self._parse_types() 3694 3695 if not to: 3696 self.raise_error("Expected TYPE after CAST") 3697 elif to.this == exp.DataType.Type.CHAR: 3698 if self._match(TokenType.CHARACTER_SET): 3699 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3700 elif self._match(TokenType.FORMAT): 3701 fmt = self._parse_at_time_zone(self._parse_string()) 3702 3703 if to.this in exp.DataType.TEMPORAL_TYPES: 3704 return self.expression( 3705 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3706 this=this, 3707 format=exp.Literal.string( 3708 format_time( 3709 fmt.this if fmt else "", 3710 self.FORMAT_MAPPING or self.TIME_MAPPING, 3711 self.FORMAT_TRIE or self.TIME_TRIE, 3712 ) 3713 ), 3714 ) 3715 3716 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3717 3718 def _parse_concat(self) -> t.Optional[exp.Expression]: 3719 args = self._parse_csv(self._parse_conjunction) 3720 if self.CONCAT_NULL_OUTPUTS_STRING: 3721 args = [ 3722 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3723 for arg in args 3724 if arg 3725 ] 3726 3727 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3728 # we find such a call we replace it with its argument. 3729 if len(args) == 1: 3730 return args[0] 3731 3732 return self.expression( 3733 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3734 ) 3735 3736 def _parse_string_agg(self) -> exp.Expression: 3737 if self._match(TokenType.DISTINCT): 3738 args: t.List[t.Optional[exp.Expression]] = [ 3739 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3740 ] 3741 if self._match(TokenType.COMMA): 3742 args.extend(self._parse_csv(self._parse_conjunction)) 3743 else: 3744 args = self._parse_csv(self._parse_conjunction) 3745 3746 index = self._index 3747 if not self._match(TokenType.R_PAREN): 3748 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3749 return self.expression( 3750 exp.GroupConcat, 3751 this=seq_get(args, 0), 3752 separator=self._parse_order(this=seq_get(args, 1)), 3753 ) 3754 3755 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3756 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3757 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3758 if not self._match_text_seq("WITHIN", "GROUP"): 3759 self._retreat(index) 3760 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3761 3762 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3763 order = self._parse_order(this=seq_get(args, 0)) 3764 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3765 3766 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3767 this = self._parse_bitwise() 3768 3769 if self._match(TokenType.USING): 3770 to: t.Optional[exp.Expression] = self.expression( 3771 exp.CharacterSet, this=self._parse_var() 3772 ) 3773 elif self._match(TokenType.COMMA): 3774 to = self._parse_types() 3775 else: 3776 to = None 3777 3778 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3779 3780 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3781 """ 3782 There are generally two variants of the DECODE function: 3783 3784 - DECODE(bin, charset) 3785 - DECODE(expression, search, result [, search, result] ... [, default]) 3786 3787 The second variant will always be parsed into a CASE expression. Note that NULL 3788 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3789 instead of relying on pattern matching. 3790 """ 3791 args = self._parse_csv(self._parse_conjunction) 3792 3793 if len(args) < 3: 3794 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3795 3796 expression, *expressions = args 3797 if not expression: 3798 return None 3799 3800 ifs = [] 3801 for search, result in zip(expressions[::2], expressions[1::2]): 3802 if not search or not result: 3803 return None 3804 3805 if isinstance(search, exp.Literal): 3806 ifs.append( 3807 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3808 ) 3809 elif isinstance(search, exp.Null): 3810 ifs.append( 3811 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3812 ) 3813 else: 3814 cond = exp.or_( 3815 exp.EQ(this=expression.copy(), expression=search), 3816 exp.and_( 3817 exp.Is(this=expression.copy(), expression=exp.Null()), 3818 exp.Is(this=search.copy(), expression=exp.Null()), 3819 copy=False, 3820 ), 3821 copy=False, 3822 ) 3823 ifs.append(exp.If(this=cond, true=result)) 3824 3825 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3826 3827 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3828 self._match_text_seq("KEY") 3829 key = self._parse_field() 3830 self._match(TokenType.COLON) 3831 self._match_text_seq("VALUE") 3832 value = self._parse_field() 3833 3834 if not key and not value: 3835 return None 3836 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3837 3838 def _parse_json_object(self) -> exp.JSONObject: 3839 star = self._parse_star() 3840 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3841 3842 null_handling = None 3843 if self._match_text_seq("NULL", "ON", "NULL"): 3844 null_handling = "NULL ON NULL" 3845 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3846 null_handling = "ABSENT ON NULL" 3847 3848 unique_keys = None 3849 if self._match_text_seq("WITH", "UNIQUE"): 3850 unique_keys = True 3851 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3852 unique_keys = False 3853 3854 self._match_text_seq("KEYS") 3855 3856 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3857 format_json = self._match_text_seq("FORMAT", "JSON") 3858 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3859 3860 return self.expression( 3861 exp.JSONObject, 3862 expressions=expressions, 3863 null_handling=null_handling, 3864 unique_keys=unique_keys, 3865 return_type=return_type, 3866 format_json=format_json, 3867 encoding=encoding, 3868 ) 3869 3870 def _parse_logarithm(self) -> exp.Func: 3871 # Default argument order is base, expression 3872 args = self._parse_csv(self._parse_range) 3873 3874 if len(args) > 1: 3875 if not self.LOG_BASE_FIRST: 3876 args.reverse() 3877 return exp.Log.from_arg_list(args) 3878 3879 return self.expression( 3880 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3881 ) 3882 3883 def _parse_match_against(self) -> exp.MatchAgainst: 3884 expressions = self._parse_csv(self._parse_column) 3885 3886 self._match_text_seq(")", "AGAINST", "(") 3887 3888 this = self._parse_string() 3889 3890 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3891 modifier = "IN NATURAL LANGUAGE MODE" 3892 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3893 modifier = f"{modifier} WITH QUERY EXPANSION" 3894 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3895 modifier = "IN BOOLEAN MODE" 3896 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3897 modifier = "WITH QUERY EXPANSION" 3898 else: 3899 modifier = None 3900 3901 return self.expression( 3902 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3903 ) 3904 3905 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3906 def _parse_open_json(self) -> exp.OpenJSON: 3907 this = self._parse_bitwise() 3908 path = self._match(TokenType.COMMA) and self._parse_string() 3909 3910 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3911 this = self._parse_field(any_token=True) 3912 kind = self._parse_types() 3913 path = self._parse_string() 3914 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3915 3916 return self.expression( 3917 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3918 ) 3919 3920 expressions = None 3921 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3922 self._match_l_paren() 3923 expressions = self._parse_csv(_parse_open_json_column_def) 3924 3925 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3926 3927 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3928 args = self._parse_csv(self._parse_bitwise) 3929 3930 if self._match(TokenType.IN): 3931 return self.expression( 3932 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3933 ) 3934 3935 if haystack_first: 3936 haystack = seq_get(args, 0) 3937 needle = seq_get(args, 1) 3938 else: 3939 needle = seq_get(args, 0) 3940 haystack = seq_get(args, 1) 3941 3942 return self.expression( 3943 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3944 ) 3945 3946 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3947 args = self._parse_csv(self._parse_table) 3948 return exp.JoinHint(this=func_name.upper(), expressions=args) 3949 3950 def _parse_substring(self) -> exp.Substring: 3951 # Postgres supports the form: substring(string [from int] [for int]) 3952 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3953 3954 args = self._parse_csv(self._parse_bitwise) 3955 3956 if self._match(TokenType.FROM): 3957 args.append(self._parse_bitwise()) 3958 if self._match(TokenType.FOR): 3959 args.append(self._parse_bitwise()) 3960 3961 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3962 3963 def _parse_trim(self) -> exp.Trim: 3964 # https://www.w3resource.com/sql/character-functions/trim.php 3965 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3966 3967 position = None 3968 collation = None 3969 3970 if self._match_texts(self.TRIM_TYPES): 3971 position = self._prev.text.upper() 3972 3973 expression = self._parse_bitwise() 3974 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3975 this = self._parse_bitwise() 3976 else: 3977 this = expression 3978 expression = None 3979 3980 if self._match(TokenType.COLLATE): 3981 collation = self._parse_bitwise() 3982 3983 return self.expression( 3984 exp.Trim, this=this, position=position, expression=expression, collation=collation 3985 ) 3986 3987 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3988 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3989 3990 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3991 return self._parse_window(self._parse_id_var(), alias=True) 3992 3993 def _parse_respect_or_ignore_nulls( 3994 self, this: t.Optional[exp.Expression] 3995 ) -> t.Optional[exp.Expression]: 3996 if self._match_text_seq("IGNORE", "NULLS"): 3997 return self.expression(exp.IgnoreNulls, this=this) 3998 if self._match_text_seq("RESPECT", "NULLS"): 3999 return self.expression(exp.RespectNulls, this=this) 4000 return this 4001 4002 def _parse_window( 4003 self, this: t.Optional[exp.Expression], alias: bool = False 4004 ) -> t.Optional[exp.Expression]: 4005 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4006 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4007 self._match_r_paren() 4008 4009 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4010 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4011 if self._match_text_seq("WITHIN", "GROUP"): 4012 order = self._parse_wrapped(self._parse_order) 4013 this = self.expression(exp.WithinGroup, this=this, expression=order) 4014 4015 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4016 # Some dialects choose to implement and some do not. 4017 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4018 4019 # There is some code above in _parse_lambda that handles 4020 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4021 4022 # The below changes handle 4023 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4024 4025 # Oracle allows both formats 4026 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4027 # and Snowflake chose to do the same for familiarity 4028 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4029 this = self._parse_respect_or_ignore_nulls(this) 4030 4031 # bigquery select from window x AS (partition by ...) 4032 if alias: 4033 over = None 4034 self._match(TokenType.ALIAS) 4035 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4036 return this 4037 else: 4038 over = self._prev.text.upper() 4039 4040 if not self._match(TokenType.L_PAREN): 4041 return self.expression( 4042 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4043 ) 4044 4045 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4046 4047 first = self._match(TokenType.FIRST) 4048 if self._match_text_seq("LAST"): 4049 first = False 4050 4051 partition = self._parse_partition_by() 4052 order = self._parse_order() 4053 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4054 4055 if kind: 4056 self._match(TokenType.BETWEEN) 4057 start = self._parse_window_spec() 4058 self._match(TokenType.AND) 4059 end = self._parse_window_spec() 4060 4061 spec = self.expression( 4062 exp.WindowSpec, 4063 kind=kind, 4064 start=start["value"], 4065 start_side=start["side"], 4066 end=end["value"], 4067 end_side=end["side"], 4068 ) 4069 else: 4070 spec = None 4071 4072 self._match_r_paren() 4073 4074 return self.expression( 4075 exp.Window, 4076 this=this, 4077 partition_by=partition, 4078 order=order, 4079 spec=spec, 4080 alias=window_alias, 4081 over=over, 4082 first=first, 4083 ) 4084 4085 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4086 self._match(TokenType.BETWEEN) 4087 4088 return { 4089 "value": ( 4090 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4091 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4092 or self._parse_bitwise() 4093 ), 4094 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4095 } 4096 4097 def _parse_alias( 4098 self, this: t.Optional[exp.Expression], explicit: bool = False 4099 ) -> t.Optional[exp.Expression]: 4100 any_token = self._match(TokenType.ALIAS) 4101 4102 if explicit and not any_token: 4103 return this 4104 4105 if self._match(TokenType.L_PAREN): 4106 aliases = self.expression( 4107 exp.Aliases, 4108 this=this, 4109 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4110 ) 4111 self._match_r_paren(aliases) 4112 return aliases 4113 4114 alias = self._parse_id_var(any_token) 4115 4116 if alias: 4117 return self.expression(exp.Alias, this=this, alias=alias) 4118 4119 return this 4120 4121 def _parse_id_var( 4122 self, 4123 any_token: bool = True, 4124 tokens: t.Optional[t.Collection[TokenType]] = None, 4125 ) -> t.Optional[exp.Expression]: 4126 identifier = self._parse_identifier() 4127 4128 if identifier: 4129 return identifier 4130 4131 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4132 quoted = self._prev.token_type == TokenType.STRING 4133 return exp.Identifier(this=self._prev.text, quoted=quoted) 4134 4135 return None 4136 4137 def _parse_string(self) -> t.Optional[exp.Expression]: 4138 if self._match(TokenType.STRING): 4139 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4140 return self._parse_placeholder() 4141 4142 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4143 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4144 4145 def _parse_number(self) -> t.Optional[exp.Expression]: 4146 if self._match(TokenType.NUMBER): 4147 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4148 return self._parse_placeholder() 4149 4150 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4151 if self._match(TokenType.IDENTIFIER): 4152 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4153 return self._parse_placeholder() 4154 4155 def _parse_var( 4156 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4157 ) -> t.Optional[exp.Expression]: 4158 if ( 4159 (any_token and self._advance_any()) 4160 or self._match(TokenType.VAR) 4161 or (self._match_set(tokens) if tokens else False) 4162 ): 4163 return self.expression(exp.Var, this=self._prev.text) 4164 return self._parse_placeholder() 4165 4166 def _advance_any(self) -> t.Optional[Token]: 4167 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4168 self._advance() 4169 return self._prev 4170 return None 4171 4172 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4173 return self._parse_var() or self._parse_string() 4174 4175 def _parse_null(self) -> t.Optional[exp.Expression]: 4176 if self._match(TokenType.NULL): 4177 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4178 return None 4179 4180 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4181 if self._match(TokenType.TRUE): 4182 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4183 if self._match(TokenType.FALSE): 4184 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4185 return None 4186 4187 def _parse_star(self) -> t.Optional[exp.Expression]: 4188 if self._match(TokenType.STAR): 4189 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4190 return None 4191 4192 def _parse_parameter(self) -> exp.Parameter: 4193 wrapped = self._match(TokenType.L_BRACE) 4194 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4195 self._match(TokenType.R_BRACE) 4196 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4197 4198 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4199 if self._match_set(self.PLACEHOLDER_PARSERS): 4200 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4201 if placeholder: 4202 return placeholder 4203 self._advance(-1) 4204 return None 4205 4206 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4207 if not self._match(TokenType.EXCEPT): 4208 return None 4209 if self._match(TokenType.L_PAREN, advance=False): 4210 return self._parse_wrapped_csv(self._parse_column) 4211 return self._parse_csv(self._parse_column) 4212 4213 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4214 if not self._match(TokenType.REPLACE): 4215 return None 4216 if self._match(TokenType.L_PAREN, advance=False): 4217 return self._parse_wrapped_csv(self._parse_expression) 4218 return self._parse_csv(self._parse_expression) 4219 4220 def _parse_csv( 4221 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4222 ) -> t.List[t.Optional[exp.Expression]]: 4223 parse_result = parse_method() 4224 items = [parse_result] if parse_result is not None else [] 4225 4226 while self._match(sep): 4227 self._add_comments(parse_result) 4228 parse_result = parse_method() 4229 if parse_result is not None: 4230 items.append(parse_result) 4231 4232 return items 4233 4234 def _parse_tokens( 4235 self, parse_method: t.Callable, expressions: t.Dict 4236 ) -> t.Optional[exp.Expression]: 4237 this = parse_method() 4238 4239 while self._match_set(expressions): 4240 this = self.expression( 4241 expressions[self._prev.token_type], 4242 this=this, 4243 comments=self._prev_comments, 4244 expression=parse_method(), 4245 ) 4246 4247 return this 4248 4249 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4250 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4251 4252 def _parse_wrapped_csv( 4253 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4254 ) -> t.List[t.Optional[exp.Expression]]: 4255 return self._parse_wrapped( 4256 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4257 ) 4258 4259 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4260 wrapped = self._match(TokenType.L_PAREN) 4261 if not wrapped and not optional: 4262 self.raise_error("Expecting (") 4263 parse_result = parse_method() 4264 if wrapped: 4265 self._match_r_paren() 4266 return parse_result 4267 4268 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4269 return self._parse_select() or self._parse_set_operations( 4270 self._parse_expression() if alias else self._parse_conjunction() 4271 ) 4272 4273 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4274 return self._parse_query_modifiers( 4275 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4276 ) 4277 4278 def _parse_transaction(self) -> exp.Transaction: 4279 this = None 4280 if self._match_texts(self.TRANSACTION_KIND): 4281 this = self._prev.text 4282 4283 self._match_texts({"TRANSACTION", "WORK"}) 4284 4285 modes = [] 4286 while True: 4287 mode = [] 4288 while self._match(TokenType.VAR): 4289 mode.append(self._prev.text) 4290 4291 if mode: 4292 modes.append(" ".join(mode)) 4293 if not self._match(TokenType.COMMA): 4294 break 4295 4296 return self.expression(exp.Transaction, this=this, modes=modes) 4297 4298 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4299 chain = None 4300 savepoint = None 4301 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4302 4303 self._match_texts({"TRANSACTION", "WORK"}) 4304 4305 if self._match_text_seq("TO"): 4306 self._match_text_seq("SAVEPOINT") 4307 savepoint = self._parse_id_var() 4308 4309 if self._match(TokenType.AND): 4310 chain = not self._match_text_seq("NO") 4311 self._match_text_seq("CHAIN") 4312 4313 if is_rollback: 4314 return self.expression(exp.Rollback, savepoint=savepoint) 4315 4316 return self.expression(exp.Commit, chain=chain) 4317 4318 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4319 if not self._match_text_seq("ADD"): 4320 return None 4321 4322 self._match(TokenType.COLUMN) 4323 exists_column = self._parse_exists(not_=True) 4324 expression = self._parse_column_def(self._parse_field(any_token=True)) 4325 4326 if expression: 4327 expression.set("exists", exists_column) 4328 4329 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4330 if self._match_texts(("FIRST", "AFTER")): 4331 position = self._prev.text 4332 column_position = self.expression( 4333 exp.ColumnPosition, this=self._parse_column(), position=position 4334 ) 4335 expression.set("position", column_position) 4336 4337 return expression 4338 4339 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4340 drop = self._match(TokenType.DROP) and self._parse_drop() 4341 if drop and not isinstance(drop, exp.Command): 4342 drop.set("kind", drop.args.get("kind", "COLUMN")) 4343 return drop 4344 4345 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4346 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4347 return self.expression( 4348 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4349 ) 4350 4351 def _parse_add_constraint(self) -> exp.AddConstraint: 4352 this = None 4353 kind = self._prev.token_type 4354 4355 if kind == TokenType.CONSTRAINT: 4356 this = self._parse_id_var() 4357 4358 if self._match_text_seq("CHECK"): 4359 expression = self._parse_wrapped(self._parse_conjunction) 4360 enforced = self._match_text_seq("ENFORCED") 4361 4362 return self.expression( 4363 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4364 ) 4365 4366 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4367 expression = self._parse_foreign_key() 4368 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4369 expression = self._parse_primary_key() 4370 else: 4371 expression = None 4372 4373 return self.expression(exp.AddConstraint, this=this, expression=expression) 4374 4375 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4376 index = self._index - 1 4377 4378 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4379 return self._parse_csv(self._parse_add_constraint) 4380 4381 self._retreat(index) 4382 return self._parse_csv(self._parse_add_column) 4383 4384 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4385 self._match(TokenType.COLUMN) 4386 column = self._parse_field(any_token=True) 4387 4388 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4389 return self.expression(exp.AlterColumn, this=column, drop=True) 4390 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4391 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4392 4393 self._match_text_seq("SET", "DATA") 4394 return self.expression( 4395 exp.AlterColumn, 4396 this=column, 4397 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4398 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4399 using=self._match(TokenType.USING) and self._parse_conjunction(), 4400 ) 4401 4402 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4403 index = self._index - 1 4404 4405 partition_exists = self._parse_exists() 4406 if self._match(TokenType.PARTITION, advance=False): 4407 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4408 4409 self._retreat(index) 4410 return self._parse_csv(self._parse_drop_column) 4411 4412 def _parse_alter_table_rename(self) -> exp.RenameTable: 4413 self._match_text_seq("TO") 4414 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4415 4416 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4417 start = self._prev 4418 4419 if not self._match(TokenType.TABLE): 4420 return self._parse_as_command(start) 4421 4422 exists = self._parse_exists() 4423 this = self._parse_table(schema=True) 4424 4425 if self._next: 4426 self._advance() 4427 4428 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4429 if parser: 4430 actions = ensure_list(parser(self)) 4431 4432 if not self._curr: 4433 return self.expression( 4434 exp.AlterTable, 4435 this=this, 4436 exists=exists, 4437 actions=actions, 4438 ) 4439 return self._parse_as_command(start) 4440 4441 def _parse_merge(self) -> exp.Merge: 4442 self._match(TokenType.INTO) 4443 target = self._parse_table() 4444 4445 self._match(TokenType.USING) 4446 using = self._parse_table() 4447 4448 self._match(TokenType.ON) 4449 on = self._parse_conjunction() 4450 4451 whens = [] 4452 while self._match(TokenType.WHEN): 4453 matched = not self._match(TokenType.NOT) 4454 self._match_text_seq("MATCHED") 4455 source = ( 4456 False 4457 if self._match_text_seq("BY", "TARGET") 4458 else self._match_text_seq("BY", "SOURCE") 4459 ) 4460 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4461 4462 self._match(TokenType.THEN) 4463 4464 if self._match(TokenType.INSERT): 4465 _this = self._parse_star() 4466 if _this: 4467 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4468 else: 4469 then = self.expression( 4470 exp.Insert, 4471 this=self._parse_value(), 4472 expression=self._match(TokenType.VALUES) and self._parse_value(), 4473 ) 4474 elif self._match(TokenType.UPDATE): 4475 expressions = self._parse_star() 4476 if expressions: 4477 then = self.expression(exp.Update, expressions=expressions) 4478 else: 4479 then = self.expression( 4480 exp.Update, 4481 expressions=self._match(TokenType.SET) 4482 and self._parse_csv(self._parse_equality), 4483 ) 4484 elif self._match(TokenType.DELETE): 4485 then = self.expression(exp.Var, this=self._prev.text) 4486 else: 4487 then = None 4488 4489 whens.append( 4490 self.expression( 4491 exp.When, 4492 matched=matched, 4493 source=source, 4494 condition=condition, 4495 then=then, 4496 ) 4497 ) 4498 4499 return self.expression( 4500 exp.Merge, 4501 this=target, 4502 using=using, 4503 on=on, 4504 expressions=whens, 4505 ) 4506 4507 def _parse_show(self) -> t.Optional[exp.Expression]: 4508 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4509 if parser: 4510 return parser(self) 4511 self._advance() 4512 return self.expression(exp.Show, this=self._prev.text.upper()) 4513 4514 def _parse_set_item_assignment( 4515 self, kind: t.Optional[str] = None 4516 ) -> t.Optional[exp.Expression]: 4517 index = self._index 4518 4519 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4520 return self._parse_set_transaction(global_=kind == "GLOBAL") 4521 4522 left = self._parse_primary() or self._parse_id_var() 4523 4524 if not self._match_texts(("=", "TO")): 4525 self._retreat(index) 4526 return None 4527 4528 right = self._parse_statement() or self._parse_id_var() 4529 this = self.expression(exp.EQ, this=left, expression=right) 4530 4531 return self.expression(exp.SetItem, this=this, kind=kind) 4532 4533 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4534 self._match_text_seq("TRANSACTION") 4535 characteristics = self._parse_csv( 4536 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4537 ) 4538 return self.expression( 4539 exp.SetItem, 4540 expressions=characteristics, 4541 kind="TRANSACTION", 4542 **{"global": global_}, # type: ignore 4543 ) 4544 4545 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4546 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4547 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4548 4549 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4550 index = self._index 4551 set_ = self.expression( 4552 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4553 ) 4554 4555 if self._curr: 4556 self._retreat(index) 4557 return self._parse_as_command(self._prev) 4558 4559 return set_ 4560 4561 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4562 for option in options: 4563 if self._match_text_seq(*option.split(" ")): 4564 return exp.var(option) 4565 return None 4566 4567 def _parse_as_command(self, start: Token) -> exp.Command: 4568 while self._curr: 4569 self._advance() 4570 text = self._find_sql(start, self._prev) 4571 size = len(start.text) 4572 return exp.Command(this=text[:size], expression=text[size:]) 4573 4574 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4575 settings = [] 4576 4577 self._match_l_paren() 4578 kind = self._parse_id_var() 4579 4580 if self._match(TokenType.L_PAREN): 4581 while True: 4582 key = self._parse_id_var() 4583 value = self._parse_primary() 4584 4585 if not key and value is None: 4586 break 4587 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4588 self._match(TokenType.R_PAREN) 4589 4590 self._match_r_paren() 4591 4592 return self.expression( 4593 exp.DictProperty, 4594 this=this, 4595 kind=kind.this if kind else None, 4596 settings=settings, 4597 ) 4598 4599 def _parse_dict_range(self, this: str) -> exp.DictRange: 4600 self._match_l_paren() 4601 has_min = self._match_text_seq("MIN") 4602 if has_min: 4603 min = self._parse_var() or self._parse_primary() 4604 self._match_text_seq("MAX") 4605 max = self._parse_var() or self._parse_primary() 4606 else: 4607 max = self._parse_var() or self._parse_primary() 4608 min = exp.Literal.number(0) 4609 self._match_r_paren() 4610 return self.expression(exp.DictRange, this=this, min=min, max=max) 4611 4612 def _find_parser( 4613 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4614 ) -> t.Optional[t.Callable]: 4615 if not self._curr: 4616 return None 4617 4618 index = self._index 4619 this = [] 4620 while True: 4621 # The current token might be multiple words 4622 curr = self._curr.text.upper() 4623 key = curr.split(" ") 4624 this.append(curr) 4625 4626 self._advance() 4627 result, trie = in_trie(trie, key) 4628 if result == TrieResult.FAILED: 4629 break 4630 4631 if result == TrieResult.EXISTS: 4632 subparser = parsers[" ".join(this)] 4633 return subparser 4634 4635 self._retreat(index) 4636 return None 4637 4638 def _match(self, token_type, advance=True, expression=None): 4639 if not self._curr: 4640 return None 4641 4642 if self._curr.token_type == token_type: 4643 if advance: 4644 self._advance() 4645 self._add_comments(expression) 4646 return True 4647 4648 return None 4649 4650 def _match_set(self, types, advance=True): 4651 if not self._curr: 4652 return None 4653 4654 if self._curr.token_type in types: 4655 if advance: 4656 self._advance() 4657 return True 4658 4659 return None 4660 4661 def _match_pair(self, token_type_a, token_type_b, advance=True): 4662 if not self._curr or not self._next: 4663 return None 4664 4665 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4666 if advance: 4667 self._advance(2) 4668 return True 4669 4670 return None 4671 4672 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4673 if not self._match(TokenType.L_PAREN, expression=expression): 4674 self.raise_error("Expecting (") 4675 4676 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4677 if not self._match(TokenType.R_PAREN, expression=expression): 4678 self.raise_error("Expecting )") 4679 4680 def _match_texts(self, texts, advance=True): 4681 if self._curr and self._curr.text.upper() in texts: 4682 if advance: 4683 self._advance() 4684 return True 4685 return False 4686 4687 def _match_text_seq(self, *texts, advance=True): 4688 index = self._index 4689 for text in texts: 4690 if self._curr and self._curr.text.upper() == text: 4691 self._advance() 4692 else: 4693 self._retreat(index) 4694 return False 4695 4696 if not advance: 4697 self._retreat(index) 4698 4699 return True 4700 4701 @t.overload 4702 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4703 ... 4704 4705 @t.overload 4706 def _replace_columns_with_dots( 4707 self, this: t.Optional[exp.Expression] 4708 ) -> t.Optional[exp.Expression]: 4709 ... 4710 4711 def _replace_columns_with_dots(self, this): 4712 if isinstance(this, exp.Dot): 4713 exp.replace_children(this, self._replace_columns_with_dots) 4714 elif isinstance(this, exp.Column): 4715 exp.replace_children(this, self._replace_columns_with_dots) 4716 table = this.args.get("table") 4717 this = ( 4718 self.expression(exp.Dot, this=table, expression=this.this) 4719 if table 4720 else self.expression(exp.Var, this=this.name) 4721 ) 4722 elif isinstance(this, exp.Identifier): 4723 this = self.expression(exp.Var, this=this.name) 4724 4725 return this 4726 4727 def _replace_lambda( 4728 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4729 ) -> t.Optional[exp.Expression]: 4730 if not node: 4731 return node 4732 4733 for column in node.find_all(exp.Column): 4734 if column.parts[0].name in lambda_variables: 4735 dot_or_id = column.to_dot() if column.table else column.this 4736 parent = column.parent 4737 4738 while isinstance(parent, exp.Dot): 4739 if not isinstance(parent.parent, exp.Dot): 4740 parent.replace(dot_or_id) 4741 break 4742 parent = parent.parent 4743 else: 4744 if column is node: 4745 node = dot_or_id 4746 else: 4747 column.replace(dot_or_id) 4748 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 "joins": lambda self: list(iter(self._parse_join, None)), 741 "laterals": lambda self: list(iter(self._parse_lateral, None)), 742 "match": lambda self: self._parse_match_recognize(), 743 "where": lambda self: self._parse_where(), 744 "group": lambda self: self._parse_group(), 745 "having": lambda self: self._parse_having(), 746 "qualify": lambda self: self._parse_qualify(), 747 "windows": lambda self: self._parse_window_clause(), 748 "order": lambda self: self._parse_order(), 749 "limit": lambda self: self._parse_limit(), 750 "offset": lambda self: self._parse_offset(), 751 "locks": lambda self: self._parse_locks(), 752 "sample": lambda self: self._parse_table_sample(as_modifier=True), 753 } 754 755 SET_PARSERS = { 756 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 757 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 758 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 759 "TRANSACTION": lambda self: self._parse_set_transaction(), 760 } 761 762 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 763 764 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 765 766 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 767 768 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 769 770 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 771 772 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 773 TRANSACTION_CHARACTERISTICS = { 774 "ISOLATION LEVEL REPEATABLE READ", 775 "ISOLATION LEVEL READ COMMITTED", 776 "ISOLATION LEVEL READ UNCOMMITTED", 777 "ISOLATION LEVEL SERIALIZABLE", 778 "READ WRITE", 779 "READ ONLY", 780 } 781 782 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 783 784 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 785 786 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 787 788 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 789 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 790 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 791 792 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 793 794 STRICT_CAST = True 795 796 # A NULL arg in CONCAT yields NULL by default 797 CONCAT_NULL_OUTPUTS_STRING = False 798 799 PREFIXED_PIVOT_COLUMNS = False 800 IDENTIFY_PIVOT_STRINGS = False 801 802 LOG_BASE_FIRST = True 803 LOG_DEFAULTS_TO_LN = False 804 805 __slots__ = ( 806 "error_level", 807 "error_message_context", 808 "max_errors", 809 "sql", 810 "errors", 811 "_tokens", 812 "_index", 813 "_curr", 814 "_next", 815 "_prev", 816 "_prev_comments", 817 ) 818 819 # Autofilled 820 INDEX_OFFSET: int = 0 821 UNNEST_COLUMN_ONLY: bool = False 822 ALIAS_POST_TABLESAMPLE: bool = False 823 STRICT_STRING_CONCAT = False 824 NULL_ORDERING: str = "nulls_are_small" 825 SHOW_TRIE: t.Dict = {} 826 SET_TRIE: t.Dict = {} 827 FORMAT_MAPPING: t.Dict[str, str] = {} 828 FORMAT_TRIE: t.Dict = {} 829 TIME_MAPPING: t.Dict[str, str] = {} 830 TIME_TRIE: t.Dict = {} 831 832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset() 842 843 def reset(self): 844 self.sql = "" 845 self.errors = [] 846 self._tokens = [] 847 self._index = 0 848 self._curr = None 849 self._next = None 850 self._prev = None 851 self._prev_comments = None 852 853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 ) 870 871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1] 906 907 def _parse( 908 self, 909 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 910 raw_tokens: t.List[Token], 911 sql: t.Optional[str] = None, 912 ) -> t.List[t.Optional[exp.Expression]]: 913 self.reset() 914 self.sql = sql or "" 915 916 total = len(raw_tokens) 917 chunks: t.List[t.List[Token]] = [[]] 918 919 for i, token in enumerate(raw_tokens): 920 if token.token_type == TokenType.SEMICOLON: 921 if i < total - 1: 922 chunks.append([]) 923 else: 924 chunks[-1].append(token) 925 926 expressions = [] 927 928 for tokens in chunks: 929 self._index = -1 930 self._tokens = tokens 931 self._advance() 932 933 expressions.append(parse_method(self)) 934 935 if self._index < len(self._tokens): 936 self.raise_error("Invalid expression / Unexpected token") 937 938 self.check_errors() 939 940 return expressions 941 942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 ) 952 953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error) 980 981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance) 998 999 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1000 if expression and self._prev_comments: 1001 expression.add_comments(self._prev_comments) 1002 self._prev_comments = None 1003 1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression 1020 1021 def _find_sql(self, start: Token, end: Token) -> str: 1022 return self.sql[start.start : end.end + 1] 1023 1024 def _advance(self, times: int = 1) -> None: 1025 self._index += times 1026 self._curr = seq_get(self._tokens, self._index) 1027 self._next = seq_get(self._tokens, self._index + 1) 1028 1029 if self._index > 0: 1030 self._prev = self._tokens[self._index - 1] 1031 self._prev_comments = self._prev.comments 1032 else: 1033 self._prev = None 1034 self._prev_comments = None 1035 1036 def _retreat(self, index: int) -> None: 1037 if index != self._index: 1038 self._advance(index - self._index) 1039 1040 def _parse_command(self) -> exp.Command: 1041 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1042 1043 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1044 start = self._prev 1045 exists = self._parse_exists() if allow_exists else None 1046 1047 self._match(TokenType.ON) 1048 1049 kind = self._match_set(self.CREATABLES) and self._prev 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=kind.token_type) 1055 elif kind.token_type == TokenType.TABLE: 1056 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1057 elif kind.token_type == TokenType.COLUMN: 1058 this = self._parse_column() 1059 else: 1060 this = self._parse_id_var() 1061 1062 self._match(TokenType.IS) 1063 1064 return self.expression( 1065 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1066 ) 1067 1068 def _parse_to_table( 1069 self, 1070 ) -> exp.ToTableProperty: 1071 table = self._parse_table_parts(schema=True) 1072 return self.expression(exp.ToTableProperty, this=table) 1073 1074 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1075 def _parse_ttl(self) -> exp.Expression: 1076 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1077 this = self._parse_bitwise() 1078 1079 if self._match_text_seq("DELETE"): 1080 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1081 if self._match_text_seq("RECOMPRESS"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1084 ) 1085 if self._match_text_seq("TO", "DISK"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1088 ) 1089 if self._match_text_seq("TO", "VOLUME"): 1090 return self.expression( 1091 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1092 ) 1093 1094 return this 1095 1096 expressions = self._parse_csv(_parse_ttl_action) 1097 where = self._parse_where() 1098 group = self._parse_group() 1099 1100 aggregates = None 1101 if group and self._match(TokenType.SET): 1102 aggregates = self._parse_csv(self._parse_set_item) 1103 1104 return self.expression( 1105 exp.MergeTreeTTL, 1106 expressions=expressions, 1107 where=where, 1108 group=group, 1109 aggregates=aggregates, 1110 ) 1111 1112 def _parse_statement(self) -> t.Optional[exp.Expression]: 1113 if self._curr is None: 1114 return None 1115 1116 if self._match_set(self.STATEMENT_PARSERS): 1117 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1118 1119 if self._match_set(Tokenizer.COMMANDS): 1120 return self._parse_command() 1121 1122 expression = self._parse_expression() 1123 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1124 return self._parse_query_modifiers(expression) 1125 1126 def _parse_drop(self) -> exp.Drop | exp.Command: 1127 start = self._prev 1128 temporary = self._match(TokenType.TEMPORARY) 1129 materialized = self._match_text_seq("MATERIALIZED") 1130 1131 kind = self._match_set(self.CREATABLES) and self._prev.text 1132 if not kind: 1133 return self._parse_as_command(start) 1134 1135 return self.expression( 1136 exp.Drop, 1137 exists=self._parse_exists(), 1138 this=self._parse_table(schema=True), 1139 kind=kind, 1140 temporary=temporary, 1141 materialized=materialized, 1142 cascade=self._match_text_seq("CASCADE"), 1143 constraints=self._match_text_seq("CONSTRAINTS"), 1144 purge=self._match_text_seq("PURGE"), 1145 ) 1146 1147 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1148 return ( 1149 self._match(TokenType.IF) 1150 and (not not_ or self._match(TokenType.NOT)) 1151 and self._match(TokenType.EXISTS) 1152 ) 1153 1154 def _parse_create(self) -> exp.Create | exp.Command: 1155 # Note: this can't be None because we've matched a statement parser 1156 start = self._prev 1157 replace = start.text.upper() == "REPLACE" or self._match_pair( 1158 TokenType.OR, TokenType.REPLACE 1159 ) 1160 unique = self._match(TokenType.UNIQUE) 1161 1162 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1163 self._advance() 1164 1165 properties = None 1166 create_token = self._match_set(self.CREATABLES) and self._prev 1167 1168 if not create_token: 1169 # exp.Properties.Location.POST_CREATE 1170 properties = self._parse_properties() 1171 create_token = self._match_set(self.CREATABLES) and self._prev 1172 1173 if not properties or not create_token: 1174 return self._parse_as_command(start) 1175 1176 exists = self._parse_exists(not_=True) 1177 this = None 1178 expression = None 1179 indexes = None 1180 no_schema_binding = None 1181 begin = None 1182 clone = None 1183 1184 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1185 nonlocal properties 1186 if properties and temp_props: 1187 properties.expressions.extend(temp_props.expressions) 1188 elif temp_props: 1189 properties = temp_props 1190 1191 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=create_token.token_type) 1193 1194 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1195 extend_props(self._parse_properties()) 1196 1197 self._match(TokenType.ALIAS) 1198 begin = self._match(TokenType.BEGIN) 1199 return_ = self._match_text_seq("RETURN") 1200 expression = self._parse_statement() 1201 1202 if return_: 1203 expression = self.expression(exp.Return, this=expression) 1204 elif create_token.token_type == TokenType.INDEX: 1205 this = self._parse_index(index=self._parse_id_var()) 1206 elif create_token.token_type in self.DB_CREATABLES: 1207 table_parts = self._parse_table_parts(schema=True) 1208 1209 # exp.Properties.Location.POST_NAME 1210 self._match(TokenType.COMMA) 1211 extend_props(self._parse_properties(before=True)) 1212 1213 this = self._parse_schema(this=table_parts) 1214 1215 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1216 extend_props(self._parse_properties()) 1217 1218 self._match(TokenType.ALIAS) 1219 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1220 # exp.Properties.Location.POST_ALIAS 1221 extend_props(self._parse_properties()) 1222 1223 expression = self._parse_ddl_select() 1224 1225 if create_token.token_type == TokenType.TABLE: 1226 indexes = [] 1227 while True: 1228 index = self._parse_index() 1229 1230 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1231 extend_props(self._parse_properties()) 1232 1233 if not index: 1234 break 1235 else: 1236 self._match(TokenType.COMMA) 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 # only used for teradata currently 1273 self._match(TokenType.COMMA) 1274 1275 kwargs = { 1276 "no": self._match_text_seq("NO"), 1277 "dual": self._match_text_seq("DUAL"), 1278 "before": self._match_text_seq("BEFORE"), 1279 "default": self._match_text_seq("DEFAULT"), 1280 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1281 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1282 "after": self._match_text_seq("AFTER"), 1283 "minimum": self._match_texts(("MIN", "MINIMUM")), 1284 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1285 } 1286 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1289 try: 1290 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1291 except TypeError: 1292 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1293 1294 return None 1295 1296 def _parse_property(self) -> t.Optional[exp.Expression]: 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1299 1300 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1301 return self._parse_character_set(default=True) 1302 1303 if self._match_text_seq("COMPOUND", "SORTKEY"): 1304 return self._parse_sortkey(compound=True) 1305 1306 if self._match_text_seq("SQL", "SECURITY"): 1307 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1308 1309 assignment = self._match_pair( 1310 TokenType.VAR, TokenType.EQ, advance=False 1311 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1312 1313 if assignment: 1314 key = self._parse_var_or_string() 1315 self._match(TokenType.EQ) 1316 return self.expression(exp.Property, this=key, value=self._parse_column()) 1317 1318 return None 1319 1320 def _parse_stored(self) -> exp.FileFormatProperty: 1321 self._match(TokenType.ALIAS) 1322 1323 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1324 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1325 1326 return self.expression( 1327 exp.FileFormatProperty, 1328 this=self.expression( 1329 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1330 ) 1331 if input_format or output_format 1332 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1333 ) 1334 1335 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1336 self._match(TokenType.EQ) 1337 self._match(TokenType.ALIAS) 1338 return self.expression(exp_class, this=self._parse_field()) 1339 1340 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1341 properties = [] 1342 while True: 1343 if before: 1344 prop = self._parse_property_before() 1345 else: 1346 prop = self._parse_property() 1347 1348 if not prop: 1349 break 1350 for p in ensure_list(prop): 1351 properties.append(p) 1352 1353 if properties: 1354 return self.expression(exp.Properties, expressions=properties) 1355 1356 return None 1357 1358 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1359 return self.expression( 1360 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1361 ) 1362 1363 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1364 if self._index >= 2: 1365 pre_volatile_token = self._tokens[self._index - 2] 1366 else: 1367 pre_volatile_token = None 1368 1369 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1370 return exp.VolatileProperty() 1371 1372 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1373 1374 def _parse_with_property( 1375 self, 1376 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1377 self._match(TokenType.WITH) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 return self._parse_wrapped_csv(self._parse_property) 1380 1381 if self._match_text_seq("JOURNAL"): 1382 return self._parse_withjournaltable() 1383 1384 if self._match_text_seq("DATA"): 1385 return self._parse_withdata(no=False) 1386 elif self._match_text_seq("NO", "DATA"): 1387 return self._parse_withdata(no=True) 1388 1389 if not self._next: 1390 return None 1391 1392 return self._parse_withisolatedloading() 1393 1394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1396 self._match(TokenType.EQ) 1397 1398 user = self._parse_id_var() 1399 self._match(TokenType.PARAMETER) 1400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1401 1402 if not user or not host: 1403 return None 1404 1405 return exp.DefinerProperty(this=f"{user}@{host}") 1406 1407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1408 self._match(TokenType.TABLE) 1409 self._match(TokenType.EQ) 1410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1411 1412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1413 return self.expression(exp.LogProperty, no=no) 1414 1415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1416 return self.expression(exp.JournalProperty, **kwargs) 1417 1418 def _parse_checksum(self) -> exp.ChecksumProperty: 1419 self._match(TokenType.EQ) 1420 1421 on = None 1422 if self._match(TokenType.ON): 1423 on = True 1424 elif self._match_text_seq("OFF"): 1425 on = False 1426 1427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1428 1429 def _parse_cluster(self) -> exp.Cluster: 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1433 self._match_text_seq("BY") 1434 1435 self._match_l_paren() 1436 expressions = self._parse_csv(self._parse_column) 1437 self._match_r_paren() 1438 1439 if self._match_text_seq("SORTED", "BY"): 1440 self._match_l_paren() 1441 sorted_by = self._parse_csv(self._parse_ordered) 1442 self._match_r_paren() 1443 else: 1444 sorted_by = None 1445 1446 self._match(TokenType.INTO) 1447 buckets = self._parse_number() 1448 self._match_text_seq("BUCKETS") 1449 1450 return self.expression( 1451 exp.ClusteredByProperty, 1452 expressions=expressions, 1453 sorted_by=sorted_by, 1454 buckets=buckets, 1455 ) 1456 1457 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1458 if not self._match_text_seq("GRANTS"): 1459 self._retreat(self._index - 1) 1460 return None 1461 1462 return self.expression(exp.CopyGrantsProperty) 1463 1464 def _parse_freespace(self) -> exp.FreespaceProperty: 1465 self._match(TokenType.EQ) 1466 return self.expression( 1467 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1468 ) 1469 1470 def _parse_mergeblockratio( 1471 self, no: bool = False, default: bool = False 1472 ) -> exp.MergeBlockRatioProperty: 1473 if self._match(TokenType.EQ): 1474 return self.expression( 1475 exp.MergeBlockRatioProperty, 1476 this=self._parse_number(), 1477 percent=self._match(TokenType.PERCENT), 1478 ) 1479 1480 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1481 1482 def _parse_datablocksize( 1483 self, 1484 default: t.Optional[bool] = None, 1485 minimum: t.Optional[bool] = None, 1486 maximum: t.Optional[bool] = None, 1487 ) -> exp.DataBlocksizeProperty: 1488 self._match(TokenType.EQ) 1489 size = self._parse_number() 1490 1491 units = None 1492 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1493 units = self._prev.text 1494 1495 return self.expression( 1496 exp.DataBlocksizeProperty, 1497 size=size, 1498 units=units, 1499 default=default, 1500 minimum=minimum, 1501 maximum=maximum, 1502 ) 1503 1504 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1505 self._match(TokenType.EQ) 1506 always = self._match_text_seq("ALWAYS") 1507 manual = self._match_text_seq("MANUAL") 1508 never = self._match_text_seq("NEVER") 1509 default = self._match_text_seq("DEFAULT") 1510 1511 autotemp = None 1512 if self._match_text_seq("AUTOTEMP"): 1513 autotemp = self._parse_schema() 1514 1515 return self.expression( 1516 exp.BlockCompressionProperty, 1517 always=always, 1518 manual=manual, 1519 never=never, 1520 default=default, 1521 autotemp=autotemp, 1522 ) 1523 1524 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1525 no = self._match_text_seq("NO") 1526 concurrent = self._match_text_seq("CONCURRENT") 1527 self._match_text_seq("ISOLATED", "LOADING") 1528 for_all = self._match_text_seq("FOR", "ALL") 1529 for_insert = self._match_text_seq("FOR", "INSERT") 1530 for_none = self._match_text_seq("FOR", "NONE") 1531 return self.expression( 1532 exp.IsolatedLoadingProperty, 1533 no=no, 1534 concurrent=concurrent, 1535 for_all=for_all, 1536 for_insert=for_insert, 1537 for_none=for_none, 1538 ) 1539 1540 def _parse_locking(self) -> exp.LockingProperty: 1541 if self._match(TokenType.TABLE): 1542 kind = "TABLE" 1543 elif self._match(TokenType.VIEW): 1544 kind = "VIEW" 1545 elif self._match(TokenType.ROW): 1546 kind = "ROW" 1547 elif self._match_text_seq("DATABASE"): 1548 kind = "DATABASE" 1549 else: 1550 kind = None 1551 1552 if kind in ("DATABASE", "TABLE", "VIEW"): 1553 this = self._parse_table_parts() 1554 else: 1555 this = None 1556 1557 if self._match(TokenType.FOR): 1558 for_or_in = "FOR" 1559 elif self._match(TokenType.IN): 1560 for_or_in = "IN" 1561 else: 1562 for_or_in = None 1563 1564 if self._match_text_seq("ACCESS"): 1565 lock_type = "ACCESS" 1566 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1567 lock_type = "EXCLUSIVE" 1568 elif self._match_text_seq("SHARE"): 1569 lock_type = "SHARE" 1570 elif self._match_text_seq("READ"): 1571 lock_type = "READ" 1572 elif self._match_text_seq("WRITE"): 1573 lock_type = "WRITE" 1574 elif self._match_text_seq("CHECKSUM"): 1575 lock_type = "CHECKSUM" 1576 else: 1577 lock_type = None 1578 1579 override = self._match_text_seq("OVERRIDE") 1580 1581 return self.expression( 1582 exp.LockingProperty, 1583 this=this, 1584 kind=kind, 1585 for_or_in=for_or_in, 1586 lock_type=lock_type, 1587 override=override, 1588 ) 1589 1590 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1591 if self._match(TokenType.PARTITION_BY): 1592 return self._parse_csv(self._parse_conjunction) 1593 return [] 1594 1595 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1596 self._match(TokenType.EQ) 1597 return self.expression( 1598 exp.PartitionedByProperty, 1599 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1600 ) 1601 1602 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1603 if self._match_text_seq("AND", "STATISTICS"): 1604 statistics = True 1605 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1606 statistics = False 1607 else: 1608 statistics = None 1609 1610 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1611 1612 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1613 if self._match_text_seq("PRIMARY", "INDEX"): 1614 return exp.NoPrimaryIndexProperty() 1615 return None 1616 1617 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1618 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1619 return exp.OnCommitProperty() 1620 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1621 return exp.OnCommitProperty(delete=True) 1622 return None 1623 1624 def _parse_distkey(self) -> exp.DistKeyProperty: 1625 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1626 1627 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1628 table = self._parse_table(schema=True) 1629 1630 options = [] 1631 while self._match_texts(("INCLUDING", "EXCLUDING")): 1632 this = self._prev.text.upper() 1633 1634 id_var = self._parse_id_var() 1635 if not id_var: 1636 return None 1637 1638 options.append( 1639 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1640 ) 1641 1642 return self.expression(exp.LikeProperty, this=table, expressions=options) 1643 1644 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1645 return self.expression( 1646 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1647 ) 1648 1649 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1650 self._match(TokenType.EQ) 1651 return self.expression( 1652 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1653 ) 1654 1655 def _parse_returns(self) -> exp.ReturnsProperty: 1656 value: t.Optional[exp.Expression] 1657 is_table = self._match(TokenType.TABLE) 1658 1659 if is_table: 1660 if self._match(TokenType.LT): 1661 value = self.expression( 1662 exp.Schema, 1663 this="TABLE", 1664 expressions=self._parse_csv(self._parse_struct_types), 1665 ) 1666 if not self._match(TokenType.GT): 1667 self.raise_error("Expecting >") 1668 else: 1669 value = self._parse_schema(exp.var("TABLE")) 1670 else: 1671 value = self._parse_types() 1672 1673 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1674 1675 def _parse_describe(self) -> exp.Describe: 1676 kind = self._match_set(self.CREATABLES) and self._prev.text 1677 this = self._parse_table() 1678 return self.expression(exp.Describe, this=this, kind=kind) 1679 1680 def _parse_insert(self) -> exp.Insert: 1681 overwrite = self._match(TokenType.OVERWRITE) 1682 local = self._match_text_seq("LOCAL") 1683 alternative = None 1684 1685 if self._match_text_seq("DIRECTORY"): 1686 this: t.Optional[exp.Expression] = self.expression( 1687 exp.Directory, 1688 this=self._parse_var_or_string(), 1689 local=local, 1690 row_format=self._parse_row_format(match_row=True), 1691 ) 1692 else: 1693 if self._match(TokenType.OR): 1694 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1695 1696 self._match(TokenType.INTO) 1697 self._match(TokenType.TABLE) 1698 this = self._parse_table(schema=True) 1699 1700 return self.expression( 1701 exp.Insert, 1702 this=this, 1703 exists=self._parse_exists(), 1704 partition=self._parse_partition(), 1705 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1706 and self._parse_conjunction(), 1707 expression=self._parse_ddl_select(), 1708 conflict=self._parse_on_conflict(), 1709 returning=self._parse_returning(), 1710 overwrite=overwrite, 1711 alternative=alternative, 1712 ) 1713 1714 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1715 conflict = self._match_text_seq("ON", "CONFLICT") 1716 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1717 1718 if not conflict and not duplicate: 1719 return None 1720 1721 nothing = None 1722 expressions = None 1723 key = None 1724 constraint = None 1725 1726 if conflict: 1727 if self._match_text_seq("ON", "CONSTRAINT"): 1728 constraint = self._parse_id_var() 1729 else: 1730 key = self._parse_csv(self._parse_value) 1731 1732 self._match_text_seq("DO") 1733 if self._match_text_seq("NOTHING"): 1734 nothing = True 1735 else: 1736 self._match(TokenType.UPDATE) 1737 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1738 1739 return self.expression( 1740 exp.OnConflict, 1741 duplicate=duplicate, 1742 expressions=expressions, 1743 nothing=nothing, 1744 key=key, 1745 constraint=constraint, 1746 ) 1747 1748 def _parse_returning(self) -> t.Optional[exp.Returning]: 1749 if not self._match(TokenType.RETURNING): 1750 return None 1751 1752 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1753 1754 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1755 if not self._match(TokenType.FORMAT): 1756 return None 1757 return self._parse_row_format() 1758 1759 def _parse_row_format( 1760 self, match_row: bool = False 1761 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1762 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1763 return None 1764 1765 if self._match_text_seq("SERDE"): 1766 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1767 1768 self._match_text_seq("DELIMITED") 1769 1770 kwargs = {} 1771 1772 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1773 kwargs["fields"] = self._parse_string() 1774 if self._match_text_seq("ESCAPED", "BY"): 1775 kwargs["escaped"] = self._parse_string() 1776 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1777 kwargs["collection_items"] = self._parse_string() 1778 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1779 kwargs["map_keys"] = self._parse_string() 1780 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1781 kwargs["lines"] = self._parse_string() 1782 if self._match_text_seq("NULL", "DEFINED", "AS"): 1783 kwargs["null"] = self._parse_string() 1784 1785 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1786 1787 def _parse_load(self) -> exp.LoadData | exp.Command: 1788 if self._match_text_seq("DATA"): 1789 local = self._match_text_seq("LOCAL") 1790 self._match_text_seq("INPATH") 1791 inpath = self._parse_string() 1792 overwrite = self._match(TokenType.OVERWRITE) 1793 self._match_pair(TokenType.INTO, TokenType.TABLE) 1794 1795 return self.expression( 1796 exp.LoadData, 1797 this=self._parse_table(schema=True), 1798 local=local, 1799 overwrite=overwrite, 1800 inpath=inpath, 1801 partition=self._parse_partition(), 1802 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1803 serde=self._match_text_seq("SERDE") and self._parse_string(), 1804 ) 1805 return self._parse_as_command(self._prev) 1806 1807 def _parse_delete(self) -> exp.Delete: 1808 self._match(TokenType.FROM) 1809 1810 return self.expression( 1811 exp.Delete, 1812 this=self._parse_table(), 1813 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1814 where=self._parse_where(), 1815 returning=self._parse_returning(), 1816 limit=self._parse_limit(), 1817 ) 1818 1819 def _parse_update(self) -> exp.Update: 1820 return self.expression( 1821 exp.Update, 1822 **{ # type: ignore 1823 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1824 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1825 "from": self._parse_from(modifiers=True), 1826 "where": self._parse_where(), 1827 "returning": self._parse_returning(), 1828 "limit": self._parse_limit(), 1829 }, 1830 ) 1831 1832 def _parse_uncache(self) -> exp.Uncache: 1833 if not self._match(TokenType.TABLE): 1834 self.raise_error("Expecting TABLE after UNCACHE") 1835 1836 return self.expression( 1837 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1838 ) 1839 1840 def _parse_cache(self) -> exp.Cache: 1841 lazy = self._match_text_seq("LAZY") 1842 self._match(TokenType.TABLE) 1843 table = self._parse_table(schema=True) 1844 1845 options = [] 1846 if self._match_text_seq("OPTIONS"): 1847 self._match_l_paren() 1848 k = self._parse_string() 1849 self._match(TokenType.EQ) 1850 v = self._parse_string() 1851 options = [k, v] 1852 self._match_r_paren() 1853 1854 self._match(TokenType.ALIAS) 1855 return self.expression( 1856 exp.Cache, 1857 this=table, 1858 lazy=lazy, 1859 options=options, 1860 expression=self._parse_select(nested=True), 1861 ) 1862 1863 def _parse_partition(self) -> t.Optional[exp.Partition]: 1864 if not self._match(TokenType.PARTITION): 1865 return None 1866 1867 return self.expression( 1868 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1869 ) 1870 1871 def _parse_value(self) -> exp.Tuple: 1872 if self._match(TokenType.L_PAREN): 1873 expressions = self._parse_csv(self._parse_conjunction) 1874 self._match_r_paren() 1875 return self.expression(exp.Tuple, expressions=expressions) 1876 1877 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1878 # Source: https://prestodb.io/docs/current/sql/values.html 1879 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1880 1881 def _parse_select( 1882 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1883 ) -> t.Optional[exp.Expression]: 1884 cte = self._parse_with() 1885 if cte: 1886 this = self._parse_statement() 1887 1888 if not this: 1889 self.raise_error("Failed to parse any statement following CTE") 1890 return cte 1891 1892 if "with" in this.arg_types: 1893 this.set("with", cte) 1894 else: 1895 self.raise_error(f"{this.key} does not support CTE") 1896 this = cte 1897 elif self._match(TokenType.SELECT): 1898 comments = self._prev_comments 1899 1900 hint = self._parse_hint() 1901 all_ = self._match(TokenType.ALL) 1902 distinct = self._match(TokenType.DISTINCT) 1903 1904 kind = ( 1905 self._match(TokenType.ALIAS) 1906 and self._match_texts(("STRUCT", "VALUE")) 1907 and self._prev.text 1908 ) 1909 1910 if distinct: 1911 distinct = self.expression( 1912 exp.Distinct, 1913 on=self._parse_value() if self._match(TokenType.ON) else None, 1914 ) 1915 1916 if all_ and distinct: 1917 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1918 1919 limit = self._parse_limit(top=True) 1920 expressions = self._parse_csv(self._parse_expression) 1921 1922 this = self.expression( 1923 exp.Select, 1924 kind=kind, 1925 hint=hint, 1926 distinct=distinct, 1927 expressions=expressions, 1928 limit=limit, 1929 ) 1930 this.comments = comments 1931 1932 into = self._parse_into() 1933 if into: 1934 this.set("into", into) 1935 1936 from_ = self._parse_from() 1937 if from_: 1938 this.set("from", from_) 1939 1940 this = self._parse_query_modifiers(this) 1941 elif (table or nested) and self._match(TokenType.L_PAREN): 1942 if self._match(TokenType.PIVOT): 1943 this = self._parse_simplified_pivot() 1944 elif self._match(TokenType.FROM): 1945 this = exp.select("*").from_( 1946 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1947 ) 1948 else: 1949 this = self._parse_table() if table else self._parse_select(nested=True) 1950 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1951 1952 self._match_r_paren() 1953 1954 # early return so that subquery unions aren't parsed again 1955 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1956 # Union ALL should be a property of the top select node, not the subquery 1957 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1958 elif self._match(TokenType.VALUES): 1959 this = self.expression( 1960 exp.Values, 1961 expressions=self._parse_csv(self._parse_value), 1962 alias=self._parse_table_alias(), 1963 ) 1964 else: 1965 this = None 1966 1967 return self._parse_set_operations(this) 1968 1969 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1970 if not skip_with_token and not self._match(TokenType.WITH): 1971 return None 1972 1973 comments = self._prev_comments 1974 recursive = self._match(TokenType.RECURSIVE) 1975 1976 expressions = [] 1977 while True: 1978 expressions.append(self._parse_cte()) 1979 1980 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1981 break 1982 else: 1983 self._match(TokenType.WITH) 1984 1985 return self.expression( 1986 exp.With, comments=comments, expressions=expressions, recursive=recursive 1987 ) 1988 1989 def _parse_cte(self) -> exp.CTE: 1990 alias = self._parse_table_alias() 1991 if not alias or not alias.this: 1992 self.raise_error("Expected CTE to have alias") 1993 1994 self._match(TokenType.ALIAS) 1995 return self.expression( 1996 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1997 ) 1998 1999 def _parse_table_alias( 2000 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2001 ) -> t.Optional[exp.TableAlias]: 2002 any_token = self._match(TokenType.ALIAS) 2003 alias = ( 2004 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2005 or self._parse_string_as_identifier() 2006 ) 2007 2008 index = self._index 2009 if self._match(TokenType.L_PAREN): 2010 columns = self._parse_csv(self._parse_function_parameter) 2011 self._match_r_paren() if columns else self._retreat(index) 2012 else: 2013 columns = None 2014 2015 if not alias and not columns: 2016 return None 2017 2018 return self.expression(exp.TableAlias, this=alias, columns=columns) 2019 2020 def _parse_subquery( 2021 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2022 ) -> t.Optional[exp.Subquery]: 2023 if not this: 2024 return None 2025 2026 return self.expression( 2027 exp.Subquery, 2028 this=this, 2029 pivots=self._parse_pivots(), 2030 alias=self._parse_table_alias() if parse_alias else None, 2031 ) 2032 2033 def _parse_query_modifiers( 2034 self, this: t.Optional[exp.Expression] 2035 ) -> t.Optional[exp.Expression]: 2036 if isinstance(this, self.MODIFIABLES): 2037 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2038 expression = parser(self) 2039 2040 if expression: 2041 if key == "limit": 2042 offset = expression.args.pop("offset", None) 2043 if offset: 2044 this.set("offset", exp.Offset(expression=offset)) 2045 this.set(key, expression) 2046 return this 2047 2048 def _parse_hint(self) -> t.Optional[exp.Hint]: 2049 if self._match(TokenType.HINT): 2050 hints = self._parse_csv(self._parse_function) 2051 2052 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2053 self.raise_error("Expected */ after HINT") 2054 2055 return self.expression(exp.Hint, expressions=hints) 2056 2057 return None 2058 2059 def _parse_into(self) -> t.Optional[exp.Into]: 2060 if not self._match(TokenType.INTO): 2061 return None 2062 2063 temp = self._match(TokenType.TEMPORARY) 2064 unlogged = self._match_text_seq("UNLOGGED") 2065 self._match(TokenType.TABLE) 2066 2067 return self.expression( 2068 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2069 ) 2070 2071 def _parse_from( 2072 self, modifiers: bool = False, skip_from_token: bool = False 2073 ) -> t.Optional[exp.From]: 2074 if not skip_from_token and not self._match(TokenType.FROM): 2075 return None 2076 2077 comments = self._prev_comments 2078 this = self._parse_table() 2079 2080 return self.expression( 2081 exp.From, 2082 comments=comments, 2083 this=self._parse_query_modifiers(this) if modifiers else this, 2084 ) 2085 2086 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2087 if not self._match(TokenType.MATCH_RECOGNIZE): 2088 return None 2089 2090 self._match_l_paren() 2091 2092 partition = self._parse_partition_by() 2093 order = self._parse_order() 2094 measures = ( 2095 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2096 ) 2097 2098 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2099 rows = exp.var("ONE ROW PER MATCH") 2100 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2101 text = "ALL ROWS PER MATCH" 2102 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2103 text += f" SHOW EMPTY MATCHES" 2104 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2105 text += f" OMIT EMPTY MATCHES" 2106 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2107 text += f" WITH UNMATCHED ROWS" 2108 rows = exp.var(text) 2109 else: 2110 rows = None 2111 2112 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2113 text = "AFTER MATCH SKIP" 2114 if self._match_text_seq("PAST", "LAST", "ROW"): 2115 text += f" PAST LAST ROW" 2116 elif self._match_text_seq("TO", "NEXT", "ROW"): 2117 text += f" TO NEXT ROW" 2118 elif self._match_text_seq("TO", "FIRST"): 2119 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2120 elif self._match_text_seq("TO", "LAST"): 2121 text += f" TO LAST {self._advance_any().text}" # type: ignore 2122 after = exp.var(text) 2123 else: 2124 after = None 2125 2126 if self._match_text_seq("PATTERN"): 2127 self._match_l_paren() 2128 2129 if not self._curr: 2130 self.raise_error("Expecting )", self._curr) 2131 2132 paren = 1 2133 start = self._curr 2134 2135 while self._curr and paren > 0: 2136 if self._curr.token_type == TokenType.L_PAREN: 2137 paren += 1 2138 if self._curr.token_type == TokenType.R_PAREN: 2139 paren -= 1 2140 2141 end = self._prev 2142 self._advance() 2143 2144 if paren > 0: 2145 self.raise_error("Expecting )", self._curr) 2146 2147 pattern = exp.var(self._find_sql(start, end)) 2148 else: 2149 pattern = None 2150 2151 define = ( 2152 self._parse_csv( 2153 lambda: self.expression( 2154 exp.Alias, 2155 alias=self._parse_id_var(any_token=True), 2156 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2157 ) 2158 ) 2159 if self._match_text_seq("DEFINE") 2160 else None 2161 ) 2162 2163 self._match_r_paren() 2164 2165 return self.expression( 2166 exp.MatchRecognize, 2167 partition_by=partition, 2168 order=order, 2169 measures=measures, 2170 rows=rows, 2171 after=after, 2172 pattern=pattern, 2173 define=define, 2174 alias=self._parse_table_alias(), 2175 ) 2176 2177 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2178 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2179 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2180 2181 if outer_apply or cross_apply: 2182 this = self._parse_select(table=True) 2183 view = None 2184 outer = not cross_apply 2185 elif self._match(TokenType.LATERAL): 2186 this = self._parse_select(table=True) 2187 view = self._match(TokenType.VIEW) 2188 outer = self._match(TokenType.OUTER) 2189 else: 2190 return None 2191 2192 if not this: 2193 this = self._parse_function() or self._parse_id_var(any_token=False) 2194 while self._match(TokenType.DOT): 2195 this = exp.Dot( 2196 this=this, 2197 expression=self._parse_function() or self._parse_id_var(any_token=False), 2198 ) 2199 2200 if view: 2201 table = self._parse_id_var(any_token=False) 2202 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2203 table_alias: t.Optional[exp.TableAlias] = self.expression( 2204 exp.TableAlias, this=table, columns=columns 2205 ) 2206 elif isinstance(this, exp.Subquery) and this.alias: 2207 # Ensures parity between the Subquery's and the Lateral's "alias" args 2208 table_alias = this.args["alias"].copy() 2209 else: 2210 table_alias = self._parse_table_alias() 2211 2212 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2213 2214 def _parse_join_parts( 2215 self, 2216 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2217 return ( 2218 self._match_set(self.JOIN_METHODS) and self._prev, 2219 self._match_set(self.JOIN_SIDES) and self._prev, 2220 self._match_set(self.JOIN_KINDS) and self._prev, 2221 ) 2222 2223 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2224 if self._match(TokenType.COMMA): 2225 return self.expression(exp.Join, this=self._parse_table()) 2226 2227 index = self._index 2228 method, side, kind = self._parse_join_parts() 2229 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2230 join = self._match(TokenType.JOIN) 2231 2232 if not skip_join_token and not join: 2233 self._retreat(index) 2234 kind = None 2235 method = None 2236 side = None 2237 2238 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2239 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2240 2241 if not skip_join_token and not join and not outer_apply and not cross_apply: 2242 return None 2243 2244 if outer_apply: 2245 side = Token(TokenType.LEFT, "LEFT") 2246 2247 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2248 2249 if method: 2250 kwargs["method"] = method.text 2251 if side: 2252 kwargs["side"] = side.text 2253 if kind: 2254 kwargs["kind"] = kind.text 2255 if hint: 2256 kwargs["hint"] = hint 2257 2258 if self._match(TokenType.ON): 2259 kwargs["on"] = self._parse_conjunction() 2260 elif self._match(TokenType.USING): 2261 kwargs["using"] = self._parse_wrapped_id_vars() 2262 2263 return self.expression(exp.Join, **kwargs) 2264 2265 def _parse_index( 2266 self, 2267 index: t.Optional[exp.Expression] = None, 2268 ) -> t.Optional[exp.Index]: 2269 if index: 2270 unique = None 2271 primary = None 2272 amp = None 2273 2274 self._match(TokenType.ON) 2275 self._match(TokenType.TABLE) # hive 2276 table = self._parse_table_parts(schema=True) 2277 else: 2278 unique = self._match(TokenType.UNIQUE) 2279 primary = self._match_text_seq("PRIMARY") 2280 amp = self._match_text_seq("AMP") 2281 2282 if not self._match(TokenType.INDEX): 2283 return None 2284 2285 index = self._parse_id_var() 2286 table = None 2287 2288 using = self._parse_field() if self._match(TokenType.USING) else None 2289 2290 if self._match(TokenType.L_PAREN, advance=False): 2291 columns = self._parse_wrapped_csv(self._parse_ordered) 2292 else: 2293 columns = None 2294 2295 return self.expression( 2296 exp.Index, 2297 this=index, 2298 table=table, 2299 using=using, 2300 columns=columns, 2301 unique=unique, 2302 primary=primary, 2303 amp=amp, 2304 partition_by=self._parse_partition_by(), 2305 ) 2306 2307 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2308 hints: t.List[exp.Expression] = [] 2309 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2310 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2311 hints.append( 2312 self.expression( 2313 exp.WithTableHint, 2314 expressions=self._parse_csv( 2315 lambda: self._parse_function() or self._parse_var(any_token=True) 2316 ), 2317 ) 2318 ) 2319 self._match_r_paren() 2320 else: 2321 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2322 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2323 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2324 2325 self._match_texts({"INDEX", "KEY"}) 2326 if self._match(TokenType.FOR): 2327 hint.set("target", self._advance_any() and self._prev.text.upper()) 2328 2329 hint.set("expressions", self._parse_wrapped_id_vars()) 2330 hints.append(hint) 2331 2332 return hints or None 2333 2334 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2335 return ( 2336 (not schema and self._parse_function(optional_parens=False)) 2337 or self._parse_id_var(any_token=False) 2338 or self._parse_string_as_identifier() 2339 or self._parse_placeholder() 2340 ) 2341 2342 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2343 catalog = None 2344 db = None 2345 table = self._parse_table_part(schema=schema) 2346 2347 while self._match(TokenType.DOT): 2348 if catalog: 2349 # This allows nesting the table in arbitrarily many dot expressions if needed 2350 table = self.expression( 2351 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2352 ) 2353 else: 2354 catalog = db 2355 db = table 2356 table = self._parse_table_part(schema=schema) 2357 2358 if not table: 2359 self.raise_error(f"Expected table name but got {self._curr}") 2360 2361 return self.expression( 2362 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2363 ) 2364 2365 def _parse_table( 2366 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2367 ) -> t.Optional[exp.Expression]: 2368 lateral = self._parse_lateral() 2369 if lateral: 2370 return lateral 2371 2372 unnest = self._parse_unnest() 2373 if unnest: 2374 return unnest 2375 2376 values = self._parse_derived_table_values() 2377 if values: 2378 return values 2379 2380 subquery = self._parse_select(table=True) 2381 if subquery: 2382 if not subquery.args.get("pivots"): 2383 subquery.set("pivots", self._parse_pivots()) 2384 return subquery 2385 2386 this: exp.Expression = self._parse_table_parts(schema=schema) 2387 2388 if schema: 2389 return self._parse_schema(this=this) 2390 2391 if self.ALIAS_POST_TABLESAMPLE: 2392 table_sample = self._parse_table_sample() 2393 2394 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2395 if alias: 2396 this.set("alias", alias) 2397 2398 if not this.args.get("pivots"): 2399 this.set("pivots", self._parse_pivots()) 2400 2401 this.set("hints", self._parse_table_hints()) 2402 2403 if not self.ALIAS_POST_TABLESAMPLE: 2404 table_sample = self._parse_table_sample() 2405 2406 if table_sample: 2407 table_sample.set("this", this) 2408 this = table_sample 2409 2410 return this 2411 2412 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2413 if not self._match(TokenType.UNNEST): 2414 return None 2415 2416 expressions = self._parse_wrapped_csv(self._parse_type) 2417 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2418 2419 alias = self._parse_table_alias() if with_alias else None 2420 2421 if alias and self.UNNEST_COLUMN_ONLY: 2422 if alias.args.get("columns"): 2423 self.raise_error("Unexpected extra column alias in unnest.") 2424 2425 alias.set("columns", [alias.this]) 2426 alias.set("this", None) 2427 2428 offset = None 2429 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2430 self._match(TokenType.ALIAS) 2431 offset = self._parse_id_var() or exp.to_identifier("offset") 2432 2433 return self.expression( 2434 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2435 ) 2436 2437 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2438 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2439 if not is_derived and not self._match(TokenType.VALUES): 2440 return None 2441 2442 expressions = self._parse_csv(self._parse_value) 2443 alias = self._parse_table_alias() 2444 2445 if is_derived: 2446 self._match_r_paren() 2447 2448 return self.expression( 2449 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2450 ) 2451 2452 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2453 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2454 as_modifier and self._match_text_seq("USING", "SAMPLE") 2455 ): 2456 return None 2457 2458 bucket_numerator = None 2459 bucket_denominator = None 2460 bucket_field = None 2461 percent = None 2462 rows = None 2463 size = None 2464 seed = None 2465 2466 kind = ( 2467 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2468 ) 2469 method = self._parse_var(tokens=(TokenType.ROW,)) 2470 2471 self._match(TokenType.L_PAREN) 2472 2473 num = self._parse_number() 2474 2475 if self._match_text_seq("BUCKET"): 2476 bucket_numerator = self._parse_number() 2477 self._match_text_seq("OUT", "OF") 2478 bucket_denominator = bucket_denominator = self._parse_number() 2479 self._match(TokenType.ON) 2480 bucket_field = self._parse_field() 2481 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2482 percent = num 2483 elif self._match(TokenType.ROWS): 2484 rows = num 2485 else: 2486 size = num 2487 2488 self._match(TokenType.R_PAREN) 2489 2490 if self._match(TokenType.L_PAREN): 2491 method = self._parse_var() 2492 seed = self._match(TokenType.COMMA) and self._parse_number() 2493 self._match_r_paren() 2494 elif self._match_texts(("SEED", "REPEATABLE")): 2495 seed = self._parse_wrapped(self._parse_number) 2496 2497 return self.expression( 2498 exp.TableSample, 2499 method=method, 2500 bucket_numerator=bucket_numerator, 2501 bucket_denominator=bucket_denominator, 2502 bucket_field=bucket_field, 2503 percent=percent, 2504 rows=rows, 2505 size=size, 2506 seed=seed, 2507 kind=kind, 2508 ) 2509 2510 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2511 return list(iter(self._parse_pivot, None)) 2512 2513 # https://duckdb.org/docs/sql/statements/pivot 2514 def _parse_simplified_pivot(self) -> exp.Pivot: 2515 def _parse_on() -> t.Optional[exp.Expression]: 2516 this = self._parse_bitwise() 2517 return self._parse_in(this) if self._match(TokenType.IN) else this 2518 2519 this = self._parse_table() 2520 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2521 using = self._match(TokenType.USING) and self._parse_csv( 2522 lambda: self._parse_alias(self._parse_function()) 2523 ) 2524 group = self._parse_group() 2525 return self.expression( 2526 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2527 ) 2528 2529 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2530 index = self._index 2531 2532 if self._match(TokenType.PIVOT): 2533 unpivot = False 2534 elif self._match(TokenType.UNPIVOT): 2535 unpivot = True 2536 else: 2537 return None 2538 2539 expressions = [] 2540 field = None 2541 2542 if not self._match(TokenType.L_PAREN): 2543 self._retreat(index) 2544 return None 2545 2546 if unpivot: 2547 expressions = self._parse_csv(self._parse_column) 2548 else: 2549 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2550 2551 if not expressions: 2552 self.raise_error("Failed to parse PIVOT's aggregation list") 2553 2554 if not self._match(TokenType.FOR): 2555 self.raise_error("Expecting FOR") 2556 2557 value = self._parse_column() 2558 2559 if not self._match(TokenType.IN): 2560 self.raise_error("Expecting IN") 2561 2562 field = self._parse_in(value, alias=True) 2563 2564 self._match_r_paren() 2565 2566 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2567 2568 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2569 pivot.set("alias", self._parse_table_alias()) 2570 2571 if not unpivot: 2572 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2573 2574 columns: t.List[exp.Expression] = [] 2575 for fld in pivot.args["field"].expressions: 2576 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2577 for name in names: 2578 if self.PREFIXED_PIVOT_COLUMNS: 2579 name = f"{name}_{field_name}" if name else field_name 2580 else: 2581 name = f"{field_name}_{name}" if name else field_name 2582 2583 columns.append(exp.to_identifier(name)) 2584 2585 pivot.set("columns", columns) 2586 2587 return pivot 2588 2589 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2590 return [agg.alias for agg in aggregations] 2591 2592 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2593 if not skip_where_token and not self._match(TokenType.WHERE): 2594 return None 2595 2596 return self.expression( 2597 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2598 ) 2599 2600 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2601 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2602 return None 2603 2604 elements = defaultdict(list) 2605 2606 while True: 2607 expressions = self._parse_csv(self._parse_conjunction) 2608 if expressions: 2609 elements["expressions"].extend(expressions) 2610 2611 grouping_sets = self._parse_grouping_sets() 2612 if grouping_sets: 2613 elements["grouping_sets"].extend(grouping_sets) 2614 2615 rollup = None 2616 cube = None 2617 totals = None 2618 2619 with_ = self._match(TokenType.WITH) 2620 if self._match(TokenType.ROLLUP): 2621 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2622 elements["rollup"].extend(ensure_list(rollup)) 2623 2624 if self._match(TokenType.CUBE): 2625 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2626 elements["cube"].extend(ensure_list(cube)) 2627 2628 if self._match_text_seq("TOTALS"): 2629 totals = True 2630 elements["totals"] = True # type: ignore 2631 2632 if not (grouping_sets or rollup or cube or totals): 2633 break 2634 2635 return self.expression(exp.Group, **elements) # type: ignore 2636 2637 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2638 if not self._match(TokenType.GROUPING_SETS): 2639 return None 2640 2641 return self._parse_wrapped_csv(self._parse_grouping_set) 2642 2643 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2644 if self._match(TokenType.L_PAREN): 2645 grouping_set = self._parse_csv(self._parse_column) 2646 self._match_r_paren() 2647 return self.expression(exp.Tuple, expressions=grouping_set) 2648 2649 return self._parse_column() 2650 2651 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2652 if not skip_having_token and not self._match(TokenType.HAVING): 2653 return None 2654 return self.expression(exp.Having, this=self._parse_conjunction()) 2655 2656 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2657 if not self._match(TokenType.QUALIFY): 2658 return None 2659 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2660 2661 def _parse_order( 2662 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2663 ) -> t.Optional[exp.Expression]: 2664 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2665 return this 2666 2667 return self.expression( 2668 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2669 ) 2670 2671 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2672 if not self._match(token): 2673 return None 2674 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2675 2676 def _parse_ordered(self) -> exp.Ordered: 2677 this = self._parse_conjunction() 2678 self._match(TokenType.ASC) 2679 2680 is_desc = self._match(TokenType.DESC) 2681 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2682 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2683 desc = is_desc or False 2684 asc = not desc 2685 nulls_first = is_nulls_first or False 2686 explicitly_null_ordered = is_nulls_first or is_nulls_last 2687 2688 if ( 2689 not explicitly_null_ordered 2690 and ( 2691 (asc and self.NULL_ORDERING == "nulls_are_small") 2692 or (desc and self.NULL_ORDERING != "nulls_are_small") 2693 ) 2694 and self.NULL_ORDERING != "nulls_are_last" 2695 ): 2696 nulls_first = True 2697 2698 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2699 2700 def _parse_limit( 2701 self, this: t.Optional[exp.Expression] = None, top: bool = False 2702 ) -> t.Optional[exp.Expression]: 2703 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2704 limit_paren = self._match(TokenType.L_PAREN) 2705 expression = self._parse_number() if top else self._parse_term() 2706 2707 if self._match(TokenType.COMMA): 2708 offset = expression 2709 expression = self._parse_term() 2710 else: 2711 offset = None 2712 2713 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2714 2715 if limit_paren: 2716 self._match_r_paren() 2717 2718 return limit_exp 2719 2720 if self._match(TokenType.FETCH): 2721 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2722 direction = self._prev.text if direction else "FIRST" 2723 2724 count = self._parse_number() 2725 percent = self._match(TokenType.PERCENT) 2726 2727 self._match_set((TokenType.ROW, TokenType.ROWS)) 2728 2729 only = self._match_text_seq("ONLY") 2730 with_ties = self._match_text_seq("WITH", "TIES") 2731 2732 if only and with_ties: 2733 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2734 2735 return self.expression( 2736 exp.Fetch, 2737 direction=direction, 2738 count=count, 2739 percent=percent, 2740 with_ties=with_ties, 2741 ) 2742 2743 return this 2744 2745 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2746 if not self._match(TokenType.OFFSET): 2747 return this 2748 2749 count = self._parse_number() 2750 self._match_set((TokenType.ROW, TokenType.ROWS)) 2751 return self.expression(exp.Offset, this=this, expression=count) 2752 2753 def _parse_locks(self) -> t.List[exp.Lock]: 2754 locks = [] 2755 while True: 2756 if self._match_text_seq("FOR", "UPDATE"): 2757 update = True 2758 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2759 "LOCK", "IN", "SHARE", "MODE" 2760 ): 2761 update = False 2762 else: 2763 break 2764 2765 expressions = None 2766 if self._match_text_seq("OF"): 2767 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2768 2769 wait: t.Optional[bool | exp.Expression] = None 2770 if self._match_text_seq("NOWAIT"): 2771 wait = True 2772 elif self._match_text_seq("WAIT"): 2773 wait = self._parse_primary() 2774 elif self._match_text_seq("SKIP", "LOCKED"): 2775 wait = False 2776 2777 locks.append( 2778 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2779 ) 2780 2781 return locks 2782 2783 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2784 if not self._match_set(self.SET_OPERATIONS): 2785 return this 2786 2787 token_type = self._prev.token_type 2788 2789 if token_type == TokenType.UNION: 2790 expression = exp.Union 2791 elif token_type == TokenType.EXCEPT: 2792 expression = exp.Except 2793 else: 2794 expression = exp.Intersect 2795 2796 return self.expression( 2797 expression, 2798 this=this, 2799 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2800 expression=self._parse_set_operations(self._parse_select(nested=True)), 2801 ) 2802 2803 def _parse_expression(self) -> t.Optional[exp.Expression]: 2804 return self._parse_alias(self._parse_conjunction()) 2805 2806 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2807 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2808 2809 def _parse_equality(self) -> t.Optional[exp.Expression]: 2810 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2811 2812 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2813 return self._parse_tokens(self._parse_range, self.COMPARISON) 2814 2815 def _parse_range(self) -> t.Optional[exp.Expression]: 2816 this = self._parse_bitwise() 2817 negate = self._match(TokenType.NOT) 2818 2819 if self._match_set(self.RANGE_PARSERS): 2820 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2821 if not expression: 2822 return this 2823 2824 this = expression 2825 elif self._match(TokenType.ISNULL): 2826 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2827 2828 # Postgres supports ISNULL and NOTNULL for conditions. 2829 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2830 if self._match(TokenType.NOTNULL): 2831 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2832 this = self.expression(exp.Not, this=this) 2833 2834 if negate: 2835 this = self.expression(exp.Not, this=this) 2836 2837 if self._match(TokenType.IS): 2838 this = self._parse_is(this) 2839 2840 return this 2841 2842 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2843 index = self._index - 1 2844 negate = self._match(TokenType.NOT) 2845 2846 if self._match_text_seq("DISTINCT", "FROM"): 2847 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2848 return self.expression(klass, this=this, expression=self._parse_expression()) 2849 2850 expression = self._parse_null() or self._parse_boolean() 2851 if not expression: 2852 self._retreat(index) 2853 return None 2854 2855 this = self.expression(exp.Is, this=this, expression=expression) 2856 return self.expression(exp.Not, this=this) if negate else this 2857 2858 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2859 unnest = self._parse_unnest(with_alias=False) 2860 if unnest: 2861 this = self.expression(exp.In, this=this, unnest=unnest) 2862 elif self._match(TokenType.L_PAREN): 2863 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2864 2865 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2866 this = self.expression(exp.In, this=this, query=expressions[0]) 2867 else: 2868 this = self.expression(exp.In, this=this, expressions=expressions) 2869 2870 self._match_r_paren(this) 2871 else: 2872 this = self.expression(exp.In, this=this, field=self._parse_field()) 2873 2874 return this 2875 2876 def _parse_between(self, this: exp.Expression) -> exp.Between: 2877 low = self._parse_bitwise() 2878 self._match(TokenType.AND) 2879 high = self._parse_bitwise() 2880 return self.expression(exp.Between, this=this, low=low, high=high) 2881 2882 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2883 if not self._match(TokenType.ESCAPE): 2884 return this 2885 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2886 2887 def _parse_interval(self) -> t.Optional[exp.Interval]: 2888 if not self._match(TokenType.INTERVAL): 2889 return None 2890 2891 if self._match(TokenType.STRING, advance=False): 2892 this = self._parse_primary() 2893 else: 2894 this = self._parse_term() 2895 2896 unit = self._parse_function() or self._parse_var() 2897 2898 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2899 # each INTERVAL expression into this canonical form so it's easy to transpile 2900 if this and this.is_number: 2901 this = exp.Literal.string(this.name) 2902 elif this and this.is_string: 2903 parts = this.name.split() 2904 2905 if len(parts) == 2: 2906 if unit: 2907 # this is not actually a unit, it's something else 2908 unit = None 2909 self._retreat(self._index - 1) 2910 else: 2911 this = exp.Literal.string(parts[0]) 2912 unit = self.expression(exp.Var, this=parts[1]) 2913 2914 return self.expression(exp.Interval, this=this, unit=unit) 2915 2916 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2917 this = self._parse_term() 2918 2919 while True: 2920 if self._match_set(self.BITWISE): 2921 this = self.expression( 2922 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2923 ) 2924 elif self._match_pair(TokenType.LT, TokenType.LT): 2925 this = self.expression( 2926 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2927 ) 2928 elif self._match_pair(TokenType.GT, TokenType.GT): 2929 this = self.expression( 2930 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2931 ) 2932 else: 2933 break 2934 2935 return this 2936 2937 def _parse_term(self) -> t.Optional[exp.Expression]: 2938 return self._parse_tokens(self._parse_factor, self.TERM) 2939 2940 def _parse_factor(self) -> t.Optional[exp.Expression]: 2941 return self._parse_tokens(self._parse_unary, self.FACTOR) 2942 2943 def _parse_unary(self) -> t.Optional[exp.Expression]: 2944 if self._match_set(self.UNARY_PARSERS): 2945 return self.UNARY_PARSERS[self._prev.token_type](self) 2946 return self._parse_at_time_zone(self._parse_type()) 2947 2948 def _parse_type(self) -> t.Optional[exp.Expression]: 2949 interval = self._parse_interval() 2950 if interval: 2951 return interval 2952 2953 index = self._index 2954 data_type = self._parse_types(check_func=True) 2955 this = self._parse_column() 2956 2957 if data_type: 2958 if isinstance(this, exp.Literal): 2959 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2960 if parser: 2961 return parser(self, this, data_type) 2962 return self.expression(exp.Cast, this=this, to=data_type) 2963 if not data_type.expressions: 2964 self._retreat(index) 2965 return self._parse_column() 2966 return self._parse_column_ops(data_type) 2967 2968 return this 2969 2970 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2971 this = self._parse_type() 2972 if not this: 2973 return None 2974 2975 return self.expression( 2976 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2977 ) 2978 2979 def _parse_types( 2980 self, check_func: bool = False, schema: bool = False 2981 ) -> t.Optional[exp.Expression]: 2982 index = self._index 2983 2984 prefix = self._match_text_seq("SYSUDTLIB", ".") 2985 2986 if not self._match_set(self.TYPE_TOKENS): 2987 return None 2988 2989 type_token = self._prev.token_type 2990 2991 if type_token == TokenType.PSEUDO_TYPE: 2992 return self.expression(exp.PseudoType, this=self._prev.text) 2993 2994 nested = type_token in self.NESTED_TYPE_TOKENS 2995 is_struct = type_token == TokenType.STRUCT 2996 expressions = None 2997 maybe_func = False 2998 2999 if self._match(TokenType.L_PAREN): 3000 if is_struct: 3001 expressions = self._parse_csv(self._parse_struct_types) 3002 elif nested: 3003 expressions = self._parse_csv( 3004 lambda: self._parse_types(check_func=check_func, schema=schema) 3005 ) 3006 elif type_token in self.ENUM_TYPE_TOKENS: 3007 expressions = self._parse_csv(self._parse_primary) 3008 else: 3009 expressions = self._parse_csv(self._parse_type_size) 3010 3011 if not expressions or not self._match(TokenType.R_PAREN): 3012 self._retreat(index) 3013 return None 3014 3015 maybe_func = True 3016 3017 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3018 this = exp.DataType( 3019 this=exp.DataType.Type.ARRAY, 3020 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3021 nested=True, 3022 ) 3023 3024 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3025 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3026 3027 return this 3028 3029 if self._match(TokenType.L_BRACKET): 3030 self._retreat(index) 3031 return None 3032 3033 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3034 if nested and self._match(TokenType.LT): 3035 if is_struct: 3036 expressions = self._parse_csv(self._parse_struct_types) 3037 else: 3038 expressions = self._parse_csv( 3039 lambda: self._parse_types(check_func=check_func, schema=schema) 3040 ) 3041 3042 if not self._match(TokenType.GT): 3043 self.raise_error("Expecting >") 3044 3045 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3046 values = self._parse_csv(self._parse_conjunction) 3047 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3048 3049 value: t.Optional[exp.Expression] = None 3050 if type_token in self.TIMESTAMPS: 3051 if self._match_text_seq("WITH", "TIME", "ZONE"): 3052 maybe_func = False 3053 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3054 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3055 maybe_func = False 3056 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3057 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3058 maybe_func = False 3059 elif type_token == TokenType.INTERVAL: 3060 unit = self._parse_var() 3061 3062 if not unit: 3063 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3064 else: 3065 value = self.expression(exp.Interval, unit=unit) 3066 3067 if maybe_func and check_func: 3068 index2 = self._index 3069 peek = self._parse_string() 3070 3071 if not peek: 3072 self._retreat(index) 3073 return None 3074 3075 self._retreat(index2) 3076 3077 if value: 3078 return value 3079 3080 return exp.DataType( 3081 this=exp.DataType.Type[type_token.value.upper()], 3082 expressions=expressions, 3083 nested=nested, 3084 values=values, 3085 prefix=prefix, 3086 ) 3087 3088 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3089 this = self._parse_type() or self._parse_id_var() 3090 self._match(TokenType.COLON) 3091 return self._parse_column_def(this) 3092 3093 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3094 if not self._match_text_seq("AT", "TIME", "ZONE"): 3095 return this 3096 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3097 3098 def _parse_column(self) -> t.Optional[exp.Expression]: 3099 this = self._parse_field() 3100 if isinstance(this, exp.Identifier): 3101 this = self.expression(exp.Column, this=this) 3102 elif not this: 3103 return self._parse_bracket(this) 3104 return self._parse_column_ops(this) 3105 3106 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3107 this = self._parse_bracket(this) 3108 3109 while self._match_set(self.COLUMN_OPERATORS): 3110 op_token = self._prev.token_type 3111 op = self.COLUMN_OPERATORS.get(op_token) 3112 3113 if op_token == TokenType.DCOLON: 3114 field = self._parse_types() 3115 if not field: 3116 self.raise_error("Expected type") 3117 elif op and self._curr: 3118 self._advance() 3119 value = self._prev.text 3120 field = ( 3121 exp.Literal.number(value) 3122 if self._prev.token_type == TokenType.NUMBER 3123 else exp.Literal.string(value) 3124 ) 3125 else: 3126 field = self._parse_field(anonymous_func=True, any_token=True) 3127 3128 if isinstance(field, exp.Func): 3129 # bigquery allows function calls like x.y.count(...) 3130 # SAFE.SUBSTR(...) 3131 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3132 this = self._replace_columns_with_dots(this) 3133 3134 if op: 3135 this = op(self, this, field) 3136 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3137 this = self.expression( 3138 exp.Column, 3139 this=field, 3140 table=this.this, 3141 db=this.args.get("table"), 3142 catalog=this.args.get("db"), 3143 ) 3144 else: 3145 this = self.expression(exp.Dot, this=this, expression=field) 3146 this = self._parse_bracket(this) 3147 return this 3148 3149 def _parse_primary(self) -> t.Optional[exp.Expression]: 3150 if self._match_set(self.PRIMARY_PARSERS): 3151 token_type = self._prev.token_type 3152 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3153 3154 if token_type == TokenType.STRING: 3155 expressions = [primary] 3156 while self._match(TokenType.STRING): 3157 expressions.append(exp.Literal.string(self._prev.text)) 3158 3159 if len(expressions) > 1: 3160 return self.expression(exp.Concat, expressions=expressions) 3161 3162 return primary 3163 3164 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3165 return exp.Literal.number(f"0.{self._prev.text}") 3166 3167 if self._match(TokenType.L_PAREN): 3168 comments = self._prev_comments 3169 query = self._parse_select() 3170 3171 if query: 3172 expressions = [query] 3173 else: 3174 expressions = self._parse_csv(self._parse_expression) 3175 3176 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3177 3178 if isinstance(this, exp.Subqueryable): 3179 this = self._parse_set_operations( 3180 self._parse_subquery(this=this, parse_alias=False) 3181 ) 3182 elif len(expressions) > 1: 3183 this = self.expression(exp.Tuple, expressions=expressions) 3184 else: 3185 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3186 3187 if this: 3188 this.add_comments(comments) 3189 3190 self._match_r_paren(expression=this) 3191 return this 3192 3193 return None 3194 3195 def _parse_field( 3196 self, 3197 any_token: bool = False, 3198 tokens: t.Optional[t.Collection[TokenType]] = None, 3199 anonymous_func: bool = False, 3200 ) -> t.Optional[exp.Expression]: 3201 return ( 3202 self._parse_primary() 3203 or self._parse_function(anonymous=anonymous_func) 3204 or self._parse_id_var(any_token=any_token, tokens=tokens) 3205 ) 3206 3207 def _parse_function( 3208 self, 3209 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3210 anonymous: bool = False, 3211 optional_parens: bool = True, 3212 ) -> t.Optional[exp.Expression]: 3213 if not self._curr: 3214 return None 3215 3216 token_type = self._curr.token_type 3217 3218 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3219 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3220 3221 if not self._next or self._next.token_type != TokenType.L_PAREN: 3222 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3223 self._advance() 3224 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3225 3226 return None 3227 3228 if token_type not in self.FUNC_TOKENS: 3229 return None 3230 3231 this = self._curr.text 3232 upper = this.upper() 3233 self._advance(2) 3234 3235 parser = self.FUNCTION_PARSERS.get(upper) 3236 3237 if parser and not anonymous: 3238 this = parser(self) 3239 else: 3240 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3241 3242 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3243 this = self.expression(subquery_predicate, this=self._parse_select()) 3244 self._match_r_paren() 3245 return this 3246 3247 if functions is None: 3248 functions = self.FUNCTIONS 3249 3250 function = functions.get(upper) 3251 3252 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3253 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3254 3255 if function and not anonymous: 3256 this = self.validate_expression(function(args), args) 3257 else: 3258 this = self.expression(exp.Anonymous, this=this, expressions=args) 3259 3260 self._match_r_paren(this) 3261 return self._parse_window(this) 3262 3263 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3264 return self._parse_column_def(self._parse_id_var()) 3265 3266 def _parse_user_defined_function( 3267 self, kind: t.Optional[TokenType] = None 3268 ) -> t.Optional[exp.Expression]: 3269 this = self._parse_id_var() 3270 3271 while self._match(TokenType.DOT): 3272 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3273 3274 if not self._match(TokenType.L_PAREN): 3275 return this 3276 3277 expressions = self._parse_csv(self._parse_function_parameter) 3278 self._match_r_paren() 3279 return self.expression( 3280 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3281 ) 3282 3283 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3284 literal = self._parse_primary() 3285 if literal: 3286 return self.expression(exp.Introducer, this=token.text, expression=literal) 3287 3288 return self.expression(exp.Identifier, this=token.text) 3289 3290 def _parse_session_parameter(self) -> exp.SessionParameter: 3291 kind = None 3292 this = self._parse_id_var() or self._parse_primary() 3293 3294 if this and self._match(TokenType.DOT): 3295 kind = this.name 3296 this = self._parse_var() or self._parse_primary() 3297 3298 return self.expression(exp.SessionParameter, this=this, kind=kind) 3299 3300 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3301 index = self._index 3302 3303 if self._match(TokenType.L_PAREN): 3304 expressions = self._parse_csv(self._parse_id_var) 3305 3306 if not self._match(TokenType.R_PAREN): 3307 self._retreat(index) 3308 else: 3309 expressions = [self._parse_id_var()] 3310 3311 if self._match_set(self.LAMBDAS): 3312 return self.LAMBDAS[self._prev.token_type](self, expressions) 3313 3314 self._retreat(index) 3315 3316 this: t.Optional[exp.Expression] 3317 3318 if self._match(TokenType.DISTINCT): 3319 this = self.expression( 3320 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3321 ) 3322 else: 3323 this = self._parse_select_or_expression(alias=alias) 3324 3325 if isinstance(this, exp.EQ): 3326 left = this.this 3327 if isinstance(left, exp.Column): 3328 left.replace(exp.var(left.text("this"))) 3329 3330 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3331 3332 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3333 index = self._index 3334 3335 if not self.errors: 3336 try: 3337 if self._parse_select(nested=True): 3338 return this 3339 except ParseError: 3340 pass 3341 finally: 3342 self.errors.clear() 3343 self._retreat(index) 3344 3345 if not self._match(TokenType.L_PAREN): 3346 return this 3347 3348 args = self._parse_csv( 3349 lambda: self._parse_constraint() 3350 or self._parse_column_def(self._parse_field(any_token=True)) 3351 ) 3352 3353 self._match_r_paren() 3354 return self.expression(exp.Schema, this=this, expressions=args) 3355 3356 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3357 # column defs are not really columns, they're identifiers 3358 if isinstance(this, exp.Column): 3359 this = this.this 3360 3361 kind = self._parse_types(schema=True) 3362 3363 if self._match_text_seq("FOR", "ORDINALITY"): 3364 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3365 3366 constraints = [] 3367 while True: 3368 constraint = self._parse_column_constraint() 3369 if not constraint: 3370 break 3371 constraints.append(constraint) 3372 3373 if not kind and not constraints: 3374 return this 3375 3376 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3377 3378 def _parse_auto_increment( 3379 self, 3380 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3381 start = None 3382 increment = None 3383 3384 if self._match(TokenType.L_PAREN, advance=False): 3385 args = self._parse_wrapped_csv(self._parse_bitwise) 3386 start = seq_get(args, 0) 3387 increment = seq_get(args, 1) 3388 elif self._match_text_seq("START"): 3389 start = self._parse_bitwise() 3390 self._match_text_seq("INCREMENT") 3391 increment = self._parse_bitwise() 3392 3393 if start and increment: 3394 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3395 3396 return exp.AutoIncrementColumnConstraint() 3397 3398 def _parse_compress(self) -> exp.CompressColumnConstraint: 3399 if self._match(TokenType.L_PAREN, advance=False): 3400 return self.expression( 3401 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3402 ) 3403 3404 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3405 3406 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3407 if self._match_text_seq("BY", "DEFAULT"): 3408 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3409 this = self.expression( 3410 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3411 ) 3412 else: 3413 self._match_text_seq("ALWAYS") 3414 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3415 3416 self._match(TokenType.ALIAS) 3417 identity = self._match_text_seq("IDENTITY") 3418 3419 if self._match(TokenType.L_PAREN): 3420 if self._match_text_seq("START", "WITH"): 3421 this.set("start", self._parse_bitwise()) 3422 if self._match_text_seq("INCREMENT", "BY"): 3423 this.set("increment", self._parse_bitwise()) 3424 if self._match_text_seq("MINVALUE"): 3425 this.set("minvalue", self._parse_bitwise()) 3426 if self._match_text_seq("MAXVALUE"): 3427 this.set("maxvalue", self._parse_bitwise()) 3428 3429 if self._match_text_seq("CYCLE"): 3430 this.set("cycle", True) 3431 elif self._match_text_seq("NO", "CYCLE"): 3432 this.set("cycle", False) 3433 3434 if not identity: 3435 this.set("expression", self._parse_bitwise()) 3436 3437 self._match_r_paren() 3438 3439 return this 3440 3441 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3442 self._match_text_seq("LENGTH") 3443 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3444 3445 def _parse_not_constraint( 3446 self, 3447 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3448 if self._match_text_seq("NULL"): 3449 return self.expression(exp.NotNullColumnConstraint) 3450 if self._match_text_seq("CASESPECIFIC"): 3451 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3452 return None 3453 3454 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3455 if self._match(TokenType.CONSTRAINT): 3456 this = self._parse_id_var() 3457 else: 3458 this = None 3459 3460 if self._match_texts(self.CONSTRAINT_PARSERS): 3461 return self.expression( 3462 exp.ColumnConstraint, 3463 this=this, 3464 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3465 ) 3466 3467 return this 3468 3469 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3470 if not self._match(TokenType.CONSTRAINT): 3471 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3472 3473 this = self._parse_id_var() 3474 expressions = [] 3475 3476 while True: 3477 constraint = self._parse_unnamed_constraint() or self._parse_function() 3478 if not constraint: 3479 break 3480 expressions.append(constraint) 3481 3482 return self.expression(exp.Constraint, this=this, expressions=expressions) 3483 3484 def _parse_unnamed_constraint( 3485 self, constraints: t.Optional[t.Collection[str]] = None 3486 ) -> t.Optional[exp.Expression]: 3487 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3488 return None 3489 3490 constraint = self._prev.text.upper() 3491 if constraint not in self.CONSTRAINT_PARSERS: 3492 self.raise_error(f"No parser found for schema constraint {constraint}.") 3493 3494 return self.CONSTRAINT_PARSERS[constraint](self) 3495 3496 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3497 self._match_text_seq("KEY") 3498 return self.expression( 3499 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3500 ) 3501 3502 def _parse_key_constraint_options(self) -> t.List[str]: 3503 options = [] 3504 while True: 3505 if not self._curr: 3506 break 3507 3508 if self._match(TokenType.ON): 3509 action = None 3510 on = self._advance_any() and self._prev.text 3511 3512 if self._match_text_seq("NO", "ACTION"): 3513 action = "NO ACTION" 3514 elif self._match_text_seq("CASCADE"): 3515 action = "CASCADE" 3516 elif self._match_pair(TokenType.SET, TokenType.NULL): 3517 action = "SET NULL" 3518 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3519 action = "SET DEFAULT" 3520 else: 3521 self.raise_error("Invalid key constraint") 3522 3523 options.append(f"ON {on} {action}") 3524 elif self._match_text_seq("NOT", "ENFORCED"): 3525 options.append("NOT ENFORCED") 3526 elif self._match_text_seq("DEFERRABLE"): 3527 options.append("DEFERRABLE") 3528 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3529 options.append("INITIALLY DEFERRED") 3530 elif self._match_text_seq("NORELY"): 3531 options.append("NORELY") 3532 elif self._match_text_seq("MATCH", "FULL"): 3533 options.append("MATCH FULL") 3534 else: 3535 break 3536 3537 return options 3538 3539 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3540 if match and not self._match(TokenType.REFERENCES): 3541 return None 3542 3543 expressions = None 3544 this = self._parse_id_var() 3545 3546 if self._match(TokenType.L_PAREN, advance=False): 3547 expressions = self._parse_wrapped_id_vars() 3548 3549 options = self._parse_key_constraint_options() 3550 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3551 3552 def _parse_foreign_key(self) -> exp.ForeignKey: 3553 expressions = self._parse_wrapped_id_vars() 3554 reference = self._parse_references() 3555 options = {} 3556 3557 while self._match(TokenType.ON): 3558 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3559 self.raise_error("Expected DELETE or UPDATE") 3560 3561 kind = self._prev.text.lower() 3562 3563 if self._match_text_seq("NO", "ACTION"): 3564 action = "NO ACTION" 3565 elif self._match(TokenType.SET): 3566 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3567 action = "SET " + self._prev.text.upper() 3568 else: 3569 self._advance() 3570 action = self._prev.text.upper() 3571 3572 options[kind] = action 3573 3574 return self.expression( 3575 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3576 ) 3577 3578 def _parse_primary_key( 3579 self, wrapped_optional: bool = False, in_props: bool = False 3580 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3581 desc = ( 3582 self._match_set((TokenType.ASC, TokenType.DESC)) 3583 and self._prev.token_type == TokenType.DESC 3584 ) 3585 3586 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3587 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3588 3589 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3590 options = self._parse_key_constraint_options() 3591 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3592 3593 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3594 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3595 return this 3596 3597 bracket_kind = self._prev.token_type 3598 3599 if self._match(TokenType.COLON): 3600 expressions: t.List[t.Optional[exp.Expression]] = [ 3601 self.expression(exp.Slice, expression=self._parse_conjunction()) 3602 ] 3603 else: 3604 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3605 3606 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3607 if bracket_kind == TokenType.L_BRACE: 3608 this = self.expression(exp.Struct, expressions=expressions) 3609 elif not this or this.name.upper() == "ARRAY": 3610 this = self.expression(exp.Array, expressions=expressions) 3611 else: 3612 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3613 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3614 3615 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3616 self.raise_error("Expected ]") 3617 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3618 self.raise_error("Expected }") 3619 3620 self._add_comments(this) 3621 return self._parse_bracket(this) 3622 3623 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3624 if self._match(TokenType.COLON): 3625 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3626 return this 3627 3628 def _parse_case(self) -> t.Optional[exp.Expression]: 3629 ifs = [] 3630 default = None 3631 3632 expression = self._parse_conjunction() 3633 3634 while self._match(TokenType.WHEN): 3635 this = self._parse_conjunction() 3636 self._match(TokenType.THEN) 3637 then = self._parse_conjunction() 3638 ifs.append(self.expression(exp.If, this=this, true=then)) 3639 3640 if self._match(TokenType.ELSE): 3641 default = self._parse_conjunction() 3642 3643 if not self._match(TokenType.END): 3644 self.raise_error("Expected END after CASE", self._prev) 3645 3646 return self._parse_window( 3647 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3648 ) 3649 3650 def _parse_if(self) -> t.Optional[exp.Expression]: 3651 if self._match(TokenType.L_PAREN): 3652 args = self._parse_csv(self._parse_conjunction) 3653 this = self.validate_expression(exp.If.from_arg_list(args), args) 3654 self._match_r_paren() 3655 else: 3656 index = self._index - 1 3657 condition = self._parse_conjunction() 3658 3659 if not condition: 3660 self._retreat(index) 3661 return None 3662 3663 self._match(TokenType.THEN) 3664 true = self._parse_conjunction() 3665 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3666 self._match(TokenType.END) 3667 this = self.expression(exp.If, this=condition, true=true, false=false) 3668 3669 return self._parse_window(this) 3670 3671 def _parse_extract(self) -> exp.Extract: 3672 this = self._parse_function() or self._parse_var() or self._parse_type() 3673 3674 if self._match(TokenType.FROM): 3675 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3676 3677 if not self._match(TokenType.COMMA): 3678 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3679 3680 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3681 3682 def _parse_cast(self, strict: bool) -> exp.Expression: 3683 this = self._parse_conjunction() 3684 3685 if not self._match(TokenType.ALIAS): 3686 if self._match(TokenType.COMMA): 3687 return self.expression( 3688 exp.CastToStrType, this=this, expression=self._parse_string() 3689 ) 3690 else: 3691 self.raise_error("Expected AS after CAST") 3692 3693 fmt = None 3694 to = self._parse_types() 3695 3696 if not to: 3697 self.raise_error("Expected TYPE after CAST") 3698 elif to.this == exp.DataType.Type.CHAR: 3699 if self._match(TokenType.CHARACTER_SET): 3700 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3701 elif self._match(TokenType.FORMAT): 3702 fmt = self._parse_at_time_zone(self._parse_string()) 3703 3704 if to.this in exp.DataType.TEMPORAL_TYPES: 3705 return self.expression( 3706 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3707 this=this, 3708 format=exp.Literal.string( 3709 format_time( 3710 fmt.this if fmt else "", 3711 self.FORMAT_MAPPING or self.TIME_MAPPING, 3712 self.FORMAT_TRIE or self.TIME_TRIE, 3713 ) 3714 ), 3715 ) 3716 3717 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3718 3719 def _parse_concat(self) -> t.Optional[exp.Expression]: 3720 args = self._parse_csv(self._parse_conjunction) 3721 if self.CONCAT_NULL_OUTPUTS_STRING: 3722 args = [ 3723 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3724 for arg in args 3725 if arg 3726 ] 3727 3728 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3729 # we find such a call we replace it with its argument. 3730 if len(args) == 1: 3731 return args[0] 3732 3733 return self.expression( 3734 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3735 ) 3736 3737 def _parse_string_agg(self) -> exp.Expression: 3738 if self._match(TokenType.DISTINCT): 3739 args: t.List[t.Optional[exp.Expression]] = [ 3740 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3741 ] 3742 if self._match(TokenType.COMMA): 3743 args.extend(self._parse_csv(self._parse_conjunction)) 3744 else: 3745 args = self._parse_csv(self._parse_conjunction) 3746 3747 index = self._index 3748 if not self._match(TokenType.R_PAREN): 3749 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3750 return self.expression( 3751 exp.GroupConcat, 3752 this=seq_get(args, 0), 3753 separator=self._parse_order(this=seq_get(args, 1)), 3754 ) 3755 3756 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3757 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3758 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3759 if not self._match_text_seq("WITHIN", "GROUP"): 3760 self._retreat(index) 3761 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3762 3763 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3764 order = self._parse_order(this=seq_get(args, 0)) 3765 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3766 3767 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3768 this = self._parse_bitwise() 3769 3770 if self._match(TokenType.USING): 3771 to: t.Optional[exp.Expression] = self.expression( 3772 exp.CharacterSet, this=self._parse_var() 3773 ) 3774 elif self._match(TokenType.COMMA): 3775 to = self._parse_types() 3776 else: 3777 to = None 3778 3779 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3780 3781 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3782 """ 3783 There are generally two variants of the DECODE function: 3784 3785 - DECODE(bin, charset) 3786 - DECODE(expression, search, result [, search, result] ... [, default]) 3787 3788 The second variant will always be parsed into a CASE expression. Note that NULL 3789 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3790 instead of relying on pattern matching. 3791 """ 3792 args = self._parse_csv(self._parse_conjunction) 3793 3794 if len(args) < 3: 3795 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3796 3797 expression, *expressions = args 3798 if not expression: 3799 return None 3800 3801 ifs = [] 3802 for search, result in zip(expressions[::2], expressions[1::2]): 3803 if not search or not result: 3804 return None 3805 3806 if isinstance(search, exp.Literal): 3807 ifs.append( 3808 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3809 ) 3810 elif isinstance(search, exp.Null): 3811 ifs.append( 3812 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3813 ) 3814 else: 3815 cond = exp.or_( 3816 exp.EQ(this=expression.copy(), expression=search), 3817 exp.and_( 3818 exp.Is(this=expression.copy(), expression=exp.Null()), 3819 exp.Is(this=search.copy(), expression=exp.Null()), 3820 copy=False, 3821 ), 3822 copy=False, 3823 ) 3824 ifs.append(exp.If(this=cond, true=result)) 3825 3826 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3827 3828 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3829 self._match_text_seq("KEY") 3830 key = self._parse_field() 3831 self._match(TokenType.COLON) 3832 self._match_text_seq("VALUE") 3833 value = self._parse_field() 3834 3835 if not key and not value: 3836 return None 3837 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3838 3839 def _parse_json_object(self) -> exp.JSONObject: 3840 star = self._parse_star() 3841 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3842 3843 null_handling = None 3844 if self._match_text_seq("NULL", "ON", "NULL"): 3845 null_handling = "NULL ON NULL" 3846 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3847 null_handling = "ABSENT ON NULL" 3848 3849 unique_keys = None 3850 if self._match_text_seq("WITH", "UNIQUE"): 3851 unique_keys = True 3852 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3853 unique_keys = False 3854 3855 self._match_text_seq("KEYS") 3856 3857 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3858 format_json = self._match_text_seq("FORMAT", "JSON") 3859 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3860 3861 return self.expression( 3862 exp.JSONObject, 3863 expressions=expressions, 3864 null_handling=null_handling, 3865 unique_keys=unique_keys, 3866 return_type=return_type, 3867 format_json=format_json, 3868 encoding=encoding, 3869 ) 3870 3871 def _parse_logarithm(self) -> exp.Func: 3872 # Default argument order is base, expression 3873 args = self._parse_csv(self._parse_range) 3874 3875 if len(args) > 1: 3876 if not self.LOG_BASE_FIRST: 3877 args.reverse() 3878 return exp.Log.from_arg_list(args) 3879 3880 return self.expression( 3881 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3882 ) 3883 3884 def _parse_match_against(self) -> exp.MatchAgainst: 3885 expressions = self._parse_csv(self._parse_column) 3886 3887 self._match_text_seq(")", "AGAINST", "(") 3888 3889 this = self._parse_string() 3890 3891 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3892 modifier = "IN NATURAL LANGUAGE MODE" 3893 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3894 modifier = f"{modifier} WITH QUERY EXPANSION" 3895 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3896 modifier = "IN BOOLEAN MODE" 3897 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3898 modifier = "WITH QUERY EXPANSION" 3899 else: 3900 modifier = None 3901 3902 return self.expression( 3903 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3904 ) 3905 3906 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3907 def _parse_open_json(self) -> exp.OpenJSON: 3908 this = self._parse_bitwise() 3909 path = self._match(TokenType.COMMA) and self._parse_string() 3910 3911 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3912 this = self._parse_field(any_token=True) 3913 kind = self._parse_types() 3914 path = self._parse_string() 3915 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3916 3917 return self.expression( 3918 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3919 ) 3920 3921 expressions = None 3922 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3923 self._match_l_paren() 3924 expressions = self._parse_csv(_parse_open_json_column_def) 3925 3926 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3927 3928 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3929 args = self._parse_csv(self._parse_bitwise) 3930 3931 if self._match(TokenType.IN): 3932 return self.expression( 3933 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3934 ) 3935 3936 if haystack_first: 3937 haystack = seq_get(args, 0) 3938 needle = seq_get(args, 1) 3939 else: 3940 needle = seq_get(args, 0) 3941 haystack = seq_get(args, 1) 3942 3943 return self.expression( 3944 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3945 ) 3946 3947 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3948 args = self._parse_csv(self._parse_table) 3949 return exp.JoinHint(this=func_name.upper(), expressions=args) 3950 3951 def _parse_substring(self) -> exp.Substring: 3952 # Postgres supports the form: substring(string [from int] [for int]) 3953 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3954 3955 args = self._parse_csv(self._parse_bitwise) 3956 3957 if self._match(TokenType.FROM): 3958 args.append(self._parse_bitwise()) 3959 if self._match(TokenType.FOR): 3960 args.append(self._parse_bitwise()) 3961 3962 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3963 3964 def _parse_trim(self) -> exp.Trim: 3965 # https://www.w3resource.com/sql/character-functions/trim.php 3966 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3967 3968 position = None 3969 collation = None 3970 3971 if self._match_texts(self.TRIM_TYPES): 3972 position = self._prev.text.upper() 3973 3974 expression = self._parse_bitwise() 3975 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3976 this = self._parse_bitwise() 3977 else: 3978 this = expression 3979 expression = None 3980 3981 if self._match(TokenType.COLLATE): 3982 collation = self._parse_bitwise() 3983 3984 return self.expression( 3985 exp.Trim, this=this, position=position, expression=expression, collation=collation 3986 ) 3987 3988 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3989 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3990 3991 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3992 return self._parse_window(self._parse_id_var(), alias=True) 3993 3994 def _parse_respect_or_ignore_nulls( 3995 self, this: t.Optional[exp.Expression] 3996 ) -> t.Optional[exp.Expression]: 3997 if self._match_text_seq("IGNORE", "NULLS"): 3998 return self.expression(exp.IgnoreNulls, this=this) 3999 if self._match_text_seq("RESPECT", "NULLS"): 4000 return self.expression(exp.RespectNulls, this=this) 4001 return this 4002 4003 def _parse_window( 4004 self, this: t.Optional[exp.Expression], alias: bool = False 4005 ) -> t.Optional[exp.Expression]: 4006 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4007 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4008 self._match_r_paren() 4009 4010 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4011 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4012 if self._match_text_seq("WITHIN", "GROUP"): 4013 order = self._parse_wrapped(self._parse_order) 4014 this = self.expression(exp.WithinGroup, this=this, expression=order) 4015 4016 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4017 # Some dialects choose to implement and some do not. 4018 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4019 4020 # There is some code above in _parse_lambda that handles 4021 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4022 4023 # The below changes handle 4024 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4025 4026 # Oracle allows both formats 4027 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4028 # and Snowflake chose to do the same for familiarity 4029 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4030 this = self._parse_respect_or_ignore_nulls(this) 4031 4032 # bigquery select from window x AS (partition by ...) 4033 if alias: 4034 over = None 4035 self._match(TokenType.ALIAS) 4036 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4037 return this 4038 else: 4039 over = self._prev.text.upper() 4040 4041 if not self._match(TokenType.L_PAREN): 4042 return self.expression( 4043 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4044 ) 4045 4046 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4047 4048 first = self._match(TokenType.FIRST) 4049 if self._match_text_seq("LAST"): 4050 first = False 4051 4052 partition = self._parse_partition_by() 4053 order = self._parse_order() 4054 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4055 4056 if kind: 4057 self._match(TokenType.BETWEEN) 4058 start = self._parse_window_spec() 4059 self._match(TokenType.AND) 4060 end = self._parse_window_spec() 4061 4062 spec = self.expression( 4063 exp.WindowSpec, 4064 kind=kind, 4065 start=start["value"], 4066 start_side=start["side"], 4067 end=end["value"], 4068 end_side=end["side"], 4069 ) 4070 else: 4071 spec = None 4072 4073 self._match_r_paren() 4074 4075 return self.expression( 4076 exp.Window, 4077 this=this, 4078 partition_by=partition, 4079 order=order, 4080 spec=spec, 4081 alias=window_alias, 4082 over=over, 4083 first=first, 4084 ) 4085 4086 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4087 self._match(TokenType.BETWEEN) 4088 4089 return { 4090 "value": ( 4091 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4092 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4093 or self._parse_bitwise() 4094 ), 4095 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4096 } 4097 4098 def _parse_alias( 4099 self, this: t.Optional[exp.Expression], explicit: bool = False 4100 ) -> t.Optional[exp.Expression]: 4101 any_token = self._match(TokenType.ALIAS) 4102 4103 if explicit and not any_token: 4104 return this 4105 4106 if self._match(TokenType.L_PAREN): 4107 aliases = self.expression( 4108 exp.Aliases, 4109 this=this, 4110 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4111 ) 4112 self._match_r_paren(aliases) 4113 return aliases 4114 4115 alias = self._parse_id_var(any_token) 4116 4117 if alias: 4118 return self.expression(exp.Alias, this=this, alias=alias) 4119 4120 return this 4121 4122 def _parse_id_var( 4123 self, 4124 any_token: bool = True, 4125 tokens: t.Optional[t.Collection[TokenType]] = None, 4126 ) -> t.Optional[exp.Expression]: 4127 identifier = self._parse_identifier() 4128 4129 if identifier: 4130 return identifier 4131 4132 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4133 quoted = self._prev.token_type == TokenType.STRING 4134 return exp.Identifier(this=self._prev.text, quoted=quoted) 4135 4136 return None 4137 4138 def _parse_string(self) -> t.Optional[exp.Expression]: 4139 if self._match(TokenType.STRING): 4140 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4141 return self._parse_placeholder() 4142 4143 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4144 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4145 4146 def _parse_number(self) -> t.Optional[exp.Expression]: 4147 if self._match(TokenType.NUMBER): 4148 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4149 return self._parse_placeholder() 4150 4151 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4152 if self._match(TokenType.IDENTIFIER): 4153 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4154 return self._parse_placeholder() 4155 4156 def _parse_var( 4157 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4158 ) -> t.Optional[exp.Expression]: 4159 if ( 4160 (any_token and self._advance_any()) 4161 or self._match(TokenType.VAR) 4162 or (self._match_set(tokens) if tokens else False) 4163 ): 4164 return self.expression(exp.Var, this=self._prev.text) 4165 return self._parse_placeholder() 4166 4167 def _advance_any(self) -> t.Optional[Token]: 4168 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4169 self._advance() 4170 return self._prev 4171 return None 4172 4173 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4174 return self._parse_var() or self._parse_string() 4175 4176 def _parse_null(self) -> t.Optional[exp.Expression]: 4177 if self._match(TokenType.NULL): 4178 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4179 return None 4180 4181 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4182 if self._match(TokenType.TRUE): 4183 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4184 if self._match(TokenType.FALSE): 4185 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4186 return None 4187 4188 def _parse_star(self) -> t.Optional[exp.Expression]: 4189 if self._match(TokenType.STAR): 4190 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4191 return None 4192 4193 def _parse_parameter(self) -> exp.Parameter: 4194 wrapped = self._match(TokenType.L_BRACE) 4195 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4196 self._match(TokenType.R_BRACE) 4197 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4198 4199 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4200 if self._match_set(self.PLACEHOLDER_PARSERS): 4201 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4202 if placeholder: 4203 return placeholder 4204 self._advance(-1) 4205 return None 4206 4207 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4208 if not self._match(TokenType.EXCEPT): 4209 return None 4210 if self._match(TokenType.L_PAREN, advance=False): 4211 return self._parse_wrapped_csv(self._parse_column) 4212 return self._parse_csv(self._parse_column) 4213 4214 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4215 if not self._match(TokenType.REPLACE): 4216 return None 4217 if self._match(TokenType.L_PAREN, advance=False): 4218 return self._parse_wrapped_csv(self._parse_expression) 4219 return self._parse_csv(self._parse_expression) 4220 4221 def _parse_csv( 4222 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4223 ) -> t.List[t.Optional[exp.Expression]]: 4224 parse_result = parse_method() 4225 items = [parse_result] if parse_result is not None else [] 4226 4227 while self._match(sep): 4228 self._add_comments(parse_result) 4229 parse_result = parse_method() 4230 if parse_result is not None: 4231 items.append(parse_result) 4232 4233 return items 4234 4235 def _parse_tokens( 4236 self, parse_method: t.Callable, expressions: t.Dict 4237 ) -> t.Optional[exp.Expression]: 4238 this = parse_method() 4239 4240 while self._match_set(expressions): 4241 this = self.expression( 4242 expressions[self._prev.token_type], 4243 this=this, 4244 comments=self._prev_comments, 4245 expression=parse_method(), 4246 ) 4247 4248 return this 4249 4250 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4251 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4252 4253 def _parse_wrapped_csv( 4254 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4255 ) -> t.List[t.Optional[exp.Expression]]: 4256 return self._parse_wrapped( 4257 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4258 ) 4259 4260 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4261 wrapped = self._match(TokenType.L_PAREN) 4262 if not wrapped and not optional: 4263 self.raise_error("Expecting (") 4264 parse_result = parse_method() 4265 if wrapped: 4266 self._match_r_paren() 4267 return parse_result 4268 4269 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4270 return self._parse_select() or self._parse_set_operations( 4271 self._parse_expression() if alias else self._parse_conjunction() 4272 ) 4273 4274 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4275 return self._parse_query_modifiers( 4276 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4277 ) 4278 4279 def _parse_transaction(self) -> exp.Transaction: 4280 this = None 4281 if self._match_texts(self.TRANSACTION_KIND): 4282 this = self._prev.text 4283 4284 self._match_texts({"TRANSACTION", "WORK"}) 4285 4286 modes = [] 4287 while True: 4288 mode = [] 4289 while self._match(TokenType.VAR): 4290 mode.append(self._prev.text) 4291 4292 if mode: 4293 modes.append(" ".join(mode)) 4294 if not self._match(TokenType.COMMA): 4295 break 4296 4297 return self.expression(exp.Transaction, this=this, modes=modes) 4298 4299 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4300 chain = None 4301 savepoint = None 4302 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4303 4304 self._match_texts({"TRANSACTION", "WORK"}) 4305 4306 if self._match_text_seq("TO"): 4307 self._match_text_seq("SAVEPOINT") 4308 savepoint = self._parse_id_var() 4309 4310 if self._match(TokenType.AND): 4311 chain = not self._match_text_seq("NO") 4312 self._match_text_seq("CHAIN") 4313 4314 if is_rollback: 4315 return self.expression(exp.Rollback, savepoint=savepoint) 4316 4317 return self.expression(exp.Commit, chain=chain) 4318 4319 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4320 if not self._match_text_seq("ADD"): 4321 return None 4322 4323 self._match(TokenType.COLUMN) 4324 exists_column = self._parse_exists(not_=True) 4325 expression = self._parse_column_def(self._parse_field(any_token=True)) 4326 4327 if expression: 4328 expression.set("exists", exists_column) 4329 4330 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4331 if self._match_texts(("FIRST", "AFTER")): 4332 position = self._prev.text 4333 column_position = self.expression( 4334 exp.ColumnPosition, this=self._parse_column(), position=position 4335 ) 4336 expression.set("position", column_position) 4337 4338 return expression 4339 4340 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4341 drop = self._match(TokenType.DROP) and self._parse_drop() 4342 if drop and not isinstance(drop, exp.Command): 4343 drop.set("kind", drop.args.get("kind", "COLUMN")) 4344 return drop 4345 4346 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4347 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4348 return self.expression( 4349 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4350 ) 4351 4352 def _parse_add_constraint(self) -> exp.AddConstraint: 4353 this = None 4354 kind = self._prev.token_type 4355 4356 if kind == TokenType.CONSTRAINT: 4357 this = self._parse_id_var() 4358 4359 if self._match_text_seq("CHECK"): 4360 expression = self._parse_wrapped(self._parse_conjunction) 4361 enforced = self._match_text_seq("ENFORCED") 4362 4363 return self.expression( 4364 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4365 ) 4366 4367 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4368 expression = self._parse_foreign_key() 4369 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4370 expression = self._parse_primary_key() 4371 else: 4372 expression = None 4373 4374 return self.expression(exp.AddConstraint, this=this, expression=expression) 4375 4376 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4377 index = self._index - 1 4378 4379 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4380 return self._parse_csv(self._parse_add_constraint) 4381 4382 self._retreat(index) 4383 return self._parse_csv(self._parse_add_column) 4384 4385 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4386 self._match(TokenType.COLUMN) 4387 column = self._parse_field(any_token=True) 4388 4389 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4390 return self.expression(exp.AlterColumn, this=column, drop=True) 4391 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4392 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4393 4394 self._match_text_seq("SET", "DATA") 4395 return self.expression( 4396 exp.AlterColumn, 4397 this=column, 4398 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4399 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4400 using=self._match(TokenType.USING) and self._parse_conjunction(), 4401 ) 4402 4403 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4404 index = self._index - 1 4405 4406 partition_exists = self._parse_exists() 4407 if self._match(TokenType.PARTITION, advance=False): 4408 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4409 4410 self._retreat(index) 4411 return self._parse_csv(self._parse_drop_column) 4412 4413 def _parse_alter_table_rename(self) -> exp.RenameTable: 4414 self._match_text_seq("TO") 4415 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4416 4417 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4418 start = self._prev 4419 4420 if not self._match(TokenType.TABLE): 4421 return self._parse_as_command(start) 4422 4423 exists = self._parse_exists() 4424 this = self._parse_table(schema=True) 4425 4426 if self._next: 4427 self._advance() 4428 4429 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4430 if parser: 4431 actions = ensure_list(parser(self)) 4432 4433 if not self._curr: 4434 return self.expression( 4435 exp.AlterTable, 4436 this=this, 4437 exists=exists, 4438 actions=actions, 4439 ) 4440 return self._parse_as_command(start) 4441 4442 def _parse_merge(self) -> exp.Merge: 4443 self._match(TokenType.INTO) 4444 target = self._parse_table() 4445 4446 self._match(TokenType.USING) 4447 using = self._parse_table() 4448 4449 self._match(TokenType.ON) 4450 on = self._parse_conjunction() 4451 4452 whens = [] 4453 while self._match(TokenType.WHEN): 4454 matched = not self._match(TokenType.NOT) 4455 self._match_text_seq("MATCHED") 4456 source = ( 4457 False 4458 if self._match_text_seq("BY", "TARGET") 4459 else self._match_text_seq("BY", "SOURCE") 4460 ) 4461 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4462 4463 self._match(TokenType.THEN) 4464 4465 if self._match(TokenType.INSERT): 4466 _this = self._parse_star() 4467 if _this: 4468 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4469 else: 4470 then = self.expression( 4471 exp.Insert, 4472 this=self._parse_value(), 4473 expression=self._match(TokenType.VALUES) and self._parse_value(), 4474 ) 4475 elif self._match(TokenType.UPDATE): 4476 expressions = self._parse_star() 4477 if expressions: 4478 then = self.expression(exp.Update, expressions=expressions) 4479 else: 4480 then = self.expression( 4481 exp.Update, 4482 expressions=self._match(TokenType.SET) 4483 and self._parse_csv(self._parse_equality), 4484 ) 4485 elif self._match(TokenType.DELETE): 4486 then = self.expression(exp.Var, this=self._prev.text) 4487 else: 4488 then = None 4489 4490 whens.append( 4491 self.expression( 4492 exp.When, 4493 matched=matched, 4494 source=source, 4495 condition=condition, 4496 then=then, 4497 ) 4498 ) 4499 4500 return self.expression( 4501 exp.Merge, 4502 this=target, 4503 using=using, 4504 on=on, 4505 expressions=whens, 4506 ) 4507 4508 def _parse_show(self) -> t.Optional[exp.Expression]: 4509 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4510 if parser: 4511 return parser(self) 4512 self._advance() 4513 return self.expression(exp.Show, this=self._prev.text.upper()) 4514 4515 def _parse_set_item_assignment( 4516 self, kind: t.Optional[str] = None 4517 ) -> t.Optional[exp.Expression]: 4518 index = self._index 4519 4520 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4521 return self._parse_set_transaction(global_=kind == "GLOBAL") 4522 4523 left = self._parse_primary() or self._parse_id_var() 4524 4525 if not self._match_texts(("=", "TO")): 4526 self._retreat(index) 4527 return None 4528 4529 right = self._parse_statement() or self._parse_id_var() 4530 this = self.expression(exp.EQ, this=left, expression=right) 4531 4532 return self.expression(exp.SetItem, this=this, kind=kind) 4533 4534 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4535 self._match_text_seq("TRANSACTION") 4536 characteristics = self._parse_csv( 4537 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4538 ) 4539 return self.expression( 4540 exp.SetItem, 4541 expressions=characteristics, 4542 kind="TRANSACTION", 4543 **{"global": global_}, # type: ignore 4544 ) 4545 4546 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4547 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4548 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4549 4550 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4551 index = self._index 4552 set_ = self.expression( 4553 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4554 ) 4555 4556 if self._curr: 4557 self._retreat(index) 4558 return self._parse_as_command(self._prev) 4559 4560 return set_ 4561 4562 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4563 for option in options: 4564 if self._match_text_seq(*option.split(" ")): 4565 return exp.var(option) 4566 return None 4567 4568 def _parse_as_command(self, start: Token) -> exp.Command: 4569 while self._curr: 4570 self._advance() 4571 text = self._find_sql(start, self._prev) 4572 size = len(start.text) 4573 return exp.Command(this=text[:size], expression=text[size:]) 4574 4575 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4576 settings = [] 4577 4578 self._match_l_paren() 4579 kind = self._parse_id_var() 4580 4581 if self._match(TokenType.L_PAREN): 4582 while True: 4583 key = self._parse_id_var() 4584 value = self._parse_primary() 4585 4586 if not key and value is None: 4587 break 4588 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4589 self._match(TokenType.R_PAREN) 4590 4591 self._match_r_paren() 4592 4593 return self.expression( 4594 exp.DictProperty, 4595 this=this, 4596 kind=kind.this if kind else None, 4597 settings=settings, 4598 ) 4599 4600 def _parse_dict_range(self, this: str) -> exp.DictRange: 4601 self._match_l_paren() 4602 has_min = self._match_text_seq("MIN") 4603 if has_min: 4604 min = self._parse_var() or self._parse_primary() 4605 self._match_text_seq("MAX") 4606 max = self._parse_var() or self._parse_primary() 4607 else: 4608 max = self._parse_var() or self._parse_primary() 4609 min = exp.Literal.number(0) 4610 self._match_r_paren() 4611 return self.expression(exp.DictRange, this=this, min=min, max=max) 4612 4613 def _find_parser( 4614 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4615 ) -> t.Optional[t.Callable]: 4616 if not self._curr: 4617 return None 4618 4619 index = self._index 4620 this = [] 4621 while True: 4622 # The current token might be multiple words 4623 curr = self._curr.text.upper() 4624 key = curr.split(" ") 4625 this.append(curr) 4626 4627 self._advance() 4628 result, trie = in_trie(trie, key) 4629 if result == TrieResult.FAILED: 4630 break 4631 4632 if result == TrieResult.EXISTS: 4633 subparser = parsers[" ".join(this)] 4634 return subparser 4635 4636 self._retreat(index) 4637 return None 4638 4639 def _match(self, token_type, advance=True, expression=None): 4640 if not self._curr: 4641 return None 4642 4643 if self._curr.token_type == token_type: 4644 if advance: 4645 self._advance() 4646 self._add_comments(expression) 4647 return True 4648 4649 return None 4650 4651 def _match_set(self, types, advance=True): 4652 if not self._curr: 4653 return None 4654 4655 if self._curr.token_type in types: 4656 if advance: 4657 self._advance() 4658 return True 4659 4660 return None 4661 4662 def _match_pair(self, token_type_a, token_type_b, advance=True): 4663 if not self._curr or not self._next: 4664 return None 4665 4666 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4667 if advance: 4668 self._advance(2) 4669 return True 4670 4671 return None 4672 4673 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4674 if not self._match(TokenType.L_PAREN, expression=expression): 4675 self.raise_error("Expecting (") 4676 4677 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4678 if not self._match(TokenType.R_PAREN, expression=expression): 4679 self.raise_error("Expecting )") 4680 4681 def _match_texts(self, texts, advance=True): 4682 if self._curr and self._curr.text.upper() in texts: 4683 if advance: 4684 self._advance() 4685 return True 4686 return False 4687 4688 def _match_text_seq(self, *texts, advance=True): 4689 index = self._index 4690 for text in texts: 4691 if self._curr and self._curr.text.upper() == text: 4692 self._advance() 4693 else: 4694 self._retreat(index) 4695 return False 4696 4697 if not advance: 4698 self._retreat(index) 4699 4700 return True 4701 4702 @t.overload 4703 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4704 ... 4705 4706 @t.overload 4707 def _replace_columns_with_dots( 4708 self, this: t.Optional[exp.Expression] 4709 ) -> t.Optional[exp.Expression]: 4710 ... 4711 4712 def _replace_columns_with_dots(self, this): 4713 if isinstance(this, exp.Dot): 4714 exp.replace_children(this, self._replace_columns_with_dots) 4715 elif isinstance(this, exp.Column): 4716 exp.replace_children(this, self._replace_columns_with_dots) 4717 table = this.args.get("table") 4718 this = ( 4719 self.expression(exp.Dot, this=table, expression=this.this) 4720 if table 4721 else self.expression(exp.Var, this=this.name) 4722 ) 4723 elif isinstance(this, exp.Identifier): 4724 this = self.expression(exp.Var, this=this.name) 4725 4726 return this 4727 4728 def _replace_lambda( 4729 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4730 ) -> t.Optional[exp.Expression]: 4731 if not node: 4732 return node 4733 4734 for column in node.find_all(exp.Column): 4735 if column.parts[0].name in lambda_variables: 4736 dot_or_id = column.to_dot() if column.table else column.this 4737 parent = column.parent 4738 4739 while isinstance(parent, exp.Dot): 4740 if not isinstance(parent.parent, exp.Dot): 4741 parent.replace(dot_or_id) 4742 break 4743 parent = parent.parent 4744 else: 4745 if column is node: 4746 node = dot_or_id 4747 else: 4748 column.replace(dot_or_id) 4749 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset()
853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 )
Logs or raises any found errors, depending on the chosen error level setting.
953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.