sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.MONEY, 181 TokenType.SMALLMONEY, 182 TokenType.ROWVERSION, 183 TokenType.IMAGE, 184 TokenType.VARIANT, 185 TokenType.OBJECT, 186 TokenType.INET, 187 TokenType.ENUM, 188 *NESTED_TYPE_TOKENS, 189 } 190 191 SUBQUERY_PREDICATES = { 192 TokenType.ANY: exp.Any, 193 TokenType.ALL: exp.All, 194 TokenType.EXISTS: exp.Exists, 195 TokenType.SOME: exp.Any, 196 } 197 198 RESERVED_KEYWORDS = { 199 *Tokenizer.SINGLE_TOKENS.values(), 200 TokenType.SELECT, 201 } 202 203 DB_CREATABLES = { 204 TokenType.DATABASE, 205 TokenType.SCHEMA, 206 TokenType.TABLE, 207 TokenType.VIEW, 208 TokenType.DICTIONARY, 209 } 210 211 CREATABLES = { 212 TokenType.COLUMN, 213 TokenType.FUNCTION, 214 TokenType.INDEX, 215 TokenType.PROCEDURE, 216 *DB_CREATABLES, 217 } 218 219 # Tokens that can represent identifiers 220 ID_VAR_TOKENS = { 221 TokenType.VAR, 222 TokenType.ANTI, 223 TokenType.APPLY, 224 TokenType.ASC, 225 TokenType.AUTO_INCREMENT, 226 TokenType.BEGIN, 227 TokenType.CACHE, 228 TokenType.CASE, 229 TokenType.COLLATE, 230 TokenType.COMMAND, 231 TokenType.COMMENT, 232 TokenType.COMMIT, 233 TokenType.CONSTRAINT, 234 TokenType.DEFAULT, 235 TokenType.DELETE, 236 TokenType.DESC, 237 TokenType.DESCRIBE, 238 TokenType.DICTIONARY, 239 TokenType.DIV, 240 TokenType.END, 241 TokenType.EXECUTE, 242 TokenType.ESCAPE, 243 TokenType.FALSE, 244 TokenType.FIRST, 245 TokenType.FILTER, 246 TokenType.FORMAT, 247 TokenType.FULL, 248 TokenType.IF, 249 TokenType.IS, 250 TokenType.ISNULL, 251 TokenType.INTERVAL, 252 TokenType.KEEP, 253 TokenType.LEFT, 254 TokenType.LOAD, 255 TokenType.MERGE, 256 TokenType.NATURAL, 257 TokenType.NEXT, 258 TokenType.OFFSET, 259 TokenType.ORDINALITY, 260 TokenType.OVERWRITE, 261 TokenType.PARTITION, 262 TokenType.PERCENT, 263 TokenType.PIVOT, 264 TokenType.PRAGMA, 265 TokenType.RANGE, 266 TokenType.REFERENCES, 267 TokenType.RIGHT, 268 TokenType.ROW, 269 TokenType.ROWS, 270 TokenType.SEMI, 271 TokenType.SET, 272 TokenType.SETTINGS, 273 TokenType.SHOW, 274 TokenType.TEMPORARY, 275 TokenType.TOP, 276 TokenType.TRUE, 277 TokenType.UNIQUE, 278 TokenType.UNPIVOT, 279 TokenType.UPDATE, 280 TokenType.VOLATILE, 281 TokenType.WINDOW, 282 *CREATABLES, 283 *SUBQUERY_PREDICATES, 284 *TYPE_TOKENS, 285 *NO_PAREN_FUNCTIONS, 286 } 287 288 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 289 290 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 291 TokenType.APPLY, 292 TokenType.ASOF, 293 TokenType.FULL, 294 TokenType.LEFT, 295 TokenType.LOCK, 296 TokenType.NATURAL, 297 TokenType.OFFSET, 298 TokenType.RIGHT, 299 TokenType.WINDOW, 300 } 301 302 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 303 304 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 305 306 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 307 308 FUNC_TOKENS = { 309 TokenType.COMMAND, 310 TokenType.CURRENT_DATE, 311 TokenType.CURRENT_DATETIME, 312 TokenType.CURRENT_TIMESTAMP, 313 TokenType.CURRENT_TIME, 314 TokenType.CURRENT_USER, 315 TokenType.FILTER, 316 TokenType.FIRST, 317 TokenType.FORMAT, 318 TokenType.GLOB, 319 TokenType.IDENTIFIER, 320 TokenType.INDEX, 321 TokenType.ISNULL, 322 TokenType.ILIKE, 323 TokenType.LIKE, 324 TokenType.MERGE, 325 TokenType.OFFSET, 326 TokenType.PRIMARY_KEY, 327 TokenType.RANGE, 328 TokenType.REPLACE, 329 TokenType.ROW, 330 TokenType.UNNEST, 331 TokenType.VAR, 332 TokenType.LEFT, 333 TokenType.RIGHT, 334 TokenType.DATE, 335 TokenType.DATETIME, 336 TokenType.TABLE, 337 TokenType.TIMESTAMP, 338 TokenType.TIMESTAMPTZ, 339 TokenType.WINDOW, 340 *TYPE_TOKENS, 341 *SUBQUERY_PREDICATES, 342 } 343 344 CONJUNCTION = { 345 TokenType.AND: exp.And, 346 TokenType.OR: exp.Or, 347 } 348 349 EQUALITY = { 350 TokenType.EQ: exp.EQ, 351 TokenType.NEQ: exp.NEQ, 352 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 353 } 354 355 COMPARISON = { 356 TokenType.GT: exp.GT, 357 TokenType.GTE: exp.GTE, 358 TokenType.LT: exp.LT, 359 TokenType.LTE: exp.LTE, 360 } 361 362 BITWISE = { 363 TokenType.AMP: exp.BitwiseAnd, 364 TokenType.CARET: exp.BitwiseXor, 365 TokenType.PIPE: exp.BitwiseOr, 366 TokenType.DPIPE: exp.DPipe, 367 } 368 369 TERM = { 370 TokenType.DASH: exp.Sub, 371 TokenType.PLUS: exp.Add, 372 TokenType.MOD: exp.Mod, 373 TokenType.COLLATE: exp.Collate, 374 } 375 376 FACTOR = { 377 TokenType.DIV: exp.IntDiv, 378 TokenType.LR_ARROW: exp.Distance, 379 TokenType.SLASH: exp.Div, 380 TokenType.STAR: exp.Mul, 381 } 382 383 TIMESTAMPS = { 384 TokenType.TIME, 385 TokenType.TIMESTAMP, 386 TokenType.TIMESTAMPTZ, 387 TokenType.TIMESTAMPLTZ, 388 } 389 390 SET_OPERATIONS = { 391 TokenType.UNION, 392 TokenType.INTERSECT, 393 TokenType.EXCEPT, 394 } 395 396 JOIN_METHODS = { 397 TokenType.NATURAL, 398 TokenType.ASOF, 399 } 400 401 JOIN_SIDES = { 402 TokenType.LEFT, 403 TokenType.RIGHT, 404 TokenType.FULL, 405 } 406 407 JOIN_KINDS = { 408 TokenType.INNER, 409 TokenType.OUTER, 410 TokenType.CROSS, 411 TokenType.SEMI, 412 TokenType.ANTI, 413 } 414 415 JOIN_HINTS: t.Set[str] = set() 416 417 LAMBDAS = { 418 TokenType.ARROW: lambda self, expressions: self.expression( 419 exp.Lambda, 420 this=self._replace_lambda( 421 self._parse_conjunction(), 422 {node.name for node in expressions}, 423 ), 424 expressions=expressions, 425 ), 426 TokenType.FARROW: lambda self, expressions: self.expression( 427 exp.Kwarg, 428 this=exp.var(expressions[0].name), 429 expression=self._parse_conjunction(), 430 ), 431 } 432 433 COLUMN_OPERATORS = { 434 TokenType.DOT: None, 435 TokenType.DCOLON: lambda self, this, to: self.expression( 436 exp.Cast if self.STRICT_CAST else exp.TryCast, 437 this=this, 438 to=to, 439 ), 440 TokenType.ARROW: lambda self, this, path: self.expression( 441 exp.JSONExtract, 442 this=this, 443 expression=path, 444 ), 445 TokenType.DARROW: lambda self, this, path: self.expression( 446 exp.JSONExtractScalar, 447 this=this, 448 expression=path, 449 ), 450 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 451 exp.JSONBExtract, 452 this=this, 453 expression=path, 454 ), 455 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtractScalar, 457 this=this, 458 expression=path, 459 ), 460 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 461 exp.JSONBContains, 462 this=this, 463 expression=key, 464 ), 465 } 466 467 EXPRESSION_PARSERS = { 468 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 469 exp.Column: lambda self: self._parse_column(), 470 exp.Condition: lambda self: self._parse_conjunction(), 471 exp.DataType: lambda self: self._parse_types(), 472 exp.Expression: lambda self: self._parse_statement(), 473 exp.From: lambda self: self._parse_from(), 474 exp.Group: lambda self: self._parse_group(), 475 exp.Having: lambda self: self._parse_having(), 476 exp.Identifier: lambda self: self._parse_id_var(), 477 exp.Join: lambda self: self._parse_join(), 478 exp.Lambda: lambda self: self._parse_lambda(), 479 exp.Lateral: lambda self: self._parse_lateral(), 480 exp.Limit: lambda self: self._parse_limit(), 481 exp.Offset: lambda self: self._parse_offset(), 482 exp.Order: lambda self: self._parse_order(), 483 exp.Ordered: lambda self: self._parse_ordered(), 484 exp.Properties: lambda self: self._parse_properties(), 485 exp.Qualify: lambda self: self._parse_qualify(), 486 exp.Returning: lambda self: self._parse_returning(), 487 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 488 exp.Table: lambda self: self._parse_table_parts(), 489 exp.TableAlias: lambda self: self._parse_table_alias(), 490 exp.Where: lambda self: self._parse_where(), 491 exp.Window: lambda self: self._parse_named_window(), 492 exp.With: lambda self: self._parse_with(), 493 "JOIN_TYPE": lambda self: self._parse_join_parts(), 494 } 495 496 STATEMENT_PARSERS = { 497 TokenType.ALTER: lambda self: self._parse_alter(), 498 TokenType.BEGIN: lambda self: self._parse_transaction(), 499 TokenType.CACHE: lambda self: self._parse_cache(), 500 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 501 TokenType.COMMENT: lambda self: self._parse_comment(), 502 TokenType.CREATE: lambda self: self._parse_create(), 503 TokenType.DELETE: lambda self: self._parse_delete(), 504 TokenType.DESC: lambda self: self._parse_describe(), 505 TokenType.DESCRIBE: lambda self: self._parse_describe(), 506 TokenType.DROP: lambda self: self._parse_drop(), 507 TokenType.END: lambda self: self._parse_commit_or_rollback(), 508 TokenType.FROM: lambda self: exp.select("*").from_( 509 t.cast(exp.From, self._parse_from(skip_from_token=True)) 510 ), 511 TokenType.INSERT: lambda self: self._parse_insert(), 512 TokenType.LOAD: lambda self: self._parse_load(), 513 TokenType.MERGE: lambda self: self._parse_merge(), 514 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 515 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 516 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 517 TokenType.SET: lambda self: self._parse_set(), 518 TokenType.UNCACHE: lambda self: self._parse_uncache(), 519 TokenType.UPDATE: lambda self: self._parse_update(), 520 TokenType.USE: lambda self: self.expression( 521 exp.Use, 522 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 523 and exp.var(self._prev.text), 524 this=self._parse_table(schema=False), 525 ), 526 } 527 528 UNARY_PARSERS = { 529 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 530 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 531 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 532 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 533 } 534 535 PRIMARY_PARSERS = { 536 TokenType.STRING: lambda self, token: self.expression( 537 exp.Literal, this=token.text, is_string=True 538 ), 539 TokenType.NUMBER: lambda self, token: self.expression( 540 exp.Literal, this=token.text, is_string=False 541 ), 542 TokenType.STAR: lambda self, _: self.expression( 543 exp.Star, 544 **{"except": self._parse_except(), "replace": self._parse_replace()}, 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER": lambda self: self._parse_cluster(), 588 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 589 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 590 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 591 "DEFINER": lambda self: self._parse_definer(), 592 "DETERMINISTIC": lambda self: self.expression( 593 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 594 ), 595 "DISTKEY": lambda self: self._parse_distkey(), 596 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 597 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 598 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 599 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 600 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 601 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 602 "FREESPACE": lambda self: self._parse_freespace(), 603 "IMMUTABLE": lambda self: self.expression( 604 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 605 ), 606 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 607 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 608 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 609 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 610 "LIKE": lambda self: self._parse_create_like(), 611 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 612 "LOCK": lambda self: self._parse_locking(), 613 "LOCKING": lambda self: self._parse_locking(), 614 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 615 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 616 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 617 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 618 "NO": lambda self: self._parse_no_property(), 619 "ON": lambda self: self._parse_on_property(), 620 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 621 "PARTITION BY": lambda self: self._parse_partitioned_by(), 622 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 624 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 625 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 626 "RETURNS": lambda self: self._parse_returns(), 627 "ROW": lambda self: self._parse_row(), 628 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 629 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 630 "SETTINGS": lambda self: self.expression( 631 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 632 ), 633 "SORTKEY": lambda self: self._parse_sortkey(), 634 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 635 "STABLE": lambda self: self.expression( 636 exp.StabilityProperty, this=exp.Literal.string("STABLE") 637 ), 638 "STORED": lambda self: self._parse_stored(), 639 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 640 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 641 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 642 "TO": lambda self: self._parse_to_table(), 643 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 644 "TTL": lambda self: self._parse_ttl(), 645 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 646 "VOLATILE": lambda self: self._parse_volatile_property(), 647 "WITH": lambda self: self._parse_with_property(), 648 } 649 650 CONSTRAINT_PARSERS = { 651 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 652 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 653 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 654 "CHARACTER SET": lambda self: self.expression( 655 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 656 ), 657 "CHECK": lambda self: self.expression( 658 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 659 ), 660 "COLLATE": lambda self: self.expression( 661 exp.CollateColumnConstraint, this=self._parse_var() 662 ), 663 "COMMENT": lambda self: self.expression( 664 exp.CommentColumnConstraint, this=self._parse_string() 665 ), 666 "COMPRESS": lambda self: self._parse_compress(), 667 "DEFAULT": lambda self: self.expression( 668 exp.DefaultColumnConstraint, this=self._parse_bitwise() 669 ), 670 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 671 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 672 "FORMAT": lambda self: self.expression( 673 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 674 ), 675 "GENERATED": lambda self: self._parse_generated_as_identity(), 676 "IDENTITY": lambda self: self._parse_auto_increment(), 677 "INLINE": lambda self: self._parse_inline(), 678 "LIKE": lambda self: self._parse_create_like(), 679 "NOT": lambda self: self._parse_not_constraint(), 680 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 681 "ON": lambda self: self._match(TokenType.UPDATE) 682 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 683 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 684 "PRIMARY KEY": lambda self: self._parse_primary_key(), 685 "REFERENCES": lambda self: self._parse_references(match=False), 686 "TITLE": lambda self: self.expression( 687 exp.TitleColumnConstraint, this=self._parse_var_or_string() 688 ), 689 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 690 "UNIQUE": lambda self: self._parse_unique(), 691 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 692 } 693 694 ALTER_PARSERS = { 695 "ADD": lambda self: self._parse_alter_table_add(), 696 "ALTER": lambda self: self._parse_alter_table_alter(), 697 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 698 "DROP": lambda self: self._parse_alter_table_drop(), 699 "RENAME": lambda self: self._parse_alter_table_rename(), 700 } 701 702 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 703 704 NO_PAREN_FUNCTION_PARSERS = { 705 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 706 TokenType.CASE: lambda self: self._parse_case(), 707 TokenType.IF: lambda self: self._parse_if(), 708 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 709 exp.NextValueFor, 710 this=self._parse_column(), 711 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 712 ), 713 } 714 715 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 716 717 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 718 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 719 "CONCAT": lambda self: self._parse_concat(), 720 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 721 "DECODE": lambda self: self._parse_decode(), 722 "EXTRACT": lambda self: self._parse_extract(), 723 "JSON_OBJECT": lambda self: self._parse_json_object(), 724 "LOG": lambda self: self._parse_logarithm(), 725 "MATCH": lambda self: self._parse_match_against(), 726 "OPENJSON": lambda self: self._parse_open_json(), 727 "POSITION": lambda self: self._parse_position(), 728 "SAFE_CAST": lambda self: self._parse_cast(False), 729 "STRING_AGG": lambda self: self._parse_string_agg(), 730 "SUBSTRING": lambda self: self._parse_substring(), 731 "TRIM": lambda self: self._parse_trim(), 732 "TRY_CAST": lambda self: self._parse_cast(False), 733 "TRY_CONVERT": lambda self: self._parse_convert(False), 734 } 735 736 QUERY_MODIFIER_PARSERS = { 737 "joins": lambda self: list(iter(self._parse_join, None)), 738 "laterals": lambda self: list(iter(self._parse_lateral, None)), 739 "match": lambda self: self._parse_match_recognize(), 740 "where": lambda self: self._parse_where(), 741 "group": lambda self: self._parse_group(), 742 "having": lambda self: self._parse_having(), 743 "qualify": lambda self: self._parse_qualify(), 744 "windows": lambda self: self._parse_window_clause(), 745 "order": lambda self: self._parse_order(), 746 "limit": lambda self: self._parse_limit(), 747 "offset": lambda self: self._parse_offset(), 748 "locks": lambda self: self._parse_locks(), 749 "sample": lambda self: self._parse_table_sample(as_modifier=True), 750 } 751 752 SET_PARSERS = { 753 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 754 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 755 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 756 "TRANSACTION": lambda self: self._parse_set_transaction(), 757 } 758 759 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 760 761 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 762 763 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 764 765 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 766 767 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 768 769 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 770 TRANSACTION_CHARACTERISTICS = { 771 "ISOLATION LEVEL REPEATABLE READ", 772 "ISOLATION LEVEL READ COMMITTED", 773 "ISOLATION LEVEL READ UNCOMMITTED", 774 "ISOLATION LEVEL SERIALIZABLE", 775 "READ WRITE", 776 "READ ONLY", 777 } 778 779 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 780 781 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 782 783 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 784 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 785 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 786 787 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 788 789 STRICT_CAST = True 790 791 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 792 793 CONVERT_TYPE_FIRST = False 794 795 PREFIXED_PIVOT_COLUMNS = False 796 IDENTIFY_PIVOT_STRINGS = False 797 798 LOG_BASE_FIRST = True 799 LOG_DEFAULTS_TO_LN = False 800 801 __slots__ = ( 802 "error_level", 803 "error_message_context", 804 "max_errors", 805 "sql", 806 "errors", 807 "_tokens", 808 "_index", 809 "_curr", 810 "_next", 811 "_prev", 812 "_prev_comments", 813 ) 814 815 # Autofilled 816 INDEX_OFFSET: int = 0 817 UNNEST_COLUMN_ONLY: bool = False 818 ALIAS_POST_TABLESAMPLE: bool = False 819 STRICT_STRING_CONCAT = False 820 NULL_ORDERING: str = "nulls_are_small" 821 SHOW_TRIE: t.Dict = {} 822 SET_TRIE: t.Dict = {} 823 FORMAT_MAPPING: t.Dict[str, str] = {} 824 FORMAT_TRIE: t.Dict = {} 825 TIME_MAPPING: t.Dict[str, str] = {} 826 TIME_TRIE: t.Dict = {} 827 828 def __init__( 829 self, 830 error_level: t.Optional[ErrorLevel] = None, 831 error_message_context: int = 100, 832 max_errors: int = 3, 833 ): 834 self.error_level = error_level or ErrorLevel.IMMEDIATE 835 self.error_message_context = error_message_context 836 self.max_errors = max_errors 837 self.reset() 838 839 def reset(self): 840 self.sql = "" 841 self.errors = [] 842 self._tokens = [] 843 self._index = 0 844 self._curr = None 845 self._next = None 846 self._prev = None 847 self._prev_comments = None 848 849 def parse( 850 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 851 ) -> t.List[t.Optional[exp.Expression]]: 852 """ 853 Parses a list of tokens and returns a list of syntax trees, one tree 854 per parsed SQL statement. 855 856 Args: 857 raw_tokens: The list of tokens. 858 sql: The original SQL string, used to produce helpful debug messages. 859 860 Returns: 861 The list of the produced syntax trees. 862 """ 863 return self._parse( 864 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 865 ) 866 867 def parse_into( 868 self, 869 expression_types: exp.IntoType, 870 raw_tokens: t.List[Token], 871 sql: t.Optional[str] = None, 872 ) -> t.List[t.Optional[exp.Expression]]: 873 """ 874 Parses a list of tokens into a given Expression type. If a collection of Expression 875 types is given instead, this method will try to parse the token list into each one 876 of them, stopping at the first for which the parsing succeeds. 877 878 Args: 879 expression_types: The expression type(s) to try and parse the token list into. 880 raw_tokens: The list of tokens. 881 sql: The original SQL string, used to produce helpful debug messages. 882 883 Returns: 884 The target Expression. 885 """ 886 errors = [] 887 for expression_type in ensure_list(expression_types): 888 parser = self.EXPRESSION_PARSERS.get(expression_type) 889 if not parser: 890 raise TypeError(f"No parser registered for {expression_type}") 891 892 try: 893 return self._parse(parser, raw_tokens, sql) 894 except ParseError as e: 895 e.errors[0]["into_expression"] = expression_type 896 errors.append(e) 897 898 raise ParseError( 899 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 900 errors=merge_errors(errors), 901 ) from errors[-1] 902 903 def _parse( 904 self, 905 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 906 raw_tokens: t.List[Token], 907 sql: t.Optional[str] = None, 908 ) -> t.List[t.Optional[exp.Expression]]: 909 self.reset() 910 self.sql = sql or "" 911 912 total = len(raw_tokens) 913 chunks: t.List[t.List[Token]] = [[]] 914 915 for i, token in enumerate(raw_tokens): 916 if token.token_type == TokenType.SEMICOLON: 917 if i < total - 1: 918 chunks.append([]) 919 else: 920 chunks[-1].append(token) 921 922 expressions = [] 923 924 for tokens in chunks: 925 self._index = -1 926 self._tokens = tokens 927 self._advance() 928 929 expressions.append(parse_method(self)) 930 931 if self._index < len(self._tokens): 932 self.raise_error("Invalid expression / Unexpected token") 933 934 self.check_errors() 935 936 return expressions 937 938 def check_errors(self) -> None: 939 """Logs or raises any found errors, depending on the chosen error level setting.""" 940 if self.error_level == ErrorLevel.WARN: 941 for error in self.errors: 942 logger.error(str(error)) 943 elif self.error_level == ErrorLevel.RAISE and self.errors: 944 raise ParseError( 945 concat_messages(self.errors, self.max_errors), 946 errors=merge_errors(self.errors), 947 ) 948 949 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 950 """ 951 Appends an error in the list of recorded errors or raises it, depending on the chosen 952 error level setting. 953 """ 954 token = token or self._curr or self._prev or Token.string("") 955 start = token.start 956 end = token.end + 1 957 start_context = self.sql[max(start - self.error_message_context, 0) : start] 958 highlight = self.sql[start:end] 959 end_context = self.sql[end : end + self.error_message_context] 960 961 error = ParseError.new( 962 f"{message}. Line {token.line}, Col: {token.col}.\n" 963 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 964 description=message, 965 line=token.line, 966 col=token.col, 967 start_context=start_context, 968 highlight=highlight, 969 end_context=end_context, 970 ) 971 972 if self.error_level == ErrorLevel.IMMEDIATE: 973 raise error 974 975 self.errors.append(error) 976 977 def expression( 978 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 979 ) -> E: 980 """ 981 Creates a new, validated Expression. 982 983 Args: 984 exp_class: The expression class to instantiate. 985 comments: An optional list of comments to attach to the expression. 986 kwargs: The arguments to set for the expression along with their respective values. 987 988 Returns: 989 The target expression. 990 """ 991 instance = exp_class(**kwargs) 992 instance.add_comments(comments) if comments else self._add_comments(instance) 993 return self.validate_expression(instance) 994 995 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 996 if expression and self._prev_comments: 997 expression.add_comments(self._prev_comments) 998 self._prev_comments = None 999 1000 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1001 """ 1002 Validates an Expression, making sure that all its mandatory arguments are set. 1003 1004 Args: 1005 expression: The expression to validate. 1006 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1007 1008 Returns: 1009 The validated expression. 1010 """ 1011 if self.error_level != ErrorLevel.IGNORE: 1012 for error_message in expression.error_messages(args): 1013 self.raise_error(error_message) 1014 1015 return expression 1016 1017 def _find_sql(self, start: Token, end: Token) -> str: 1018 return self.sql[start.start : end.end + 1] 1019 1020 def _advance(self, times: int = 1) -> None: 1021 self._index += times 1022 self._curr = seq_get(self._tokens, self._index) 1023 self._next = seq_get(self._tokens, self._index + 1) 1024 1025 if self._index > 0: 1026 self._prev = self._tokens[self._index - 1] 1027 self._prev_comments = self._prev.comments 1028 else: 1029 self._prev = None 1030 self._prev_comments = None 1031 1032 def _retreat(self, index: int) -> None: 1033 if index != self._index: 1034 self._advance(index - self._index) 1035 1036 def _parse_command(self) -> exp.Command: 1037 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1038 1039 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1040 start = self._prev 1041 exists = self._parse_exists() if allow_exists else None 1042 1043 self._match(TokenType.ON) 1044 1045 kind = self._match_set(self.CREATABLES) and self._prev 1046 if not kind: 1047 return self._parse_as_command(start) 1048 1049 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1050 this = self._parse_user_defined_function(kind=kind.token_type) 1051 elif kind.token_type == TokenType.TABLE: 1052 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1053 elif kind.token_type == TokenType.COLUMN: 1054 this = self._parse_column() 1055 else: 1056 this = self._parse_id_var() 1057 1058 self._match(TokenType.IS) 1059 1060 return self.expression( 1061 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1062 ) 1063 1064 def _parse_to_table( 1065 self, 1066 ) -> exp.ToTableProperty: 1067 table = self._parse_table_parts(schema=True) 1068 return self.expression(exp.ToTableProperty, this=table) 1069 1070 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1071 def _parse_ttl(self) -> exp.Expression: 1072 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1073 this = self._parse_bitwise() 1074 1075 if self._match_text_seq("DELETE"): 1076 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1077 if self._match_text_seq("RECOMPRESS"): 1078 return self.expression( 1079 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1080 ) 1081 if self._match_text_seq("TO", "DISK"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1084 ) 1085 if self._match_text_seq("TO", "VOLUME"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1088 ) 1089 1090 return this 1091 1092 expressions = self._parse_csv(_parse_ttl_action) 1093 where = self._parse_where() 1094 group = self._parse_group() 1095 1096 aggregates = None 1097 if group and self._match(TokenType.SET): 1098 aggregates = self._parse_csv(self._parse_set_item) 1099 1100 return self.expression( 1101 exp.MergeTreeTTL, 1102 expressions=expressions, 1103 where=where, 1104 group=group, 1105 aggregates=aggregates, 1106 ) 1107 1108 def _parse_statement(self) -> t.Optional[exp.Expression]: 1109 if self._curr is None: 1110 return None 1111 1112 if self._match_set(self.STATEMENT_PARSERS): 1113 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1114 1115 if self._match_set(Tokenizer.COMMANDS): 1116 return self._parse_command() 1117 1118 expression = self._parse_expression() 1119 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1120 return self._parse_query_modifiers(expression) 1121 1122 def _parse_drop(self) -> exp.Drop | exp.Command: 1123 start = self._prev 1124 temporary = self._match(TokenType.TEMPORARY) 1125 materialized = self._match_text_seq("MATERIALIZED") 1126 1127 kind = self._match_set(self.CREATABLES) and self._prev.text 1128 if not kind: 1129 return self._parse_as_command(start) 1130 1131 return self.expression( 1132 exp.Drop, 1133 exists=self._parse_exists(), 1134 this=self._parse_table(schema=True), 1135 kind=kind, 1136 temporary=temporary, 1137 materialized=materialized, 1138 cascade=self._match_text_seq("CASCADE"), 1139 constraints=self._match_text_seq("CONSTRAINTS"), 1140 purge=self._match_text_seq("PURGE"), 1141 ) 1142 1143 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1144 return ( 1145 self._match(TokenType.IF) 1146 and (not not_ or self._match(TokenType.NOT)) 1147 and self._match(TokenType.EXISTS) 1148 ) 1149 1150 def _parse_create(self) -> exp.Create | exp.Command: 1151 # Note: this can't be None because we've matched a statement parser 1152 start = self._prev 1153 replace = start.text.upper() == "REPLACE" or self._match_pair( 1154 TokenType.OR, TokenType.REPLACE 1155 ) 1156 unique = self._match(TokenType.UNIQUE) 1157 1158 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1159 self._advance() 1160 1161 properties = None 1162 create_token = self._match_set(self.CREATABLES) and self._prev 1163 1164 if not create_token: 1165 # exp.Properties.Location.POST_CREATE 1166 properties = self._parse_properties() 1167 create_token = self._match_set(self.CREATABLES) and self._prev 1168 1169 if not properties or not create_token: 1170 return self._parse_as_command(start) 1171 1172 exists = self._parse_exists(not_=True) 1173 this = None 1174 expression = None 1175 indexes = None 1176 no_schema_binding = None 1177 begin = None 1178 clone = None 1179 1180 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1181 nonlocal properties 1182 if properties and temp_props: 1183 properties.expressions.extend(temp_props.expressions) 1184 elif temp_props: 1185 properties = temp_props 1186 1187 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1188 this = self._parse_user_defined_function(kind=create_token.token_type) 1189 1190 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1191 extend_props(self._parse_properties()) 1192 1193 self._match(TokenType.ALIAS) 1194 begin = self._match(TokenType.BEGIN) 1195 return_ = self._match_text_seq("RETURN") 1196 expression = self._parse_statement() 1197 1198 if return_: 1199 expression = self.expression(exp.Return, this=expression) 1200 elif create_token.token_type == TokenType.INDEX: 1201 this = self._parse_index(index=self._parse_id_var()) 1202 elif create_token.token_type in self.DB_CREATABLES: 1203 table_parts = self._parse_table_parts(schema=True) 1204 1205 # exp.Properties.Location.POST_NAME 1206 self._match(TokenType.COMMA) 1207 extend_props(self._parse_properties(before=True)) 1208 1209 this = self._parse_schema(this=table_parts) 1210 1211 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1212 extend_props(self._parse_properties()) 1213 1214 self._match(TokenType.ALIAS) 1215 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1216 # exp.Properties.Location.POST_ALIAS 1217 extend_props(self._parse_properties()) 1218 1219 expression = self._parse_ddl_select() 1220 1221 if create_token.token_type == TokenType.TABLE: 1222 indexes = [] 1223 while True: 1224 index = self._parse_index() 1225 1226 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1227 extend_props(self._parse_properties()) 1228 1229 if not index: 1230 break 1231 else: 1232 self._match(TokenType.COMMA) 1233 indexes.append(index) 1234 elif create_token.token_type == TokenType.VIEW: 1235 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1236 no_schema_binding = True 1237 1238 if self._match_text_seq("CLONE"): 1239 clone = self._parse_table(schema=True) 1240 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1241 clone_kind = ( 1242 self._match(TokenType.L_PAREN) 1243 and self._match_texts(self.CLONE_KINDS) 1244 and self._prev.text.upper() 1245 ) 1246 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1247 self._match(TokenType.R_PAREN) 1248 clone = self.expression( 1249 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1250 ) 1251 1252 return self.expression( 1253 exp.Create, 1254 this=this, 1255 kind=create_token.text, 1256 replace=replace, 1257 unique=unique, 1258 expression=expression, 1259 exists=exists, 1260 properties=properties, 1261 indexes=indexes, 1262 no_schema_binding=no_schema_binding, 1263 begin=begin, 1264 clone=clone, 1265 ) 1266 1267 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1268 # only used for teradata currently 1269 self._match(TokenType.COMMA) 1270 1271 kwargs = { 1272 "no": self._match_text_seq("NO"), 1273 "dual": self._match_text_seq("DUAL"), 1274 "before": self._match_text_seq("BEFORE"), 1275 "default": self._match_text_seq("DEFAULT"), 1276 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1277 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1278 "after": self._match_text_seq("AFTER"), 1279 "minimum": self._match_texts(("MIN", "MINIMUM")), 1280 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1281 } 1282 1283 if self._match_texts(self.PROPERTY_PARSERS): 1284 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1285 try: 1286 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1287 except TypeError: 1288 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1289 1290 return None 1291 1292 def _parse_property(self) -> t.Optional[exp.Expression]: 1293 if self._match_texts(self.PROPERTY_PARSERS): 1294 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1295 1296 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1297 return self._parse_character_set(default=True) 1298 1299 if self._match_text_seq("COMPOUND", "SORTKEY"): 1300 return self._parse_sortkey(compound=True) 1301 1302 if self._match_text_seq("SQL", "SECURITY"): 1303 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1304 1305 assignment = self._match_pair( 1306 TokenType.VAR, TokenType.EQ, advance=False 1307 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1308 1309 if assignment: 1310 key = self._parse_var_or_string() 1311 self._match(TokenType.EQ) 1312 return self.expression(exp.Property, this=key, value=self._parse_column()) 1313 1314 return None 1315 1316 def _parse_stored(self) -> exp.FileFormatProperty: 1317 self._match(TokenType.ALIAS) 1318 1319 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1320 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1321 1322 return self.expression( 1323 exp.FileFormatProperty, 1324 this=self.expression( 1325 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1326 ) 1327 if input_format or output_format 1328 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1329 ) 1330 1331 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1332 self._match(TokenType.EQ) 1333 self._match(TokenType.ALIAS) 1334 return self.expression(exp_class, this=self._parse_field()) 1335 1336 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1337 properties = [] 1338 while True: 1339 if before: 1340 prop = self._parse_property_before() 1341 else: 1342 prop = self._parse_property() 1343 1344 if not prop: 1345 break 1346 for p in ensure_list(prop): 1347 properties.append(p) 1348 1349 if properties: 1350 return self.expression(exp.Properties, expressions=properties) 1351 1352 return None 1353 1354 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1355 return self.expression( 1356 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1357 ) 1358 1359 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1360 if self._index >= 2: 1361 pre_volatile_token = self._tokens[self._index - 2] 1362 else: 1363 pre_volatile_token = None 1364 1365 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1366 return exp.VolatileProperty() 1367 1368 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1369 1370 def _parse_with_property( 1371 self, 1372 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1373 self._match(TokenType.WITH) 1374 if self._match(TokenType.L_PAREN, advance=False): 1375 return self._parse_wrapped_csv(self._parse_property) 1376 1377 if self._match_text_seq("JOURNAL"): 1378 return self._parse_withjournaltable() 1379 1380 if self._match_text_seq("DATA"): 1381 return self._parse_withdata(no=False) 1382 elif self._match_text_seq("NO", "DATA"): 1383 return self._parse_withdata(no=True) 1384 1385 if not self._next: 1386 return None 1387 1388 return self._parse_withisolatedloading() 1389 1390 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1391 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1392 self._match(TokenType.EQ) 1393 1394 user = self._parse_id_var() 1395 self._match(TokenType.PARAMETER) 1396 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1397 1398 if not user or not host: 1399 return None 1400 1401 return exp.DefinerProperty(this=f"{user}@{host}") 1402 1403 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1404 self._match(TokenType.TABLE) 1405 self._match(TokenType.EQ) 1406 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1407 1408 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1409 return self.expression(exp.LogProperty, no=no) 1410 1411 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1412 return self.expression(exp.JournalProperty, **kwargs) 1413 1414 def _parse_checksum(self) -> exp.ChecksumProperty: 1415 self._match(TokenType.EQ) 1416 1417 on = None 1418 if self._match(TokenType.ON): 1419 on = True 1420 elif self._match_text_seq("OFF"): 1421 on = False 1422 1423 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1424 1425 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1426 if not self._match_text_seq("BY"): 1427 self._retreat(self._index - 1) 1428 return None 1429 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_freespace(self) -> exp.FreespaceProperty: 1433 self._match(TokenType.EQ) 1434 return self.expression( 1435 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1436 ) 1437 1438 def _parse_mergeblockratio( 1439 self, no: bool = False, default: bool = False 1440 ) -> exp.MergeBlockRatioProperty: 1441 if self._match(TokenType.EQ): 1442 return self.expression( 1443 exp.MergeBlockRatioProperty, 1444 this=self._parse_number(), 1445 percent=self._match(TokenType.PERCENT), 1446 ) 1447 1448 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1449 1450 def _parse_datablocksize( 1451 self, 1452 default: t.Optional[bool] = None, 1453 minimum: t.Optional[bool] = None, 1454 maximum: t.Optional[bool] = None, 1455 ) -> exp.DataBlocksizeProperty: 1456 self._match(TokenType.EQ) 1457 size = self._parse_number() 1458 1459 units = None 1460 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1461 units = self._prev.text 1462 1463 return self.expression( 1464 exp.DataBlocksizeProperty, 1465 size=size, 1466 units=units, 1467 default=default, 1468 minimum=minimum, 1469 maximum=maximum, 1470 ) 1471 1472 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1473 self._match(TokenType.EQ) 1474 always = self._match_text_seq("ALWAYS") 1475 manual = self._match_text_seq("MANUAL") 1476 never = self._match_text_seq("NEVER") 1477 default = self._match_text_seq("DEFAULT") 1478 1479 autotemp = None 1480 if self._match_text_seq("AUTOTEMP"): 1481 autotemp = self._parse_schema() 1482 1483 return self.expression( 1484 exp.BlockCompressionProperty, 1485 always=always, 1486 manual=manual, 1487 never=never, 1488 default=default, 1489 autotemp=autotemp, 1490 ) 1491 1492 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1493 no = self._match_text_seq("NO") 1494 concurrent = self._match_text_seq("CONCURRENT") 1495 self._match_text_seq("ISOLATED", "LOADING") 1496 for_all = self._match_text_seq("FOR", "ALL") 1497 for_insert = self._match_text_seq("FOR", "INSERT") 1498 for_none = self._match_text_seq("FOR", "NONE") 1499 return self.expression( 1500 exp.IsolatedLoadingProperty, 1501 no=no, 1502 concurrent=concurrent, 1503 for_all=for_all, 1504 for_insert=for_insert, 1505 for_none=for_none, 1506 ) 1507 1508 def _parse_locking(self) -> exp.LockingProperty: 1509 if self._match(TokenType.TABLE): 1510 kind = "TABLE" 1511 elif self._match(TokenType.VIEW): 1512 kind = "VIEW" 1513 elif self._match(TokenType.ROW): 1514 kind = "ROW" 1515 elif self._match_text_seq("DATABASE"): 1516 kind = "DATABASE" 1517 else: 1518 kind = None 1519 1520 if kind in ("DATABASE", "TABLE", "VIEW"): 1521 this = self._parse_table_parts() 1522 else: 1523 this = None 1524 1525 if self._match(TokenType.FOR): 1526 for_or_in = "FOR" 1527 elif self._match(TokenType.IN): 1528 for_or_in = "IN" 1529 else: 1530 for_or_in = None 1531 1532 if self._match_text_seq("ACCESS"): 1533 lock_type = "ACCESS" 1534 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1535 lock_type = "EXCLUSIVE" 1536 elif self._match_text_seq("SHARE"): 1537 lock_type = "SHARE" 1538 elif self._match_text_seq("READ"): 1539 lock_type = "READ" 1540 elif self._match_text_seq("WRITE"): 1541 lock_type = "WRITE" 1542 elif self._match_text_seq("CHECKSUM"): 1543 lock_type = "CHECKSUM" 1544 else: 1545 lock_type = None 1546 1547 override = self._match_text_seq("OVERRIDE") 1548 1549 return self.expression( 1550 exp.LockingProperty, 1551 this=this, 1552 kind=kind, 1553 for_or_in=for_or_in, 1554 lock_type=lock_type, 1555 override=override, 1556 ) 1557 1558 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1559 if self._match(TokenType.PARTITION_BY): 1560 return self._parse_csv(self._parse_conjunction) 1561 return [] 1562 1563 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1564 self._match(TokenType.EQ) 1565 return self.expression( 1566 exp.PartitionedByProperty, 1567 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1568 ) 1569 1570 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1571 if self._match_text_seq("AND", "STATISTICS"): 1572 statistics = True 1573 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1574 statistics = False 1575 else: 1576 statistics = None 1577 1578 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1579 1580 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1581 if self._match_text_seq("PRIMARY", "INDEX"): 1582 return exp.NoPrimaryIndexProperty() 1583 return None 1584 1585 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1586 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1587 return exp.OnCommitProperty() 1588 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1589 return exp.OnCommitProperty(delete=True) 1590 return None 1591 1592 def _parse_distkey(self) -> exp.DistKeyProperty: 1593 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1594 1595 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1596 table = self._parse_table(schema=True) 1597 1598 options = [] 1599 while self._match_texts(("INCLUDING", "EXCLUDING")): 1600 this = self._prev.text.upper() 1601 1602 id_var = self._parse_id_var() 1603 if not id_var: 1604 return None 1605 1606 options.append( 1607 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1608 ) 1609 1610 return self.expression(exp.LikeProperty, this=table, expressions=options) 1611 1612 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1613 return self.expression( 1614 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1615 ) 1616 1617 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1618 self._match(TokenType.EQ) 1619 return self.expression( 1620 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1621 ) 1622 1623 def _parse_returns(self) -> exp.ReturnsProperty: 1624 value: t.Optional[exp.Expression] 1625 is_table = self._match(TokenType.TABLE) 1626 1627 if is_table: 1628 if self._match(TokenType.LT): 1629 value = self.expression( 1630 exp.Schema, 1631 this="TABLE", 1632 expressions=self._parse_csv(self._parse_struct_types), 1633 ) 1634 if not self._match(TokenType.GT): 1635 self.raise_error("Expecting >") 1636 else: 1637 value = self._parse_schema(exp.var("TABLE")) 1638 else: 1639 value = self._parse_types() 1640 1641 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1642 1643 def _parse_describe(self) -> exp.Describe: 1644 kind = self._match_set(self.CREATABLES) and self._prev.text 1645 this = self._parse_table() 1646 return self.expression(exp.Describe, this=this, kind=kind) 1647 1648 def _parse_insert(self) -> exp.Insert: 1649 overwrite = self._match(TokenType.OVERWRITE) 1650 local = self._match_text_seq("LOCAL") 1651 alternative = None 1652 1653 if self._match_text_seq("DIRECTORY"): 1654 this: t.Optional[exp.Expression] = self.expression( 1655 exp.Directory, 1656 this=self._parse_var_or_string(), 1657 local=local, 1658 row_format=self._parse_row_format(match_row=True), 1659 ) 1660 else: 1661 if self._match(TokenType.OR): 1662 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1663 1664 self._match(TokenType.INTO) 1665 self._match(TokenType.TABLE) 1666 this = self._parse_table(schema=True) 1667 1668 return self.expression( 1669 exp.Insert, 1670 this=this, 1671 exists=self._parse_exists(), 1672 partition=self._parse_partition(), 1673 expression=self._parse_ddl_select(), 1674 conflict=self._parse_on_conflict(), 1675 returning=self._parse_returning(), 1676 overwrite=overwrite, 1677 alternative=alternative, 1678 ) 1679 1680 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1681 conflict = self._match_text_seq("ON", "CONFLICT") 1682 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1683 1684 if not conflict and not duplicate: 1685 return None 1686 1687 nothing = None 1688 expressions = None 1689 key = None 1690 constraint = None 1691 1692 if conflict: 1693 if self._match_text_seq("ON", "CONSTRAINT"): 1694 constraint = self._parse_id_var() 1695 else: 1696 key = self._parse_csv(self._parse_value) 1697 1698 self._match_text_seq("DO") 1699 if self._match_text_seq("NOTHING"): 1700 nothing = True 1701 else: 1702 self._match(TokenType.UPDATE) 1703 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1704 1705 return self.expression( 1706 exp.OnConflict, 1707 duplicate=duplicate, 1708 expressions=expressions, 1709 nothing=nothing, 1710 key=key, 1711 constraint=constraint, 1712 ) 1713 1714 def _parse_returning(self) -> t.Optional[exp.Returning]: 1715 if not self._match(TokenType.RETURNING): 1716 return None 1717 1718 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1719 1720 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1721 if not self._match(TokenType.FORMAT): 1722 return None 1723 return self._parse_row_format() 1724 1725 def _parse_row_format( 1726 self, match_row: bool = False 1727 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1728 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1729 return None 1730 1731 if self._match_text_seq("SERDE"): 1732 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1733 1734 self._match_text_seq("DELIMITED") 1735 1736 kwargs = {} 1737 1738 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1739 kwargs["fields"] = self._parse_string() 1740 if self._match_text_seq("ESCAPED", "BY"): 1741 kwargs["escaped"] = self._parse_string() 1742 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1743 kwargs["collection_items"] = self._parse_string() 1744 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1745 kwargs["map_keys"] = self._parse_string() 1746 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1747 kwargs["lines"] = self._parse_string() 1748 if self._match_text_seq("NULL", "DEFINED", "AS"): 1749 kwargs["null"] = self._parse_string() 1750 1751 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1752 1753 def _parse_load(self) -> exp.LoadData | exp.Command: 1754 if self._match_text_seq("DATA"): 1755 local = self._match_text_seq("LOCAL") 1756 self._match_text_seq("INPATH") 1757 inpath = self._parse_string() 1758 overwrite = self._match(TokenType.OVERWRITE) 1759 self._match_pair(TokenType.INTO, TokenType.TABLE) 1760 1761 return self.expression( 1762 exp.LoadData, 1763 this=self._parse_table(schema=True), 1764 local=local, 1765 overwrite=overwrite, 1766 inpath=inpath, 1767 partition=self._parse_partition(), 1768 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1769 serde=self._match_text_seq("SERDE") and self._parse_string(), 1770 ) 1771 return self._parse_as_command(self._prev) 1772 1773 def _parse_delete(self) -> exp.Delete: 1774 self._match(TokenType.FROM) 1775 1776 return self.expression( 1777 exp.Delete, 1778 this=self._parse_table(), 1779 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1780 where=self._parse_where(), 1781 returning=self._parse_returning(), 1782 ) 1783 1784 def _parse_update(self) -> exp.Update: 1785 return self.expression( 1786 exp.Update, 1787 **{ # type: ignore 1788 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1789 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1790 "from": self._parse_from(modifiers=True), 1791 "where": self._parse_where(), 1792 "returning": self._parse_returning(), 1793 }, 1794 ) 1795 1796 def _parse_uncache(self) -> exp.Uncache: 1797 if not self._match(TokenType.TABLE): 1798 self.raise_error("Expecting TABLE after UNCACHE") 1799 1800 return self.expression( 1801 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1802 ) 1803 1804 def _parse_cache(self) -> exp.Cache: 1805 lazy = self._match_text_seq("LAZY") 1806 self._match(TokenType.TABLE) 1807 table = self._parse_table(schema=True) 1808 1809 options = [] 1810 if self._match_text_seq("OPTIONS"): 1811 self._match_l_paren() 1812 k = self._parse_string() 1813 self._match(TokenType.EQ) 1814 v = self._parse_string() 1815 options = [k, v] 1816 self._match_r_paren() 1817 1818 self._match(TokenType.ALIAS) 1819 return self.expression( 1820 exp.Cache, 1821 this=table, 1822 lazy=lazy, 1823 options=options, 1824 expression=self._parse_select(nested=True), 1825 ) 1826 1827 def _parse_partition(self) -> t.Optional[exp.Partition]: 1828 if not self._match(TokenType.PARTITION): 1829 return None 1830 1831 return self.expression( 1832 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1833 ) 1834 1835 def _parse_value(self) -> exp.Tuple: 1836 if self._match(TokenType.L_PAREN): 1837 expressions = self._parse_csv(self._parse_conjunction) 1838 self._match_r_paren() 1839 return self.expression(exp.Tuple, expressions=expressions) 1840 1841 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1842 # Source: https://prestodb.io/docs/current/sql/values.html 1843 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1844 1845 def _parse_select( 1846 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1847 ) -> t.Optional[exp.Expression]: 1848 cte = self._parse_with() 1849 if cte: 1850 this = self._parse_statement() 1851 1852 if not this: 1853 self.raise_error("Failed to parse any statement following CTE") 1854 return cte 1855 1856 if "with" in this.arg_types: 1857 this.set("with", cte) 1858 else: 1859 self.raise_error(f"{this.key} does not support CTE") 1860 this = cte 1861 elif self._match(TokenType.SELECT): 1862 comments = self._prev_comments 1863 1864 hint = self._parse_hint() 1865 all_ = self._match(TokenType.ALL) 1866 distinct = self._match(TokenType.DISTINCT) 1867 1868 kind = ( 1869 self._match(TokenType.ALIAS) 1870 and self._match_texts(("STRUCT", "VALUE")) 1871 and self._prev.text 1872 ) 1873 1874 if distinct: 1875 distinct = self.expression( 1876 exp.Distinct, 1877 on=self._parse_value() if self._match(TokenType.ON) else None, 1878 ) 1879 1880 if all_ and distinct: 1881 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1882 1883 limit = self._parse_limit(top=True) 1884 expressions = self._parse_csv(self._parse_expression) 1885 1886 this = self.expression( 1887 exp.Select, 1888 kind=kind, 1889 hint=hint, 1890 distinct=distinct, 1891 expressions=expressions, 1892 limit=limit, 1893 ) 1894 this.comments = comments 1895 1896 into = self._parse_into() 1897 if into: 1898 this.set("into", into) 1899 1900 from_ = self._parse_from() 1901 if from_: 1902 this.set("from", from_) 1903 1904 this = self._parse_query_modifiers(this) 1905 elif (table or nested) and self._match(TokenType.L_PAREN): 1906 if self._match(TokenType.PIVOT): 1907 this = self._parse_simplified_pivot() 1908 elif self._match(TokenType.FROM): 1909 this = exp.select("*").from_( 1910 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1911 ) 1912 else: 1913 this = self._parse_table() if table else self._parse_select(nested=True) 1914 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1915 1916 self._match_r_paren() 1917 1918 # early return so that subquery unions aren't parsed again 1919 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1920 # Union ALL should be a property of the top select node, not the subquery 1921 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1922 elif self._match(TokenType.VALUES): 1923 this = self.expression( 1924 exp.Values, 1925 expressions=self._parse_csv(self._parse_value), 1926 alias=self._parse_table_alias(), 1927 ) 1928 else: 1929 this = None 1930 1931 return self._parse_set_operations(this) 1932 1933 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1934 if not skip_with_token and not self._match(TokenType.WITH): 1935 return None 1936 1937 comments = self._prev_comments 1938 recursive = self._match(TokenType.RECURSIVE) 1939 1940 expressions = [] 1941 while True: 1942 expressions.append(self._parse_cte()) 1943 1944 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1945 break 1946 else: 1947 self._match(TokenType.WITH) 1948 1949 return self.expression( 1950 exp.With, comments=comments, expressions=expressions, recursive=recursive 1951 ) 1952 1953 def _parse_cte(self) -> exp.CTE: 1954 alias = self._parse_table_alias() 1955 if not alias or not alias.this: 1956 self.raise_error("Expected CTE to have alias") 1957 1958 self._match(TokenType.ALIAS) 1959 return self.expression( 1960 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1961 ) 1962 1963 def _parse_table_alias( 1964 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1965 ) -> t.Optional[exp.TableAlias]: 1966 any_token = self._match(TokenType.ALIAS) 1967 alias = ( 1968 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1969 or self._parse_string_as_identifier() 1970 ) 1971 1972 index = self._index 1973 if self._match(TokenType.L_PAREN): 1974 columns = self._parse_csv(self._parse_function_parameter) 1975 self._match_r_paren() if columns else self._retreat(index) 1976 else: 1977 columns = None 1978 1979 if not alias and not columns: 1980 return None 1981 1982 return self.expression(exp.TableAlias, this=alias, columns=columns) 1983 1984 def _parse_subquery( 1985 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1986 ) -> t.Optional[exp.Subquery]: 1987 if not this: 1988 return None 1989 1990 return self.expression( 1991 exp.Subquery, 1992 this=this, 1993 pivots=self._parse_pivots(), 1994 alias=self._parse_table_alias() if parse_alias else None, 1995 ) 1996 1997 def _parse_query_modifiers( 1998 self, this: t.Optional[exp.Expression] 1999 ) -> t.Optional[exp.Expression]: 2000 if isinstance(this, self.MODIFIABLES): 2001 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2002 expression = parser(self) 2003 2004 if expression: 2005 if key == "limit": 2006 offset = expression.args.pop("offset", None) 2007 if offset: 2008 this.set("offset", exp.Offset(expression=offset)) 2009 this.set(key, expression) 2010 return this 2011 2012 def _parse_hint(self) -> t.Optional[exp.Hint]: 2013 if self._match(TokenType.HINT): 2014 hints = self._parse_csv(self._parse_function) 2015 2016 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2017 self.raise_error("Expected */ after HINT") 2018 2019 return self.expression(exp.Hint, expressions=hints) 2020 2021 return None 2022 2023 def _parse_into(self) -> t.Optional[exp.Into]: 2024 if not self._match(TokenType.INTO): 2025 return None 2026 2027 temp = self._match(TokenType.TEMPORARY) 2028 unlogged = self._match_text_seq("UNLOGGED") 2029 self._match(TokenType.TABLE) 2030 2031 return self.expression( 2032 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2033 ) 2034 2035 def _parse_from( 2036 self, modifiers: bool = False, skip_from_token: bool = False 2037 ) -> t.Optional[exp.From]: 2038 if not skip_from_token and not self._match(TokenType.FROM): 2039 return None 2040 2041 comments = self._prev_comments 2042 this = self._parse_table() 2043 2044 return self.expression( 2045 exp.From, 2046 comments=comments, 2047 this=self._parse_query_modifiers(this) if modifiers else this, 2048 ) 2049 2050 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2051 if not self._match(TokenType.MATCH_RECOGNIZE): 2052 return None 2053 2054 self._match_l_paren() 2055 2056 partition = self._parse_partition_by() 2057 order = self._parse_order() 2058 measures = ( 2059 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2060 ) 2061 2062 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2063 rows = exp.var("ONE ROW PER MATCH") 2064 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2065 text = "ALL ROWS PER MATCH" 2066 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2067 text += f" SHOW EMPTY MATCHES" 2068 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2069 text += f" OMIT EMPTY MATCHES" 2070 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2071 text += f" WITH UNMATCHED ROWS" 2072 rows = exp.var(text) 2073 else: 2074 rows = None 2075 2076 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2077 text = "AFTER MATCH SKIP" 2078 if self._match_text_seq("PAST", "LAST", "ROW"): 2079 text += f" PAST LAST ROW" 2080 elif self._match_text_seq("TO", "NEXT", "ROW"): 2081 text += f" TO NEXT ROW" 2082 elif self._match_text_seq("TO", "FIRST"): 2083 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2084 elif self._match_text_seq("TO", "LAST"): 2085 text += f" TO LAST {self._advance_any().text}" # type: ignore 2086 after = exp.var(text) 2087 else: 2088 after = None 2089 2090 if self._match_text_seq("PATTERN"): 2091 self._match_l_paren() 2092 2093 if not self._curr: 2094 self.raise_error("Expecting )", self._curr) 2095 2096 paren = 1 2097 start = self._curr 2098 2099 while self._curr and paren > 0: 2100 if self._curr.token_type == TokenType.L_PAREN: 2101 paren += 1 2102 if self._curr.token_type == TokenType.R_PAREN: 2103 paren -= 1 2104 2105 end = self._prev 2106 self._advance() 2107 2108 if paren > 0: 2109 self.raise_error("Expecting )", self._curr) 2110 2111 pattern = exp.var(self._find_sql(start, end)) 2112 else: 2113 pattern = None 2114 2115 define = ( 2116 self._parse_csv( 2117 lambda: self.expression( 2118 exp.Alias, 2119 alias=self._parse_id_var(any_token=True), 2120 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2121 ) 2122 ) 2123 if self._match_text_seq("DEFINE") 2124 else None 2125 ) 2126 2127 self._match_r_paren() 2128 2129 return self.expression( 2130 exp.MatchRecognize, 2131 partition_by=partition, 2132 order=order, 2133 measures=measures, 2134 rows=rows, 2135 after=after, 2136 pattern=pattern, 2137 define=define, 2138 alias=self._parse_table_alias(), 2139 ) 2140 2141 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2142 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2143 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2144 2145 if outer_apply or cross_apply: 2146 this = self._parse_select(table=True) 2147 view = None 2148 outer = not cross_apply 2149 elif self._match(TokenType.LATERAL): 2150 this = self._parse_select(table=True) 2151 view = self._match(TokenType.VIEW) 2152 outer = self._match(TokenType.OUTER) 2153 else: 2154 return None 2155 2156 if not this: 2157 this = self._parse_function() or self._parse_id_var(any_token=False) 2158 while self._match(TokenType.DOT): 2159 this = exp.Dot( 2160 this=this, 2161 expression=self._parse_function() or self._parse_id_var(any_token=False), 2162 ) 2163 2164 if view: 2165 table = self._parse_id_var(any_token=False) 2166 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2167 table_alias: t.Optional[exp.TableAlias] = self.expression( 2168 exp.TableAlias, this=table, columns=columns 2169 ) 2170 elif isinstance(this, exp.Subquery) and this.alias: 2171 # Ensures parity between the Subquery's and the Lateral's "alias" args 2172 table_alias = this.args["alias"].copy() 2173 else: 2174 table_alias = self._parse_table_alias() 2175 2176 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2177 2178 def _parse_join_parts( 2179 self, 2180 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2181 return ( 2182 self._match_set(self.JOIN_METHODS) and self._prev, 2183 self._match_set(self.JOIN_SIDES) and self._prev, 2184 self._match_set(self.JOIN_KINDS) and self._prev, 2185 ) 2186 2187 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2188 if self._match(TokenType.COMMA): 2189 return self.expression(exp.Join, this=self._parse_table()) 2190 2191 index = self._index 2192 method, side, kind = self._parse_join_parts() 2193 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2194 join = self._match(TokenType.JOIN) 2195 2196 if not skip_join_token and not join: 2197 self._retreat(index) 2198 kind = None 2199 method = None 2200 side = None 2201 2202 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2203 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2204 2205 if not skip_join_token and not join and not outer_apply and not cross_apply: 2206 return None 2207 2208 if outer_apply: 2209 side = Token(TokenType.LEFT, "LEFT") 2210 2211 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2212 2213 if method: 2214 kwargs["method"] = method.text 2215 if side: 2216 kwargs["side"] = side.text 2217 if kind: 2218 kwargs["kind"] = kind.text 2219 if hint: 2220 kwargs["hint"] = hint 2221 2222 if self._match(TokenType.ON): 2223 kwargs["on"] = self._parse_conjunction() 2224 elif self._match(TokenType.USING): 2225 kwargs["using"] = self._parse_wrapped_id_vars() 2226 2227 return self.expression(exp.Join, **kwargs) 2228 2229 def _parse_index( 2230 self, 2231 index: t.Optional[exp.Expression] = None, 2232 ) -> t.Optional[exp.Index]: 2233 if index: 2234 unique = None 2235 primary = None 2236 amp = None 2237 2238 self._match(TokenType.ON) 2239 self._match(TokenType.TABLE) # hive 2240 table = self._parse_table_parts(schema=True) 2241 else: 2242 unique = self._match(TokenType.UNIQUE) 2243 primary = self._match_text_seq("PRIMARY") 2244 amp = self._match_text_seq("AMP") 2245 2246 if not self._match(TokenType.INDEX): 2247 return None 2248 2249 index = self._parse_id_var() 2250 table = None 2251 2252 using = self._parse_field() if self._match(TokenType.USING) else None 2253 2254 if self._match(TokenType.L_PAREN, advance=False): 2255 columns = self._parse_wrapped_csv(self._parse_ordered) 2256 else: 2257 columns = None 2258 2259 return self.expression( 2260 exp.Index, 2261 this=index, 2262 table=table, 2263 using=using, 2264 columns=columns, 2265 unique=unique, 2266 primary=primary, 2267 amp=amp, 2268 partition_by=self._parse_partition_by(), 2269 ) 2270 2271 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2272 return ( 2273 (not schema and self._parse_function(optional_parens=False)) 2274 or self._parse_id_var(any_token=False) 2275 or self._parse_string_as_identifier() 2276 or self._parse_placeholder() 2277 ) 2278 2279 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2280 catalog = None 2281 db = None 2282 table = self._parse_table_part(schema=schema) 2283 2284 while self._match(TokenType.DOT): 2285 if catalog: 2286 # This allows nesting the table in arbitrarily many dot expressions if needed 2287 table = self.expression( 2288 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2289 ) 2290 else: 2291 catalog = db 2292 db = table 2293 table = self._parse_table_part(schema=schema) 2294 2295 if not table: 2296 self.raise_error(f"Expected table name but got {self._curr}") 2297 2298 return self.expression( 2299 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2300 ) 2301 2302 def _parse_table( 2303 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2304 ) -> t.Optional[exp.Expression]: 2305 lateral = self._parse_lateral() 2306 if lateral: 2307 return lateral 2308 2309 unnest = self._parse_unnest() 2310 if unnest: 2311 return unnest 2312 2313 values = self._parse_derived_table_values() 2314 if values: 2315 return values 2316 2317 subquery = self._parse_select(table=True) 2318 if subquery: 2319 if not subquery.args.get("pivots"): 2320 subquery.set("pivots", self._parse_pivots()) 2321 return subquery 2322 2323 this: exp.Expression = self._parse_table_parts(schema=schema) 2324 2325 if schema: 2326 return self._parse_schema(this=this) 2327 2328 if self.ALIAS_POST_TABLESAMPLE: 2329 table_sample = self._parse_table_sample() 2330 2331 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2332 if alias: 2333 this.set("alias", alias) 2334 2335 if not this.args.get("pivots"): 2336 this.set("pivots", self._parse_pivots()) 2337 2338 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2339 this.set( 2340 "hints", 2341 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2342 ) 2343 self._match_r_paren() 2344 2345 if not self.ALIAS_POST_TABLESAMPLE: 2346 table_sample = self._parse_table_sample() 2347 2348 if table_sample: 2349 table_sample.set("this", this) 2350 this = table_sample 2351 2352 return this 2353 2354 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2355 if not self._match(TokenType.UNNEST): 2356 return None 2357 2358 expressions = self._parse_wrapped_csv(self._parse_type) 2359 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2360 2361 alias = self._parse_table_alias() if with_alias else None 2362 2363 if alias and self.UNNEST_COLUMN_ONLY: 2364 if alias.args.get("columns"): 2365 self.raise_error("Unexpected extra column alias in unnest.") 2366 2367 alias.set("columns", [alias.this]) 2368 alias.set("this", None) 2369 2370 offset = None 2371 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2372 self._match(TokenType.ALIAS) 2373 offset = self._parse_id_var() or exp.to_identifier("offset") 2374 2375 return self.expression( 2376 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2377 ) 2378 2379 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2380 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2381 if not is_derived and not self._match(TokenType.VALUES): 2382 return None 2383 2384 expressions = self._parse_csv(self._parse_value) 2385 alias = self._parse_table_alias() 2386 2387 if is_derived: 2388 self._match_r_paren() 2389 2390 return self.expression( 2391 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2392 ) 2393 2394 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2395 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2396 as_modifier and self._match_text_seq("USING", "SAMPLE") 2397 ): 2398 return None 2399 2400 bucket_numerator = None 2401 bucket_denominator = None 2402 bucket_field = None 2403 percent = None 2404 rows = None 2405 size = None 2406 seed = None 2407 2408 kind = ( 2409 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2410 ) 2411 method = self._parse_var(tokens=(TokenType.ROW,)) 2412 2413 self._match(TokenType.L_PAREN) 2414 2415 num = self._parse_number() 2416 2417 if self._match_text_seq("BUCKET"): 2418 bucket_numerator = self._parse_number() 2419 self._match_text_seq("OUT", "OF") 2420 bucket_denominator = bucket_denominator = self._parse_number() 2421 self._match(TokenType.ON) 2422 bucket_field = self._parse_field() 2423 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2424 percent = num 2425 elif self._match(TokenType.ROWS): 2426 rows = num 2427 else: 2428 size = num 2429 2430 self._match(TokenType.R_PAREN) 2431 2432 if self._match(TokenType.L_PAREN): 2433 method = self._parse_var() 2434 seed = self._match(TokenType.COMMA) and self._parse_number() 2435 self._match_r_paren() 2436 elif self._match_texts(("SEED", "REPEATABLE")): 2437 seed = self._parse_wrapped(self._parse_number) 2438 2439 return self.expression( 2440 exp.TableSample, 2441 method=method, 2442 bucket_numerator=bucket_numerator, 2443 bucket_denominator=bucket_denominator, 2444 bucket_field=bucket_field, 2445 percent=percent, 2446 rows=rows, 2447 size=size, 2448 seed=seed, 2449 kind=kind, 2450 ) 2451 2452 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2453 return list(iter(self._parse_pivot, None)) 2454 2455 # https://duckdb.org/docs/sql/statements/pivot 2456 def _parse_simplified_pivot(self) -> exp.Pivot: 2457 def _parse_on() -> t.Optional[exp.Expression]: 2458 this = self._parse_bitwise() 2459 return self._parse_in(this) if self._match(TokenType.IN) else this 2460 2461 this = self._parse_table() 2462 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2463 using = self._match(TokenType.USING) and self._parse_csv( 2464 lambda: self._parse_alias(self._parse_function()) 2465 ) 2466 group = self._parse_group() 2467 return self.expression( 2468 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2469 ) 2470 2471 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2472 index = self._index 2473 2474 if self._match(TokenType.PIVOT): 2475 unpivot = False 2476 elif self._match(TokenType.UNPIVOT): 2477 unpivot = True 2478 else: 2479 return None 2480 2481 expressions = [] 2482 field = None 2483 2484 if not self._match(TokenType.L_PAREN): 2485 self._retreat(index) 2486 return None 2487 2488 if unpivot: 2489 expressions = self._parse_csv(self._parse_column) 2490 else: 2491 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2492 2493 if not expressions: 2494 self.raise_error("Failed to parse PIVOT's aggregation list") 2495 2496 if not self._match(TokenType.FOR): 2497 self.raise_error("Expecting FOR") 2498 2499 value = self._parse_column() 2500 2501 if not self._match(TokenType.IN): 2502 self.raise_error("Expecting IN") 2503 2504 field = self._parse_in(value, alias=True) 2505 2506 self._match_r_paren() 2507 2508 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2509 2510 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2511 pivot.set("alias", self._parse_table_alias()) 2512 2513 if not unpivot: 2514 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2515 2516 columns: t.List[exp.Expression] = [] 2517 for fld in pivot.args["field"].expressions: 2518 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2519 for name in names: 2520 if self.PREFIXED_PIVOT_COLUMNS: 2521 name = f"{name}_{field_name}" if name else field_name 2522 else: 2523 name = f"{field_name}_{name}" if name else field_name 2524 2525 columns.append(exp.to_identifier(name)) 2526 2527 pivot.set("columns", columns) 2528 2529 return pivot 2530 2531 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2532 return [agg.alias for agg in aggregations] 2533 2534 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2535 if not skip_where_token and not self._match(TokenType.WHERE): 2536 return None 2537 2538 return self.expression( 2539 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2540 ) 2541 2542 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2543 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2544 return None 2545 2546 elements = defaultdict(list) 2547 2548 while True: 2549 expressions = self._parse_csv(self._parse_conjunction) 2550 if expressions: 2551 elements["expressions"].extend(expressions) 2552 2553 grouping_sets = self._parse_grouping_sets() 2554 if grouping_sets: 2555 elements["grouping_sets"].extend(grouping_sets) 2556 2557 rollup = None 2558 cube = None 2559 totals = None 2560 2561 with_ = self._match(TokenType.WITH) 2562 if self._match(TokenType.ROLLUP): 2563 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2564 elements["rollup"].extend(ensure_list(rollup)) 2565 2566 if self._match(TokenType.CUBE): 2567 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2568 elements["cube"].extend(ensure_list(cube)) 2569 2570 if self._match_text_seq("TOTALS"): 2571 totals = True 2572 elements["totals"] = True # type: ignore 2573 2574 if not (grouping_sets or rollup or cube or totals): 2575 break 2576 2577 return self.expression(exp.Group, **elements) # type: ignore 2578 2579 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2580 if not self._match(TokenType.GROUPING_SETS): 2581 return None 2582 2583 return self._parse_wrapped_csv(self._parse_grouping_set) 2584 2585 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2586 if self._match(TokenType.L_PAREN): 2587 grouping_set = self._parse_csv(self._parse_column) 2588 self._match_r_paren() 2589 return self.expression(exp.Tuple, expressions=grouping_set) 2590 2591 return self._parse_column() 2592 2593 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2594 if not skip_having_token and not self._match(TokenType.HAVING): 2595 return None 2596 return self.expression(exp.Having, this=self._parse_conjunction()) 2597 2598 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2599 if not self._match(TokenType.QUALIFY): 2600 return None 2601 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2602 2603 def _parse_order( 2604 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2605 ) -> t.Optional[exp.Expression]: 2606 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2607 return this 2608 2609 return self.expression( 2610 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2611 ) 2612 2613 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2614 if not self._match_text_seq(*texts): 2615 return None 2616 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2617 2618 def _parse_ordered(self) -> exp.Ordered: 2619 this = self._parse_conjunction() 2620 self._match(TokenType.ASC) 2621 2622 is_desc = self._match(TokenType.DESC) 2623 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2624 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2625 desc = is_desc or False 2626 asc = not desc 2627 nulls_first = is_nulls_first or False 2628 explicitly_null_ordered = is_nulls_first or is_nulls_last 2629 2630 if ( 2631 not explicitly_null_ordered 2632 and ( 2633 (asc and self.NULL_ORDERING == "nulls_are_small") 2634 or (desc and self.NULL_ORDERING != "nulls_are_small") 2635 ) 2636 and self.NULL_ORDERING != "nulls_are_last" 2637 ): 2638 nulls_first = True 2639 2640 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2641 2642 def _parse_limit( 2643 self, this: t.Optional[exp.Expression] = None, top: bool = False 2644 ) -> t.Optional[exp.Expression]: 2645 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2646 limit_paren = self._match(TokenType.L_PAREN) 2647 expression = self._parse_number() if top else self._parse_term() 2648 2649 if self._match(TokenType.COMMA): 2650 offset = expression 2651 expression = self._parse_term() 2652 else: 2653 offset = None 2654 2655 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2656 2657 if limit_paren: 2658 self._match_r_paren() 2659 2660 return limit_exp 2661 2662 if self._match(TokenType.FETCH): 2663 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2664 direction = self._prev.text if direction else "FIRST" 2665 2666 count = self._parse_number() 2667 percent = self._match(TokenType.PERCENT) 2668 2669 self._match_set((TokenType.ROW, TokenType.ROWS)) 2670 2671 only = self._match_text_seq("ONLY") 2672 with_ties = self._match_text_seq("WITH", "TIES") 2673 2674 if only and with_ties: 2675 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2676 2677 return self.expression( 2678 exp.Fetch, 2679 direction=direction, 2680 count=count, 2681 percent=percent, 2682 with_ties=with_ties, 2683 ) 2684 2685 return this 2686 2687 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2688 if not self._match(TokenType.OFFSET): 2689 return this 2690 2691 count = self._parse_number() 2692 self._match_set((TokenType.ROW, TokenType.ROWS)) 2693 return self.expression(exp.Offset, this=this, expression=count) 2694 2695 def _parse_locks(self) -> t.List[exp.Lock]: 2696 locks = [] 2697 while True: 2698 if self._match_text_seq("FOR", "UPDATE"): 2699 update = True 2700 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2701 "LOCK", "IN", "SHARE", "MODE" 2702 ): 2703 update = False 2704 else: 2705 break 2706 2707 expressions = None 2708 if self._match_text_seq("OF"): 2709 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2710 2711 wait: t.Optional[bool | exp.Expression] = None 2712 if self._match_text_seq("NOWAIT"): 2713 wait = True 2714 elif self._match_text_seq("WAIT"): 2715 wait = self._parse_primary() 2716 elif self._match_text_seq("SKIP", "LOCKED"): 2717 wait = False 2718 2719 locks.append( 2720 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2721 ) 2722 2723 return locks 2724 2725 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2726 if not self._match_set(self.SET_OPERATIONS): 2727 return this 2728 2729 token_type = self._prev.token_type 2730 2731 if token_type == TokenType.UNION: 2732 expression = exp.Union 2733 elif token_type == TokenType.EXCEPT: 2734 expression = exp.Except 2735 else: 2736 expression = exp.Intersect 2737 2738 return self.expression( 2739 expression, 2740 this=this, 2741 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2742 expression=self._parse_set_operations(self._parse_select(nested=True)), 2743 ) 2744 2745 def _parse_expression(self) -> t.Optional[exp.Expression]: 2746 return self._parse_alias(self._parse_conjunction()) 2747 2748 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2749 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2750 2751 def _parse_equality(self) -> t.Optional[exp.Expression]: 2752 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2753 2754 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2755 return self._parse_tokens(self._parse_range, self.COMPARISON) 2756 2757 def _parse_range(self) -> t.Optional[exp.Expression]: 2758 this = self._parse_bitwise() 2759 negate = self._match(TokenType.NOT) 2760 2761 if self._match_set(self.RANGE_PARSERS): 2762 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2763 if not expression: 2764 return this 2765 2766 this = expression 2767 elif self._match(TokenType.ISNULL): 2768 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2769 2770 # Postgres supports ISNULL and NOTNULL for conditions. 2771 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2772 if self._match(TokenType.NOTNULL): 2773 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2774 this = self.expression(exp.Not, this=this) 2775 2776 if negate: 2777 this = self.expression(exp.Not, this=this) 2778 2779 if self._match(TokenType.IS): 2780 this = self._parse_is(this) 2781 2782 return this 2783 2784 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2785 index = self._index - 1 2786 negate = self._match(TokenType.NOT) 2787 2788 if self._match_text_seq("DISTINCT", "FROM"): 2789 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2790 return self.expression(klass, this=this, expression=self._parse_expression()) 2791 2792 expression = self._parse_null() or self._parse_boolean() 2793 if not expression: 2794 self._retreat(index) 2795 return None 2796 2797 this = self.expression(exp.Is, this=this, expression=expression) 2798 return self.expression(exp.Not, this=this) if negate else this 2799 2800 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2801 unnest = self._parse_unnest(with_alias=False) 2802 if unnest: 2803 this = self.expression(exp.In, this=this, unnest=unnest) 2804 elif self._match(TokenType.L_PAREN): 2805 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2806 2807 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2808 this = self.expression(exp.In, this=this, query=expressions[0]) 2809 else: 2810 this = self.expression(exp.In, this=this, expressions=expressions) 2811 2812 self._match_r_paren(this) 2813 else: 2814 this = self.expression(exp.In, this=this, field=self._parse_field()) 2815 2816 return this 2817 2818 def _parse_between(self, this: exp.Expression) -> exp.Between: 2819 low = self._parse_bitwise() 2820 self._match(TokenType.AND) 2821 high = self._parse_bitwise() 2822 return self.expression(exp.Between, this=this, low=low, high=high) 2823 2824 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2825 if not self._match(TokenType.ESCAPE): 2826 return this 2827 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2828 2829 def _parse_interval(self) -> t.Optional[exp.Interval]: 2830 if not self._match(TokenType.INTERVAL): 2831 return None 2832 2833 this = self._parse_primary() or self._parse_term() 2834 unit = self._parse_function() or self._parse_var() 2835 2836 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2837 # each INTERVAL expression into this canonical form so it's easy to transpile 2838 if this and this.is_number: 2839 this = exp.Literal.string(this.name) 2840 elif this and this.is_string: 2841 parts = this.name.split() 2842 2843 if len(parts) == 2: 2844 if unit: 2845 # this is not actually a unit, it's something else 2846 unit = None 2847 self._retreat(self._index - 1) 2848 else: 2849 this = exp.Literal.string(parts[0]) 2850 unit = self.expression(exp.Var, this=parts[1]) 2851 2852 return self.expression(exp.Interval, this=this, unit=unit) 2853 2854 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2855 this = self._parse_term() 2856 2857 while True: 2858 if self._match_set(self.BITWISE): 2859 this = self.expression( 2860 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2861 ) 2862 elif self._match_pair(TokenType.LT, TokenType.LT): 2863 this = self.expression( 2864 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2865 ) 2866 elif self._match_pair(TokenType.GT, TokenType.GT): 2867 this = self.expression( 2868 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2869 ) 2870 else: 2871 break 2872 2873 return this 2874 2875 def _parse_term(self) -> t.Optional[exp.Expression]: 2876 return self._parse_tokens(self._parse_factor, self.TERM) 2877 2878 def _parse_factor(self) -> t.Optional[exp.Expression]: 2879 return self._parse_tokens(self._parse_unary, self.FACTOR) 2880 2881 def _parse_unary(self) -> t.Optional[exp.Expression]: 2882 if self._match_set(self.UNARY_PARSERS): 2883 return self.UNARY_PARSERS[self._prev.token_type](self) 2884 return self._parse_at_time_zone(self._parse_type()) 2885 2886 def _parse_type(self) -> t.Optional[exp.Expression]: 2887 interval = self._parse_interval() 2888 if interval: 2889 return interval 2890 2891 index = self._index 2892 data_type = self._parse_types(check_func=True) 2893 this = self._parse_column() 2894 2895 if data_type: 2896 if isinstance(this, exp.Literal): 2897 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2898 if parser: 2899 return parser(self, this, data_type) 2900 return self.expression(exp.Cast, this=this, to=data_type) 2901 if not data_type.expressions: 2902 self._retreat(index) 2903 return self._parse_column() 2904 return self._parse_column_ops(data_type) 2905 2906 return this 2907 2908 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2909 this = self._parse_type() 2910 if not this: 2911 return None 2912 2913 return self.expression( 2914 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2915 ) 2916 2917 def _parse_types( 2918 self, check_func: bool = False, schema: bool = False 2919 ) -> t.Optional[exp.Expression]: 2920 index = self._index 2921 2922 prefix = self._match_text_seq("SYSUDTLIB", ".") 2923 2924 if not self._match_set(self.TYPE_TOKENS): 2925 return None 2926 2927 type_token = self._prev.token_type 2928 2929 if type_token == TokenType.PSEUDO_TYPE: 2930 return self.expression(exp.PseudoType, this=self._prev.text) 2931 2932 nested = type_token in self.NESTED_TYPE_TOKENS 2933 is_struct = type_token == TokenType.STRUCT 2934 expressions = None 2935 maybe_func = False 2936 2937 if self._match(TokenType.L_PAREN): 2938 if is_struct: 2939 expressions = self._parse_csv(self._parse_struct_types) 2940 elif nested: 2941 expressions = self._parse_csv( 2942 lambda: self._parse_types(check_func=check_func, schema=schema) 2943 ) 2944 elif type_token in self.ENUM_TYPE_TOKENS: 2945 expressions = self._parse_csv(self._parse_primary) 2946 else: 2947 expressions = self._parse_csv(self._parse_type_size) 2948 2949 if not expressions or not self._match(TokenType.R_PAREN): 2950 self._retreat(index) 2951 return None 2952 2953 maybe_func = True 2954 2955 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2956 this = exp.DataType( 2957 this=exp.DataType.Type.ARRAY, 2958 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2959 nested=True, 2960 ) 2961 2962 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2963 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2964 2965 return this 2966 2967 if self._match(TokenType.L_BRACKET): 2968 self._retreat(index) 2969 return None 2970 2971 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2972 if nested and self._match(TokenType.LT): 2973 if is_struct: 2974 expressions = self._parse_csv(self._parse_struct_types) 2975 else: 2976 expressions = self._parse_csv( 2977 lambda: self._parse_types(check_func=check_func, schema=schema) 2978 ) 2979 2980 if not self._match(TokenType.GT): 2981 self.raise_error("Expecting >") 2982 2983 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2984 values = self._parse_csv(self._parse_conjunction) 2985 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2986 2987 value: t.Optional[exp.Expression] = None 2988 if type_token in self.TIMESTAMPS: 2989 if self._match_text_seq("WITH", "TIME", "ZONE"): 2990 maybe_func = False 2991 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2992 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 2993 maybe_func = False 2994 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2995 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2996 maybe_func = False 2997 elif type_token == TokenType.INTERVAL: 2998 unit = self._parse_var() 2999 3000 if not unit: 3001 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3002 else: 3003 value = self.expression(exp.Interval, unit=unit) 3004 3005 if maybe_func and check_func: 3006 index2 = self._index 3007 peek = self._parse_string() 3008 3009 if not peek: 3010 self._retreat(index) 3011 return None 3012 3013 self._retreat(index2) 3014 3015 if value: 3016 return value 3017 3018 return exp.DataType( 3019 this=exp.DataType.Type[type_token.value.upper()], 3020 expressions=expressions, 3021 nested=nested, 3022 values=values, 3023 prefix=prefix, 3024 ) 3025 3026 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3027 this = self._parse_type() or self._parse_id_var() 3028 self._match(TokenType.COLON) 3029 return self._parse_column_def(this) 3030 3031 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3032 if not self._match_text_seq("AT", "TIME", "ZONE"): 3033 return this 3034 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3035 3036 def _parse_column(self) -> t.Optional[exp.Expression]: 3037 this = self._parse_field() 3038 if isinstance(this, exp.Identifier): 3039 this = self.expression(exp.Column, this=this) 3040 elif not this: 3041 return self._parse_bracket(this) 3042 return self._parse_column_ops(this) 3043 3044 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3045 this = self._parse_bracket(this) 3046 3047 while self._match_set(self.COLUMN_OPERATORS): 3048 op_token = self._prev.token_type 3049 op = self.COLUMN_OPERATORS.get(op_token) 3050 3051 if op_token == TokenType.DCOLON: 3052 field = self._parse_types() 3053 if not field: 3054 self.raise_error("Expected type") 3055 elif op and self._curr: 3056 self._advance() 3057 value = self._prev.text 3058 field = ( 3059 exp.Literal.number(value) 3060 if self._prev.token_type == TokenType.NUMBER 3061 else exp.Literal.string(value) 3062 ) 3063 else: 3064 field = self._parse_field(anonymous_func=True, any_token=True) 3065 3066 if isinstance(field, exp.Func): 3067 # bigquery allows function calls like x.y.count(...) 3068 # SAFE.SUBSTR(...) 3069 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3070 this = self._replace_columns_with_dots(this) 3071 3072 if op: 3073 this = op(self, this, field) 3074 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3075 this = self.expression( 3076 exp.Column, 3077 this=field, 3078 table=this.this, 3079 db=this.args.get("table"), 3080 catalog=this.args.get("db"), 3081 ) 3082 else: 3083 this = self.expression(exp.Dot, this=this, expression=field) 3084 this = self._parse_bracket(this) 3085 return this 3086 3087 def _parse_primary(self) -> t.Optional[exp.Expression]: 3088 if self._match_set(self.PRIMARY_PARSERS): 3089 token_type = self._prev.token_type 3090 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3091 3092 if token_type == TokenType.STRING: 3093 expressions = [primary] 3094 while self._match(TokenType.STRING): 3095 expressions.append(exp.Literal.string(self._prev.text)) 3096 3097 if len(expressions) > 1: 3098 return self.expression(exp.Concat, expressions=expressions) 3099 3100 return primary 3101 3102 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3103 return exp.Literal.number(f"0.{self._prev.text}") 3104 3105 if self._match(TokenType.L_PAREN): 3106 comments = self._prev_comments 3107 query = self._parse_select() 3108 3109 if query: 3110 expressions = [query] 3111 else: 3112 expressions = self._parse_csv(self._parse_expression) 3113 3114 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3115 3116 if isinstance(this, exp.Subqueryable): 3117 this = self._parse_set_operations( 3118 self._parse_subquery(this=this, parse_alias=False) 3119 ) 3120 elif len(expressions) > 1: 3121 this = self.expression(exp.Tuple, expressions=expressions) 3122 else: 3123 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3124 3125 if this: 3126 this.add_comments(comments) 3127 3128 self._match_r_paren(expression=this) 3129 return this 3130 3131 return None 3132 3133 def _parse_field( 3134 self, 3135 any_token: bool = False, 3136 tokens: t.Optional[t.Collection[TokenType]] = None, 3137 anonymous_func: bool = False, 3138 ) -> t.Optional[exp.Expression]: 3139 return ( 3140 self._parse_primary() 3141 or self._parse_function(anonymous=anonymous_func) 3142 or self._parse_id_var(any_token=any_token, tokens=tokens) 3143 ) 3144 3145 def _parse_function( 3146 self, 3147 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3148 anonymous: bool = False, 3149 optional_parens: bool = True, 3150 ) -> t.Optional[exp.Expression]: 3151 if not self._curr: 3152 return None 3153 3154 token_type = self._curr.token_type 3155 3156 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3157 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3158 3159 if not self._next or self._next.token_type != TokenType.L_PAREN: 3160 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3161 self._advance() 3162 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3163 3164 return None 3165 3166 if token_type not in self.FUNC_TOKENS: 3167 return None 3168 3169 this = self._curr.text 3170 upper = this.upper() 3171 self._advance(2) 3172 3173 parser = self.FUNCTION_PARSERS.get(upper) 3174 3175 if parser and not anonymous: 3176 this = parser(self) 3177 else: 3178 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3179 3180 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3181 this = self.expression(subquery_predicate, this=self._parse_select()) 3182 self._match_r_paren() 3183 return this 3184 3185 if functions is None: 3186 functions = self.FUNCTIONS 3187 3188 function = functions.get(upper) 3189 3190 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3191 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3192 3193 if function and not anonymous: 3194 this = self.validate_expression(function(args), args) 3195 else: 3196 this = self.expression(exp.Anonymous, this=this, expressions=args) 3197 3198 self._match_r_paren(this) 3199 return self._parse_window(this) 3200 3201 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3202 return self._parse_column_def(self._parse_id_var()) 3203 3204 def _parse_user_defined_function( 3205 self, kind: t.Optional[TokenType] = None 3206 ) -> t.Optional[exp.Expression]: 3207 this = self._parse_id_var() 3208 3209 while self._match(TokenType.DOT): 3210 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3211 3212 if not self._match(TokenType.L_PAREN): 3213 return this 3214 3215 expressions = self._parse_csv(self._parse_function_parameter) 3216 self._match_r_paren() 3217 return self.expression( 3218 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3219 ) 3220 3221 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3222 literal = self._parse_primary() 3223 if literal: 3224 return self.expression(exp.Introducer, this=token.text, expression=literal) 3225 3226 return self.expression(exp.Identifier, this=token.text) 3227 3228 def _parse_session_parameter(self) -> exp.SessionParameter: 3229 kind = None 3230 this = self._parse_id_var() or self._parse_primary() 3231 3232 if this and self._match(TokenType.DOT): 3233 kind = this.name 3234 this = self._parse_var() or self._parse_primary() 3235 3236 return self.expression(exp.SessionParameter, this=this, kind=kind) 3237 3238 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3239 index = self._index 3240 3241 if self._match(TokenType.L_PAREN): 3242 expressions = self._parse_csv(self._parse_id_var) 3243 3244 if not self._match(TokenType.R_PAREN): 3245 self._retreat(index) 3246 else: 3247 expressions = [self._parse_id_var()] 3248 3249 if self._match_set(self.LAMBDAS): 3250 return self.LAMBDAS[self._prev.token_type](self, expressions) 3251 3252 self._retreat(index) 3253 3254 this: t.Optional[exp.Expression] 3255 3256 if self._match(TokenType.DISTINCT): 3257 this = self.expression( 3258 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3259 ) 3260 else: 3261 this = self._parse_select_or_expression(alias=alias) 3262 3263 if isinstance(this, exp.EQ): 3264 left = this.this 3265 if isinstance(left, exp.Column): 3266 left.replace(exp.var(left.text("this"))) 3267 3268 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3269 3270 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3271 index = self._index 3272 3273 if not self.errors: 3274 try: 3275 if self._parse_select(nested=True): 3276 return this 3277 except ParseError: 3278 pass 3279 finally: 3280 self.errors.clear() 3281 self._retreat(index) 3282 3283 if not self._match(TokenType.L_PAREN): 3284 return this 3285 3286 args = self._parse_csv( 3287 lambda: self._parse_constraint() 3288 or self._parse_column_def(self._parse_field(any_token=True)) 3289 ) 3290 3291 self._match_r_paren() 3292 return self.expression(exp.Schema, this=this, expressions=args) 3293 3294 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3295 # column defs are not really columns, they're identifiers 3296 if isinstance(this, exp.Column): 3297 this = this.this 3298 3299 kind = self._parse_types(schema=True) 3300 3301 if self._match_text_seq("FOR", "ORDINALITY"): 3302 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3303 3304 constraints = [] 3305 while True: 3306 constraint = self._parse_column_constraint() 3307 if not constraint: 3308 break 3309 constraints.append(constraint) 3310 3311 if not kind and not constraints: 3312 return this 3313 3314 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3315 3316 def _parse_auto_increment( 3317 self, 3318 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3319 start = None 3320 increment = None 3321 3322 if self._match(TokenType.L_PAREN, advance=False): 3323 args = self._parse_wrapped_csv(self._parse_bitwise) 3324 start = seq_get(args, 0) 3325 increment = seq_get(args, 1) 3326 elif self._match_text_seq("START"): 3327 start = self._parse_bitwise() 3328 self._match_text_seq("INCREMENT") 3329 increment = self._parse_bitwise() 3330 3331 if start and increment: 3332 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3333 3334 return exp.AutoIncrementColumnConstraint() 3335 3336 def _parse_compress(self) -> exp.CompressColumnConstraint: 3337 if self._match(TokenType.L_PAREN, advance=False): 3338 return self.expression( 3339 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3340 ) 3341 3342 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3343 3344 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3345 if self._match_text_seq("BY", "DEFAULT"): 3346 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3347 this = self.expression( 3348 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3349 ) 3350 else: 3351 self._match_text_seq("ALWAYS") 3352 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3353 3354 self._match(TokenType.ALIAS) 3355 identity = self._match_text_seq("IDENTITY") 3356 3357 if self._match(TokenType.L_PAREN): 3358 if self._match_text_seq("START", "WITH"): 3359 this.set("start", self._parse_bitwise()) 3360 if self._match_text_seq("INCREMENT", "BY"): 3361 this.set("increment", self._parse_bitwise()) 3362 if self._match_text_seq("MINVALUE"): 3363 this.set("minvalue", self._parse_bitwise()) 3364 if self._match_text_seq("MAXVALUE"): 3365 this.set("maxvalue", self._parse_bitwise()) 3366 3367 if self._match_text_seq("CYCLE"): 3368 this.set("cycle", True) 3369 elif self._match_text_seq("NO", "CYCLE"): 3370 this.set("cycle", False) 3371 3372 if not identity: 3373 this.set("expression", self._parse_bitwise()) 3374 3375 self._match_r_paren() 3376 3377 return this 3378 3379 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3380 self._match_text_seq("LENGTH") 3381 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3382 3383 def _parse_not_constraint( 3384 self, 3385 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3386 if self._match_text_seq("NULL"): 3387 return self.expression(exp.NotNullColumnConstraint) 3388 if self._match_text_seq("CASESPECIFIC"): 3389 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3390 return None 3391 3392 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3393 if self._match(TokenType.CONSTRAINT): 3394 this = self._parse_id_var() 3395 else: 3396 this = None 3397 3398 if self._match_texts(self.CONSTRAINT_PARSERS): 3399 return self.expression( 3400 exp.ColumnConstraint, 3401 this=this, 3402 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3403 ) 3404 3405 return this 3406 3407 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3408 if not self._match(TokenType.CONSTRAINT): 3409 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3410 3411 this = self._parse_id_var() 3412 expressions = [] 3413 3414 while True: 3415 constraint = self._parse_unnamed_constraint() or self._parse_function() 3416 if not constraint: 3417 break 3418 expressions.append(constraint) 3419 3420 return self.expression(exp.Constraint, this=this, expressions=expressions) 3421 3422 def _parse_unnamed_constraint( 3423 self, constraints: t.Optional[t.Collection[str]] = None 3424 ) -> t.Optional[exp.Expression]: 3425 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3426 return None 3427 3428 constraint = self._prev.text.upper() 3429 if constraint not in self.CONSTRAINT_PARSERS: 3430 self.raise_error(f"No parser found for schema constraint {constraint}.") 3431 3432 return self.CONSTRAINT_PARSERS[constraint](self) 3433 3434 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3435 self._match_text_seq("KEY") 3436 return self.expression( 3437 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3438 ) 3439 3440 def _parse_key_constraint_options(self) -> t.List[str]: 3441 options = [] 3442 while True: 3443 if not self._curr: 3444 break 3445 3446 if self._match(TokenType.ON): 3447 action = None 3448 on = self._advance_any() and self._prev.text 3449 3450 if self._match_text_seq("NO", "ACTION"): 3451 action = "NO ACTION" 3452 elif self._match_text_seq("CASCADE"): 3453 action = "CASCADE" 3454 elif self._match_pair(TokenType.SET, TokenType.NULL): 3455 action = "SET NULL" 3456 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3457 action = "SET DEFAULT" 3458 else: 3459 self.raise_error("Invalid key constraint") 3460 3461 options.append(f"ON {on} {action}") 3462 elif self._match_text_seq("NOT", "ENFORCED"): 3463 options.append("NOT ENFORCED") 3464 elif self._match_text_seq("DEFERRABLE"): 3465 options.append("DEFERRABLE") 3466 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3467 options.append("INITIALLY DEFERRED") 3468 elif self._match_text_seq("NORELY"): 3469 options.append("NORELY") 3470 elif self._match_text_seq("MATCH", "FULL"): 3471 options.append("MATCH FULL") 3472 else: 3473 break 3474 3475 return options 3476 3477 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3478 if match and not self._match(TokenType.REFERENCES): 3479 return None 3480 3481 expressions = None 3482 this = self._parse_id_var() 3483 3484 if self._match(TokenType.L_PAREN, advance=False): 3485 expressions = self._parse_wrapped_id_vars() 3486 3487 options = self._parse_key_constraint_options() 3488 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3489 3490 def _parse_foreign_key(self) -> exp.ForeignKey: 3491 expressions = self._parse_wrapped_id_vars() 3492 reference = self._parse_references() 3493 options = {} 3494 3495 while self._match(TokenType.ON): 3496 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3497 self.raise_error("Expected DELETE or UPDATE") 3498 3499 kind = self._prev.text.lower() 3500 3501 if self._match_text_seq("NO", "ACTION"): 3502 action = "NO ACTION" 3503 elif self._match(TokenType.SET): 3504 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3505 action = "SET " + self._prev.text.upper() 3506 else: 3507 self._advance() 3508 action = self._prev.text.upper() 3509 3510 options[kind] = action 3511 3512 return self.expression( 3513 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3514 ) 3515 3516 def _parse_primary_key( 3517 self, wrapped_optional: bool = False, in_props: bool = False 3518 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3519 desc = ( 3520 self._match_set((TokenType.ASC, TokenType.DESC)) 3521 and self._prev.token_type == TokenType.DESC 3522 ) 3523 3524 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3525 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3526 3527 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3528 options = self._parse_key_constraint_options() 3529 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3530 3531 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3532 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3533 return this 3534 3535 bracket_kind = self._prev.token_type 3536 3537 if self._match(TokenType.COLON): 3538 expressions: t.List[t.Optional[exp.Expression]] = [ 3539 self.expression(exp.Slice, expression=self._parse_conjunction()) 3540 ] 3541 else: 3542 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3543 3544 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3545 if bracket_kind == TokenType.L_BRACE: 3546 this = self.expression(exp.Struct, expressions=expressions) 3547 elif not this or this.name.upper() == "ARRAY": 3548 this = self.expression(exp.Array, expressions=expressions) 3549 else: 3550 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3551 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3552 3553 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3554 self.raise_error("Expected ]") 3555 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3556 self.raise_error("Expected }") 3557 3558 self._add_comments(this) 3559 return self._parse_bracket(this) 3560 3561 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3562 if self._match(TokenType.COLON): 3563 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3564 return this 3565 3566 def _parse_case(self) -> t.Optional[exp.Expression]: 3567 ifs = [] 3568 default = None 3569 3570 expression = self._parse_conjunction() 3571 3572 while self._match(TokenType.WHEN): 3573 this = self._parse_conjunction() 3574 self._match(TokenType.THEN) 3575 then = self._parse_conjunction() 3576 ifs.append(self.expression(exp.If, this=this, true=then)) 3577 3578 if self._match(TokenType.ELSE): 3579 default = self._parse_conjunction() 3580 3581 if not self._match(TokenType.END): 3582 self.raise_error("Expected END after CASE", self._prev) 3583 3584 return self._parse_window( 3585 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3586 ) 3587 3588 def _parse_if(self) -> t.Optional[exp.Expression]: 3589 if self._match(TokenType.L_PAREN): 3590 args = self._parse_csv(self._parse_conjunction) 3591 this = self.validate_expression(exp.If.from_arg_list(args), args) 3592 self._match_r_paren() 3593 else: 3594 index = self._index - 1 3595 condition = self._parse_conjunction() 3596 3597 if not condition: 3598 self._retreat(index) 3599 return None 3600 3601 self._match(TokenType.THEN) 3602 true = self._parse_conjunction() 3603 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3604 self._match(TokenType.END) 3605 this = self.expression(exp.If, this=condition, true=true, false=false) 3606 3607 return self._parse_window(this) 3608 3609 def _parse_extract(self) -> exp.Extract: 3610 this = self._parse_function() or self._parse_var() or self._parse_type() 3611 3612 if self._match(TokenType.FROM): 3613 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3614 3615 if not self._match(TokenType.COMMA): 3616 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3617 3618 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3619 3620 def _parse_cast(self, strict: bool) -> exp.Expression: 3621 this = self._parse_conjunction() 3622 3623 if not self._match(TokenType.ALIAS): 3624 if self._match(TokenType.COMMA): 3625 return self.expression( 3626 exp.CastToStrType, this=this, expression=self._parse_string() 3627 ) 3628 else: 3629 self.raise_error("Expected AS after CAST") 3630 3631 to = self._parse_types() 3632 3633 if not to: 3634 self.raise_error("Expected TYPE after CAST") 3635 elif to.this == exp.DataType.Type.CHAR: 3636 if self._match(TokenType.CHARACTER_SET): 3637 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3638 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3639 fmt = self._parse_string() 3640 3641 return self.expression( 3642 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3643 this=this, 3644 format=exp.Literal.string( 3645 format_time( 3646 fmt.this if fmt else "", 3647 self.FORMAT_MAPPING or self.TIME_MAPPING, 3648 self.FORMAT_TRIE or self.TIME_TRIE, 3649 ) 3650 ), 3651 ) 3652 3653 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3654 3655 def _parse_concat(self) -> t.Optional[exp.Expression]: 3656 args = self._parse_csv(self._parse_conjunction) 3657 if self.CONCAT_NULL_OUTPUTS_STRING: 3658 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3659 3660 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3661 # we find such a call we replace it with its argument. 3662 if len(args) == 1: 3663 return args[0] 3664 3665 return self.expression( 3666 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3667 ) 3668 3669 def _parse_string_agg(self) -> exp.Expression: 3670 expression: t.Optional[exp.Expression] 3671 3672 if self._match(TokenType.DISTINCT): 3673 args = self._parse_csv(self._parse_conjunction) 3674 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3675 else: 3676 args = self._parse_csv(self._parse_conjunction) 3677 expression = seq_get(args, 0) 3678 3679 index = self._index 3680 if not self._match(TokenType.R_PAREN): 3681 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3682 order = self._parse_order(this=expression) 3683 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3684 3685 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3686 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3687 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3688 if not self._match_text_seq("WITHIN", "GROUP"): 3689 self._retreat(index) 3690 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3691 3692 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3693 order = self._parse_order(this=expression) 3694 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3695 3696 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3697 to: t.Optional[exp.Expression] 3698 this = self._parse_bitwise() 3699 3700 if self._match(TokenType.USING): 3701 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3702 elif self._match(TokenType.COMMA): 3703 to = self._parse_bitwise() 3704 else: 3705 to = None 3706 3707 # Swap the argument order if needed to produce the correct AST 3708 if self.CONVERT_TYPE_FIRST: 3709 this, to = to, this 3710 3711 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3712 3713 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3714 """ 3715 There are generally two variants of the DECODE function: 3716 3717 - DECODE(bin, charset) 3718 - DECODE(expression, search, result [, search, result] ... [, default]) 3719 3720 The second variant will always be parsed into a CASE expression. Note that NULL 3721 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3722 instead of relying on pattern matching. 3723 """ 3724 args = self._parse_csv(self._parse_conjunction) 3725 3726 if len(args) < 3: 3727 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3728 3729 expression, *expressions = args 3730 if not expression: 3731 return None 3732 3733 ifs = [] 3734 for search, result in zip(expressions[::2], expressions[1::2]): 3735 if not search or not result: 3736 return None 3737 3738 if isinstance(search, exp.Literal): 3739 ifs.append( 3740 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3741 ) 3742 elif isinstance(search, exp.Null): 3743 ifs.append( 3744 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3745 ) 3746 else: 3747 cond = exp.or_( 3748 exp.EQ(this=expression.copy(), expression=search), 3749 exp.and_( 3750 exp.Is(this=expression.copy(), expression=exp.Null()), 3751 exp.Is(this=search.copy(), expression=exp.Null()), 3752 copy=False, 3753 ), 3754 copy=False, 3755 ) 3756 ifs.append(exp.If(this=cond, true=result)) 3757 3758 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3759 3760 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3761 self._match_text_seq("KEY") 3762 key = self._parse_field() 3763 self._match(TokenType.COLON) 3764 self._match_text_seq("VALUE") 3765 value = self._parse_field() 3766 3767 if not key and not value: 3768 return None 3769 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3770 3771 def _parse_json_object(self) -> exp.JSONObject: 3772 star = self._parse_star() 3773 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3774 3775 null_handling = None 3776 if self._match_text_seq("NULL", "ON", "NULL"): 3777 null_handling = "NULL ON NULL" 3778 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3779 null_handling = "ABSENT ON NULL" 3780 3781 unique_keys = None 3782 if self._match_text_seq("WITH", "UNIQUE"): 3783 unique_keys = True 3784 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3785 unique_keys = False 3786 3787 self._match_text_seq("KEYS") 3788 3789 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3790 format_json = self._match_text_seq("FORMAT", "JSON") 3791 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3792 3793 return self.expression( 3794 exp.JSONObject, 3795 expressions=expressions, 3796 null_handling=null_handling, 3797 unique_keys=unique_keys, 3798 return_type=return_type, 3799 format_json=format_json, 3800 encoding=encoding, 3801 ) 3802 3803 def _parse_logarithm(self) -> exp.Func: 3804 # Default argument order is base, expression 3805 args = self._parse_csv(self._parse_range) 3806 3807 if len(args) > 1: 3808 if not self.LOG_BASE_FIRST: 3809 args.reverse() 3810 return exp.Log.from_arg_list(args) 3811 3812 return self.expression( 3813 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3814 ) 3815 3816 def _parse_match_against(self) -> exp.MatchAgainst: 3817 expressions = self._parse_csv(self._parse_column) 3818 3819 self._match_text_seq(")", "AGAINST", "(") 3820 3821 this = self._parse_string() 3822 3823 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3824 modifier = "IN NATURAL LANGUAGE MODE" 3825 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3826 modifier = f"{modifier} WITH QUERY EXPANSION" 3827 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3828 modifier = "IN BOOLEAN MODE" 3829 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3830 modifier = "WITH QUERY EXPANSION" 3831 else: 3832 modifier = None 3833 3834 return self.expression( 3835 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3836 ) 3837 3838 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3839 def _parse_open_json(self) -> exp.OpenJSON: 3840 this = self._parse_bitwise() 3841 path = self._match(TokenType.COMMA) and self._parse_string() 3842 3843 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3844 this = self._parse_field(any_token=True) 3845 kind = self._parse_types() 3846 path = self._parse_string() 3847 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3848 3849 return self.expression( 3850 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3851 ) 3852 3853 expressions = None 3854 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3855 self._match_l_paren() 3856 expressions = self._parse_csv(_parse_open_json_column_def) 3857 3858 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3859 3860 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3861 args = self._parse_csv(self._parse_bitwise) 3862 3863 if self._match(TokenType.IN): 3864 return self.expression( 3865 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3866 ) 3867 3868 if haystack_first: 3869 haystack = seq_get(args, 0) 3870 needle = seq_get(args, 1) 3871 else: 3872 needle = seq_get(args, 0) 3873 haystack = seq_get(args, 1) 3874 3875 return self.expression( 3876 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3877 ) 3878 3879 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3880 args = self._parse_csv(self._parse_table) 3881 return exp.JoinHint(this=func_name.upper(), expressions=args) 3882 3883 def _parse_substring(self) -> exp.Substring: 3884 # Postgres supports the form: substring(string [from int] [for int]) 3885 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3886 3887 args = self._parse_csv(self._parse_bitwise) 3888 3889 if self._match(TokenType.FROM): 3890 args.append(self._parse_bitwise()) 3891 if self._match(TokenType.FOR): 3892 args.append(self._parse_bitwise()) 3893 3894 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3895 3896 def _parse_trim(self) -> exp.Trim: 3897 # https://www.w3resource.com/sql/character-functions/trim.php 3898 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3899 3900 position = None 3901 collation = None 3902 3903 if self._match_texts(self.TRIM_TYPES): 3904 position = self._prev.text.upper() 3905 3906 expression = self._parse_bitwise() 3907 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3908 this = self._parse_bitwise() 3909 else: 3910 this = expression 3911 expression = None 3912 3913 if self._match(TokenType.COLLATE): 3914 collation = self._parse_bitwise() 3915 3916 return self.expression( 3917 exp.Trim, this=this, position=position, expression=expression, collation=collation 3918 ) 3919 3920 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3921 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3922 3923 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3924 return self._parse_window(self._parse_id_var(), alias=True) 3925 3926 def _parse_respect_or_ignore_nulls( 3927 self, this: t.Optional[exp.Expression] 3928 ) -> t.Optional[exp.Expression]: 3929 if self._match_text_seq("IGNORE", "NULLS"): 3930 return self.expression(exp.IgnoreNulls, this=this) 3931 if self._match_text_seq("RESPECT", "NULLS"): 3932 return self.expression(exp.RespectNulls, this=this) 3933 return this 3934 3935 def _parse_window( 3936 self, this: t.Optional[exp.Expression], alias: bool = False 3937 ) -> t.Optional[exp.Expression]: 3938 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3939 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3940 self._match_r_paren() 3941 3942 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3943 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3944 if self._match_text_seq("WITHIN", "GROUP"): 3945 order = self._parse_wrapped(self._parse_order) 3946 this = self.expression(exp.WithinGroup, this=this, expression=order) 3947 3948 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3949 # Some dialects choose to implement and some do not. 3950 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3951 3952 # There is some code above in _parse_lambda that handles 3953 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3954 3955 # The below changes handle 3956 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3957 3958 # Oracle allows both formats 3959 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3960 # and Snowflake chose to do the same for familiarity 3961 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3962 this = self._parse_respect_or_ignore_nulls(this) 3963 3964 # bigquery select from window x AS (partition by ...) 3965 if alias: 3966 over = None 3967 self._match(TokenType.ALIAS) 3968 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3969 return this 3970 else: 3971 over = self._prev.text.upper() 3972 3973 if not self._match(TokenType.L_PAREN): 3974 return self.expression( 3975 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3976 ) 3977 3978 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3979 3980 first = self._match(TokenType.FIRST) 3981 if self._match_text_seq("LAST"): 3982 first = False 3983 3984 partition = self._parse_partition_by() 3985 order = self._parse_order() 3986 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3987 3988 if kind: 3989 self._match(TokenType.BETWEEN) 3990 start = self._parse_window_spec() 3991 self._match(TokenType.AND) 3992 end = self._parse_window_spec() 3993 3994 spec = self.expression( 3995 exp.WindowSpec, 3996 kind=kind, 3997 start=start["value"], 3998 start_side=start["side"], 3999 end=end["value"], 4000 end_side=end["side"], 4001 ) 4002 else: 4003 spec = None 4004 4005 self._match_r_paren() 4006 4007 return self.expression( 4008 exp.Window, 4009 this=this, 4010 partition_by=partition, 4011 order=order, 4012 spec=spec, 4013 alias=window_alias, 4014 over=over, 4015 first=first, 4016 ) 4017 4018 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4019 self._match(TokenType.BETWEEN) 4020 4021 return { 4022 "value": ( 4023 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4024 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4025 or self._parse_bitwise() 4026 ), 4027 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4028 } 4029 4030 def _parse_alias( 4031 self, this: t.Optional[exp.Expression], explicit: bool = False 4032 ) -> t.Optional[exp.Expression]: 4033 any_token = self._match(TokenType.ALIAS) 4034 4035 if explicit and not any_token: 4036 return this 4037 4038 if self._match(TokenType.L_PAREN): 4039 aliases = self.expression( 4040 exp.Aliases, 4041 this=this, 4042 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4043 ) 4044 self._match_r_paren(aliases) 4045 return aliases 4046 4047 alias = self._parse_id_var(any_token) 4048 4049 if alias: 4050 return self.expression(exp.Alias, this=this, alias=alias) 4051 4052 return this 4053 4054 def _parse_id_var( 4055 self, 4056 any_token: bool = True, 4057 tokens: t.Optional[t.Collection[TokenType]] = None, 4058 ) -> t.Optional[exp.Expression]: 4059 identifier = self._parse_identifier() 4060 4061 if identifier: 4062 return identifier 4063 4064 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4065 quoted = self._prev.token_type == TokenType.STRING 4066 return exp.Identifier(this=self._prev.text, quoted=quoted) 4067 4068 return None 4069 4070 def _parse_string(self) -> t.Optional[exp.Expression]: 4071 if self._match(TokenType.STRING): 4072 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4073 return self._parse_placeholder() 4074 4075 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4076 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4077 4078 def _parse_number(self) -> t.Optional[exp.Expression]: 4079 if self._match(TokenType.NUMBER): 4080 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4081 return self._parse_placeholder() 4082 4083 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4084 if self._match(TokenType.IDENTIFIER): 4085 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4086 return self._parse_placeholder() 4087 4088 def _parse_var( 4089 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4090 ) -> t.Optional[exp.Expression]: 4091 if ( 4092 (any_token and self._advance_any()) 4093 or self._match(TokenType.VAR) 4094 or (self._match_set(tokens) if tokens else False) 4095 ): 4096 return self.expression(exp.Var, this=self._prev.text) 4097 return self._parse_placeholder() 4098 4099 def _advance_any(self) -> t.Optional[Token]: 4100 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4101 self._advance() 4102 return self._prev 4103 return None 4104 4105 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4106 return self._parse_var() or self._parse_string() 4107 4108 def _parse_null(self) -> t.Optional[exp.Expression]: 4109 if self._match(TokenType.NULL): 4110 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4111 return None 4112 4113 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4114 if self._match(TokenType.TRUE): 4115 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4116 if self._match(TokenType.FALSE): 4117 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4118 return None 4119 4120 def _parse_star(self) -> t.Optional[exp.Expression]: 4121 if self._match(TokenType.STAR): 4122 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4123 return None 4124 4125 def _parse_parameter(self) -> exp.Parameter: 4126 wrapped = self._match(TokenType.L_BRACE) 4127 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4128 self._match(TokenType.R_BRACE) 4129 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4130 4131 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4132 if self._match_set(self.PLACEHOLDER_PARSERS): 4133 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4134 if placeholder: 4135 return placeholder 4136 self._advance(-1) 4137 return None 4138 4139 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4140 if not self._match(TokenType.EXCEPT): 4141 return None 4142 if self._match(TokenType.L_PAREN, advance=False): 4143 return self._parse_wrapped_csv(self._parse_column) 4144 return self._parse_csv(self._parse_column) 4145 4146 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4147 if not self._match(TokenType.REPLACE): 4148 return None 4149 if self._match(TokenType.L_PAREN, advance=False): 4150 return self._parse_wrapped_csv(self._parse_expression) 4151 return self._parse_csv(self._parse_expression) 4152 4153 def _parse_csv( 4154 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4155 ) -> t.List[t.Optional[exp.Expression]]: 4156 parse_result = parse_method() 4157 items = [parse_result] if parse_result is not None else [] 4158 4159 while self._match(sep): 4160 self._add_comments(parse_result) 4161 parse_result = parse_method() 4162 if parse_result is not None: 4163 items.append(parse_result) 4164 4165 return items 4166 4167 def _parse_tokens( 4168 self, parse_method: t.Callable, expressions: t.Dict 4169 ) -> t.Optional[exp.Expression]: 4170 this = parse_method() 4171 4172 while self._match_set(expressions): 4173 this = self.expression( 4174 expressions[self._prev.token_type], 4175 this=this, 4176 comments=self._prev_comments, 4177 expression=parse_method(), 4178 ) 4179 4180 return this 4181 4182 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4183 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4184 4185 def _parse_wrapped_csv( 4186 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4187 ) -> t.List[t.Optional[exp.Expression]]: 4188 return self._parse_wrapped( 4189 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4190 ) 4191 4192 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4193 wrapped = self._match(TokenType.L_PAREN) 4194 if not wrapped and not optional: 4195 self.raise_error("Expecting (") 4196 parse_result = parse_method() 4197 if wrapped: 4198 self._match_r_paren() 4199 return parse_result 4200 4201 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4202 return self._parse_select() or self._parse_set_operations( 4203 self._parse_expression() if alias else self._parse_conjunction() 4204 ) 4205 4206 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4207 return self._parse_query_modifiers( 4208 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4209 ) 4210 4211 def _parse_transaction(self) -> exp.Transaction: 4212 this = None 4213 if self._match_texts(self.TRANSACTION_KIND): 4214 this = self._prev.text 4215 4216 self._match_texts({"TRANSACTION", "WORK"}) 4217 4218 modes = [] 4219 while True: 4220 mode = [] 4221 while self._match(TokenType.VAR): 4222 mode.append(self._prev.text) 4223 4224 if mode: 4225 modes.append(" ".join(mode)) 4226 if not self._match(TokenType.COMMA): 4227 break 4228 4229 return self.expression(exp.Transaction, this=this, modes=modes) 4230 4231 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4232 chain = None 4233 savepoint = None 4234 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4235 4236 self._match_texts({"TRANSACTION", "WORK"}) 4237 4238 if self._match_text_seq("TO"): 4239 self._match_text_seq("SAVEPOINT") 4240 savepoint = self._parse_id_var() 4241 4242 if self._match(TokenType.AND): 4243 chain = not self._match_text_seq("NO") 4244 self._match_text_seq("CHAIN") 4245 4246 if is_rollback: 4247 return self.expression(exp.Rollback, savepoint=savepoint) 4248 4249 return self.expression(exp.Commit, chain=chain) 4250 4251 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4252 if not self._match_text_seq("ADD"): 4253 return None 4254 4255 self._match(TokenType.COLUMN) 4256 exists_column = self._parse_exists(not_=True) 4257 expression = self._parse_column_def(self._parse_field(any_token=True)) 4258 4259 if expression: 4260 expression.set("exists", exists_column) 4261 4262 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4263 if self._match_texts(("FIRST", "AFTER")): 4264 position = self._prev.text 4265 column_position = self.expression( 4266 exp.ColumnPosition, this=self._parse_column(), position=position 4267 ) 4268 expression.set("position", column_position) 4269 4270 return expression 4271 4272 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4273 drop = self._match(TokenType.DROP) and self._parse_drop() 4274 if drop and not isinstance(drop, exp.Command): 4275 drop.set("kind", drop.args.get("kind", "COLUMN")) 4276 return drop 4277 4278 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4279 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4280 return self.expression( 4281 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4282 ) 4283 4284 def _parse_add_constraint(self) -> exp.AddConstraint: 4285 this = None 4286 kind = self._prev.token_type 4287 4288 if kind == TokenType.CONSTRAINT: 4289 this = self._parse_id_var() 4290 4291 if self._match_text_seq("CHECK"): 4292 expression = self._parse_wrapped(self._parse_conjunction) 4293 enforced = self._match_text_seq("ENFORCED") 4294 4295 return self.expression( 4296 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4297 ) 4298 4299 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4300 expression = self._parse_foreign_key() 4301 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4302 expression = self._parse_primary_key() 4303 else: 4304 expression = None 4305 4306 return self.expression(exp.AddConstraint, this=this, expression=expression) 4307 4308 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4309 index = self._index - 1 4310 4311 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4312 return self._parse_csv(self._parse_add_constraint) 4313 4314 self._retreat(index) 4315 return self._parse_csv(self._parse_add_column) 4316 4317 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4318 self._match(TokenType.COLUMN) 4319 column = self._parse_field(any_token=True) 4320 4321 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4322 return self.expression(exp.AlterColumn, this=column, drop=True) 4323 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4324 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4325 4326 self._match_text_seq("SET", "DATA") 4327 return self.expression( 4328 exp.AlterColumn, 4329 this=column, 4330 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4331 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4332 using=self._match(TokenType.USING) and self._parse_conjunction(), 4333 ) 4334 4335 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4336 index = self._index - 1 4337 4338 partition_exists = self._parse_exists() 4339 if self._match(TokenType.PARTITION, advance=False): 4340 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4341 4342 self._retreat(index) 4343 return self._parse_csv(self._parse_drop_column) 4344 4345 def _parse_alter_table_rename(self) -> exp.RenameTable: 4346 self._match_text_seq("TO") 4347 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4348 4349 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4350 start = self._prev 4351 4352 if not self._match(TokenType.TABLE): 4353 return self._parse_as_command(start) 4354 4355 exists = self._parse_exists() 4356 this = self._parse_table(schema=True) 4357 4358 if self._next: 4359 self._advance() 4360 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4361 4362 if parser: 4363 actions = ensure_list(parser(self)) 4364 4365 if not self._curr: 4366 return self.expression( 4367 exp.AlterTable, 4368 this=this, 4369 exists=exists, 4370 actions=actions, 4371 ) 4372 return self._parse_as_command(start) 4373 4374 def _parse_merge(self) -> exp.Merge: 4375 self._match(TokenType.INTO) 4376 target = self._parse_table() 4377 4378 self._match(TokenType.USING) 4379 using = self._parse_table() 4380 4381 self._match(TokenType.ON) 4382 on = self._parse_conjunction() 4383 4384 whens = [] 4385 while self._match(TokenType.WHEN): 4386 matched = not self._match(TokenType.NOT) 4387 self._match_text_seq("MATCHED") 4388 source = ( 4389 False 4390 if self._match_text_seq("BY", "TARGET") 4391 else self._match_text_seq("BY", "SOURCE") 4392 ) 4393 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4394 4395 self._match(TokenType.THEN) 4396 4397 if self._match(TokenType.INSERT): 4398 _this = self._parse_star() 4399 if _this: 4400 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4401 else: 4402 then = self.expression( 4403 exp.Insert, 4404 this=self._parse_value(), 4405 expression=self._match(TokenType.VALUES) and self._parse_value(), 4406 ) 4407 elif self._match(TokenType.UPDATE): 4408 expressions = self._parse_star() 4409 if expressions: 4410 then = self.expression(exp.Update, expressions=expressions) 4411 else: 4412 then = self.expression( 4413 exp.Update, 4414 expressions=self._match(TokenType.SET) 4415 and self._parse_csv(self._parse_equality), 4416 ) 4417 elif self._match(TokenType.DELETE): 4418 then = self.expression(exp.Var, this=self._prev.text) 4419 else: 4420 then = None 4421 4422 whens.append( 4423 self.expression( 4424 exp.When, 4425 matched=matched, 4426 source=source, 4427 condition=condition, 4428 then=then, 4429 ) 4430 ) 4431 4432 return self.expression( 4433 exp.Merge, 4434 this=target, 4435 using=using, 4436 on=on, 4437 expressions=whens, 4438 ) 4439 4440 def _parse_show(self) -> t.Optional[exp.Expression]: 4441 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4442 if parser: 4443 return parser(self) 4444 self._advance() 4445 return self.expression(exp.Show, this=self._prev.text.upper()) 4446 4447 def _parse_set_item_assignment( 4448 self, kind: t.Optional[str] = None 4449 ) -> t.Optional[exp.Expression]: 4450 index = self._index 4451 4452 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4453 return self._parse_set_transaction(global_=kind == "GLOBAL") 4454 4455 left = self._parse_primary() or self._parse_id_var() 4456 4457 if not self._match_texts(("=", "TO")): 4458 self._retreat(index) 4459 return None 4460 4461 right = self._parse_statement() or self._parse_id_var() 4462 this = self.expression(exp.EQ, this=left, expression=right) 4463 4464 return self.expression(exp.SetItem, this=this, kind=kind) 4465 4466 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4467 self._match_text_seq("TRANSACTION") 4468 characteristics = self._parse_csv( 4469 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4470 ) 4471 return self.expression( 4472 exp.SetItem, 4473 expressions=characteristics, 4474 kind="TRANSACTION", 4475 **{"global": global_}, # type: ignore 4476 ) 4477 4478 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4479 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4480 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4481 4482 def _parse_set(self) -> exp.Set | exp.Command: 4483 index = self._index 4484 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4485 4486 if self._curr: 4487 self._retreat(index) 4488 return self._parse_as_command(self._prev) 4489 4490 return set_ 4491 4492 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4493 for option in options: 4494 if self._match_text_seq(*option.split(" ")): 4495 return exp.var(option) 4496 return None 4497 4498 def _parse_as_command(self, start: Token) -> exp.Command: 4499 while self._curr: 4500 self._advance() 4501 text = self._find_sql(start, self._prev) 4502 size = len(start.text) 4503 return exp.Command(this=text[:size], expression=text[size:]) 4504 4505 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4506 settings = [] 4507 4508 self._match_l_paren() 4509 kind = self._parse_id_var() 4510 4511 if self._match(TokenType.L_PAREN): 4512 while True: 4513 key = self._parse_id_var() 4514 value = self._parse_primary() 4515 4516 if not key and value is None: 4517 break 4518 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4519 self._match(TokenType.R_PAREN) 4520 4521 self._match_r_paren() 4522 4523 return self.expression( 4524 exp.DictProperty, 4525 this=this, 4526 kind=kind.this if kind else None, 4527 settings=settings, 4528 ) 4529 4530 def _parse_dict_range(self, this: str) -> exp.DictRange: 4531 self._match_l_paren() 4532 has_min = self._match_text_seq("MIN") 4533 if has_min: 4534 min = self._parse_var() or self._parse_primary() 4535 self._match_text_seq("MAX") 4536 max = self._parse_var() or self._parse_primary() 4537 else: 4538 max = self._parse_var() or self._parse_primary() 4539 min = exp.Literal.number(0) 4540 self._match_r_paren() 4541 return self.expression(exp.DictRange, this=this, min=min, max=max) 4542 4543 def _find_parser( 4544 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4545 ) -> t.Optional[t.Callable]: 4546 if not self._curr: 4547 return None 4548 4549 index = self._index 4550 this = [] 4551 while True: 4552 # The current token might be multiple words 4553 curr = self._curr.text.upper() 4554 key = curr.split(" ") 4555 this.append(curr) 4556 self._advance() 4557 result, trie = in_trie(trie, key) 4558 if result == 0: 4559 break 4560 if result == 2: 4561 subparser = parsers[" ".join(this)] 4562 return subparser 4563 self._retreat(index) 4564 return None 4565 4566 def _match(self, token_type, advance=True, expression=None): 4567 if not self._curr: 4568 return None 4569 4570 if self._curr.token_type == token_type: 4571 if advance: 4572 self._advance() 4573 self._add_comments(expression) 4574 return True 4575 4576 return None 4577 4578 def _match_set(self, types, advance=True): 4579 if not self._curr: 4580 return None 4581 4582 if self._curr.token_type in types: 4583 if advance: 4584 self._advance() 4585 return True 4586 4587 return None 4588 4589 def _match_pair(self, token_type_a, token_type_b, advance=True): 4590 if not self._curr or not self._next: 4591 return None 4592 4593 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4594 if advance: 4595 self._advance(2) 4596 return True 4597 4598 return None 4599 4600 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4601 if not self._match(TokenType.L_PAREN, expression=expression): 4602 self.raise_error("Expecting (") 4603 4604 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4605 if not self._match(TokenType.R_PAREN, expression=expression): 4606 self.raise_error("Expecting )") 4607 4608 def _match_texts(self, texts, advance=True): 4609 if self._curr and self._curr.text.upper() in texts: 4610 if advance: 4611 self._advance() 4612 return True 4613 return False 4614 4615 def _match_text_seq(self, *texts, advance=True): 4616 index = self._index 4617 for text in texts: 4618 if self._curr and self._curr.text.upper() == text: 4619 self._advance() 4620 else: 4621 self._retreat(index) 4622 return False 4623 4624 if not advance: 4625 self._retreat(index) 4626 4627 return True 4628 4629 @t.overload 4630 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4631 ... 4632 4633 @t.overload 4634 def _replace_columns_with_dots( 4635 self, this: t.Optional[exp.Expression] 4636 ) -> t.Optional[exp.Expression]: 4637 ... 4638 4639 def _replace_columns_with_dots(self, this): 4640 if isinstance(this, exp.Dot): 4641 exp.replace_children(this, self._replace_columns_with_dots) 4642 elif isinstance(this, exp.Column): 4643 exp.replace_children(this, self._replace_columns_with_dots) 4644 table = this.args.get("table") 4645 this = ( 4646 self.expression(exp.Dot, this=table, expression=this.this) 4647 if table 4648 else self.expression(exp.Var, this=this.name) 4649 ) 4650 elif isinstance(this, exp.Identifier): 4651 this = self.expression(exp.Var, this=this.name) 4652 4653 return this 4654 4655 def _replace_lambda( 4656 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4657 ) -> t.Optional[exp.Expression]: 4658 if not node: 4659 return node 4660 4661 for column in node.find_all(exp.Column): 4662 if column.parts[0].name in lambda_variables: 4663 dot_or_id = column.to_dot() if column.table else column.this 4664 parent = column.parent 4665 4666 while isinstance(parent, exp.Dot): 4667 if not isinstance(parent.parent, exp.Dot): 4668 parent.replace(dot_or_id) 4669 break 4670 parent = parent.parent 4671 else: 4672 if column is node: 4673 node = dot_or_id 4674 else: 4675 column.replace(dot_or_id) 4676 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, 545 **{"except": self._parse_except(), "replace": self._parse_replace()}, 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER": lambda self: self._parse_cluster(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 592 "DEFINER": lambda self: self._parse_definer(), 593 "DETERMINISTIC": lambda self: self.expression( 594 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 595 ), 596 "DISTKEY": lambda self: self._parse_distkey(), 597 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 598 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 599 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 600 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 601 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 602 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 603 "FREESPACE": lambda self: self._parse_freespace(), 604 "IMMUTABLE": lambda self: self.expression( 605 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 606 ), 607 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 608 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 609 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 610 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 611 "LIKE": lambda self: self._parse_create_like(), 612 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 613 "LOCK": lambda self: self._parse_locking(), 614 "LOCKING": lambda self: self._parse_locking(), 615 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 616 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 617 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 618 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 619 "NO": lambda self: self._parse_no_property(), 620 "ON": lambda self: self._parse_on_property(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 626 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 627 "RETURNS": lambda self: self._parse_returns(), 628 "ROW": lambda self: self._parse_row(), 629 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 630 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 631 "SETTINGS": lambda self: self.expression( 632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 633 ), 634 "SORTKEY": lambda self: self._parse_sortkey(), 635 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 636 "STABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("STABLE") 638 ), 639 "STORED": lambda self: self._parse_stored(), 640 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 641 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 642 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 643 "TO": lambda self: self._parse_to_table(), 644 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 645 "TTL": lambda self: self._parse_ttl(), 646 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "VOLATILE": lambda self: self._parse_volatile_property(), 648 "WITH": lambda self: self._parse_with_property(), 649 } 650 651 CONSTRAINT_PARSERS = { 652 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 653 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 654 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 655 "CHARACTER SET": lambda self: self.expression( 656 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 657 ), 658 "CHECK": lambda self: self.expression( 659 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 660 ), 661 "COLLATE": lambda self: self.expression( 662 exp.CollateColumnConstraint, this=self._parse_var() 663 ), 664 "COMMENT": lambda self: self.expression( 665 exp.CommentColumnConstraint, this=self._parse_string() 666 ), 667 "COMPRESS": lambda self: self._parse_compress(), 668 "DEFAULT": lambda self: self.expression( 669 exp.DefaultColumnConstraint, this=self._parse_bitwise() 670 ), 671 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 672 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 673 "FORMAT": lambda self: self.expression( 674 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 675 ), 676 "GENERATED": lambda self: self._parse_generated_as_identity(), 677 "IDENTITY": lambda self: self._parse_auto_increment(), 678 "INLINE": lambda self: self._parse_inline(), 679 "LIKE": lambda self: self._parse_create_like(), 680 "NOT": lambda self: self._parse_not_constraint(), 681 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 682 "ON": lambda self: self._match(TokenType.UPDATE) 683 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 684 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 685 "PRIMARY KEY": lambda self: self._parse_primary_key(), 686 "REFERENCES": lambda self: self._parse_references(match=False), 687 "TITLE": lambda self: self.expression( 688 exp.TitleColumnConstraint, this=self._parse_var_or_string() 689 ), 690 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 691 "UNIQUE": lambda self: self._parse_unique(), 692 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 693 } 694 695 ALTER_PARSERS = { 696 "ADD": lambda self: self._parse_alter_table_add(), 697 "ALTER": lambda self: self._parse_alter_table_alter(), 698 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 699 "DROP": lambda self: self._parse_alter_table_drop(), 700 "RENAME": lambda self: self._parse_alter_table_rename(), 701 } 702 703 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 704 705 NO_PAREN_FUNCTION_PARSERS = { 706 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 707 TokenType.CASE: lambda self: self._parse_case(), 708 TokenType.IF: lambda self: self._parse_if(), 709 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 710 exp.NextValueFor, 711 this=self._parse_column(), 712 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 713 ), 714 } 715 716 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 717 718 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 719 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 720 "CONCAT": lambda self: self._parse_concat(), 721 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 722 "DECODE": lambda self: self._parse_decode(), 723 "EXTRACT": lambda self: self._parse_extract(), 724 "JSON_OBJECT": lambda self: self._parse_json_object(), 725 "LOG": lambda self: self._parse_logarithm(), 726 "MATCH": lambda self: self._parse_match_against(), 727 "OPENJSON": lambda self: self._parse_open_json(), 728 "POSITION": lambda self: self._parse_position(), 729 "SAFE_CAST": lambda self: self._parse_cast(False), 730 "STRING_AGG": lambda self: self._parse_string_agg(), 731 "SUBSTRING": lambda self: self._parse_substring(), 732 "TRIM": lambda self: self._parse_trim(), 733 "TRY_CAST": lambda self: self._parse_cast(False), 734 "TRY_CONVERT": lambda self: self._parse_convert(False), 735 } 736 737 QUERY_MODIFIER_PARSERS = { 738 "joins": lambda self: list(iter(self._parse_join, None)), 739 "laterals": lambda self: list(iter(self._parse_lateral, None)), 740 "match": lambda self: self._parse_match_recognize(), 741 "where": lambda self: self._parse_where(), 742 "group": lambda self: self._parse_group(), 743 "having": lambda self: self._parse_having(), 744 "qualify": lambda self: self._parse_qualify(), 745 "windows": lambda self: self._parse_window_clause(), 746 "order": lambda self: self._parse_order(), 747 "limit": lambda self: self._parse_limit(), 748 "offset": lambda self: self._parse_offset(), 749 "locks": lambda self: self._parse_locks(), 750 "sample": lambda self: self._parse_table_sample(as_modifier=True), 751 } 752 753 SET_PARSERS = { 754 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 755 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 756 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 757 "TRANSACTION": lambda self: self._parse_set_transaction(), 758 } 759 760 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 761 762 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 763 764 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 765 766 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 767 768 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 769 770 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 771 TRANSACTION_CHARACTERISTICS = { 772 "ISOLATION LEVEL REPEATABLE READ", 773 "ISOLATION LEVEL READ COMMITTED", 774 "ISOLATION LEVEL READ UNCOMMITTED", 775 "ISOLATION LEVEL SERIALIZABLE", 776 "READ WRITE", 777 "READ ONLY", 778 } 779 780 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 781 782 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 783 784 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 785 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 786 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 787 788 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 789 790 STRICT_CAST = True 791 792 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 793 794 CONVERT_TYPE_FIRST = False 795 796 PREFIXED_PIVOT_COLUMNS = False 797 IDENTIFY_PIVOT_STRINGS = False 798 799 LOG_BASE_FIRST = True 800 LOG_DEFAULTS_TO_LN = False 801 802 __slots__ = ( 803 "error_level", 804 "error_message_context", 805 "max_errors", 806 "sql", 807 "errors", 808 "_tokens", 809 "_index", 810 "_curr", 811 "_next", 812 "_prev", 813 "_prev_comments", 814 ) 815 816 # Autofilled 817 INDEX_OFFSET: int = 0 818 UNNEST_COLUMN_ONLY: bool = False 819 ALIAS_POST_TABLESAMPLE: bool = False 820 STRICT_STRING_CONCAT = False 821 NULL_ORDERING: str = "nulls_are_small" 822 SHOW_TRIE: t.Dict = {} 823 SET_TRIE: t.Dict = {} 824 FORMAT_MAPPING: t.Dict[str, str] = {} 825 FORMAT_TRIE: t.Dict = {} 826 TIME_MAPPING: t.Dict[str, str] = {} 827 TIME_TRIE: t.Dict = {} 828 829 def __init__( 830 self, 831 error_level: t.Optional[ErrorLevel] = None, 832 error_message_context: int = 100, 833 max_errors: int = 3, 834 ): 835 self.error_level = error_level or ErrorLevel.IMMEDIATE 836 self.error_message_context = error_message_context 837 self.max_errors = max_errors 838 self.reset() 839 840 def reset(self): 841 self.sql = "" 842 self.errors = [] 843 self._tokens = [] 844 self._index = 0 845 self._curr = None 846 self._next = None 847 self._prev = None 848 self._prev_comments = None 849 850 def parse( 851 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 852 ) -> t.List[t.Optional[exp.Expression]]: 853 """ 854 Parses a list of tokens and returns a list of syntax trees, one tree 855 per parsed SQL statement. 856 857 Args: 858 raw_tokens: The list of tokens. 859 sql: The original SQL string, used to produce helpful debug messages. 860 861 Returns: 862 The list of the produced syntax trees. 863 """ 864 return self._parse( 865 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 866 ) 867 868 def parse_into( 869 self, 870 expression_types: exp.IntoType, 871 raw_tokens: t.List[Token], 872 sql: t.Optional[str] = None, 873 ) -> t.List[t.Optional[exp.Expression]]: 874 """ 875 Parses a list of tokens into a given Expression type. If a collection of Expression 876 types is given instead, this method will try to parse the token list into each one 877 of them, stopping at the first for which the parsing succeeds. 878 879 Args: 880 expression_types: The expression type(s) to try and parse the token list into. 881 raw_tokens: The list of tokens. 882 sql: The original SQL string, used to produce helpful debug messages. 883 884 Returns: 885 The target Expression. 886 """ 887 errors = [] 888 for expression_type in ensure_list(expression_types): 889 parser = self.EXPRESSION_PARSERS.get(expression_type) 890 if not parser: 891 raise TypeError(f"No parser registered for {expression_type}") 892 893 try: 894 return self._parse(parser, raw_tokens, sql) 895 except ParseError as e: 896 e.errors[0]["into_expression"] = expression_type 897 errors.append(e) 898 899 raise ParseError( 900 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 901 errors=merge_errors(errors), 902 ) from errors[-1] 903 904 def _parse( 905 self, 906 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 907 raw_tokens: t.List[Token], 908 sql: t.Optional[str] = None, 909 ) -> t.List[t.Optional[exp.Expression]]: 910 self.reset() 911 self.sql = sql or "" 912 913 total = len(raw_tokens) 914 chunks: t.List[t.List[Token]] = [[]] 915 916 for i, token in enumerate(raw_tokens): 917 if token.token_type == TokenType.SEMICOLON: 918 if i < total - 1: 919 chunks.append([]) 920 else: 921 chunks[-1].append(token) 922 923 expressions = [] 924 925 for tokens in chunks: 926 self._index = -1 927 self._tokens = tokens 928 self._advance() 929 930 expressions.append(parse_method(self)) 931 932 if self._index < len(self._tokens): 933 self.raise_error("Invalid expression / Unexpected token") 934 935 self.check_errors() 936 937 return expressions 938 939 def check_errors(self) -> None: 940 """Logs or raises any found errors, depending on the chosen error level setting.""" 941 if self.error_level == ErrorLevel.WARN: 942 for error in self.errors: 943 logger.error(str(error)) 944 elif self.error_level == ErrorLevel.RAISE and self.errors: 945 raise ParseError( 946 concat_messages(self.errors, self.max_errors), 947 errors=merge_errors(self.errors), 948 ) 949 950 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 951 """ 952 Appends an error in the list of recorded errors or raises it, depending on the chosen 953 error level setting. 954 """ 955 token = token or self._curr or self._prev or Token.string("") 956 start = token.start 957 end = token.end + 1 958 start_context = self.sql[max(start - self.error_message_context, 0) : start] 959 highlight = self.sql[start:end] 960 end_context = self.sql[end : end + self.error_message_context] 961 962 error = ParseError.new( 963 f"{message}. Line {token.line}, Col: {token.col}.\n" 964 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 965 description=message, 966 line=token.line, 967 col=token.col, 968 start_context=start_context, 969 highlight=highlight, 970 end_context=end_context, 971 ) 972 973 if self.error_level == ErrorLevel.IMMEDIATE: 974 raise error 975 976 self.errors.append(error) 977 978 def expression( 979 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 980 ) -> E: 981 """ 982 Creates a new, validated Expression. 983 984 Args: 985 exp_class: The expression class to instantiate. 986 comments: An optional list of comments to attach to the expression. 987 kwargs: The arguments to set for the expression along with their respective values. 988 989 Returns: 990 The target expression. 991 """ 992 instance = exp_class(**kwargs) 993 instance.add_comments(comments) if comments else self._add_comments(instance) 994 return self.validate_expression(instance) 995 996 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 997 if expression and self._prev_comments: 998 expression.add_comments(self._prev_comments) 999 self._prev_comments = None 1000 1001 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1002 """ 1003 Validates an Expression, making sure that all its mandatory arguments are set. 1004 1005 Args: 1006 expression: The expression to validate. 1007 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1008 1009 Returns: 1010 The validated expression. 1011 """ 1012 if self.error_level != ErrorLevel.IGNORE: 1013 for error_message in expression.error_messages(args): 1014 self.raise_error(error_message) 1015 1016 return expression 1017 1018 def _find_sql(self, start: Token, end: Token) -> str: 1019 return self.sql[start.start : end.end + 1] 1020 1021 def _advance(self, times: int = 1) -> None: 1022 self._index += times 1023 self._curr = seq_get(self._tokens, self._index) 1024 self._next = seq_get(self._tokens, self._index + 1) 1025 1026 if self._index > 0: 1027 self._prev = self._tokens[self._index - 1] 1028 self._prev_comments = self._prev.comments 1029 else: 1030 self._prev = None 1031 self._prev_comments = None 1032 1033 def _retreat(self, index: int) -> None: 1034 if index != self._index: 1035 self._advance(index - self._index) 1036 1037 def _parse_command(self) -> exp.Command: 1038 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1039 1040 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1041 start = self._prev 1042 exists = self._parse_exists() if allow_exists else None 1043 1044 self._match(TokenType.ON) 1045 1046 kind = self._match_set(self.CREATABLES) and self._prev 1047 if not kind: 1048 return self._parse_as_command(start) 1049 1050 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1051 this = self._parse_user_defined_function(kind=kind.token_type) 1052 elif kind.token_type == TokenType.TABLE: 1053 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1054 elif kind.token_type == TokenType.COLUMN: 1055 this = self._parse_column() 1056 else: 1057 this = self._parse_id_var() 1058 1059 self._match(TokenType.IS) 1060 1061 return self.expression( 1062 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1063 ) 1064 1065 def _parse_to_table( 1066 self, 1067 ) -> exp.ToTableProperty: 1068 table = self._parse_table_parts(schema=True) 1069 return self.expression(exp.ToTableProperty, this=table) 1070 1071 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1072 def _parse_ttl(self) -> exp.Expression: 1073 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1074 this = self._parse_bitwise() 1075 1076 if self._match_text_seq("DELETE"): 1077 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1078 if self._match_text_seq("RECOMPRESS"): 1079 return self.expression( 1080 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1081 ) 1082 if self._match_text_seq("TO", "DISK"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1085 ) 1086 if self._match_text_seq("TO", "VOLUME"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1089 ) 1090 1091 return this 1092 1093 expressions = self._parse_csv(_parse_ttl_action) 1094 where = self._parse_where() 1095 group = self._parse_group() 1096 1097 aggregates = None 1098 if group and self._match(TokenType.SET): 1099 aggregates = self._parse_csv(self._parse_set_item) 1100 1101 return self.expression( 1102 exp.MergeTreeTTL, 1103 expressions=expressions, 1104 where=where, 1105 group=group, 1106 aggregates=aggregates, 1107 ) 1108 1109 def _parse_statement(self) -> t.Optional[exp.Expression]: 1110 if self._curr is None: 1111 return None 1112 1113 if self._match_set(self.STATEMENT_PARSERS): 1114 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1115 1116 if self._match_set(Tokenizer.COMMANDS): 1117 return self._parse_command() 1118 1119 expression = self._parse_expression() 1120 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1121 return self._parse_query_modifiers(expression) 1122 1123 def _parse_drop(self) -> exp.Drop | exp.Command: 1124 start = self._prev 1125 temporary = self._match(TokenType.TEMPORARY) 1126 materialized = self._match_text_seq("MATERIALIZED") 1127 1128 kind = self._match_set(self.CREATABLES) and self._prev.text 1129 if not kind: 1130 return self._parse_as_command(start) 1131 1132 return self.expression( 1133 exp.Drop, 1134 exists=self._parse_exists(), 1135 this=self._parse_table(schema=True), 1136 kind=kind, 1137 temporary=temporary, 1138 materialized=materialized, 1139 cascade=self._match_text_seq("CASCADE"), 1140 constraints=self._match_text_seq("CONSTRAINTS"), 1141 purge=self._match_text_seq("PURGE"), 1142 ) 1143 1144 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1145 return ( 1146 self._match(TokenType.IF) 1147 and (not not_ or self._match(TokenType.NOT)) 1148 and self._match(TokenType.EXISTS) 1149 ) 1150 1151 def _parse_create(self) -> exp.Create | exp.Command: 1152 # Note: this can't be None because we've matched a statement parser 1153 start = self._prev 1154 replace = start.text.upper() == "REPLACE" or self._match_pair( 1155 TokenType.OR, TokenType.REPLACE 1156 ) 1157 unique = self._match(TokenType.UNIQUE) 1158 1159 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1160 self._advance() 1161 1162 properties = None 1163 create_token = self._match_set(self.CREATABLES) and self._prev 1164 1165 if not create_token: 1166 # exp.Properties.Location.POST_CREATE 1167 properties = self._parse_properties() 1168 create_token = self._match_set(self.CREATABLES) and self._prev 1169 1170 if not properties or not create_token: 1171 return self._parse_as_command(start) 1172 1173 exists = self._parse_exists(not_=True) 1174 this = None 1175 expression = None 1176 indexes = None 1177 no_schema_binding = None 1178 begin = None 1179 clone = None 1180 1181 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1182 nonlocal properties 1183 if properties and temp_props: 1184 properties.expressions.extend(temp_props.expressions) 1185 elif temp_props: 1186 properties = temp_props 1187 1188 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1189 this = self._parse_user_defined_function(kind=create_token.token_type) 1190 1191 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1192 extend_props(self._parse_properties()) 1193 1194 self._match(TokenType.ALIAS) 1195 begin = self._match(TokenType.BEGIN) 1196 return_ = self._match_text_seq("RETURN") 1197 expression = self._parse_statement() 1198 1199 if return_: 1200 expression = self.expression(exp.Return, this=expression) 1201 elif create_token.token_type == TokenType.INDEX: 1202 this = self._parse_index(index=self._parse_id_var()) 1203 elif create_token.token_type in self.DB_CREATABLES: 1204 table_parts = self._parse_table_parts(schema=True) 1205 1206 # exp.Properties.Location.POST_NAME 1207 self._match(TokenType.COMMA) 1208 extend_props(self._parse_properties(before=True)) 1209 1210 this = self._parse_schema(this=table_parts) 1211 1212 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1213 extend_props(self._parse_properties()) 1214 1215 self._match(TokenType.ALIAS) 1216 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1217 # exp.Properties.Location.POST_ALIAS 1218 extend_props(self._parse_properties()) 1219 1220 expression = self._parse_ddl_select() 1221 1222 if create_token.token_type == TokenType.TABLE: 1223 indexes = [] 1224 while True: 1225 index = self._parse_index() 1226 1227 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1228 extend_props(self._parse_properties()) 1229 1230 if not index: 1231 break 1232 else: 1233 self._match(TokenType.COMMA) 1234 indexes.append(index) 1235 elif create_token.token_type == TokenType.VIEW: 1236 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1237 no_schema_binding = True 1238 1239 if self._match_text_seq("CLONE"): 1240 clone = self._parse_table(schema=True) 1241 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1242 clone_kind = ( 1243 self._match(TokenType.L_PAREN) 1244 and self._match_texts(self.CLONE_KINDS) 1245 and self._prev.text.upper() 1246 ) 1247 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1248 self._match(TokenType.R_PAREN) 1249 clone = self.expression( 1250 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1251 ) 1252 1253 return self.expression( 1254 exp.Create, 1255 this=this, 1256 kind=create_token.text, 1257 replace=replace, 1258 unique=unique, 1259 expression=expression, 1260 exists=exists, 1261 properties=properties, 1262 indexes=indexes, 1263 no_schema_binding=no_schema_binding, 1264 begin=begin, 1265 clone=clone, 1266 ) 1267 1268 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1269 # only used for teradata currently 1270 self._match(TokenType.COMMA) 1271 1272 kwargs = { 1273 "no": self._match_text_seq("NO"), 1274 "dual": self._match_text_seq("DUAL"), 1275 "before": self._match_text_seq("BEFORE"), 1276 "default": self._match_text_seq("DEFAULT"), 1277 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1278 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1279 "after": self._match_text_seq("AFTER"), 1280 "minimum": self._match_texts(("MIN", "MINIMUM")), 1281 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1282 } 1283 1284 if self._match_texts(self.PROPERTY_PARSERS): 1285 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1286 try: 1287 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1288 except TypeError: 1289 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1290 1291 return None 1292 1293 def _parse_property(self) -> t.Optional[exp.Expression]: 1294 if self._match_texts(self.PROPERTY_PARSERS): 1295 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1296 1297 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1298 return self._parse_character_set(default=True) 1299 1300 if self._match_text_seq("COMPOUND", "SORTKEY"): 1301 return self._parse_sortkey(compound=True) 1302 1303 if self._match_text_seq("SQL", "SECURITY"): 1304 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1305 1306 assignment = self._match_pair( 1307 TokenType.VAR, TokenType.EQ, advance=False 1308 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1309 1310 if assignment: 1311 key = self._parse_var_or_string() 1312 self._match(TokenType.EQ) 1313 return self.expression(exp.Property, this=key, value=self._parse_column()) 1314 1315 return None 1316 1317 def _parse_stored(self) -> exp.FileFormatProperty: 1318 self._match(TokenType.ALIAS) 1319 1320 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1321 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1322 1323 return self.expression( 1324 exp.FileFormatProperty, 1325 this=self.expression( 1326 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1327 ) 1328 if input_format or output_format 1329 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1330 ) 1331 1332 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1333 self._match(TokenType.EQ) 1334 self._match(TokenType.ALIAS) 1335 return self.expression(exp_class, this=self._parse_field()) 1336 1337 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1338 properties = [] 1339 while True: 1340 if before: 1341 prop = self._parse_property_before() 1342 else: 1343 prop = self._parse_property() 1344 1345 if not prop: 1346 break 1347 for p in ensure_list(prop): 1348 properties.append(p) 1349 1350 if properties: 1351 return self.expression(exp.Properties, expressions=properties) 1352 1353 return None 1354 1355 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1356 return self.expression( 1357 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1358 ) 1359 1360 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1361 if self._index >= 2: 1362 pre_volatile_token = self._tokens[self._index - 2] 1363 else: 1364 pre_volatile_token = None 1365 1366 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1367 return exp.VolatileProperty() 1368 1369 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1370 1371 def _parse_with_property( 1372 self, 1373 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1374 self._match(TokenType.WITH) 1375 if self._match(TokenType.L_PAREN, advance=False): 1376 return self._parse_wrapped_csv(self._parse_property) 1377 1378 if self._match_text_seq("JOURNAL"): 1379 return self._parse_withjournaltable() 1380 1381 if self._match_text_seq("DATA"): 1382 return self._parse_withdata(no=False) 1383 elif self._match_text_seq("NO", "DATA"): 1384 return self._parse_withdata(no=True) 1385 1386 if not self._next: 1387 return None 1388 1389 return self._parse_withisolatedloading() 1390 1391 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1392 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1393 self._match(TokenType.EQ) 1394 1395 user = self._parse_id_var() 1396 self._match(TokenType.PARAMETER) 1397 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1398 1399 if not user or not host: 1400 return None 1401 1402 return exp.DefinerProperty(this=f"{user}@{host}") 1403 1404 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1405 self._match(TokenType.TABLE) 1406 self._match(TokenType.EQ) 1407 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1408 1409 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1410 return self.expression(exp.LogProperty, no=no) 1411 1412 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1413 return self.expression(exp.JournalProperty, **kwargs) 1414 1415 def _parse_checksum(self) -> exp.ChecksumProperty: 1416 self._match(TokenType.EQ) 1417 1418 on = None 1419 if self._match(TokenType.ON): 1420 on = True 1421 elif self._match_text_seq("OFF"): 1422 on = False 1423 1424 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1425 1426 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1427 if not self._match_text_seq("BY"): 1428 self._retreat(self._index - 1) 1429 return None 1430 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_freespace(self) -> exp.FreespaceProperty: 1434 self._match(TokenType.EQ) 1435 return self.expression( 1436 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1437 ) 1438 1439 def _parse_mergeblockratio( 1440 self, no: bool = False, default: bool = False 1441 ) -> exp.MergeBlockRatioProperty: 1442 if self._match(TokenType.EQ): 1443 return self.expression( 1444 exp.MergeBlockRatioProperty, 1445 this=self._parse_number(), 1446 percent=self._match(TokenType.PERCENT), 1447 ) 1448 1449 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1450 1451 def _parse_datablocksize( 1452 self, 1453 default: t.Optional[bool] = None, 1454 minimum: t.Optional[bool] = None, 1455 maximum: t.Optional[bool] = None, 1456 ) -> exp.DataBlocksizeProperty: 1457 self._match(TokenType.EQ) 1458 size = self._parse_number() 1459 1460 units = None 1461 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1462 units = self._prev.text 1463 1464 return self.expression( 1465 exp.DataBlocksizeProperty, 1466 size=size, 1467 units=units, 1468 default=default, 1469 minimum=minimum, 1470 maximum=maximum, 1471 ) 1472 1473 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1474 self._match(TokenType.EQ) 1475 always = self._match_text_seq("ALWAYS") 1476 manual = self._match_text_seq("MANUAL") 1477 never = self._match_text_seq("NEVER") 1478 default = self._match_text_seq("DEFAULT") 1479 1480 autotemp = None 1481 if self._match_text_seq("AUTOTEMP"): 1482 autotemp = self._parse_schema() 1483 1484 return self.expression( 1485 exp.BlockCompressionProperty, 1486 always=always, 1487 manual=manual, 1488 never=never, 1489 default=default, 1490 autotemp=autotemp, 1491 ) 1492 1493 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1494 no = self._match_text_seq("NO") 1495 concurrent = self._match_text_seq("CONCURRENT") 1496 self._match_text_seq("ISOLATED", "LOADING") 1497 for_all = self._match_text_seq("FOR", "ALL") 1498 for_insert = self._match_text_seq("FOR", "INSERT") 1499 for_none = self._match_text_seq("FOR", "NONE") 1500 return self.expression( 1501 exp.IsolatedLoadingProperty, 1502 no=no, 1503 concurrent=concurrent, 1504 for_all=for_all, 1505 for_insert=for_insert, 1506 for_none=for_none, 1507 ) 1508 1509 def _parse_locking(self) -> exp.LockingProperty: 1510 if self._match(TokenType.TABLE): 1511 kind = "TABLE" 1512 elif self._match(TokenType.VIEW): 1513 kind = "VIEW" 1514 elif self._match(TokenType.ROW): 1515 kind = "ROW" 1516 elif self._match_text_seq("DATABASE"): 1517 kind = "DATABASE" 1518 else: 1519 kind = None 1520 1521 if kind in ("DATABASE", "TABLE", "VIEW"): 1522 this = self._parse_table_parts() 1523 else: 1524 this = None 1525 1526 if self._match(TokenType.FOR): 1527 for_or_in = "FOR" 1528 elif self._match(TokenType.IN): 1529 for_or_in = "IN" 1530 else: 1531 for_or_in = None 1532 1533 if self._match_text_seq("ACCESS"): 1534 lock_type = "ACCESS" 1535 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1536 lock_type = "EXCLUSIVE" 1537 elif self._match_text_seq("SHARE"): 1538 lock_type = "SHARE" 1539 elif self._match_text_seq("READ"): 1540 lock_type = "READ" 1541 elif self._match_text_seq("WRITE"): 1542 lock_type = "WRITE" 1543 elif self._match_text_seq("CHECKSUM"): 1544 lock_type = "CHECKSUM" 1545 else: 1546 lock_type = None 1547 1548 override = self._match_text_seq("OVERRIDE") 1549 1550 return self.expression( 1551 exp.LockingProperty, 1552 this=this, 1553 kind=kind, 1554 for_or_in=for_or_in, 1555 lock_type=lock_type, 1556 override=override, 1557 ) 1558 1559 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1560 if self._match(TokenType.PARTITION_BY): 1561 return self._parse_csv(self._parse_conjunction) 1562 return [] 1563 1564 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1565 self._match(TokenType.EQ) 1566 return self.expression( 1567 exp.PartitionedByProperty, 1568 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1569 ) 1570 1571 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1572 if self._match_text_seq("AND", "STATISTICS"): 1573 statistics = True 1574 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1575 statistics = False 1576 else: 1577 statistics = None 1578 1579 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1580 1581 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1582 if self._match_text_seq("PRIMARY", "INDEX"): 1583 return exp.NoPrimaryIndexProperty() 1584 return None 1585 1586 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1587 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1588 return exp.OnCommitProperty() 1589 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1590 return exp.OnCommitProperty(delete=True) 1591 return None 1592 1593 def _parse_distkey(self) -> exp.DistKeyProperty: 1594 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1595 1596 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1597 table = self._parse_table(schema=True) 1598 1599 options = [] 1600 while self._match_texts(("INCLUDING", "EXCLUDING")): 1601 this = self._prev.text.upper() 1602 1603 id_var = self._parse_id_var() 1604 if not id_var: 1605 return None 1606 1607 options.append( 1608 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1609 ) 1610 1611 return self.expression(exp.LikeProperty, this=table, expressions=options) 1612 1613 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1614 return self.expression( 1615 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1616 ) 1617 1618 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1619 self._match(TokenType.EQ) 1620 return self.expression( 1621 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1622 ) 1623 1624 def _parse_returns(self) -> exp.ReturnsProperty: 1625 value: t.Optional[exp.Expression] 1626 is_table = self._match(TokenType.TABLE) 1627 1628 if is_table: 1629 if self._match(TokenType.LT): 1630 value = self.expression( 1631 exp.Schema, 1632 this="TABLE", 1633 expressions=self._parse_csv(self._parse_struct_types), 1634 ) 1635 if not self._match(TokenType.GT): 1636 self.raise_error("Expecting >") 1637 else: 1638 value = self._parse_schema(exp.var("TABLE")) 1639 else: 1640 value = self._parse_types() 1641 1642 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1643 1644 def _parse_describe(self) -> exp.Describe: 1645 kind = self._match_set(self.CREATABLES) and self._prev.text 1646 this = self._parse_table() 1647 return self.expression(exp.Describe, this=this, kind=kind) 1648 1649 def _parse_insert(self) -> exp.Insert: 1650 overwrite = self._match(TokenType.OVERWRITE) 1651 local = self._match_text_seq("LOCAL") 1652 alternative = None 1653 1654 if self._match_text_seq("DIRECTORY"): 1655 this: t.Optional[exp.Expression] = self.expression( 1656 exp.Directory, 1657 this=self._parse_var_or_string(), 1658 local=local, 1659 row_format=self._parse_row_format(match_row=True), 1660 ) 1661 else: 1662 if self._match(TokenType.OR): 1663 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1664 1665 self._match(TokenType.INTO) 1666 self._match(TokenType.TABLE) 1667 this = self._parse_table(schema=True) 1668 1669 return self.expression( 1670 exp.Insert, 1671 this=this, 1672 exists=self._parse_exists(), 1673 partition=self._parse_partition(), 1674 expression=self._parse_ddl_select(), 1675 conflict=self._parse_on_conflict(), 1676 returning=self._parse_returning(), 1677 overwrite=overwrite, 1678 alternative=alternative, 1679 ) 1680 1681 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1682 conflict = self._match_text_seq("ON", "CONFLICT") 1683 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1684 1685 if not conflict and not duplicate: 1686 return None 1687 1688 nothing = None 1689 expressions = None 1690 key = None 1691 constraint = None 1692 1693 if conflict: 1694 if self._match_text_seq("ON", "CONSTRAINT"): 1695 constraint = self._parse_id_var() 1696 else: 1697 key = self._parse_csv(self._parse_value) 1698 1699 self._match_text_seq("DO") 1700 if self._match_text_seq("NOTHING"): 1701 nothing = True 1702 else: 1703 self._match(TokenType.UPDATE) 1704 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1705 1706 return self.expression( 1707 exp.OnConflict, 1708 duplicate=duplicate, 1709 expressions=expressions, 1710 nothing=nothing, 1711 key=key, 1712 constraint=constraint, 1713 ) 1714 1715 def _parse_returning(self) -> t.Optional[exp.Returning]: 1716 if not self._match(TokenType.RETURNING): 1717 return None 1718 1719 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1720 1721 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1722 if not self._match(TokenType.FORMAT): 1723 return None 1724 return self._parse_row_format() 1725 1726 def _parse_row_format( 1727 self, match_row: bool = False 1728 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1729 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1730 return None 1731 1732 if self._match_text_seq("SERDE"): 1733 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1734 1735 self._match_text_seq("DELIMITED") 1736 1737 kwargs = {} 1738 1739 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1740 kwargs["fields"] = self._parse_string() 1741 if self._match_text_seq("ESCAPED", "BY"): 1742 kwargs["escaped"] = self._parse_string() 1743 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1744 kwargs["collection_items"] = self._parse_string() 1745 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1746 kwargs["map_keys"] = self._parse_string() 1747 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1748 kwargs["lines"] = self._parse_string() 1749 if self._match_text_seq("NULL", "DEFINED", "AS"): 1750 kwargs["null"] = self._parse_string() 1751 1752 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1753 1754 def _parse_load(self) -> exp.LoadData | exp.Command: 1755 if self._match_text_seq("DATA"): 1756 local = self._match_text_seq("LOCAL") 1757 self._match_text_seq("INPATH") 1758 inpath = self._parse_string() 1759 overwrite = self._match(TokenType.OVERWRITE) 1760 self._match_pair(TokenType.INTO, TokenType.TABLE) 1761 1762 return self.expression( 1763 exp.LoadData, 1764 this=self._parse_table(schema=True), 1765 local=local, 1766 overwrite=overwrite, 1767 inpath=inpath, 1768 partition=self._parse_partition(), 1769 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1770 serde=self._match_text_seq("SERDE") and self._parse_string(), 1771 ) 1772 return self._parse_as_command(self._prev) 1773 1774 def _parse_delete(self) -> exp.Delete: 1775 self._match(TokenType.FROM) 1776 1777 return self.expression( 1778 exp.Delete, 1779 this=self._parse_table(), 1780 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1781 where=self._parse_where(), 1782 returning=self._parse_returning(), 1783 ) 1784 1785 def _parse_update(self) -> exp.Update: 1786 return self.expression( 1787 exp.Update, 1788 **{ # type: ignore 1789 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1790 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1791 "from": self._parse_from(modifiers=True), 1792 "where": self._parse_where(), 1793 "returning": self._parse_returning(), 1794 }, 1795 ) 1796 1797 def _parse_uncache(self) -> exp.Uncache: 1798 if not self._match(TokenType.TABLE): 1799 self.raise_error("Expecting TABLE after UNCACHE") 1800 1801 return self.expression( 1802 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1803 ) 1804 1805 def _parse_cache(self) -> exp.Cache: 1806 lazy = self._match_text_seq("LAZY") 1807 self._match(TokenType.TABLE) 1808 table = self._parse_table(schema=True) 1809 1810 options = [] 1811 if self._match_text_seq("OPTIONS"): 1812 self._match_l_paren() 1813 k = self._parse_string() 1814 self._match(TokenType.EQ) 1815 v = self._parse_string() 1816 options = [k, v] 1817 self._match_r_paren() 1818 1819 self._match(TokenType.ALIAS) 1820 return self.expression( 1821 exp.Cache, 1822 this=table, 1823 lazy=lazy, 1824 options=options, 1825 expression=self._parse_select(nested=True), 1826 ) 1827 1828 def _parse_partition(self) -> t.Optional[exp.Partition]: 1829 if not self._match(TokenType.PARTITION): 1830 return None 1831 1832 return self.expression( 1833 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1834 ) 1835 1836 def _parse_value(self) -> exp.Tuple: 1837 if self._match(TokenType.L_PAREN): 1838 expressions = self._parse_csv(self._parse_conjunction) 1839 self._match_r_paren() 1840 return self.expression(exp.Tuple, expressions=expressions) 1841 1842 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1843 # Source: https://prestodb.io/docs/current/sql/values.html 1844 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1845 1846 def _parse_select( 1847 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1848 ) -> t.Optional[exp.Expression]: 1849 cte = self._parse_with() 1850 if cte: 1851 this = self._parse_statement() 1852 1853 if not this: 1854 self.raise_error("Failed to parse any statement following CTE") 1855 return cte 1856 1857 if "with" in this.arg_types: 1858 this.set("with", cte) 1859 else: 1860 self.raise_error(f"{this.key} does not support CTE") 1861 this = cte 1862 elif self._match(TokenType.SELECT): 1863 comments = self._prev_comments 1864 1865 hint = self._parse_hint() 1866 all_ = self._match(TokenType.ALL) 1867 distinct = self._match(TokenType.DISTINCT) 1868 1869 kind = ( 1870 self._match(TokenType.ALIAS) 1871 and self._match_texts(("STRUCT", "VALUE")) 1872 and self._prev.text 1873 ) 1874 1875 if distinct: 1876 distinct = self.expression( 1877 exp.Distinct, 1878 on=self._parse_value() if self._match(TokenType.ON) else None, 1879 ) 1880 1881 if all_ and distinct: 1882 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1883 1884 limit = self._parse_limit(top=True) 1885 expressions = self._parse_csv(self._parse_expression) 1886 1887 this = self.expression( 1888 exp.Select, 1889 kind=kind, 1890 hint=hint, 1891 distinct=distinct, 1892 expressions=expressions, 1893 limit=limit, 1894 ) 1895 this.comments = comments 1896 1897 into = self._parse_into() 1898 if into: 1899 this.set("into", into) 1900 1901 from_ = self._parse_from() 1902 if from_: 1903 this.set("from", from_) 1904 1905 this = self._parse_query_modifiers(this) 1906 elif (table or nested) and self._match(TokenType.L_PAREN): 1907 if self._match(TokenType.PIVOT): 1908 this = self._parse_simplified_pivot() 1909 elif self._match(TokenType.FROM): 1910 this = exp.select("*").from_( 1911 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1912 ) 1913 else: 1914 this = self._parse_table() if table else self._parse_select(nested=True) 1915 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1916 1917 self._match_r_paren() 1918 1919 # early return so that subquery unions aren't parsed again 1920 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1921 # Union ALL should be a property of the top select node, not the subquery 1922 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1923 elif self._match(TokenType.VALUES): 1924 this = self.expression( 1925 exp.Values, 1926 expressions=self._parse_csv(self._parse_value), 1927 alias=self._parse_table_alias(), 1928 ) 1929 else: 1930 this = None 1931 1932 return self._parse_set_operations(this) 1933 1934 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1935 if not skip_with_token and not self._match(TokenType.WITH): 1936 return None 1937 1938 comments = self._prev_comments 1939 recursive = self._match(TokenType.RECURSIVE) 1940 1941 expressions = [] 1942 while True: 1943 expressions.append(self._parse_cte()) 1944 1945 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1946 break 1947 else: 1948 self._match(TokenType.WITH) 1949 1950 return self.expression( 1951 exp.With, comments=comments, expressions=expressions, recursive=recursive 1952 ) 1953 1954 def _parse_cte(self) -> exp.CTE: 1955 alias = self._parse_table_alias() 1956 if not alias or not alias.this: 1957 self.raise_error("Expected CTE to have alias") 1958 1959 self._match(TokenType.ALIAS) 1960 return self.expression( 1961 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1962 ) 1963 1964 def _parse_table_alias( 1965 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1966 ) -> t.Optional[exp.TableAlias]: 1967 any_token = self._match(TokenType.ALIAS) 1968 alias = ( 1969 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1970 or self._parse_string_as_identifier() 1971 ) 1972 1973 index = self._index 1974 if self._match(TokenType.L_PAREN): 1975 columns = self._parse_csv(self._parse_function_parameter) 1976 self._match_r_paren() if columns else self._retreat(index) 1977 else: 1978 columns = None 1979 1980 if not alias and not columns: 1981 return None 1982 1983 return self.expression(exp.TableAlias, this=alias, columns=columns) 1984 1985 def _parse_subquery( 1986 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1987 ) -> t.Optional[exp.Subquery]: 1988 if not this: 1989 return None 1990 1991 return self.expression( 1992 exp.Subquery, 1993 this=this, 1994 pivots=self._parse_pivots(), 1995 alias=self._parse_table_alias() if parse_alias else None, 1996 ) 1997 1998 def _parse_query_modifiers( 1999 self, this: t.Optional[exp.Expression] 2000 ) -> t.Optional[exp.Expression]: 2001 if isinstance(this, self.MODIFIABLES): 2002 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2003 expression = parser(self) 2004 2005 if expression: 2006 if key == "limit": 2007 offset = expression.args.pop("offset", None) 2008 if offset: 2009 this.set("offset", exp.Offset(expression=offset)) 2010 this.set(key, expression) 2011 return this 2012 2013 def _parse_hint(self) -> t.Optional[exp.Hint]: 2014 if self._match(TokenType.HINT): 2015 hints = self._parse_csv(self._parse_function) 2016 2017 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2018 self.raise_error("Expected */ after HINT") 2019 2020 return self.expression(exp.Hint, expressions=hints) 2021 2022 return None 2023 2024 def _parse_into(self) -> t.Optional[exp.Into]: 2025 if not self._match(TokenType.INTO): 2026 return None 2027 2028 temp = self._match(TokenType.TEMPORARY) 2029 unlogged = self._match_text_seq("UNLOGGED") 2030 self._match(TokenType.TABLE) 2031 2032 return self.expression( 2033 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2034 ) 2035 2036 def _parse_from( 2037 self, modifiers: bool = False, skip_from_token: bool = False 2038 ) -> t.Optional[exp.From]: 2039 if not skip_from_token and not self._match(TokenType.FROM): 2040 return None 2041 2042 comments = self._prev_comments 2043 this = self._parse_table() 2044 2045 return self.expression( 2046 exp.From, 2047 comments=comments, 2048 this=self._parse_query_modifiers(this) if modifiers else this, 2049 ) 2050 2051 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2052 if not self._match(TokenType.MATCH_RECOGNIZE): 2053 return None 2054 2055 self._match_l_paren() 2056 2057 partition = self._parse_partition_by() 2058 order = self._parse_order() 2059 measures = ( 2060 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2061 ) 2062 2063 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2064 rows = exp.var("ONE ROW PER MATCH") 2065 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2066 text = "ALL ROWS PER MATCH" 2067 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2068 text += f" SHOW EMPTY MATCHES" 2069 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2070 text += f" OMIT EMPTY MATCHES" 2071 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2072 text += f" WITH UNMATCHED ROWS" 2073 rows = exp.var(text) 2074 else: 2075 rows = None 2076 2077 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2078 text = "AFTER MATCH SKIP" 2079 if self._match_text_seq("PAST", "LAST", "ROW"): 2080 text += f" PAST LAST ROW" 2081 elif self._match_text_seq("TO", "NEXT", "ROW"): 2082 text += f" TO NEXT ROW" 2083 elif self._match_text_seq("TO", "FIRST"): 2084 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2085 elif self._match_text_seq("TO", "LAST"): 2086 text += f" TO LAST {self._advance_any().text}" # type: ignore 2087 after = exp.var(text) 2088 else: 2089 after = None 2090 2091 if self._match_text_seq("PATTERN"): 2092 self._match_l_paren() 2093 2094 if not self._curr: 2095 self.raise_error("Expecting )", self._curr) 2096 2097 paren = 1 2098 start = self._curr 2099 2100 while self._curr and paren > 0: 2101 if self._curr.token_type == TokenType.L_PAREN: 2102 paren += 1 2103 if self._curr.token_type == TokenType.R_PAREN: 2104 paren -= 1 2105 2106 end = self._prev 2107 self._advance() 2108 2109 if paren > 0: 2110 self.raise_error("Expecting )", self._curr) 2111 2112 pattern = exp.var(self._find_sql(start, end)) 2113 else: 2114 pattern = None 2115 2116 define = ( 2117 self._parse_csv( 2118 lambda: self.expression( 2119 exp.Alias, 2120 alias=self._parse_id_var(any_token=True), 2121 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2122 ) 2123 ) 2124 if self._match_text_seq("DEFINE") 2125 else None 2126 ) 2127 2128 self._match_r_paren() 2129 2130 return self.expression( 2131 exp.MatchRecognize, 2132 partition_by=partition, 2133 order=order, 2134 measures=measures, 2135 rows=rows, 2136 after=after, 2137 pattern=pattern, 2138 define=define, 2139 alias=self._parse_table_alias(), 2140 ) 2141 2142 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2143 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2144 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2145 2146 if outer_apply or cross_apply: 2147 this = self._parse_select(table=True) 2148 view = None 2149 outer = not cross_apply 2150 elif self._match(TokenType.LATERAL): 2151 this = self._parse_select(table=True) 2152 view = self._match(TokenType.VIEW) 2153 outer = self._match(TokenType.OUTER) 2154 else: 2155 return None 2156 2157 if not this: 2158 this = self._parse_function() or self._parse_id_var(any_token=False) 2159 while self._match(TokenType.DOT): 2160 this = exp.Dot( 2161 this=this, 2162 expression=self._parse_function() or self._parse_id_var(any_token=False), 2163 ) 2164 2165 if view: 2166 table = self._parse_id_var(any_token=False) 2167 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2168 table_alias: t.Optional[exp.TableAlias] = self.expression( 2169 exp.TableAlias, this=table, columns=columns 2170 ) 2171 elif isinstance(this, exp.Subquery) and this.alias: 2172 # Ensures parity between the Subquery's and the Lateral's "alias" args 2173 table_alias = this.args["alias"].copy() 2174 else: 2175 table_alias = self._parse_table_alias() 2176 2177 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2178 2179 def _parse_join_parts( 2180 self, 2181 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2182 return ( 2183 self._match_set(self.JOIN_METHODS) and self._prev, 2184 self._match_set(self.JOIN_SIDES) and self._prev, 2185 self._match_set(self.JOIN_KINDS) and self._prev, 2186 ) 2187 2188 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2189 if self._match(TokenType.COMMA): 2190 return self.expression(exp.Join, this=self._parse_table()) 2191 2192 index = self._index 2193 method, side, kind = self._parse_join_parts() 2194 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2195 join = self._match(TokenType.JOIN) 2196 2197 if not skip_join_token and not join: 2198 self._retreat(index) 2199 kind = None 2200 method = None 2201 side = None 2202 2203 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2204 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2205 2206 if not skip_join_token and not join and not outer_apply and not cross_apply: 2207 return None 2208 2209 if outer_apply: 2210 side = Token(TokenType.LEFT, "LEFT") 2211 2212 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2213 2214 if method: 2215 kwargs["method"] = method.text 2216 if side: 2217 kwargs["side"] = side.text 2218 if kind: 2219 kwargs["kind"] = kind.text 2220 if hint: 2221 kwargs["hint"] = hint 2222 2223 if self._match(TokenType.ON): 2224 kwargs["on"] = self._parse_conjunction() 2225 elif self._match(TokenType.USING): 2226 kwargs["using"] = self._parse_wrapped_id_vars() 2227 2228 return self.expression(exp.Join, **kwargs) 2229 2230 def _parse_index( 2231 self, 2232 index: t.Optional[exp.Expression] = None, 2233 ) -> t.Optional[exp.Index]: 2234 if index: 2235 unique = None 2236 primary = None 2237 amp = None 2238 2239 self._match(TokenType.ON) 2240 self._match(TokenType.TABLE) # hive 2241 table = self._parse_table_parts(schema=True) 2242 else: 2243 unique = self._match(TokenType.UNIQUE) 2244 primary = self._match_text_seq("PRIMARY") 2245 amp = self._match_text_seq("AMP") 2246 2247 if not self._match(TokenType.INDEX): 2248 return None 2249 2250 index = self._parse_id_var() 2251 table = None 2252 2253 using = self._parse_field() if self._match(TokenType.USING) else None 2254 2255 if self._match(TokenType.L_PAREN, advance=False): 2256 columns = self._parse_wrapped_csv(self._parse_ordered) 2257 else: 2258 columns = None 2259 2260 return self.expression( 2261 exp.Index, 2262 this=index, 2263 table=table, 2264 using=using, 2265 columns=columns, 2266 unique=unique, 2267 primary=primary, 2268 amp=amp, 2269 partition_by=self._parse_partition_by(), 2270 ) 2271 2272 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2273 return ( 2274 (not schema and self._parse_function(optional_parens=False)) 2275 or self._parse_id_var(any_token=False) 2276 or self._parse_string_as_identifier() 2277 or self._parse_placeholder() 2278 ) 2279 2280 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2281 catalog = None 2282 db = None 2283 table = self._parse_table_part(schema=schema) 2284 2285 while self._match(TokenType.DOT): 2286 if catalog: 2287 # This allows nesting the table in arbitrarily many dot expressions if needed 2288 table = self.expression( 2289 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2290 ) 2291 else: 2292 catalog = db 2293 db = table 2294 table = self._parse_table_part(schema=schema) 2295 2296 if not table: 2297 self.raise_error(f"Expected table name but got {self._curr}") 2298 2299 return self.expression( 2300 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2301 ) 2302 2303 def _parse_table( 2304 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2305 ) -> t.Optional[exp.Expression]: 2306 lateral = self._parse_lateral() 2307 if lateral: 2308 return lateral 2309 2310 unnest = self._parse_unnest() 2311 if unnest: 2312 return unnest 2313 2314 values = self._parse_derived_table_values() 2315 if values: 2316 return values 2317 2318 subquery = self._parse_select(table=True) 2319 if subquery: 2320 if not subquery.args.get("pivots"): 2321 subquery.set("pivots", self._parse_pivots()) 2322 return subquery 2323 2324 this: exp.Expression = self._parse_table_parts(schema=schema) 2325 2326 if schema: 2327 return self._parse_schema(this=this) 2328 2329 if self.ALIAS_POST_TABLESAMPLE: 2330 table_sample = self._parse_table_sample() 2331 2332 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2333 if alias: 2334 this.set("alias", alias) 2335 2336 if not this.args.get("pivots"): 2337 this.set("pivots", self._parse_pivots()) 2338 2339 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2340 this.set( 2341 "hints", 2342 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2343 ) 2344 self._match_r_paren() 2345 2346 if not self.ALIAS_POST_TABLESAMPLE: 2347 table_sample = self._parse_table_sample() 2348 2349 if table_sample: 2350 table_sample.set("this", this) 2351 this = table_sample 2352 2353 return this 2354 2355 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2356 if not self._match(TokenType.UNNEST): 2357 return None 2358 2359 expressions = self._parse_wrapped_csv(self._parse_type) 2360 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2361 2362 alias = self._parse_table_alias() if with_alias else None 2363 2364 if alias and self.UNNEST_COLUMN_ONLY: 2365 if alias.args.get("columns"): 2366 self.raise_error("Unexpected extra column alias in unnest.") 2367 2368 alias.set("columns", [alias.this]) 2369 alias.set("this", None) 2370 2371 offset = None 2372 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2373 self._match(TokenType.ALIAS) 2374 offset = self._parse_id_var() or exp.to_identifier("offset") 2375 2376 return self.expression( 2377 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2378 ) 2379 2380 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2381 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2382 if not is_derived and not self._match(TokenType.VALUES): 2383 return None 2384 2385 expressions = self._parse_csv(self._parse_value) 2386 alias = self._parse_table_alias() 2387 2388 if is_derived: 2389 self._match_r_paren() 2390 2391 return self.expression( 2392 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2393 ) 2394 2395 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2396 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2397 as_modifier and self._match_text_seq("USING", "SAMPLE") 2398 ): 2399 return None 2400 2401 bucket_numerator = None 2402 bucket_denominator = None 2403 bucket_field = None 2404 percent = None 2405 rows = None 2406 size = None 2407 seed = None 2408 2409 kind = ( 2410 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2411 ) 2412 method = self._parse_var(tokens=(TokenType.ROW,)) 2413 2414 self._match(TokenType.L_PAREN) 2415 2416 num = self._parse_number() 2417 2418 if self._match_text_seq("BUCKET"): 2419 bucket_numerator = self._parse_number() 2420 self._match_text_seq("OUT", "OF") 2421 bucket_denominator = bucket_denominator = self._parse_number() 2422 self._match(TokenType.ON) 2423 bucket_field = self._parse_field() 2424 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2425 percent = num 2426 elif self._match(TokenType.ROWS): 2427 rows = num 2428 else: 2429 size = num 2430 2431 self._match(TokenType.R_PAREN) 2432 2433 if self._match(TokenType.L_PAREN): 2434 method = self._parse_var() 2435 seed = self._match(TokenType.COMMA) and self._parse_number() 2436 self._match_r_paren() 2437 elif self._match_texts(("SEED", "REPEATABLE")): 2438 seed = self._parse_wrapped(self._parse_number) 2439 2440 return self.expression( 2441 exp.TableSample, 2442 method=method, 2443 bucket_numerator=bucket_numerator, 2444 bucket_denominator=bucket_denominator, 2445 bucket_field=bucket_field, 2446 percent=percent, 2447 rows=rows, 2448 size=size, 2449 seed=seed, 2450 kind=kind, 2451 ) 2452 2453 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2454 return list(iter(self._parse_pivot, None)) 2455 2456 # https://duckdb.org/docs/sql/statements/pivot 2457 def _parse_simplified_pivot(self) -> exp.Pivot: 2458 def _parse_on() -> t.Optional[exp.Expression]: 2459 this = self._parse_bitwise() 2460 return self._parse_in(this) if self._match(TokenType.IN) else this 2461 2462 this = self._parse_table() 2463 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2464 using = self._match(TokenType.USING) and self._parse_csv( 2465 lambda: self._parse_alias(self._parse_function()) 2466 ) 2467 group = self._parse_group() 2468 return self.expression( 2469 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2470 ) 2471 2472 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2473 index = self._index 2474 2475 if self._match(TokenType.PIVOT): 2476 unpivot = False 2477 elif self._match(TokenType.UNPIVOT): 2478 unpivot = True 2479 else: 2480 return None 2481 2482 expressions = [] 2483 field = None 2484 2485 if not self._match(TokenType.L_PAREN): 2486 self._retreat(index) 2487 return None 2488 2489 if unpivot: 2490 expressions = self._parse_csv(self._parse_column) 2491 else: 2492 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2493 2494 if not expressions: 2495 self.raise_error("Failed to parse PIVOT's aggregation list") 2496 2497 if not self._match(TokenType.FOR): 2498 self.raise_error("Expecting FOR") 2499 2500 value = self._parse_column() 2501 2502 if not self._match(TokenType.IN): 2503 self.raise_error("Expecting IN") 2504 2505 field = self._parse_in(value, alias=True) 2506 2507 self._match_r_paren() 2508 2509 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2510 2511 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2512 pivot.set("alias", self._parse_table_alias()) 2513 2514 if not unpivot: 2515 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2516 2517 columns: t.List[exp.Expression] = [] 2518 for fld in pivot.args["field"].expressions: 2519 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2520 for name in names: 2521 if self.PREFIXED_PIVOT_COLUMNS: 2522 name = f"{name}_{field_name}" if name else field_name 2523 else: 2524 name = f"{field_name}_{name}" if name else field_name 2525 2526 columns.append(exp.to_identifier(name)) 2527 2528 pivot.set("columns", columns) 2529 2530 return pivot 2531 2532 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2533 return [agg.alias for agg in aggregations] 2534 2535 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2536 if not skip_where_token and not self._match(TokenType.WHERE): 2537 return None 2538 2539 return self.expression( 2540 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2541 ) 2542 2543 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2544 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2545 return None 2546 2547 elements = defaultdict(list) 2548 2549 while True: 2550 expressions = self._parse_csv(self._parse_conjunction) 2551 if expressions: 2552 elements["expressions"].extend(expressions) 2553 2554 grouping_sets = self._parse_grouping_sets() 2555 if grouping_sets: 2556 elements["grouping_sets"].extend(grouping_sets) 2557 2558 rollup = None 2559 cube = None 2560 totals = None 2561 2562 with_ = self._match(TokenType.WITH) 2563 if self._match(TokenType.ROLLUP): 2564 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2565 elements["rollup"].extend(ensure_list(rollup)) 2566 2567 if self._match(TokenType.CUBE): 2568 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2569 elements["cube"].extend(ensure_list(cube)) 2570 2571 if self._match_text_seq("TOTALS"): 2572 totals = True 2573 elements["totals"] = True # type: ignore 2574 2575 if not (grouping_sets or rollup or cube or totals): 2576 break 2577 2578 return self.expression(exp.Group, **elements) # type: ignore 2579 2580 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2581 if not self._match(TokenType.GROUPING_SETS): 2582 return None 2583 2584 return self._parse_wrapped_csv(self._parse_grouping_set) 2585 2586 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2587 if self._match(TokenType.L_PAREN): 2588 grouping_set = self._parse_csv(self._parse_column) 2589 self._match_r_paren() 2590 return self.expression(exp.Tuple, expressions=grouping_set) 2591 2592 return self._parse_column() 2593 2594 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2595 if not skip_having_token and not self._match(TokenType.HAVING): 2596 return None 2597 return self.expression(exp.Having, this=self._parse_conjunction()) 2598 2599 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2600 if not self._match(TokenType.QUALIFY): 2601 return None 2602 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2603 2604 def _parse_order( 2605 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2606 ) -> t.Optional[exp.Expression]: 2607 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2608 return this 2609 2610 return self.expression( 2611 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2612 ) 2613 2614 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2615 if not self._match_text_seq(*texts): 2616 return None 2617 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2618 2619 def _parse_ordered(self) -> exp.Ordered: 2620 this = self._parse_conjunction() 2621 self._match(TokenType.ASC) 2622 2623 is_desc = self._match(TokenType.DESC) 2624 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2625 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2626 desc = is_desc or False 2627 asc = not desc 2628 nulls_first = is_nulls_first or False 2629 explicitly_null_ordered = is_nulls_first or is_nulls_last 2630 2631 if ( 2632 not explicitly_null_ordered 2633 and ( 2634 (asc and self.NULL_ORDERING == "nulls_are_small") 2635 or (desc and self.NULL_ORDERING != "nulls_are_small") 2636 ) 2637 and self.NULL_ORDERING != "nulls_are_last" 2638 ): 2639 nulls_first = True 2640 2641 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2642 2643 def _parse_limit( 2644 self, this: t.Optional[exp.Expression] = None, top: bool = False 2645 ) -> t.Optional[exp.Expression]: 2646 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2647 limit_paren = self._match(TokenType.L_PAREN) 2648 expression = self._parse_number() if top else self._parse_term() 2649 2650 if self._match(TokenType.COMMA): 2651 offset = expression 2652 expression = self._parse_term() 2653 else: 2654 offset = None 2655 2656 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2657 2658 if limit_paren: 2659 self._match_r_paren() 2660 2661 return limit_exp 2662 2663 if self._match(TokenType.FETCH): 2664 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2665 direction = self._prev.text if direction else "FIRST" 2666 2667 count = self._parse_number() 2668 percent = self._match(TokenType.PERCENT) 2669 2670 self._match_set((TokenType.ROW, TokenType.ROWS)) 2671 2672 only = self._match_text_seq("ONLY") 2673 with_ties = self._match_text_seq("WITH", "TIES") 2674 2675 if only and with_ties: 2676 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2677 2678 return self.expression( 2679 exp.Fetch, 2680 direction=direction, 2681 count=count, 2682 percent=percent, 2683 with_ties=with_ties, 2684 ) 2685 2686 return this 2687 2688 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2689 if not self._match(TokenType.OFFSET): 2690 return this 2691 2692 count = self._parse_number() 2693 self._match_set((TokenType.ROW, TokenType.ROWS)) 2694 return self.expression(exp.Offset, this=this, expression=count) 2695 2696 def _parse_locks(self) -> t.List[exp.Lock]: 2697 locks = [] 2698 while True: 2699 if self._match_text_seq("FOR", "UPDATE"): 2700 update = True 2701 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2702 "LOCK", "IN", "SHARE", "MODE" 2703 ): 2704 update = False 2705 else: 2706 break 2707 2708 expressions = None 2709 if self._match_text_seq("OF"): 2710 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2711 2712 wait: t.Optional[bool | exp.Expression] = None 2713 if self._match_text_seq("NOWAIT"): 2714 wait = True 2715 elif self._match_text_seq("WAIT"): 2716 wait = self._parse_primary() 2717 elif self._match_text_seq("SKIP", "LOCKED"): 2718 wait = False 2719 2720 locks.append( 2721 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2722 ) 2723 2724 return locks 2725 2726 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2727 if not self._match_set(self.SET_OPERATIONS): 2728 return this 2729 2730 token_type = self._prev.token_type 2731 2732 if token_type == TokenType.UNION: 2733 expression = exp.Union 2734 elif token_type == TokenType.EXCEPT: 2735 expression = exp.Except 2736 else: 2737 expression = exp.Intersect 2738 2739 return self.expression( 2740 expression, 2741 this=this, 2742 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2743 expression=self._parse_set_operations(self._parse_select(nested=True)), 2744 ) 2745 2746 def _parse_expression(self) -> t.Optional[exp.Expression]: 2747 return self._parse_alias(self._parse_conjunction()) 2748 2749 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2750 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2751 2752 def _parse_equality(self) -> t.Optional[exp.Expression]: 2753 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2754 2755 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2756 return self._parse_tokens(self._parse_range, self.COMPARISON) 2757 2758 def _parse_range(self) -> t.Optional[exp.Expression]: 2759 this = self._parse_bitwise() 2760 negate = self._match(TokenType.NOT) 2761 2762 if self._match_set(self.RANGE_PARSERS): 2763 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2764 if not expression: 2765 return this 2766 2767 this = expression 2768 elif self._match(TokenType.ISNULL): 2769 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2770 2771 # Postgres supports ISNULL and NOTNULL for conditions. 2772 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2773 if self._match(TokenType.NOTNULL): 2774 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2775 this = self.expression(exp.Not, this=this) 2776 2777 if negate: 2778 this = self.expression(exp.Not, this=this) 2779 2780 if self._match(TokenType.IS): 2781 this = self._parse_is(this) 2782 2783 return this 2784 2785 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2786 index = self._index - 1 2787 negate = self._match(TokenType.NOT) 2788 2789 if self._match_text_seq("DISTINCT", "FROM"): 2790 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2791 return self.expression(klass, this=this, expression=self._parse_expression()) 2792 2793 expression = self._parse_null() or self._parse_boolean() 2794 if not expression: 2795 self._retreat(index) 2796 return None 2797 2798 this = self.expression(exp.Is, this=this, expression=expression) 2799 return self.expression(exp.Not, this=this) if negate else this 2800 2801 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2802 unnest = self._parse_unnest(with_alias=False) 2803 if unnest: 2804 this = self.expression(exp.In, this=this, unnest=unnest) 2805 elif self._match(TokenType.L_PAREN): 2806 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2807 2808 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2809 this = self.expression(exp.In, this=this, query=expressions[0]) 2810 else: 2811 this = self.expression(exp.In, this=this, expressions=expressions) 2812 2813 self._match_r_paren(this) 2814 else: 2815 this = self.expression(exp.In, this=this, field=self._parse_field()) 2816 2817 return this 2818 2819 def _parse_between(self, this: exp.Expression) -> exp.Between: 2820 low = self._parse_bitwise() 2821 self._match(TokenType.AND) 2822 high = self._parse_bitwise() 2823 return self.expression(exp.Between, this=this, low=low, high=high) 2824 2825 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2826 if not self._match(TokenType.ESCAPE): 2827 return this 2828 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2829 2830 def _parse_interval(self) -> t.Optional[exp.Interval]: 2831 if not self._match(TokenType.INTERVAL): 2832 return None 2833 2834 this = self._parse_primary() or self._parse_term() 2835 unit = self._parse_function() or self._parse_var() 2836 2837 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2838 # each INTERVAL expression into this canonical form so it's easy to transpile 2839 if this and this.is_number: 2840 this = exp.Literal.string(this.name) 2841 elif this and this.is_string: 2842 parts = this.name.split() 2843 2844 if len(parts) == 2: 2845 if unit: 2846 # this is not actually a unit, it's something else 2847 unit = None 2848 self._retreat(self._index - 1) 2849 else: 2850 this = exp.Literal.string(parts[0]) 2851 unit = self.expression(exp.Var, this=parts[1]) 2852 2853 return self.expression(exp.Interval, this=this, unit=unit) 2854 2855 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2856 this = self._parse_term() 2857 2858 while True: 2859 if self._match_set(self.BITWISE): 2860 this = self.expression( 2861 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2862 ) 2863 elif self._match_pair(TokenType.LT, TokenType.LT): 2864 this = self.expression( 2865 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2866 ) 2867 elif self._match_pair(TokenType.GT, TokenType.GT): 2868 this = self.expression( 2869 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2870 ) 2871 else: 2872 break 2873 2874 return this 2875 2876 def _parse_term(self) -> t.Optional[exp.Expression]: 2877 return self._parse_tokens(self._parse_factor, self.TERM) 2878 2879 def _parse_factor(self) -> t.Optional[exp.Expression]: 2880 return self._parse_tokens(self._parse_unary, self.FACTOR) 2881 2882 def _parse_unary(self) -> t.Optional[exp.Expression]: 2883 if self._match_set(self.UNARY_PARSERS): 2884 return self.UNARY_PARSERS[self._prev.token_type](self) 2885 return self._parse_at_time_zone(self._parse_type()) 2886 2887 def _parse_type(self) -> t.Optional[exp.Expression]: 2888 interval = self._parse_interval() 2889 if interval: 2890 return interval 2891 2892 index = self._index 2893 data_type = self._parse_types(check_func=True) 2894 this = self._parse_column() 2895 2896 if data_type: 2897 if isinstance(this, exp.Literal): 2898 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2899 if parser: 2900 return parser(self, this, data_type) 2901 return self.expression(exp.Cast, this=this, to=data_type) 2902 if not data_type.expressions: 2903 self._retreat(index) 2904 return self._parse_column() 2905 return self._parse_column_ops(data_type) 2906 2907 return this 2908 2909 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2910 this = self._parse_type() 2911 if not this: 2912 return None 2913 2914 return self.expression( 2915 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2916 ) 2917 2918 def _parse_types( 2919 self, check_func: bool = False, schema: bool = False 2920 ) -> t.Optional[exp.Expression]: 2921 index = self._index 2922 2923 prefix = self._match_text_seq("SYSUDTLIB", ".") 2924 2925 if not self._match_set(self.TYPE_TOKENS): 2926 return None 2927 2928 type_token = self._prev.token_type 2929 2930 if type_token == TokenType.PSEUDO_TYPE: 2931 return self.expression(exp.PseudoType, this=self._prev.text) 2932 2933 nested = type_token in self.NESTED_TYPE_TOKENS 2934 is_struct = type_token == TokenType.STRUCT 2935 expressions = None 2936 maybe_func = False 2937 2938 if self._match(TokenType.L_PAREN): 2939 if is_struct: 2940 expressions = self._parse_csv(self._parse_struct_types) 2941 elif nested: 2942 expressions = self._parse_csv( 2943 lambda: self._parse_types(check_func=check_func, schema=schema) 2944 ) 2945 elif type_token in self.ENUM_TYPE_TOKENS: 2946 expressions = self._parse_csv(self._parse_primary) 2947 else: 2948 expressions = self._parse_csv(self._parse_type_size) 2949 2950 if not expressions or not self._match(TokenType.R_PAREN): 2951 self._retreat(index) 2952 return None 2953 2954 maybe_func = True 2955 2956 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2957 this = exp.DataType( 2958 this=exp.DataType.Type.ARRAY, 2959 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2960 nested=True, 2961 ) 2962 2963 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2964 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2965 2966 return this 2967 2968 if self._match(TokenType.L_BRACKET): 2969 self._retreat(index) 2970 return None 2971 2972 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2973 if nested and self._match(TokenType.LT): 2974 if is_struct: 2975 expressions = self._parse_csv(self._parse_struct_types) 2976 else: 2977 expressions = self._parse_csv( 2978 lambda: self._parse_types(check_func=check_func, schema=schema) 2979 ) 2980 2981 if not self._match(TokenType.GT): 2982 self.raise_error("Expecting >") 2983 2984 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2985 values = self._parse_csv(self._parse_conjunction) 2986 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2987 2988 value: t.Optional[exp.Expression] = None 2989 if type_token in self.TIMESTAMPS: 2990 if self._match_text_seq("WITH", "TIME", "ZONE"): 2991 maybe_func = False 2992 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2993 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 2994 maybe_func = False 2995 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2996 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2997 maybe_func = False 2998 elif type_token == TokenType.INTERVAL: 2999 unit = self._parse_var() 3000 3001 if not unit: 3002 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3003 else: 3004 value = self.expression(exp.Interval, unit=unit) 3005 3006 if maybe_func and check_func: 3007 index2 = self._index 3008 peek = self._parse_string() 3009 3010 if not peek: 3011 self._retreat(index) 3012 return None 3013 3014 self._retreat(index2) 3015 3016 if value: 3017 return value 3018 3019 return exp.DataType( 3020 this=exp.DataType.Type[type_token.value.upper()], 3021 expressions=expressions, 3022 nested=nested, 3023 values=values, 3024 prefix=prefix, 3025 ) 3026 3027 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3028 this = self._parse_type() or self._parse_id_var() 3029 self._match(TokenType.COLON) 3030 return self._parse_column_def(this) 3031 3032 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3033 if not self._match_text_seq("AT", "TIME", "ZONE"): 3034 return this 3035 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3036 3037 def _parse_column(self) -> t.Optional[exp.Expression]: 3038 this = self._parse_field() 3039 if isinstance(this, exp.Identifier): 3040 this = self.expression(exp.Column, this=this) 3041 elif not this: 3042 return self._parse_bracket(this) 3043 return self._parse_column_ops(this) 3044 3045 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3046 this = self._parse_bracket(this) 3047 3048 while self._match_set(self.COLUMN_OPERATORS): 3049 op_token = self._prev.token_type 3050 op = self.COLUMN_OPERATORS.get(op_token) 3051 3052 if op_token == TokenType.DCOLON: 3053 field = self._parse_types() 3054 if not field: 3055 self.raise_error("Expected type") 3056 elif op and self._curr: 3057 self._advance() 3058 value = self._prev.text 3059 field = ( 3060 exp.Literal.number(value) 3061 if self._prev.token_type == TokenType.NUMBER 3062 else exp.Literal.string(value) 3063 ) 3064 else: 3065 field = self._parse_field(anonymous_func=True, any_token=True) 3066 3067 if isinstance(field, exp.Func): 3068 # bigquery allows function calls like x.y.count(...) 3069 # SAFE.SUBSTR(...) 3070 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3071 this = self._replace_columns_with_dots(this) 3072 3073 if op: 3074 this = op(self, this, field) 3075 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3076 this = self.expression( 3077 exp.Column, 3078 this=field, 3079 table=this.this, 3080 db=this.args.get("table"), 3081 catalog=this.args.get("db"), 3082 ) 3083 else: 3084 this = self.expression(exp.Dot, this=this, expression=field) 3085 this = self._parse_bracket(this) 3086 return this 3087 3088 def _parse_primary(self) -> t.Optional[exp.Expression]: 3089 if self._match_set(self.PRIMARY_PARSERS): 3090 token_type = self._prev.token_type 3091 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3092 3093 if token_type == TokenType.STRING: 3094 expressions = [primary] 3095 while self._match(TokenType.STRING): 3096 expressions.append(exp.Literal.string(self._prev.text)) 3097 3098 if len(expressions) > 1: 3099 return self.expression(exp.Concat, expressions=expressions) 3100 3101 return primary 3102 3103 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3104 return exp.Literal.number(f"0.{self._prev.text}") 3105 3106 if self._match(TokenType.L_PAREN): 3107 comments = self._prev_comments 3108 query = self._parse_select() 3109 3110 if query: 3111 expressions = [query] 3112 else: 3113 expressions = self._parse_csv(self._parse_expression) 3114 3115 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3116 3117 if isinstance(this, exp.Subqueryable): 3118 this = self._parse_set_operations( 3119 self._parse_subquery(this=this, parse_alias=False) 3120 ) 3121 elif len(expressions) > 1: 3122 this = self.expression(exp.Tuple, expressions=expressions) 3123 else: 3124 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3125 3126 if this: 3127 this.add_comments(comments) 3128 3129 self._match_r_paren(expression=this) 3130 return this 3131 3132 return None 3133 3134 def _parse_field( 3135 self, 3136 any_token: bool = False, 3137 tokens: t.Optional[t.Collection[TokenType]] = None, 3138 anonymous_func: bool = False, 3139 ) -> t.Optional[exp.Expression]: 3140 return ( 3141 self._parse_primary() 3142 or self._parse_function(anonymous=anonymous_func) 3143 or self._parse_id_var(any_token=any_token, tokens=tokens) 3144 ) 3145 3146 def _parse_function( 3147 self, 3148 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3149 anonymous: bool = False, 3150 optional_parens: bool = True, 3151 ) -> t.Optional[exp.Expression]: 3152 if not self._curr: 3153 return None 3154 3155 token_type = self._curr.token_type 3156 3157 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3158 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3159 3160 if not self._next or self._next.token_type != TokenType.L_PAREN: 3161 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3162 self._advance() 3163 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3164 3165 return None 3166 3167 if token_type not in self.FUNC_TOKENS: 3168 return None 3169 3170 this = self._curr.text 3171 upper = this.upper() 3172 self._advance(2) 3173 3174 parser = self.FUNCTION_PARSERS.get(upper) 3175 3176 if parser and not anonymous: 3177 this = parser(self) 3178 else: 3179 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3180 3181 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3182 this = self.expression(subquery_predicate, this=self._parse_select()) 3183 self._match_r_paren() 3184 return this 3185 3186 if functions is None: 3187 functions = self.FUNCTIONS 3188 3189 function = functions.get(upper) 3190 3191 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3192 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3193 3194 if function and not anonymous: 3195 this = self.validate_expression(function(args), args) 3196 else: 3197 this = self.expression(exp.Anonymous, this=this, expressions=args) 3198 3199 self._match_r_paren(this) 3200 return self._parse_window(this) 3201 3202 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3203 return self._parse_column_def(self._parse_id_var()) 3204 3205 def _parse_user_defined_function( 3206 self, kind: t.Optional[TokenType] = None 3207 ) -> t.Optional[exp.Expression]: 3208 this = self._parse_id_var() 3209 3210 while self._match(TokenType.DOT): 3211 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3212 3213 if not self._match(TokenType.L_PAREN): 3214 return this 3215 3216 expressions = self._parse_csv(self._parse_function_parameter) 3217 self._match_r_paren() 3218 return self.expression( 3219 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3220 ) 3221 3222 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3223 literal = self._parse_primary() 3224 if literal: 3225 return self.expression(exp.Introducer, this=token.text, expression=literal) 3226 3227 return self.expression(exp.Identifier, this=token.text) 3228 3229 def _parse_session_parameter(self) -> exp.SessionParameter: 3230 kind = None 3231 this = self._parse_id_var() or self._parse_primary() 3232 3233 if this and self._match(TokenType.DOT): 3234 kind = this.name 3235 this = self._parse_var() or self._parse_primary() 3236 3237 return self.expression(exp.SessionParameter, this=this, kind=kind) 3238 3239 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3240 index = self._index 3241 3242 if self._match(TokenType.L_PAREN): 3243 expressions = self._parse_csv(self._parse_id_var) 3244 3245 if not self._match(TokenType.R_PAREN): 3246 self._retreat(index) 3247 else: 3248 expressions = [self._parse_id_var()] 3249 3250 if self._match_set(self.LAMBDAS): 3251 return self.LAMBDAS[self._prev.token_type](self, expressions) 3252 3253 self._retreat(index) 3254 3255 this: t.Optional[exp.Expression] 3256 3257 if self._match(TokenType.DISTINCT): 3258 this = self.expression( 3259 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3260 ) 3261 else: 3262 this = self._parse_select_or_expression(alias=alias) 3263 3264 if isinstance(this, exp.EQ): 3265 left = this.this 3266 if isinstance(left, exp.Column): 3267 left.replace(exp.var(left.text("this"))) 3268 3269 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3270 3271 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3272 index = self._index 3273 3274 if not self.errors: 3275 try: 3276 if self._parse_select(nested=True): 3277 return this 3278 except ParseError: 3279 pass 3280 finally: 3281 self.errors.clear() 3282 self._retreat(index) 3283 3284 if not self._match(TokenType.L_PAREN): 3285 return this 3286 3287 args = self._parse_csv( 3288 lambda: self._parse_constraint() 3289 or self._parse_column_def(self._parse_field(any_token=True)) 3290 ) 3291 3292 self._match_r_paren() 3293 return self.expression(exp.Schema, this=this, expressions=args) 3294 3295 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3296 # column defs are not really columns, they're identifiers 3297 if isinstance(this, exp.Column): 3298 this = this.this 3299 3300 kind = self._parse_types(schema=True) 3301 3302 if self._match_text_seq("FOR", "ORDINALITY"): 3303 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3304 3305 constraints = [] 3306 while True: 3307 constraint = self._parse_column_constraint() 3308 if not constraint: 3309 break 3310 constraints.append(constraint) 3311 3312 if not kind and not constraints: 3313 return this 3314 3315 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3316 3317 def _parse_auto_increment( 3318 self, 3319 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3320 start = None 3321 increment = None 3322 3323 if self._match(TokenType.L_PAREN, advance=False): 3324 args = self._parse_wrapped_csv(self._parse_bitwise) 3325 start = seq_get(args, 0) 3326 increment = seq_get(args, 1) 3327 elif self._match_text_seq("START"): 3328 start = self._parse_bitwise() 3329 self._match_text_seq("INCREMENT") 3330 increment = self._parse_bitwise() 3331 3332 if start and increment: 3333 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3334 3335 return exp.AutoIncrementColumnConstraint() 3336 3337 def _parse_compress(self) -> exp.CompressColumnConstraint: 3338 if self._match(TokenType.L_PAREN, advance=False): 3339 return self.expression( 3340 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3341 ) 3342 3343 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3344 3345 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3346 if self._match_text_seq("BY", "DEFAULT"): 3347 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3348 this = self.expression( 3349 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3350 ) 3351 else: 3352 self._match_text_seq("ALWAYS") 3353 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3354 3355 self._match(TokenType.ALIAS) 3356 identity = self._match_text_seq("IDENTITY") 3357 3358 if self._match(TokenType.L_PAREN): 3359 if self._match_text_seq("START", "WITH"): 3360 this.set("start", self._parse_bitwise()) 3361 if self._match_text_seq("INCREMENT", "BY"): 3362 this.set("increment", self._parse_bitwise()) 3363 if self._match_text_seq("MINVALUE"): 3364 this.set("minvalue", self._parse_bitwise()) 3365 if self._match_text_seq("MAXVALUE"): 3366 this.set("maxvalue", self._parse_bitwise()) 3367 3368 if self._match_text_seq("CYCLE"): 3369 this.set("cycle", True) 3370 elif self._match_text_seq("NO", "CYCLE"): 3371 this.set("cycle", False) 3372 3373 if not identity: 3374 this.set("expression", self._parse_bitwise()) 3375 3376 self._match_r_paren() 3377 3378 return this 3379 3380 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3381 self._match_text_seq("LENGTH") 3382 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3383 3384 def _parse_not_constraint( 3385 self, 3386 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3387 if self._match_text_seq("NULL"): 3388 return self.expression(exp.NotNullColumnConstraint) 3389 if self._match_text_seq("CASESPECIFIC"): 3390 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3391 return None 3392 3393 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3394 if self._match(TokenType.CONSTRAINT): 3395 this = self._parse_id_var() 3396 else: 3397 this = None 3398 3399 if self._match_texts(self.CONSTRAINT_PARSERS): 3400 return self.expression( 3401 exp.ColumnConstraint, 3402 this=this, 3403 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3404 ) 3405 3406 return this 3407 3408 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3409 if not self._match(TokenType.CONSTRAINT): 3410 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3411 3412 this = self._parse_id_var() 3413 expressions = [] 3414 3415 while True: 3416 constraint = self._parse_unnamed_constraint() or self._parse_function() 3417 if not constraint: 3418 break 3419 expressions.append(constraint) 3420 3421 return self.expression(exp.Constraint, this=this, expressions=expressions) 3422 3423 def _parse_unnamed_constraint( 3424 self, constraints: t.Optional[t.Collection[str]] = None 3425 ) -> t.Optional[exp.Expression]: 3426 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3427 return None 3428 3429 constraint = self._prev.text.upper() 3430 if constraint not in self.CONSTRAINT_PARSERS: 3431 self.raise_error(f"No parser found for schema constraint {constraint}.") 3432 3433 return self.CONSTRAINT_PARSERS[constraint](self) 3434 3435 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3436 self._match_text_seq("KEY") 3437 return self.expression( 3438 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3439 ) 3440 3441 def _parse_key_constraint_options(self) -> t.List[str]: 3442 options = [] 3443 while True: 3444 if not self._curr: 3445 break 3446 3447 if self._match(TokenType.ON): 3448 action = None 3449 on = self._advance_any() and self._prev.text 3450 3451 if self._match_text_seq("NO", "ACTION"): 3452 action = "NO ACTION" 3453 elif self._match_text_seq("CASCADE"): 3454 action = "CASCADE" 3455 elif self._match_pair(TokenType.SET, TokenType.NULL): 3456 action = "SET NULL" 3457 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3458 action = "SET DEFAULT" 3459 else: 3460 self.raise_error("Invalid key constraint") 3461 3462 options.append(f"ON {on} {action}") 3463 elif self._match_text_seq("NOT", "ENFORCED"): 3464 options.append("NOT ENFORCED") 3465 elif self._match_text_seq("DEFERRABLE"): 3466 options.append("DEFERRABLE") 3467 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3468 options.append("INITIALLY DEFERRED") 3469 elif self._match_text_seq("NORELY"): 3470 options.append("NORELY") 3471 elif self._match_text_seq("MATCH", "FULL"): 3472 options.append("MATCH FULL") 3473 else: 3474 break 3475 3476 return options 3477 3478 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3479 if match and not self._match(TokenType.REFERENCES): 3480 return None 3481 3482 expressions = None 3483 this = self._parse_id_var() 3484 3485 if self._match(TokenType.L_PAREN, advance=False): 3486 expressions = self._parse_wrapped_id_vars() 3487 3488 options = self._parse_key_constraint_options() 3489 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3490 3491 def _parse_foreign_key(self) -> exp.ForeignKey: 3492 expressions = self._parse_wrapped_id_vars() 3493 reference = self._parse_references() 3494 options = {} 3495 3496 while self._match(TokenType.ON): 3497 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3498 self.raise_error("Expected DELETE or UPDATE") 3499 3500 kind = self._prev.text.lower() 3501 3502 if self._match_text_seq("NO", "ACTION"): 3503 action = "NO ACTION" 3504 elif self._match(TokenType.SET): 3505 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3506 action = "SET " + self._prev.text.upper() 3507 else: 3508 self._advance() 3509 action = self._prev.text.upper() 3510 3511 options[kind] = action 3512 3513 return self.expression( 3514 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3515 ) 3516 3517 def _parse_primary_key( 3518 self, wrapped_optional: bool = False, in_props: bool = False 3519 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3520 desc = ( 3521 self._match_set((TokenType.ASC, TokenType.DESC)) 3522 and self._prev.token_type == TokenType.DESC 3523 ) 3524 3525 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3526 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3527 3528 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3529 options = self._parse_key_constraint_options() 3530 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3531 3532 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3533 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3534 return this 3535 3536 bracket_kind = self._prev.token_type 3537 3538 if self._match(TokenType.COLON): 3539 expressions: t.List[t.Optional[exp.Expression]] = [ 3540 self.expression(exp.Slice, expression=self._parse_conjunction()) 3541 ] 3542 else: 3543 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3544 3545 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3546 if bracket_kind == TokenType.L_BRACE: 3547 this = self.expression(exp.Struct, expressions=expressions) 3548 elif not this or this.name.upper() == "ARRAY": 3549 this = self.expression(exp.Array, expressions=expressions) 3550 else: 3551 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3552 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3553 3554 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3555 self.raise_error("Expected ]") 3556 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3557 self.raise_error("Expected }") 3558 3559 self._add_comments(this) 3560 return self._parse_bracket(this) 3561 3562 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3563 if self._match(TokenType.COLON): 3564 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3565 return this 3566 3567 def _parse_case(self) -> t.Optional[exp.Expression]: 3568 ifs = [] 3569 default = None 3570 3571 expression = self._parse_conjunction() 3572 3573 while self._match(TokenType.WHEN): 3574 this = self._parse_conjunction() 3575 self._match(TokenType.THEN) 3576 then = self._parse_conjunction() 3577 ifs.append(self.expression(exp.If, this=this, true=then)) 3578 3579 if self._match(TokenType.ELSE): 3580 default = self._parse_conjunction() 3581 3582 if not self._match(TokenType.END): 3583 self.raise_error("Expected END after CASE", self._prev) 3584 3585 return self._parse_window( 3586 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3587 ) 3588 3589 def _parse_if(self) -> t.Optional[exp.Expression]: 3590 if self._match(TokenType.L_PAREN): 3591 args = self._parse_csv(self._parse_conjunction) 3592 this = self.validate_expression(exp.If.from_arg_list(args), args) 3593 self._match_r_paren() 3594 else: 3595 index = self._index - 1 3596 condition = self._parse_conjunction() 3597 3598 if not condition: 3599 self._retreat(index) 3600 return None 3601 3602 self._match(TokenType.THEN) 3603 true = self._parse_conjunction() 3604 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3605 self._match(TokenType.END) 3606 this = self.expression(exp.If, this=condition, true=true, false=false) 3607 3608 return self._parse_window(this) 3609 3610 def _parse_extract(self) -> exp.Extract: 3611 this = self._parse_function() or self._parse_var() or self._parse_type() 3612 3613 if self._match(TokenType.FROM): 3614 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3615 3616 if not self._match(TokenType.COMMA): 3617 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3618 3619 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3620 3621 def _parse_cast(self, strict: bool) -> exp.Expression: 3622 this = self._parse_conjunction() 3623 3624 if not self._match(TokenType.ALIAS): 3625 if self._match(TokenType.COMMA): 3626 return self.expression( 3627 exp.CastToStrType, this=this, expression=self._parse_string() 3628 ) 3629 else: 3630 self.raise_error("Expected AS after CAST") 3631 3632 to = self._parse_types() 3633 3634 if not to: 3635 self.raise_error("Expected TYPE after CAST") 3636 elif to.this == exp.DataType.Type.CHAR: 3637 if self._match(TokenType.CHARACTER_SET): 3638 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3639 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3640 fmt = self._parse_string() 3641 3642 return self.expression( 3643 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3644 this=this, 3645 format=exp.Literal.string( 3646 format_time( 3647 fmt.this if fmt else "", 3648 self.FORMAT_MAPPING or self.TIME_MAPPING, 3649 self.FORMAT_TRIE or self.TIME_TRIE, 3650 ) 3651 ), 3652 ) 3653 3654 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3655 3656 def _parse_concat(self) -> t.Optional[exp.Expression]: 3657 args = self._parse_csv(self._parse_conjunction) 3658 if self.CONCAT_NULL_OUTPUTS_STRING: 3659 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3660 3661 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3662 # we find such a call we replace it with its argument. 3663 if len(args) == 1: 3664 return args[0] 3665 3666 return self.expression( 3667 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3668 ) 3669 3670 def _parse_string_agg(self) -> exp.Expression: 3671 expression: t.Optional[exp.Expression] 3672 3673 if self._match(TokenType.DISTINCT): 3674 args = self._parse_csv(self._parse_conjunction) 3675 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3676 else: 3677 args = self._parse_csv(self._parse_conjunction) 3678 expression = seq_get(args, 0) 3679 3680 index = self._index 3681 if not self._match(TokenType.R_PAREN): 3682 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3683 order = self._parse_order(this=expression) 3684 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3685 3686 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3687 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3688 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3689 if not self._match_text_seq("WITHIN", "GROUP"): 3690 self._retreat(index) 3691 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3692 3693 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3694 order = self._parse_order(this=expression) 3695 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3696 3697 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3698 to: t.Optional[exp.Expression] 3699 this = self._parse_bitwise() 3700 3701 if self._match(TokenType.USING): 3702 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3703 elif self._match(TokenType.COMMA): 3704 to = self._parse_bitwise() 3705 else: 3706 to = None 3707 3708 # Swap the argument order if needed to produce the correct AST 3709 if self.CONVERT_TYPE_FIRST: 3710 this, to = to, this 3711 3712 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3713 3714 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3715 """ 3716 There are generally two variants of the DECODE function: 3717 3718 - DECODE(bin, charset) 3719 - DECODE(expression, search, result [, search, result] ... [, default]) 3720 3721 The second variant will always be parsed into a CASE expression. Note that NULL 3722 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3723 instead of relying on pattern matching. 3724 """ 3725 args = self._parse_csv(self._parse_conjunction) 3726 3727 if len(args) < 3: 3728 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3729 3730 expression, *expressions = args 3731 if not expression: 3732 return None 3733 3734 ifs = [] 3735 for search, result in zip(expressions[::2], expressions[1::2]): 3736 if not search or not result: 3737 return None 3738 3739 if isinstance(search, exp.Literal): 3740 ifs.append( 3741 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3742 ) 3743 elif isinstance(search, exp.Null): 3744 ifs.append( 3745 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3746 ) 3747 else: 3748 cond = exp.or_( 3749 exp.EQ(this=expression.copy(), expression=search), 3750 exp.and_( 3751 exp.Is(this=expression.copy(), expression=exp.Null()), 3752 exp.Is(this=search.copy(), expression=exp.Null()), 3753 copy=False, 3754 ), 3755 copy=False, 3756 ) 3757 ifs.append(exp.If(this=cond, true=result)) 3758 3759 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3760 3761 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3762 self._match_text_seq("KEY") 3763 key = self._parse_field() 3764 self._match(TokenType.COLON) 3765 self._match_text_seq("VALUE") 3766 value = self._parse_field() 3767 3768 if not key and not value: 3769 return None 3770 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3771 3772 def _parse_json_object(self) -> exp.JSONObject: 3773 star = self._parse_star() 3774 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3775 3776 null_handling = None 3777 if self._match_text_seq("NULL", "ON", "NULL"): 3778 null_handling = "NULL ON NULL" 3779 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3780 null_handling = "ABSENT ON NULL" 3781 3782 unique_keys = None 3783 if self._match_text_seq("WITH", "UNIQUE"): 3784 unique_keys = True 3785 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3786 unique_keys = False 3787 3788 self._match_text_seq("KEYS") 3789 3790 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3791 format_json = self._match_text_seq("FORMAT", "JSON") 3792 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3793 3794 return self.expression( 3795 exp.JSONObject, 3796 expressions=expressions, 3797 null_handling=null_handling, 3798 unique_keys=unique_keys, 3799 return_type=return_type, 3800 format_json=format_json, 3801 encoding=encoding, 3802 ) 3803 3804 def _parse_logarithm(self) -> exp.Func: 3805 # Default argument order is base, expression 3806 args = self._parse_csv(self._parse_range) 3807 3808 if len(args) > 1: 3809 if not self.LOG_BASE_FIRST: 3810 args.reverse() 3811 return exp.Log.from_arg_list(args) 3812 3813 return self.expression( 3814 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3815 ) 3816 3817 def _parse_match_against(self) -> exp.MatchAgainst: 3818 expressions = self._parse_csv(self._parse_column) 3819 3820 self._match_text_seq(")", "AGAINST", "(") 3821 3822 this = self._parse_string() 3823 3824 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3825 modifier = "IN NATURAL LANGUAGE MODE" 3826 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3827 modifier = f"{modifier} WITH QUERY EXPANSION" 3828 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3829 modifier = "IN BOOLEAN MODE" 3830 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3831 modifier = "WITH QUERY EXPANSION" 3832 else: 3833 modifier = None 3834 3835 return self.expression( 3836 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3837 ) 3838 3839 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3840 def _parse_open_json(self) -> exp.OpenJSON: 3841 this = self._parse_bitwise() 3842 path = self._match(TokenType.COMMA) and self._parse_string() 3843 3844 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3845 this = self._parse_field(any_token=True) 3846 kind = self._parse_types() 3847 path = self._parse_string() 3848 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3849 3850 return self.expression( 3851 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3852 ) 3853 3854 expressions = None 3855 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3856 self._match_l_paren() 3857 expressions = self._parse_csv(_parse_open_json_column_def) 3858 3859 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3860 3861 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3862 args = self._parse_csv(self._parse_bitwise) 3863 3864 if self._match(TokenType.IN): 3865 return self.expression( 3866 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3867 ) 3868 3869 if haystack_first: 3870 haystack = seq_get(args, 0) 3871 needle = seq_get(args, 1) 3872 else: 3873 needle = seq_get(args, 0) 3874 haystack = seq_get(args, 1) 3875 3876 return self.expression( 3877 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3878 ) 3879 3880 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3881 args = self._parse_csv(self._parse_table) 3882 return exp.JoinHint(this=func_name.upper(), expressions=args) 3883 3884 def _parse_substring(self) -> exp.Substring: 3885 # Postgres supports the form: substring(string [from int] [for int]) 3886 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3887 3888 args = self._parse_csv(self._parse_bitwise) 3889 3890 if self._match(TokenType.FROM): 3891 args.append(self._parse_bitwise()) 3892 if self._match(TokenType.FOR): 3893 args.append(self._parse_bitwise()) 3894 3895 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3896 3897 def _parse_trim(self) -> exp.Trim: 3898 # https://www.w3resource.com/sql/character-functions/trim.php 3899 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3900 3901 position = None 3902 collation = None 3903 3904 if self._match_texts(self.TRIM_TYPES): 3905 position = self._prev.text.upper() 3906 3907 expression = self._parse_bitwise() 3908 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3909 this = self._parse_bitwise() 3910 else: 3911 this = expression 3912 expression = None 3913 3914 if self._match(TokenType.COLLATE): 3915 collation = self._parse_bitwise() 3916 3917 return self.expression( 3918 exp.Trim, this=this, position=position, expression=expression, collation=collation 3919 ) 3920 3921 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3922 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3923 3924 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3925 return self._parse_window(self._parse_id_var(), alias=True) 3926 3927 def _parse_respect_or_ignore_nulls( 3928 self, this: t.Optional[exp.Expression] 3929 ) -> t.Optional[exp.Expression]: 3930 if self._match_text_seq("IGNORE", "NULLS"): 3931 return self.expression(exp.IgnoreNulls, this=this) 3932 if self._match_text_seq("RESPECT", "NULLS"): 3933 return self.expression(exp.RespectNulls, this=this) 3934 return this 3935 3936 def _parse_window( 3937 self, this: t.Optional[exp.Expression], alias: bool = False 3938 ) -> t.Optional[exp.Expression]: 3939 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3940 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3941 self._match_r_paren() 3942 3943 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3944 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3945 if self._match_text_seq("WITHIN", "GROUP"): 3946 order = self._parse_wrapped(self._parse_order) 3947 this = self.expression(exp.WithinGroup, this=this, expression=order) 3948 3949 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3950 # Some dialects choose to implement and some do not. 3951 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3952 3953 # There is some code above in _parse_lambda that handles 3954 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3955 3956 # The below changes handle 3957 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3958 3959 # Oracle allows both formats 3960 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3961 # and Snowflake chose to do the same for familiarity 3962 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3963 this = self._parse_respect_or_ignore_nulls(this) 3964 3965 # bigquery select from window x AS (partition by ...) 3966 if alias: 3967 over = None 3968 self._match(TokenType.ALIAS) 3969 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3970 return this 3971 else: 3972 over = self._prev.text.upper() 3973 3974 if not self._match(TokenType.L_PAREN): 3975 return self.expression( 3976 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3977 ) 3978 3979 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3980 3981 first = self._match(TokenType.FIRST) 3982 if self._match_text_seq("LAST"): 3983 first = False 3984 3985 partition = self._parse_partition_by() 3986 order = self._parse_order() 3987 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3988 3989 if kind: 3990 self._match(TokenType.BETWEEN) 3991 start = self._parse_window_spec() 3992 self._match(TokenType.AND) 3993 end = self._parse_window_spec() 3994 3995 spec = self.expression( 3996 exp.WindowSpec, 3997 kind=kind, 3998 start=start["value"], 3999 start_side=start["side"], 4000 end=end["value"], 4001 end_side=end["side"], 4002 ) 4003 else: 4004 spec = None 4005 4006 self._match_r_paren() 4007 4008 return self.expression( 4009 exp.Window, 4010 this=this, 4011 partition_by=partition, 4012 order=order, 4013 spec=spec, 4014 alias=window_alias, 4015 over=over, 4016 first=first, 4017 ) 4018 4019 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4020 self._match(TokenType.BETWEEN) 4021 4022 return { 4023 "value": ( 4024 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4025 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4026 or self._parse_bitwise() 4027 ), 4028 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4029 } 4030 4031 def _parse_alias( 4032 self, this: t.Optional[exp.Expression], explicit: bool = False 4033 ) -> t.Optional[exp.Expression]: 4034 any_token = self._match(TokenType.ALIAS) 4035 4036 if explicit and not any_token: 4037 return this 4038 4039 if self._match(TokenType.L_PAREN): 4040 aliases = self.expression( 4041 exp.Aliases, 4042 this=this, 4043 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4044 ) 4045 self._match_r_paren(aliases) 4046 return aliases 4047 4048 alias = self._parse_id_var(any_token) 4049 4050 if alias: 4051 return self.expression(exp.Alias, this=this, alias=alias) 4052 4053 return this 4054 4055 def _parse_id_var( 4056 self, 4057 any_token: bool = True, 4058 tokens: t.Optional[t.Collection[TokenType]] = None, 4059 ) -> t.Optional[exp.Expression]: 4060 identifier = self._parse_identifier() 4061 4062 if identifier: 4063 return identifier 4064 4065 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4066 quoted = self._prev.token_type == TokenType.STRING 4067 return exp.Identifier(this=self._prev.text, quoted=quoted) 4068 4069 return None 4070 4071 def _parse_string(self) -> t.Optional[exp.Expression]: 4072 if self._match(TokenType.STRING): 4073 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4074 return self._parse_placeholder() 4075 4076 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4077 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4078 4079 def _parse_number(self) -> t.Optional[exp.Expression]: 4080 if self._match(TokenType.NUMBER): 4081 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4082 return self._parse_placeholder() 4083 4084 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4085 if self._match(TokenType.IDENTIFIER): 4086 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4087 return self._parse_placeholder() 4088 4089 def _parse_var( 4090 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4091 ) -> t.Optional[exp.Expression]: 4092 if ( 4093 (any_token and self._advance_any()) 4094 or self._match(TokenType.VAR) 4095 or (self._match_set(tokens) if tokens else False) 4096 ): 4097 return self.expression(exp.Var, this=self._prev.text) 4098 return self._parse_placeholder() 4099 4100 def _advance_any(self) -> t.Optional[Token]: 4101 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4102 self._advance() 4103 return self._prev 4104 return None 4105 4106 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4107 return self._parse_var() or self._parse_string() 4108 4109 def _parse_null(self) -> t.Optional[exp.Expression]: 4110 if self._match(TokenType.NULL): 4111 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4112 return None 4113 4114 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4115 if self._match(TokenType.TRUE): 4116 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4117 if self._match(TokenType.FALSE): 4118 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4119 return None 4120 4121 def _parse_star(self) -> t.Optional[exp.Expression]: 4122 if self._match(TokenType.STAR): 4123 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4124 return None 4125 4126 def _parse_parameter(self) -> exp.Parameter: 4127 wrapped = self._match(TokenType.L_BRACE) 4128 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4129 self._match(TokenType.R_BRACE) 4130 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4131 4132 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4133 if self._match_set(self.PLACEHOLDER_PARSERS): 4134 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4135 if placeholder: 4136 return placeholder 4137 self._advance(-1) 4138 return None 4139 4140 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4141 if not self._match(TokenType.EXCEPT): 4142 return None 4143 if self._match(TokenType.L_PAREN, advance=False): 4144 return self._parse_wrapped_csv(self._parse_column) 4145 return self._parse_csv(self._parse_column) 4146 4147 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4148 if not self._match(TokenType.REPLACE): 4149 return None 4150 if self._match(TokenType.L_PAREN, advance=False): 4151 return self._parse_wrapped_csv(self._parse_expression) 4152 return self._parse_csv(self._parse_expression) 4153 4154 def _parse_csv( 4155 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4156 ) -> t.List[t.Optional[exp.Expression]]: 4157 parse_result = parse_method() 4158 items = [parse_result] if parse_result is not None else [] 4159 4160 while self._match(sep): 4161 self._add_comments(parse_result) 4162 parse_result = parse_method() 4163 if parse_result is not None: 4164 items.append(parse_result) 4165 4166 return items 4167 4168 def _parse_tokens( 4169 self, parse_method: t.Callable, expressions: t.Dict 4170 ) -> t.Optional[exp.Expression]: 4171 this = parse_method() 4172 4173 while self._match_set(expressions): 4174 this = self.expression( 4175 expressions[self._prev.token_type], 4176 this=this, 4177 comments=self._prev_comments, 4178 expression=parse_method(), 4179 ) 4180 4181 return this 4182 4183 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4184 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4185 4186 def _parse_wrapped_csv( 4187 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4188 ) -> t.List[t.Optional[exp.Expression]]: 4189 return self._parse_wrapped( 4190 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4191 ) 4192 4193 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4194 wrapped = self._match(TokenType.L_PAREN) 4195 if not wrapped and not optional: 4196 self.raise_error("Expecting (") 4197 parse_result = parse_method() 4198 if wrapped: 4199 self._match_r_paren() 4200 return parse_result 4201 4202 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4203 return self._parse_select() or self._parse_set_operations( 4204 self._parse_expression() if alias else self._parse_conjunction() 4205 ) 4206 4207 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4208 return self._parse_query_modifiers( 4209 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4210 ) 4211 4212 def _parse_transaction(self) -> exp.Transaction: 4213 this = None 4214 if self._match_texts(self.TRANSACTION_KIND): 4215 this = self._prev.text 4216 4217 self._match_texts({"TRANSACTION", "WORK"}) 4218 4219 modes = [] 4220 while True: 4221 mode = [] 4222 while self._match(TokenType.VAR): 4223 mode.append(self._prev.text) 4224 4225 if mode: 4226 modes.append(" ".join(mode)) 4227 if not self._match(TokenType.COMMA): 4228 break 4229 4230 return self.expression(exp.Transaction, this=this, modes=modes) 4231 4232 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4233 chain = None 4234 savepoint = None 4235 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4236 4237 self._match_texts({"TRANSACTION", "WORK"}) 4238 4239 if self._match_text_seq("TO"): 4240 self._match_text_seq("SAVEPOINT") 4241 savepoint = self._parse_id_var() 4242 4243 if self._match(TokenType.AND): 4244 chain = not self._match_text_seq("NO") 4245 self._match_text_seq("CHAIN") 4246 4247 if is_rollback: 4248 return self.expression(exp.Rollback, savepoint=savepoint) 4249 4250 return self.expression(exp.Commit, chain=chain) 4251 4252 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4253 if not self._match_text_seq("ADD"): 4254 return None 4255 4256 self._match(TokenType.COLUMN) 4257 exists_column = self._parse_exists(not_=True) 4258 expression = self._parse_column_def(self._parse_field(any_token=True)) 4259 4260 if expression: 4261 expression.set("exists", exists_column) 4262 4263 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4264 if self._match_texts(("FIRST", "AFTER")): 4265 position = self._prev.text 4266 column_position = self.expression( 4267 exp.ColumnPosition, this=self._parse_column(), position=position 4268 ) 4269 expression.set("position", column_position) 4270 4271 return expression 4272 4273 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4274 drop = self._match(TokenType.DROP) and self._parse_drop() 4275 if drop and not isinstance(drop, exp.Command): 4276 drop.set("kind", drop.args.get("kind", "COLUMN")) 4277 return drop 4278 4279 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4280 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4281 return self.expression( 4282 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4283 ) 4284 4285 def _parse_add_constraint(self) -> exp.AddConstraint: 4286 this = None 4287 kind = self._prev.token_type 4288 4289 if kind == TokenType.CONSTRAINT: 4290 this = self._parse_id_var() 4291 4292 if self._match_text_seq("CHECK"): 4293 expression = self._parse_wrapped(self._parse_conjunction) 4294 enforced = self._match_text_seq("ENFORCED") 4295 4296 return self.expression( 4297 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4298 ) 4299 4300 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4301 expression = self._parse_foreign_key() 4302 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4303 expression = self._parse_primary_key() 4304 else: 4305 expression = None 4306 4307 return self.expression(exp.AddConstraint, this=this, expression=expression) 4308 4309 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4310 index = self._index - 1 4311 4312 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4313 return self._parse_csv(self._parse_add_constraint) 4314 4315 self._retreat(index) 4316 return self._parse_csv(self._parse_add_column) 4317 4318 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4319 self._match(TokenType.COLUMN) 4320 column = self._parse_field(any_token=True) 4321 4322 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4323 return self.expression(exp.AlterColumn, this=column, drop=True) 4324 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4325 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4326 4327 self._match_text_seq("SET", "DATA") 4328 return self.expression( 4329 exp.AlterColumn, 4330 this=column, 4331 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4332 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4333 using=self._match(TokenType.USING) and self._parse_conjunction(), 4334 ) 4335 4336 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4337 index = self._index - 1 4338 4339 partition_exists = self._parse_exists() 4340 if self._match(TokenType.PARTITION, advance=False): 4341 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4342 4343 self._retreat(index) 4344 return self._parse_csv(self._parse_drop_column) 4345 4346 def _parse_alter_table_rename(self) -> exp.RenameTable: 4347 self._match_text_seq("TO") 4348 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4349 4350 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4351 start = self._prev 4352 4353 if not self._match(TokenType.TABLE): 4354 return self._parse_as_command(start) 4355 4356 exists = self._parse_exists() 4357 this = self._parse_table(schema=True) 4358 4359 if self._next: 4360 self._advance() 4361 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4362 4363 if parser: 4364 actions = ensure_list(parser(self)) 4365 4366 if not self._curr: 4367 return self.expression( 4368 exp.AlterTable, 4369 this=this, 4370 exists=exists, 4371 actions=actions, 4372 ) 4373 return self._parse_as_command(start) 4374 4375 def _parse_merge(self) -> exp.Merge: 4376 self._match(TokenType.INTO) 4377 target = self._parse_table() 4378 4379 self._match(TokenType.USING) 4380 using = self._parse_table() 4381 4382 self._match(TokenType.ON) 4383 on = self._parse_conjunction() 4384 4385 whens = [] 4386 while self._match(TokenType.WHEN): 4387 matched = not self._match(TokenType.NOT) 4388 self._match_text_seq("MATCHED") 4389 source = ( 4390 False 4391 if self._match_text_seq("BY", "TARGET") 4392 else self._match_text_seq("BY", "SOURCE") 4393 ) 4394 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4395 4396 self._match(TokenType.THEN) 4397 4398 if self._match(TokenType.INSERT): 4399 _this = self._parse_star() 4400 if _this: 4401 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4402 else: 4403 then = self.expression( 4404 exp.Insert, 4405 this=self._parse_value(), 4406 expression=self._match(TokenType.VALUES) and self._parse_value(), 4407 ) 4408 elif self._match(TokenType.UPDATE): 4409 expressions = self._parse_star() 4410 if expressions: 4411 then = self.expression(exp.Update, expressions=expressions) 4412 else: 4413 then = self.expression( 4414 exp.Update, 4415 expressions=self._match(TokenType.SET) 4416 and self._parse_csv(self._parse_equality), 4417 ) 4418 elif self._match(TokenType.DELETE): 4419 then = self.expression(exp.Var, this=self._prev.text) 4420 else: 4421 then = None 4422 4423 whens.append( 4424 self.expression( 4425 exp.When, 4426 matched=matched, 4427 source=source, 4428 condition=condition, 4429 then=then, 4430 ) 4431 ) 4432 4433 return self.expression( 4434 exp.Merge, 4435 this=target, 4436 using=using, 4437 on=on, 4438 expressions=whens, 4439 ) 4440 4441 def _parse_show(self) -> t.Optional[exp.Expression]: 4442 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4443 if parser: 4444 return parser(self) 4445 self._advance() 4446 return self.expression(exp.Show, this=self._prev.text.upper()) 4447 4448 def _parse_set_item_assignment( 4449 self, kind: t.Optional[str] = None 4450 ) -> t.Optional[exp.Expression]: 4451 index = self._index 4452 4453 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4454 return self._parse_set_transaction(global_=kind == "GLOBAL") 4455 4456 left = self._parse_primary() or self._parse_id_var() 4457 4458 if not self._match_texts(("=", "TO")): 4459 self._retreat(index) 4460 return None 4461 4462 right = self._parse_statement() or self._parse_id_var() 4463 this = self.expression(exp.EQ, this=left, expression=right) 4464 4465 return self.expression(exp.SetItem, this=this, kind=kind) 4466 4467 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4468 self._match_text_seq("TRANSACTION") 4469 characteristics = self._parse_csv( 4470 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4471 ) 4472 return self.expression( 4473 exp.SetItem, 4474 expressions=characteristics, 4475 kind="TRANSACTION", 4476 **{"global": global_}, # type: ignore 4477 ) 4478 4479 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4480 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4481 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4482 4483 def _parse_set(self) -> exp.Set | exp.Command: 4484 index = self._index 4485 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4486 4487 if self._curr: 4488 self._retreat(index) 4489 return self._parse_as_command(self._prev) 4490 4491 return set_ 4492 4493 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4494 for option in options: 4495 if self._match_text_seq(*option.split(" ")): 4496 return exp.var(option) 4497 return None 4498 4499 def _parse_as_command(self, start: Token) -> exp.Command: 4500 while self._curr: 4501 self._advance() 4502 text = self._find_sql(start, self._prev) 4503 size = len(start.text) 4504 return exp.Command(this=text[:size], expression=text[size:]) 4505 4506 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4507 settings = [] 4508 4509 self._match_l_paren() 4510 kind = self._parse_id_var() 4511 4512 if self._match(TokenType.L_PAREN): 4513 while True: 4514 key = self._parse_id_var() 4515 value = self._parse_primary() 4516 4517 if not key and value is None: 4518 break 4519 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4520 self._match(TokenType.R_PAREN) 4521 4522 self._match_r_paren() 4523 4524 return self.expression( 4525 exp.DictProperty, 4526 this=this, 4527 kind=kind.this if kind else None, 4528 settings=settings, 4529 ) 4530 4531 def _parse_dict_range(self, this: str) -> exp.DictRange: 4532 self._match_l_paren() 4533 has_min = self._match_text_seq("MIN") 4534 if has_min: 4535 min = self._parse_var() or self._parse_primary() 4536 self._match_text_seq("MAX") 4537 max = self._parse_var() or self._parse_primary() 4538 else: 4539 max = self._parse_var() or self._parse_primary() 4540 min = exp.Literal.number(0) 4541 self._match_r_paren() 4542 return self.expression(exp.DictRange, this=this, min=min, max=max) 4543 4544 def _find_parser( 4545 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4546 ) -> t.Optional[t.Callable]: 4547 if not self._curr: 4548 return None 4549 4550 index = self._index 4551 this = [] 4552 while True: 4553 # The current token might be multiple words 4554 curr = self._curr.text.upper() 4555 key = curr.split(" ") 4556 this.append(curr) 4557 self._advance() 4558 result, trie = in_trie(trie, key) 4559 if result == 0: 4560 break 4561 if result == 2: 4562 subparser = parsers[" ".join(this)] 4563 return subparser 4564 self._retreat(index) 4565 return None 4566 4567 def _match(self, token_type, advance=True, expression=None): 4568 if not self._curr: 4569 return None 4570 4571 if self._curr.token_type == token_type: 4572 if advance: 4573 self._advance() 4574 self._add_comments(expression) 4575 return True 4576 4577 return None 4578 4579 def _match_set(self, types, advance=True): 4580 if not self._curr: 4581 return None 4582 4583 if self._curr.token_type in types: 4584 if advance: 4585 self._advance() 4586 return True 4587 4588 return None 4589 4590 def _match_pair(self, token_type_a, token_type_b, advance=True): 4591 if not self._curr or not self._next: 4592 return None 4593 4594 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4595 if advance: 4596 self._advance(2) 4597 return True 4598 4599 return None 4600 4601 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4602 if not self._match(TokenType.L_PAREN, expression=expression): 4603 self.raise_error("Expecting (") 4604 4605 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4606 if not self._match(TokenType.R_PAREN, expression=expression): 4607 self.raise_error("Expecting )") 4608 4609 def _match_texts(self, texts, advance=True): 4610 if self._curr and self._curr.text.upper() in texts: 4611 if advance: 4612 self._advance() 4613 return True 4614 return False 4615 4616 def _match_text_seq(self, *texts, advance=True): 4617 index = self._index 4618 for text in texts: 4619 if self._curr and self._curr.text.upper() == text: 4620 self._advance() 4621 else: 4622 self._retreat(index) 4623 return False 4624 4625 if not advance: 4626 self._retreat(index) 4627 4628 return True 4629 4630 @t.overload 4631 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4632 ... 4633 4634 @t.overload 4635 def _replace_columns_with_dots( 4636 self, this: t.Optional[exp.Expression] 4637 ) -> t.Optional[exp.Expression]: 4638 ... 4639 4640 def _replace_columns_with_dots(self, this): 4641 if isinstance(this, exp.Dot): 4642 exp.replace_children(this, self._replace_columns_with_dots) 4643 elif isinstance(this, exp.Column): 4644 exp.replace_children(this, self._replace_columns_with_dots) 4645 table = this.args.get("table") 4646 this = ( 4647 self.expression(exp.Dot, this=table, expression=this.this) 4648 if table 4649 else self.expression(exp.Var, this=this.name) 4650 ) 4651 elif isinstance(this, exp.Identifier): 4652 this = self.expression(exp.Var, this=this.name) 4653 4654 return this 4655 4656 def _replace_lambda( 4657 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4658 ) -> t.Optional[exp.Expression]: 4659 if not node: 4660 return node 4661 4662 for column in node.find_all(exp.Column): 4663 if column.parts[0].name in lambda_variables: 4664 dot_or_id = column.to_dot() if column.table else column.this 4665 parent = column.parent 4666 4667 while isinstance(parent, exp.Dot): 4668 if not isinstance(parent.parent, exp.Dot): 4669 parent.replace(dot_or_id) 4670 break 4671 parent = parent.parent 4672 else: 4673 if column is node: 4674 node = dot_or_id 4675 else: 4676 column.replace(dot_or_id) 4677 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
829 def __init__( 830 self, 831 error_level: t.Optional[ErrorLevel] = None, 832 error_message_context: int = 100, 833 max_errors: int = 3, 834 ): 835 self.error_level = error_level or ErrorLevel.IMMEDIATE 836 self.error_message_context = error_message_context 837 self.max_errors = max_errors 838 self.reset()
850 def parse( 851 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 852 ) -> t.List[t.Optional[exp.Expression]]: 853 """ 854 Parses a list of tokens and returns a list of syntax trees, one tree 855 per parsed SQL statement. 856 857 Args: 858 raw_tokens: The list of tokens. 859 sql: The original SQL string, used to produce helpful debug messages. 860 861 Returns: 862 The list of the produced syntax trees. 863 """ 864 return self._parse( 865 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 866 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
868 def parse_into( 869 self, 870 expression_types: exp.IntoType, 871 raw_tokens: t.List[Token], 872 sql: t.Optional[str] = None, 873 ) -> t.List[t.Optional[exp.Expression]]: 874 """ 875 Parses a list of tokens into a given Expression type. If a collection of Expression 876 types is given instead, this method will try to parse the token list into each one 877 of them, stopping at the first for which the parsing succeeds. 878 879 Args: 880 expression_types: The expression type(s) to try and parse the token list into. 881 raw_tokens: The list of tokens. 882 sql: The original SQL string, used to produce helpful debug messages. 883 884 Returns: 885 The target Expression. 886 """ 887 errors = [] 888 for expression_type in ensure_list(expression_types): 889 parser = self.EXPRESSION_PARSERS.get(expression_type) 890 if not parser: 891 raise TypeError(f"No parser registered for {expression_type}") 892 893 try: 894 return self._parse(parser, raw_tokens, sql) 895 except ParseError as e: 896 e.errors[0]["into_expression"] = expression_type 897 errors.append(e) 898 899 raise ParseError( 900 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 901 errors=merge_errors(errors), 902 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
939 def check_errors(self) -> None: 940 """Logs or raises any found errors, depending on the chosen error level setting.""" 941 if self.error_level == ErrorLevel.WARN: 942 for error in self.errors: 943 logger.error(str(error)) 944 elif self.error_level == ErrorLevel.RAISE and self.errors: 945 raise ParseError( 946 concat_messages(self.errors, self.max_errors), 947 errors=merge_errors(self.errors), 948 )
Logs or raises any found errors, depending on the chosen error level setting.
950 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 951 """ 952 Appends an error in the list of recorded errors or raises it, depending on the chosen 953 error level setting. 954 """ 955 token = token or self._curr or self._prev or Token.string("") 956 start = token.start 957 end = token.end + 1 958 start_context = self.sql[max(start - self.error_message_context, 0) : start] 959 highlight = self.sql[start:end] 960 end_context = self.sql[end : end + self.error_message_context] 961 962 error = ParseError.new( 963 f"{message}. Line {token.line}, Col: {token.col}.\n" 964 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 965 description=message, 966 line=token.line, 967 col=token.col, 968 start_context=start_context, 969 highlight=highlight, 970 end_context=end_context, 971 ) 972 973 if self.error_level == ErrorLevel.IMMEDIATE: 974 raise error 975 976 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
978 def expression( 979 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 980 ) -> E: 981 """ 982 Creates a new, validated Expression. 983 984 Args: 985 exp_class: The expression class to instantiate. 986 comments: An optional list of comments to attach to the expression. 987 kwargs: The arguments to set for the expression along with their respective values. 988 989 Returns: 990 The target expression. 991 """ 992 instance = exp_class(**kwargs) 993 instance.add_comments(comments) if comments else self._add_comments(instance) 994 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1001 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1002 """ 1003 Validates an Expression, making sure that all its mandatory arguments are set. 1004 1005 Args: 1006 expression: The expression to validate. 1007 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1008 1009 Returns: 1010 The validated expression. 1011 """ 1012 if self.error_level != ErrorLevel.IGNORE: 1013 for error_message in expression.error_messages(args): 1014 self.raise_error(error_message) 1015 1016 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.