sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.IPADDRESS, 189 TokenType.IPPREFIX, 190 TokenType.ENUM, 191 *NESTED_TYPE_TOKENS, 192 } 193 194 SUBQUERY_PREDICATES = { 195 TokenType.ANY: exp.Any, 196 TokenType.ALL: exp.All, 197 TokenType.EXISTS: exp.Exists, 198 TokenType.SOME: exp.Any, 199 } 200 201 RESERVED_KEYWORDS = { 202 *Tokenizer.SINGLE_TOKENS.values(), 203 TokenType.SELECT, 204 } 205 206 DB_CREATABLES = { 207 TokenType.DATABASE, 208 TokenType.SCHEMA, 209 TokenType.TABLE, 210 TokenType.VIEW, 211 TokenType.DICTIONARY, 212 } 213 214 CREATABLES = { 215 TokenType.COLUMN, 216 TokenType.FUNCTION, 217 TokenType.INDEX, 218 TokenType.PROCEDURE, 219 *DB_CREATABLES, 220 } 221 222 # Tokens that can represent identifiers 223 ID_VAR_TOKENS = { 224 TokenType.VAR, 225 TokenType.ANTI, 226 TokenType.APPLY, 227 TokenType.ASC, 228 TokenType.AUTO_INCREMENT, 229 TokenType.BEGIN, 230 TokenType.CACHE, 231 TokenType.CASE, 232 TokenType.COLLATE, 233 TokenType.COMMAND, 234 TokenType.COMMENT, 235 TokenType.COMMIT, 236 TokenType.CONSTRAINT, 237 TokenType.DEFAULT, 238 TokenType.DELETE, 239 TokenType.DESC, 240 TokenType.DESCRIBE, 241 TokenType.DICTIONARY, 242 TokenType.DIV, 243 TokenType.END, 244 TokenType.EXECUTE, 245 TokenType.ESCAPE, 246 TokenType.FALSE, 247 TokenType.FIRST, 248 TokenType.FILTER, 249 TokenType.FORMAT, 250 TokenType.FULL, 251 TokenType.IF, 252 TokenType.IS, 253 TokenType.ISNULL, 254 TokenType.INTERVAL, 255 TokenType.KEEP, 256 TokenType.LEFT, 257 TokenType.LOAD, 258 TokenType.MERGE, 259 TokenType.NATURAL, 260 TokenType.NEXT, 261 TokenType.OFFSET, 262 TokenType.ORDINALITY, 263 TokenType.OVERWRITE, 264 TokenType.PARTITION, 265 TokenType.PERCENT, 266 TokenType.PIVOT, 267 TokenType.PRAGMA, 268 TokenType.RANGE, 269 TokenType.REFERENCES, 270 TokenType.RIGHT, 271 TokenType.ROW, 272 TokenType.ROWS, 273 TokenType.SEMI, 274 TokenType.SET, 275 TokenType.SETTINGS, 276 TokenType.SHOW, 277 TokenType.TEMPORARY, 278 TokenType.TOP, 279 TokenType.TRUE, 280 TokenType.UNIQUE, 281 TokenType.UNPIVOT, 282 TokenType.UPDATE, 283 TokenType.VOLATILE, 284 TokenType.WINDOW, 285 *CREATABLES, 286 *SUBQUERY_PREDICATES, 287 *TYPE_TOKENS, 288 *NO_PAREN_FUNCTIONS, 289 } 290 291 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 292 293 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 294 TokenType.APPLY, 295 TokenType.ASOF, 296 TokenType.FULL, 297 TokenType.LEFT, 298 TokenType.LOCK, 299 TokenType.NATURAL, 300 TokenType.OFFSET, 301 TokenType.RIGHT, 302 TokenType.WINDOW, 303 } 304 305 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 306 307 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 308 309 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 310 311 FUNC_TOKENS = { 312 TokenType.COMMAND, 313 TokenType.CURRENT_DATE, 314 TokenType.CURRENT_DATETIME, 315 TokenType.CURRENT_TIMESTAMP, 316 TokenType.CURRENT_TIME, 317 TokenType.CURRENT_USER, 318 TokenType.FILTER, 319 TokenType.FIRST, 320 TokenType.FORMAT, 321 TokenType.GLOB, 322 TokenType.IDENTIFIER, 323 TokenType.INDEX, 324 TokenType.ISNULL, 325 TokenType.ILIKE, 326 TokenType.LIKE, 327 TokenType.MERGE, 328 TokenType.OFFSET, 329 TokenType.PRIMARY_KEY, 330 TokenType.RANGE, 331 TokenType.REPLACE, 332 TokenType.RLIKE, 333 TokenType.ROW, 334 TokenType.UNNEST, 335 TokenType.VAR, 336 TokenType.LEFT, 337 TokenType.RIGHT, 338 TokenType.DATE, 339 TokenType.DATETIME, 340 TokenType.TABLE, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMPTZ, 343 TokenType.WINDOW, 344 TokenType.XOR, 345 *TYPE_TOKENS, 346 *SUBQUERY_PREDICATES, 347 } 348 349 CONJUNCTION = { 350 TokenType.AND: exp.And, 351 TokenType.OR: exp.Or, 352 } 353 354 EQUALITY = { 355 TokenType.EQ: exp.EQ, 356 TokenType.NEQ: exp.NEQ, 357 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 358 } 359 360 COMPARISON = { 361 TokenType.GT: exp.GT, 362 TokenType.GTE: exp.GTE, 363 TokenType.LT: exp.LT, 364 TokenType.LTE: exp.LTE, 365 } 366 367 BITWISE = { 368 TokenType.AMP: exp.BitwiseAnd, 369 TokenType.CARET: exp.BitwiseXor, 370 TokenType.PIPE: exp.BitwiseOr, 371 TokenType.DPIPE: exp.DPipe, 372 } 373 374 TERM = { 375 TokenType.DASH: exp.Sub, 376 TokenType.PLUS: exp.Add, 377 TokenType.MOD: exp.Mod, 378 TokenType.COLLATE: exp.Collate, 379 } 380 381 FACTOR = { 382 TokenType.DIV: exp.IntDiv, 383 TokenType.LR_ARROW: exp.Distance, 384 TokenType.SLASH: exp.Div, 385 TokenType.STAR: exp.Mul, 386 } 387 388 TIMESTAMPS = { 389 TokenType.TIME, 390 TokenType.TIMESTAMP, 391 TokenType.TIMESTAMPTZ, 392 TokenType.TIMESTAMPLTZ, 393 } 394 395 SET_OPERATIONS = { 396 TokenType.UNION, 397 TokenType.INTERSECT, 398 TokenType.EXCEPT, 399 } 400 401 JOIN_METHODS = { 402 TokenType.NATURAL, 403 TokenType.ASOF, 404 } 405 406 JOIN_SIDES = { 407 TokenType.LEFT, 408 TokenType.RIGHT, 409 TokenType.FULL, 410 } 411 412 JOIN_KINDS = { 413 TokenType.INNER, 414 TokenType.OUTER, 415 TokenType.CROSS, 416 TokenType.SEMI, 417 TokenType.ANTI, 418 } 419 420 JOIN_HINTS: t.Set[str] = set() 421 422 LAMBDAS = { 423 TokenType.ARROW: lambda self, expressions: self.expression( 424 exp.Lambda, 425 this=self._replace_lambda( 426 self._parse_conjunction(), 427 {node.name for node in expressions}, 428 ), 429 expressions=expressions, 430 ), 431 TokenType.FARROW: lambda self, expressions: self.expression( 432 exp.Kwarg, 433 this=exp.var(expressions[0].name), 434 expression=self._parse_conjunction(), 435 ), 436 } 437 438 COLUMN_OPERATORS = { 439 TokenType.DOT: None, 440 TokenType.DCOLON: lambda self, this, to: self.expression( 441 exp.Cast if self.STRICT_CAST else exp.TryCast, 442 this=this, 443 to=to, 444 ), 445 TokenType.ARROW: lambda self, this, path: self.expression( 446 exp.JSONExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DARROW: lambda self, this, path: self.expression( 451 exp.JSONExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtract, 457 this=this, 458 expression=path, 459 ), 460 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 461 exp.JSONBExtractScalar, 462 this=this, 463 expression=path, 464 ), 465 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 466 exp.JSONBContains, 467 this=this, 468 expression=key, 469 ), 470 } 471 472 EXPRESSION_PARSERS = { 473 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 474 exp.Column: lambda self: self._parse_column(), 475 exp.Condition: lambda self: self._parse_conjunction(), 476 exp.DataType: lambda self: self._parse_types(), 477 exp.Expression: lambda self: self._parse_statement(), 478 exp.From: lambda self: self._parse_from(), 479 exp.Group: lambda self: self._parse_group(), 480 exp.Having: lambda self: self._parse_having(), 481 exp.Identifier: lambda self: self._parse_id_var(), 482 exp.Join: lambda self: self._parse_join(), 483 exp.Lambda: lambda self: self._parse_lambda(), 484 exp.Lateral: lambda self: self._parse_lateral(), 485 exp.Limit: lambda self: self._parse_limit(), 486 exp.Offset: lambda self: self._parse_offset(), 487 exp.Order: lambda self: self._parse_order(), 488 exp.Ordered: lambda self: self._parse_ordered(), 489 exp.Properties: lambda self: self._parse_properties(), 490 exp.Qualify: lambda self: self._parse_qualify(), 491 exp.Returning: lambda self: self._parse_returning(), 492 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 493 exp.Table: lambda self: self._parse_table_parts(), 494 exp.TableAlias: lambda self: self._parse_table_alias(), 495 exp.Where: lambda self: self._parse_where(), 496 exp.Window: lambda self: self._parse_named_window(), 497 exp.With: lambda self: self._parse_with(), 498 "JOIN_TYPE": lambda self: self._parse_join_parts(), 499 } 500 501 STATEMENT_PARSERS = { 502 TokenType.ALTER: lambda self: self._parse_alter(), 503 TokenType.BEGIN: lambda self: self._parse_transaction(), 504 TokenType.CACHE: lambda self: self._parse_cache(), 505 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 506 TokenType.COMMENT: lambda self: self._parse_comment(), 507 TokenType.CREATE: lambda self: self._parse_create(), 508 TokenType.DELETE: lambda self: self._parse_delete(), 509 TokenType.DESC: lambda self: self._parse_describe(), 510 TokenType.DESCRIBE: lambda self: self._parse_describe(), 511 TokenType.DROP: lambda self: self._parse_drop(), 512 TokenType.FROM: lambda self: exp.select("*").from_( 513 t.cast(exp.From, self._parse_from(skip_from_token=True)) 514 ), 515 TokenType.INSERT: lambda self: self._parse_insert(), 516 TokenType.LOAD: lambda self: self._parse_load(), 517 TokenType.MERGE: lambda self: self._parse_merge(), 518 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 519 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 520 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 521 TokenType.SET: lambda self: self._parse_set(), 522 TokenType.UNCACHE: lambda self: self._parse_uncache(), 523 TokenType.UPDATE: lambda self: self._parse_update(), 524 TokenType.USE: lambda self: self.expression( 525 exp.Use, 526 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 527 and exp.var(self._prev.text), 528 this=self._parse_table(schema=False), 529 ), 530 } 531 532 UNARY_PARSERS = { 533 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 534 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 535 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 536 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 537 } 538 539 PRIMARY_PARSERS = { 540 TokenType.STRING: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=True 542 ), 543 TokenType.NUMBER: lambda self, token: self.expression( 544 exp.Literal, this=token.text, is_string=False 545 ), 546 TokenType.STAR: lambda self, _: self.expression( 547 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 548 ), 549 TokenType.NULL: lambda self, _: self.expression(exp.Null), 550 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 551 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 552 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 553 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 554 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 555 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 556 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 557 exp.National, this=token.text 558 ), 559 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 560 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 561 } 562 563 PLACEHOLDER_PARSERS = { 564 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 565 TokenType.PARAMETER: lambda self: self._parse_parameter(), 566 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 567 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 568 else None, 569 } 570 571 RANGE_PARSERS = { 572 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 573 TokenType.GLOB: binary_range_parser(exp.Glob), 574 TokenType.ILIKE: binary_range_parser(exp.ILike), 575 TokenType.IN: lambda self, this: self._parse_in(this), 576 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 577 TokenType.IS: lambda self, this: self._parse_is(this), 578 TokenType.LIKE: binary_range_parser(exp.Like), 579 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 580 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 581 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 582 } 583 584 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 585 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 586 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 587 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 588 "CHARACTER SET": lambda self: self._parse_character_set(), 589 "CHECKSUM": lambda self: self._parse_checksum(), 590 "CLUSTER BY": lambda self: self._parse_cluster(), 591 "CLUSTERED": lambda self: self._parse_clustered_by(), 592 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 593 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 594 "COPY": lambda self: self._parse_copy_property(), 595 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 596 "DEFINER": lambda self: self._parse_definer(), 597 "DETERMINISTIC": lambda self: self.expression( 598 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 599 ), 600 "DISTKEY": lambda self: self._parse_distkey(), 601 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 602 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 603 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 604 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 605 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 606 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 607 "FREESPACE": lambda self: self._parse_freespace(), 608 "HEAP": lambda self: self.expression(exp.HeapProperty), 609 "IMMUTABLE": lambda self: self.expression( 610 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 611 ), 612 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 613 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 614 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 615 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 616 "LIKE": lambda self: self._parse_create_like(), 617 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 618 "LOCK": lambda self: self._parse_locking(), 619 "LOCKING": lambda self: self._parse_locking(), 620 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 621 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 622 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 623 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 624 "NO": lambda self: self._parse_no_property(), 625 "ON": lambda self: self._parse_on_property(), 626 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 627 "PARTITION BY": lambda self: self._parse_partitioned_by(), 628 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 629 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 630 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 631 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 632 "RETURNS": lambda self: self._parse_returns(), 633 "ROW": lambda self: self._parse_row(), 634 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 635 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 636 "SETTINGS": lambda self: self.expression( 637 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 638 ), 639 "SORTKEY": lambda self: self._parse_sortkey(), 640 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 641 "STABLE": lambda self: self.expression( 642 exp.StabilityProperty, this=exp.Literal.string("STABLE") 643 ), 644 "STORED": lambda self: self._parse_stored(), 645 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 646 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 647 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 648 "TO": lambda self: self._parse_to_table(), 649 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 650 "TTL": lambda self: self._parse_ttl(), 651 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 652 "VOLATILE": lambda self: self._parse_volatile_property(), 653 "WITH": lambda self: self._parse_with_property(), 654 } 655 656 CONSTRAINT_PARSERS = { 657 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 658 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 659 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 660 "CHARACTER SET": lambda self: self.expression( 661 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 662 ), 663 "CHECK": lambda self: self.expression( 664 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 665 ), 666 "COLLATE": lambda self: self.expression( 667 exp.CollateColumnConstraint, this=self._parse_var() 668 ), 669 "COMMENT": lambda self: self.expression( 670 exp.CommentColumnConstraint, this=self._parse_string() 671 ), 672 "COMPRESS": lambda self: self._parse_compress(), 673 "DEFAULT": lambda self: self.expression( 674 exp.DefaultColumnConstraint, this=self._parse_bitwise() 675 ), 676 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 677 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 678 "FORMAT": lambda self: self.expression( 679 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "GENERATED": lambda self: self._parse_generated_as_identity(), 682 "IDENTITY": lambda self: self._parse_auto_increment(), 683 "INLINE": lambda self: self._parse_inline(), 684 "LIKE": lambda self: self._parse_create_like(), 685 "NOT": lambda self: self._parse_not_constraint(), 686 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 687 "ON": lambda self: self._match(TokenType.UPDATE) 688 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 689 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 690 "PRIMARY KEY": lambda self: self._parse_primary_key(), 691 "REFERENCES": lambda self: self._parse_references(match=False), 692 "TITLE": lambda self: self.expression( 693 exp.TitleColumnConstraint, this=self._parse_var_or_string() 694 ), 695 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 696 "UNIQUE": lambda self: self._parse_unique(), 697 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 698 } 699 700 ALTER_PARSERS = { 701 "ADD": lambda self: self._parse_alter_table_add(), 702 "ALTER": lambda self: self._parse_alter_table_alter(), 703 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 704 "DROP": lambda self: self._parse_alter_table_drop(), 705 "RENAME": lambda self: self._parse_alter_table_rename(), 706 } 707 708 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 709 710 NO_PAREN_FUNCTION_PARSERS = { 711 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 712 TokenType.CASE: lambda self: self._parse_case(), 713 TokenType.IF: lambda self: self._parse_if(), 714 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 715 exp.NextValueFor, 716 this=self._parse_column(), 717 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 718 ), 719 } 720 721 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 722 723 FUNCTION_PARSERS = { 724 "ANY_VALUE": lambda self: self._parse_any_value(), 725 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 726 "CONCAT": lambda self: self._parse_concat(), 727 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 728 "DECODE": lambda self: self._parse_decode(), 729 "EXTRACT": lambda self: self._parse_extract(), 730 "JSON_OBJECT": lambda self: self._parse_json_object(), 731 "LOG": lambda self: self._parse_logarithm(), 732 "MATCH": lambda self: self._parse_match_against(), 733 "OPENJSON": lambda self: self._parse_open_json(), 734 "POSITION": lambda self: self._parse_position(), 735 "SAFE_CAST": lambda self: self._parse_cast(False), 736 "STRING_AGG": lambda self: self._parse_string_agg(), 737 "SUBSTRING": lambda self: self._parse_substring(), 738 "TRIM": lambda self: self._parse_trim(), 739 "TRY_CAST": lambda self: self._parse_cast(False), 740 "TRY_CONVERT": lambda self: self._parse_convert(False), 741 } 742 743 QUERY_MODIFIER_PARSERS = { 744 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 745 TokenType.WHERE: lambda self: ("where", self._parse_where()), 746 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 747 TokenType.HAVING: lambda self: ("having", self._parse_having()), 748 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 749 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 750 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 751 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 752 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 753 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 754 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 755 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 756 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 757 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 758 TokenType.CLUSTER_BY: lambda self: ( 759 "cluster", 760 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 761 ), 762 TokenType.DISTRIBUTE_BY: lambda self: ( 763 "distribute", 764 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 765 ), 766 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 767 } 768 769 SET_PARSERS = { 770 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 771 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 772 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 773 "TRANSACTION": lambda self: self._parse_set_transaction(), 774 } 775 776 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 777 778 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 779 780 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 781 782 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 783 784 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 785 786 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 787 TRANSACTION_CHARACTERISTICS = { 788 "ISOLATION LEVEL REPEATABLE READ", 789 "ISOLATION LEVEL READ COMMITTED", 790 "ISOLATION LEVEL READ UNCOMMITTED", 791 "ISOLATION LEVEL SERIALIZABLE", 792 "READ WRITE", 793 "READ ONLY", 794 } 795 796 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 797 798 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 799 800 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 801 802 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 803 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 804 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 805 806 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 807 808 STRICT_CAST = True 809 810 # A NULL arg in CONCAT yields NULL by default 811 CONCAT_NULL_OUTPUTS_STRING = False 812 813 PREFIXED_PIVOT_COLUMNS = False 814 IDENTIFY_PIVOT_STRINGS = False 815 816 LOG_BASE_FIRST = True 817 LOG_DEFAULTS_TO_LN = False 818 819 __slots__ = ( 820 "error_level", 821 "error_message_context", 822 "max_errors", 823 "sql", 824 "errors", 825 "_tokens", 826 "_index", 827 "_curr", 828 "_next", 829 "_prev", 830 "_prev_comments", 831 ) 832 833 # Autofilled 834 INDEX_OFFSET: int = 0 835 UNNEST_COLUMN_ONLY: bool = False 836 ALIAS_POST_TABLESAMPLE: bool = False 837 STRICT_STRING_CONCAT = False 838 NORMALIZE_FUNCTIONS = "upper" 839 NULL_ORDERING: str = "nulls_are_small" 840 SHOW_TRIE: t.Dict = {} 841 SET_TRIE: t.Dict = {} 842 FORMAT_MAPPING: t.Dict[str, str] = {} 843 FORMAT_TRIE: t.Dict = {} 844 TIME_MAPPING: t.Dict[str, str] = {} 845 TIME_TRIE: t.Dict = {} 846 847 def __init__( 848 self, 849 error_level: t.Optional[ErrorLevel] = None, 850 error_message_context: int = 100, 851 max_errors: int = 3, 852 ): 853 self.error_level = error_level or ErrorLevel.IMMEDIATE 854 self.error_message_context = error_message_context 855 self.max_errors = max_errors 856 self.reset() 857 858 def reset(self): 859 self.sql = "" 860 self.errors = [] 861 self._tokens = [] 862 self._index = 0 863 self._curr = None 864 self._next = None 865 self._prev = None 866 self._prev_comments = None 867 868 def parse( 869 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 870 ) -> t.List[t.Optional[exp.Expression]]: 871 """ 872 Parses a list of tokens and returns a list of syntax trees, one tree 873 per parsed SQL statement. 874 875 Args: 876 raw_tokens: The list of tokens. 877 sql: The original SQL string, used to produce helpful debug messages. 878 879 Returns: 880 The list of the produced syntax trees. 881 """ 882 return self._parse( 883 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 884 ) 885 886 def parse_into( 887 self, 888 expression_types: exp.IntoType, 889 raw_tokens: t.List[Token], 890 sql: t.Optional[str] = None, 891 ) -> t.List[t.Optional[exp.Expression]]: 892 """ 893 Parses a list of tokens into a given Expression type. If a collection of Expression 894 types is given instead, this method will try to parse the token list into each one 895 of them, stopping at the first for which the parsing succeeds. 896 897 Args: 898 expression_types: The expression type(s) to try and parse the token list into. 899 raw_tokens: The list of tokens. 900 sql: The original SQL string, used to produce helpful debug messages. 901 902 Returns: 903 The target Expression. 904 """ 905 errors = [] 906 for expression_type in ensure_list(expression_types): 907 parser = self.EXPRESSION_PARSERS.get(expression_type) 908 if not parser: 909 raise TypeError(f"No parser registered for {expression_type}") 910 911 try: 912 return self._parse(parser, raw_tokens, sql) 913 except ParseError as e: 914 e.errors[0]["into_expression"] = expression_type 915 errors.append(e) 916 917 raise ParseError( 918 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 919 errors=merge_errors(errors), 920 ) from errors[-1] 921 922 def _parse( 923 self, 924 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 925 raw_tokens: t.List[Token], 926 sql: t.Optional[str] = None, 927 ) -> t.List[t.Optional[exp.Expression]]: 928 self.reset() 929 self.sql = sql or "" 930 931 total = len(raw_tokens) 932 chunks: t.List[t.List[Token]] = [[]] 933 934 for i, token in enumerate(raw_tokens): 935 if token.token_type == TokenType.SEMICOLON: 936 if i < total - 1: 937 chunks.append([]) 938 else: 939 chunks[-1].append(token) 940 941 expressions = [] 942 943 for tokens in chunks: 944 self._index = -1 945 self._tokens = tokens 946 self._advance() 947 948 expressions.append(parse_method(self)) 949 950 if self._index < len(self._tokens): 951 self.raise_error("Invalid expression / Unexpected token") 952 953 self.check_errors() 954 955 return expressions 956 957 def check_errors(self) -> None: 958 """Logs or raises any found errors, depending on the chosen error level setting.""" 959 if self.error_level == ErrorLevel.WARN: 960 for error in self.errors: 961 logger.error(str(error)) 962 elif self.error_level == ErrorLevel.RAISE and self.errors: 963 raise ParseError( 964 concat_messages(self.errors, self.max_errors), 965 errors=merge_errors(self.errors), 966 ) 967 968 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 969 """ 970 Appends an error in the list of recorded errors or raises it, depending on the chosen 971 error level setting. 972 """ 973 token = token or self._curr or self._prev or Token.string("") 974 start = token.start 975 end = token.end + 1 976 start_context = self.sql[max(start - self.error_message_context, 0) : start] 977 highlight = self.sql[start:end] 978 end_context = self.sql[end : end + self.error_message_context] 979 980 error = ParseError.new( 981 f"{message}. Line {token.line}, Col: {token.col}.\n" 982 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 983 description=message, 984 line=token.line, 985 col=token.col, 986 start_context=start_context, 987 highlight=highlight, 988 end_context=end_context, 989 ) 990 991 if self.error_level == ErrorLevel.IMMEDIATE: 992 raise error 993 994 self.errors.append(error) 995 996 def expression( 997 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 998 ) -> E: 999 """ 1000 Creates a new, validated Expression. 1001 1002 Args: 1003 exp_class: The expression class to instantiate. 1004 comments: An optional list of comments to attach to the expression. 1005 kwargs: The arguments to set for the expression along with their respective values. 1006 1007 Returns: 1008 The target expression. 1009 """ 1010 instance = exp_class(**kwargs) 1011 instance.add_comments(comments) if comments else self._add_comments(instance) 1012 return self.validate_expression(instance) 1013 1014 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1015 if expression and self._prev_comments: 1016 expression.add_comments(self._prev_comments) 1017 self._prev_comments = None 1018 1019 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1020 """ 1021 Validates an Expression, making sure that all its mandatory arguments are set. 1022 1023 Args: 1024 expression: The expression to validate. 1025 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1026 1027 Returns: 1028 The validated expression. 1029 """ 1030 if self.error_level != ErrorLevel.IGNORE: 1031 for error_message in expression.error_messages(args): 1032 self.raise_error(error_message) 1033 1034 return expression 1035 1036 def _find_sql(self, start: Token, end: Token) -> str: 1037 return self.sql[start.start : end.end + 1] 1038 1039 def _advance(self, times: int = 1) -> None: 1040 self._index += times 1041 self._curr = seq_get(self._tokens, self._index) 1042 self._next = seq_get(self._tokens, self._index + 1) 1043 1044 if self._index > 0: 1045 self._prev = self._tokens[self._index - 1] 1046 self._prev_comments = self._prev.comments 1047 else: 1048 self._prev = None 1049 self._prev_comments = None 1050 1051 def _retreat(self, index: int) -> None: 1052 if index != self._index: 1053 self._advance(index - self._index) 1054 1055 def _parse_command(self) -> exp.Command: 1056 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1057 1058 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1059 start = self._prev 1060 exists = self._parse_exists() if allow_exists else None 1061 1062 self._match(TokenType.ON) 1063 1064 kind = self._match_set(self.CREATABLES) and self._prev 1065 if not kind: 1066 return self._parse_as_command(start) 1067 1068 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1069 this = self._parse_user_defined_function(kind=kind.token_type) 1070 elif kind.token_type == TokenType.TABLE: 1071 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1072 elif kind.token_type == TokenType.COLUMN: 1073 this = self._parse_column() 1074 else: 1075 this = self._parse_id_var() 1076 1077 self._match(TokenType.IS) 1078 1079 return self.expression( 1080 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1081 ) 1082 1083 def _parse_to_table( 1084 self, 1085 ) -> exp.ToTableProperty: 1086 table = self._parse_table_parts(schema=True) 1087 return self.expression(exp.ToTableProperty, this=table) 1088 1089 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1090 def _parse_ttl(self) -> exp.Expression: 1091 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1092 this = self._parse_bitwise() 1093 1094 if self._match_text_seq("DELETE"): 1095 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1096 if self._match_text_seq("RECOMPRESS"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1099 ) 1100 if self._match_text_seq("TO", "DISK"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1103 ) 1104 if self._match_text_seq("TO", "VOLUME"): 1105 return self.expression( 1106 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1107 ) 1108 1109 return this 1110 1111 expressions = self._parse_csv(_parse_ttl_action) 1112 where = self._parse_where() 1113 group = self._parse_group() 1114 1115 aggregates = None 1116 if group and self._match(TokenType.SET): 1117 aggregates = self._parse_csv(self._parse_set_item) 1118 1119 return self.expression( 1120 exp.MergeTreeTTL, 1121 expressions=expressions, 1122 where=where, 1123 group=group, 1124 aggregates=aggregates, 1125 ) 1126 1127 def _parse_statement(self) -> t.Optional[exp.Expression]: 1128 if self._curr is None: 1129 return None 1130 1131 if self._match_set(self.STATEMENT_PARSERS): 1132 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1133 1134 if self._match_set(Tokenizer.COMMANDS): 1135 return self._parse_command() 1136 1137 expression = self._parse_expression() 1138 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1139 return self._parse_query_modifiers(expression) 1140 1141 def _parse_drop(self) -> exp.Drop | exp.Command: 1142 start = self._prev 1143 temporary = self._match(TokenType.TEMPORARY) 1144 materialized = self._match_text_seq("MATERIALIZED") 1145 1146 kind = self._match_set(self.CREATABLES) and self._prev.text 1147 if not kind: 1148 return self._parse_as_command(start) 1149 1150 return self.expression( 1151 exp.Drop, 1152 comments=start.comments, 1153 exists=self._parse_exists(), 1154 this=self._parse_table(schema=True), 1155 kind=kind, 1156 temporary=temporary, 1157 materialized=materialized, 1158 cascade=self._match_text_seq("CASCADE"), 1159 constraints=self._match_text_seq("CONSTRAINTS"), 1160 purge=self._match_text_seq("PURGE"), 1161 ) 1162 1163 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1164 return ( 1165 self._match(TokenType.IF) 1166 and (not not_ or self._match(TokenType.NOT)) 1167 and self._match(TokenType.EXISTS) 1168 ) 1169 1170 def _parse_create(self) -> exp.Create | exp.Command: 1171 # Note: this can't be None because we've matched a statement parser 1172 start = self._prev 1173 replace = start.text.upper() == "REPLACE" or self._match_pair( 1174 TokenType.OR, TokenType.REPLACE 1175 ) 1176 unique = self._match(TokenType.UNIQUE) 1177 1178 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1179 self._advance() 1180 1181 properties = None 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not create_token: 1185 # exp.Properties.Location.POST_CREATE 1186 properties = self._parse_properties() 1187 create_token = self._match_set(self.CREATABLES) and self._prev 1188 1189 if not properties or not create_token: 1190 return self._parse_as_command(start) 1191 1192 exists = self._parse_exists(not_=True) 1193 this = None 1194 expression: t.Optional[exp.Expression] = None 1195 indexes = None 1196 no_schema_binding = None 1197 begin = None 1198 clone = None 1199 1200 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1201 nonlocal properties 1202 if properties and temp_props: 1203 properties.expressions.extend(temp_props.expressions) 1204 elif temp_props: 1205 properties = temp_props 1206 1207 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1208 this = self._parse_user_defined_function(kind=create_token.token_type) 1209 1210 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1211 extend_props(self._parse_properties()) 1212 1213 self._match(TokenType.ALIAS) 1214 1215 if self._match(TokenType.COMMAND): 1216 expression = self._parse_as_command(self._prev) 1217 else: 1218 begin = self._match(TokenType.BEGIN) 1219 return_ = self._match_text_seq("RETURN") 1220 expression = self._parse_statement() 1221 1222 if return_: 1223 expression = self.expression(exp.Return, this=expression) 1224 elif create_token.token_type == TokenType.INDEX: 1225 this = self._parse_index(index=self._parse_id_var()) 1226 elif create_token.token_type in self.DB_CREATABLES: 1227 table_parts = self._parse_table_parts(schema=True) 1228 1229 # exp.Properties.Location.POST_NAME 1230 self._match(TokenType.COMMA) 1231 extend_props(self._parse_properties(before=True)) 1232 1233 this = self._parse_schema(this=table_parts) 1234 1235 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1236 extend_props(self._parse_properties()) 1237 1238 self._match(TokenType.ALIAS) 1239 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1240 # exp.Properties.Location.POST_ALIAS 1241 extend_props(self._parse_properties()) 1242 1243 expression = self._parse_ddl_select() 1244 1245 if create_token.token_type == TokenType.TABLE: 1246 # exp.Properties.Location.POST_EXPRESSION 1247 extend_props(self._parse_properties()) 1248 1249 indexes = [] 1250 while True: 1251 index = self._parse_index() 1252 1253 # exp.Properties.Location.POST_INDEX 1254 extend_props(self._parse_properties()) 1255 1256 if not index: 1257 break 1258 else: 1259 self._match(TokenType.COMMA) 1260 indexes.append(index) 1261 elif create_token.token_type == TokenType.VIEW: 1262 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1263 no_schema_binding = True 1264 1265 if self._match_text_seq("CLONE"): 1266 clone = self._parse_table(schema=True) 1267 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1268 clone_kind = ( 1269 self._match(TokenType.L_PAREN) 1270 and self._match_texts(self.CLONE_KINDS) 1271 and self._prev.text.upper() 1272 ) 1273 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1274 self._match(TokenType.R_PAREN) 1275 clone = self.expression( 1276 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1277 ) 1278 1279 return self.expression( 1280 exp.Create, 1281 this=this, 1282 kind=create_token.text, 1283 replace=replace, 1284 unique=unique, 1285 expression=expression, 1286 exists=exists, 1287 properties=properties, 1288 indexes=indexes, 1289 no_schema_binding=no_schema_binding, 1290 begin=begin, 1291 clone=clone, 1292 ) 1293 1294 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1295 # only used for teradata currently 1296 self._match(TokenType.COMMA) 1297 1298 kwargs = { 1299 "no": self._match_text_seq("NO"), 1300 "dual": self._match_text_seq("DUAL"), 1301 "before": self._match_text_seq("BEFORE"), 1302 "default": self._match_text_seq("DEFAULT"), 1303 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1304 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1305 "after": self._match_text_seq("AFTER"), 1306 "minimum": self._match_texts(("MIN", "MINIMUM")), 1307 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1308 } 1309 1310 if self._match_texts(self.PROPERTY_PARSERS): 1311 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1312 try: 1313 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1314 except TypeError: 1315 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1316 1317 return None 1318 1319 def _parse_property(self) -> t.Optional[exp.Expression]: 1320 if self._match_texts(self.PROPERTY_PARSERS): 1321 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1322 1323 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1324 return self._parse_character_set(default=True) 1325 1326 if self._match_text_seq("COMPOUND", "SORTKEY"): 1327 return self._parse_sortkey(compound=True) 1328 1329 if self._match_text_seq("SQL", "SECURITY"): 1330 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1331 1332 assignment = self._match_pair( 1333 TokenType.VAR, TokenType.EQ, advance=False 1334 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1335 1336 if assignment: 1337 key = self._parse_var_or_string() 1338 self._match(TokenType.EQ) 1339 return self.expression(exp.Property, this=key, value=self._parse_column()) 1340 1341 return None 1342 1343 def _parse_stored(self) -> exp.FileFormatProperty: 1344 self._match(TokenType.ALIAS) 1345 1346 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1347 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1348 1349 return self.expression( 1350 exp.FileFormatProperty, 1351 this=self.expression( 1352 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1353 ) 1354 if input_format or output_format 1355 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1356 ) 1357 1358 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1359 self._match(TokenType.EQ) 1360 self._match(TokenType.ALIAS) 1361 return self.expression(exp_class, this=self._parse_field()) 1362 1363 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1364 properties = [] 1365 while True: 1366 if before: 1367 prop = self._parse_property_before() 1368 else: 1369 prop = self._parse_property() 1370 1371 if not prop: 1372 break 1373 for p in ensure_list(prop): 1374 properties.append(p) 1375 1376 if properties: 1377 return self.expression(exp.Properties, expressions=properties) 1378 1379 return None 1380 1381 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1382 return self.expression( 1383 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1384 ) 1385 1386 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1387 if self._index >= 2: 1388 pre_volatile_token = self._tokens[self._index - 2] 1389 else: 1390 pre_volatile_token = None 1391 1392 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1393 return exp.VolatileProperty() 1394 1395 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1396 1397 def _parse_with_property( 1398 self, 1399 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1400 if self._match(TokenType.L_PAREN, advance=False): 1401 return self._parse_wrapped_csv(self._parse_property) 1402 1403 if self._match_text_seq("JOURNAL"): 1404 return self._parse_withjournaltable() 1405 1406 if self._match_text_seq("DATA"): 1407 return self._parse_withdata(no=False) 1408 elif self._match_text_seq("NO", "DATA"): 1409 return self._parse_withdata(no=True) 1410 1411 if not self._next: 1412 return None 1413 1414 return self._parse_withisolatedloading() 1415 1416 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1417 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1418 self._match(TokenType.EQ) 1419 1420 user = self._parse_id_var() 1421 self._match(TokenType.PARAMETER) 1422 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1423 1424 if not user or not host: 1425 return None 1426 1427 return exp.DefinerProperty(this=f"{user}@{host}") 1428 1429 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1430 self._match(TokenType.TABLE) 1431 self._match(TokenType.EQ) 1432 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1433 1434 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1435 return self.expression(exp.LogProperty, no=no) 1436 1437 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1438 return self.expression(exp.JournalProperty, **kwargs) 1439 1440 def _parse_checksum(self) -> exp.ChecksumProperty: 1441 self._match(TokenType.EQ) 1442 1443 on = None 1444 if self._match(TokenType.ON): 1445 on = True 1446 elif self._match_text_seq("OFF"): 1447 on = False 1448 1449 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1450 1451 def _parse_cluster(self) -> exp.Cluster: 1452 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1453 1454 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1455 self._match_text_seq("BY") 1456 1457 self._match_l_paren() 1458 expressions = self._parse_csv(self._parse_column) 1459 self._match_r_paren() 1460 1461 if self._match_text_seq("SORTED", "BY"): 1462 self._match_l_paren() 1463 sorted_by = self._parse_csv(self._parse_ordered) 1464 self._match_r_paren() 1465 else: 1466 sorted_by = None 1467 1468 self._match(TokenType.INTO) 1469 buckets = self._parse_number() 1470 self._match_text_seq("BUCKETS") 1471 1472 return self.expression( 1473 exp.ClusteredByProperty, 1474 expressions=expressions, 1475 sorted_by=sorted_by, 1476 buckets=buckets, 1477 ) 1478 1479 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1480 if not self._match_text_seq("GRANTS"): 1481 self._retreat(self._index - 1) 1482 return None 1483 1484 return self.expression(exp.CopyGrantsProperty) 1485 1486 def _parse_freespace(self) -> exp.FreespaceProperty: 1487 self._match(TokenType.EQ) 1488 return self.expression( 1489 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1490 ) 1491 1492 def _parse_mergeblockratio( 1493 self, no: bool = False, default: bool = False 1494 ) -> exp.MergeBlockRatioProperty: 1495 if self._match(TokenType.EQ): 1496 return self.expression( 1497 exp.MergeBlockRatioProperty, 1498 this=self._parse_number(), 1499 percent=self._match(TokenType.PERCENT), 1500 ) 1501 1502 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1503 1504 def _parse_datablocksize( 1505 self, 1506 default: t.Optional[bool] = None, 1507 minimum: t.Optional[bool] = None, 1508 maximum: t.Optional[bool] = None, 1509 ) -> exp.DataBlocksizeProperty: 1510 self._match(TokenType.EQ) 1511 size = self._parse_number() 1512 1513 units = None 1514 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1515 units = self._prev.text 1516 1517 return self.expression( 1518 exp.DataBlocksizeProperty, 1519 size=size, 1520 units=units, 1521 default=default, 1522 minimum=minimum, 1523 maximum=maximum, 1524 ) 1525 1526 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1527 self._match(TokenType.EQ) 1528 always = self._match_text_seq("ALWAYS") 1529 manual = self._match_text_seq("MANUAL") 1530 never = self._match_text_seq("NEVER") 1531 default = self._match_text_seq("DEFAULT") 1532 1533 autotemp = None 1534 if self._match_text_seq("AUTOTEMP"): 1535 autotemp = self._parse_schema() 1536 1537 return self.expression( 1538 exp.BlockCompressionProperty, 1539 always=always, 1540 manual=manual, 1541 never=never, 1542 default=default, 1543 autotemp=autotemp, 1544 ) 1545 1546 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1547 no = self._match_text_seq("NO") 1548 concurrent = self._match_text_seq("CONCURRENT") 1549 self._match_text_seq("ISOLATED", "LOADING") 1550 for_all = self._match_text_seq("FOR", "ALL") 1551 for_insert = self._match_text_seq("FOR", "INSERT") 1552 for_none = self._match_text_seq("FOR", "NONE") 1553 return self.expression( 1554 exp.IsolatedLoadingProperty, 1555 no=no, 1556 concurrent=concurrent, 1557 for_all=for_all, 1558 for_insert=for_insert, 1559 for_none=for_none, 1560 ) 1561 1562 def _parse_locking(self) -> exp.LockingProperty: 1563 if self._match(TokenType.TABLE): 1564 kind = "TABLE" 1565 elif self._match(TokenType.VIEW): 1566 kind = "VIEW" 1567 elif self._match(TokenType.ROW): 1568 kind = "ROW" 1569 elif self._match_text_seq("DATABASE"): 1570 kind = "DATABASE" 1571 else: 1572 kind = None 1573 1574 if kind in ("DATABASE", "TABLE", "VIEW"): 1575 this = self._parse_table_parts() 1576 else: 1577 this = None 1578 1579 if self._match(TokenType.FOR): 1580 for_or_in = "FOR" 1581 elif self._match(TokenType.IN): 1582 for_or_in = "IN" 1583 else: 1584 for_or_in = None 1585 1586 if self._match_text_seq("ACCESS"): 1587 lock_type = "ACCESS" 1588 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1589 lock_type = "EXCLUSIVE" 1590 elif self._match_text_seq("SHARE"): 1591 lock_type = "SHARE" 1592 elif self._match_text_seq("READ"): 1593 lock_type = "READ" 1594 elif self._match_text_seq("WRITE"): 1595 lock_type = "WRITE" 1596 elif self._match_text_seq("CHECKSUM"): 1597 lock_type = "CHECKSUM" 1598 else: 1599 lock_type = None 1600 1601 override = self._match_text_seq("OVERRIDE") 1602 1603 return self.expression( 1604 exp.LockingProperty, 1605 this=this, 1606 kind=kind, 1607 for_or_in=for_or_in, 1608 lock_type=lock_type, 1609 override=override, 1610 ) 1611 1612 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1613 if self._match(TokenType.PARTITION_BY): 1614 return self._parse_csv(self._parse_conjunction) 1615 return [] 1616 1617 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1618 self._match(TokenType.EQ) 1619 return self.expression( 1620 exp.PartitionedByProperty, 1621 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1622 ) 1623 1624 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1625 if self._match_text_seq("AND", "STATISTICS"): 1626 statistics = True 1627 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1628 statistics = False 1629 else: 1630 statistics = None 1631 1632 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1633 1634 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1635 if self._match_text_seq("PRIMARY", "INDEX"): 1636 return exp.NoPrimaryIndexProperty() 1637 return None 1638 1639 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1640 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1641 return exp.OnCommitProperty() 1642 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1643 return exp.OnCommitProperty(delete=True) 1644 return None 1645 1646 def _parse_distkey(self) -> exp.DistKeyProperty: 1647 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1648 1649 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1650 table = self._parse_table(schema=True) 1651 1652 options = [] 1653 while self._match_texts(("INCLUDING", "EXCLUDING")): 1654 this = self._prev.text.upper() 1655 1656 id_var = self._parse_id_var() 1657 if not id_var: 1658 return None 1659 1660 options.append( 1661 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1662 ) 1663 1664 return self.expression(exp.LikeProperty, this=table, expressions=options) 1665 1666 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1667 return self.expression( 1668 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1669 ) 1670 1671 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1672 self._match(TokenType.EQ) 1673 return self.expression( 1674 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1675 ) 1676 1677 def _parse_returns(self) -> exp.ReturnsProperty: 1678 value: t.Optional[exp.Expression] 1679 is_table = self._match(TokenType.TABLE) 1680 1681 if is_table: 1682 if self._match(TokenType.LT): 1683 value = self.expression( 1684 exp.Schema, 1685 this="TABLE", 1686 expressions=self._parse_csv(self._parse_struct_types), 1687 ) 1688 if not self._match(TokenType.GT): 1689 self.raise_error("Expecting >") 1690 else: 1691 value = self._parse_schema(exp.var("TABLE")) 1692 else: 1693 value = self._parse_types() 1694 1695 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1696 1697 def _parse_describe(self) -> exp.Describe: 1698 kind = self._match_set(self.CREATABLES) and self._prev.text 1699 this = self._parse_table() 1700 return self.expression(exp.Describe, this=this, kind=kind) 1701 1702 def _parse_insert(self) -> exp.Insert: 1703 comments = ensure_list(self._prev_comments) 1704 overwrite = self._match(TokenType.OVERWRITE) 1705 ignore = self._match(TokenType.IGNORE) 1706 local = self._match_text_seq("LOCAL") 1707 alternative = None 1708 1709 if self._match_text_seq("DIRECTORY"): 1710 this: t.Optional[exp.Expression] = self.expression( 1711 exp.Directory, 1712 this=self._parse_var_or_string(), 1713 local=local, 1714 row_format=self._parse_row_format(match_row=True), 1715 ) 1716 else: 1717 if self._match(TokenType.OR): 1718 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1719 1720 self._match(TokenType.INTO) 1721 comments += ensure_list(self._prev_comments) 1722 self._match(TokenType.TABLE) 1723 this = self._parse_table(schema=True) 1724 1725 returning = self._parse_returning() 1726 1727 return self.expression( 1728 exp.Insert, 1729 comments=comments, 1730 this=this, 1731 exists=self._parse_exists(), 1732 partition=self._parse_partition(), 1733 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1734 and self._parse_conjunction(), 1735 expression=self._parse_ddl_select(), 1736 conflict=self._parse_on_conflict(), 1737 returning=returning or self._parse_returning(), 1738 overwrite=overwrite, 1739 alternative=alternative, 1740 ignore=ignore, 1741 ) 1742 1743 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1744 conflict = self._match_text_seq("ON", "CONFLICT") 1745 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1746 1747 if not conflict and not duplicate: 1748 return None 1749 1750 nothing = None 1751 expressions = None 1752 key = None 1753 constraint = None 1754 1755 if conflict: 1756 if self._match_text_seq("ON", "CONSTRAINT"): 1757 constraint = self._parse_id_var() 1758 else: 1759 key = self._parse_csv(self._parse_value) 1760 1761 self._match_text_seq("DO") 1762 if self._match_text_seq("NOTHING"): 1763 nothing = True 1764 else: 1765 self._match(TokenType.UPDATE) 1766 self._match(TokenType.SET) 1767 expressions = self._parse_csv(self._parse_equality) 1768 1769 return self.expression( 1770 exp.OnConflict, 1771 duplicate=duplicate, 1772 expressions=expressions, 1773 nothing=nothing, 1774 key=key, 1775 constraint=constraint, 1776 ) 1777 1778 def _parse_returning(self) -> t.Optional[exp.Returning]: 1779 if not self._match(TokenType.RETURNING): 1780 return None 1781 return self.expression( 1782 exp.Returning, 1783 expressions=self._parse_csv(self._parse_expression), 1784 into=self._match(TokenType.INTO) and self._parse_table_part(), 1785 ) 1786 1787 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1788 if not self._match(TokenType.FORMAT): 1789 return None 1790 return self._parse_row_format() 1791 1792 def _parse_row_format( 1793 self, match_row: bool = False 1794 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1795 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1796 return None 1797 1798 if self._match_text_seq("SERDE"): 1799 this = self._parse_string() 1800 1801 serde_properties = None 1802 if self._match(TokenType.SERDE_PROPERTIES): 1803 serde_properties = self.expression( 1804 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1805 ) 1806 1807 return self.expression( 1808 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1809 ) 1810 1811 self._match_text_seq("DELIMITED") 1812 1813 kwargs = {} 1814 1815 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1816 kwargs["fields"] = self._parse_string() 1817 if self._match_text_seq("ESCAPED", "BY"): 1818 kwargs["escaped"] = self._parse_string() 1819 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1820 kwargs["collection_items"] = self._parse_string() 1821 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1822 kwargs["map_keys"] = self._parse_string() 1823 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1824 kwargs["lines"] = self._parse_string() 1825 if self._match_text_seq("NULL", "DEFINED", "AS"): 1826 kwargs["null"] = self._parse_string() 1827 1828 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1829 1830 def _parse_load(self) -> exp.LoadData | exp.Command: 1831 if self._match_text_seq("DATA"): 1832 local = self._match_text_seq("LOCAL") 1833 self._match_text_seq("INPATH") 1834 inpath = self._parse_string() 1835 overwrite = self._match(TokenType.OVERWRITE) 1836 self._match_pair(TokenType.INTO, TokenType.TABLE) 1837 1838 return self.expression( 1839 exp.LoadData, 1840 this=self._parse_table(schema=True), 1841 local=local, 1842 overwrite=overwrite, 1843 inpath=inpath, 1844 partition=self._parse_partition(), 1845 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1846 serde=self._match_text_seq("SERDE") and self._parse_string(), 1847 ) 1848 return self._parse_as_command(self._prev) 1849 1850 def _parse_delete(self) -> exp.Delete: 1851 # This handles MySQL's "Multiple-Table Syntax" 1852 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1853 tables = None 1854 comments = self._prev_comments 1855 if not self._match(TokenType.FROM, advance=False): 1856 tables = self._parse_csv(self._parse_table) or None 1857 1858 returning = self._parse_returning() 1859 1860 return self.expression( 1861 exp.Delete, 1862 comments=comments, 1863 tables=tables, 1864 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1865 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1866 where=self._parse_where(), 1867 returning=returning or self._parse_returning(), 1868 limit=self._parse_limit(), 1869 ) 1870 1871 def _parse_update(self) -> exp.Update: 1872 comments = self._prev_comments 1873 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1874 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1875 returning = self._parse_returning() 1876 return self.expression( 1877 exp.Update, 1878 comments=comments, 1879 **{ # type: ignore 1880 "this": this, 1881 "expressions": expressions, 1882 "from": self._parse_from(joins=True), 1883 "where": self._parse_where(), 1884 "returning": returning or self._parse_returning(), 1885 "limit": self._parse_limit(), 1886 }, 1887 ) 1888 1889 def _parse_uncache(self) -> exp.Uncache: 1890 if not self._match(TokenType.TABLE): 1891 self.raise_error("Expecting TABLE after UNCACHE") 1892 1893 return self.expression( 1894 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1895 ) 1896 1897 def _parse_cache(self) -> exp.Cache: 1898 lazy = self._match_text_seq("LAZY") 1899 self._match(TokenType.TABLE) 1900 table = self._parse_table(schema=True) 1901 1902 options = [] 1903 if self._match_text_seq("OPTIONS"): 1904 self._match_l_paren() 1905 k = self._parse_string() 1906 self._match(TokenType.EQ) 1907 v = self._parse_string() 1908 options = [k, v] 1909 self._match_r_paren() 1910 1911 self._match(TokenType.ALIAS) 1912 return self.expression( 1913 exp.Cache, 1914 this=table, 1915 lazy=lazy, 1916 options=options, 1917 expression=self._parse_select(nested=True), 1918 ) 1919 1920 def _parse_partition(self) -> t.Optional[exp.Partition]: 1921 if not self._match(TokenType.PARTITION): 1922 return None 1923 1924 return self.expression( 1925 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1926 ) 1927 1928 def _parse_value(self) -> exp.Tuple: 1929 if self._match(TokenType.L_PAREN): 1930 expressions = self._parse_csv(self._parse_conjunction) 1931 self._match_r_paren() 1932 return self.expression(exp.Tuple, expressions=expressions) 1933 1934 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1935 # https://prestodb.io/docs/current/sql/values.html 1936 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1937 1938 def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: 1939 return self._parse_expressions() 1940 1941 def _parse_select( 1942 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1943 ) -> t.Optional[exp.Expression]: 1944 cte = self._parse_with() 1945 if cte: 1946 this = self._parse_statement() 1947 1948 if not this: 1949 self.raise_error("Failed to parse any statement following CTE") 1950 return cte 1951 1952 if "with" in this.arg_types: 1953 this.set("with", cte) 1954 else: 1955 self.raise_error(f"{this.key} does not support CTE") 1956 this = cte 1957 elif self._match(TokenType.SELECT): 1958 comments = self._prev_comments 1959 1960 hint = self._parse_hint() 1961 all_ = self._match(TokenType.ALL) 1962 distinct = self._match(TokenType.DISTINCT) 1963 1964 kind = ( 1965 self._match(TokenType.ALIAS) 1966 and self._match_texts(("STRUCT", "VALUE")) 1967 and self._prev.text 1968 ) 1969 1970 if distinct: 1971 distinct = self.expression( 1972 exp.Distinct, 1973 on=self._parse_value() if self._match(TokenType.ON) else None, 1974 ) 1975 1976 if all_ and distinct: 1977 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1978 1979 limit = self._parse_limit(top=True) 1980 projections = self._parse_projections() 1981 1982 this = self.expression( 1983 exp.Select, 1984 kind=kind, 1985 hint=hint, 1986 distinct=distinct, 1987 expressions=projections, 1988 limit=limit, 1989 ) 1990 this.comments = comments 1991 1992 into = self._parse_into() 1993 if into: 1994 this.set("into", into) 1995 1996 from_ = self._parse_from() 1997 if from_: 1998 this.set("from", from_) 1999 2000 this = self._parse_query_modifiers(this) 2001 elif (table or nested) and self._match(TokenType.L_PAREN): 2002 if self._match(TokenType.PIVOT): 2003 this = self._parse_simplified_pivot() 2004 elif self._match(TokenType.FROM): 2005 this = exp.select("*").from_( 2006 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2007 ) 2008 else: 2009 this = self._parse_table() if table else self._parse_select(nested=True) 2010 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2011 2012 self._match_r_paren() 2013 2014 # We return early here so that the UNION isn't attached to the subquery by the 2015 # following call to _parse_set_operations, but instead becomes the parent node 2016 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2017 elif self._match(TokenType.VALUES): 2018 this = self.expression( 2019 exp.Values, 2020 expressions=self._parse_csv(self._parse_value), 2021 alias=self._parse_table_alias(), 2022 ) 2023 else: 2024 this = None 2025 2026 return self._parse_set_operations(this) 2027 2028 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2029 if not skip_with_token and not self._match(TokenType.WITH): 2030 return None 2031 2032 comments = self._prev_comments 2033 recursive = self._match(TokenType.RECURSIVE) 2034 2035 expressions = [] 2036 while True: 2037 expressions.append(self._parse_cte()) 2038 2039 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2040 break 2041 else: 2042 self._match(TokenType.WITH) 2043 2044 return self.expression( 2045 exp.With, comments=comments, expressions=expressions, recursive=recursive 2046 ) 2047 2048 def _parse_cte(self) -> exp.CTE: 2049 alias = self._parse_table_alias() 2050 if not alias or not alias.this: 2051 self.raise_error("Expected CTE to have alias") 2052 2053 self._match(TokenType.ALIAS) 2054 return self.expression( 2055 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2056 ) 2057 2058 def _parse_table_alias( 2059 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2060 ) -> t.Optional[exp.TableAlias]: 2061 any_token = self._match(TokenType.ALIAS) 2062 alias = ( 2063 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2064 or self._parse_string_as_identifier() 2065 ) 2066 2067 index = self._index 2068 if self._match(TokenType.L_PAREN): 2069 columns = self._parse_csv(self._parse_function_parameter) 2070 self._match_r_paren() if columns else self._retreat(index) 2071 else: 2072 columns = None 2073 2074 if not alias and not columns: 2075 return None 2076 2077 return self.expression(exp.TableAlias, this=alias, columns=columns) 2078 2079 def _parse_subquery( 2080 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2081 ) -> t.Optional[exp.Subquery]: 2082 if not this: 2083 return None 2084 2085 return self.expression( 2086 exp.Subquery, 2087 this=this, 2088 pivots=self._parse_pivots(), 2089 alias=self._parse_table_alias() if parse_alias else None, 2090 ) 2091 2092 def _parse_query_modifiers( 2093 self, this: t.Optional[exp.Expression] 2094 ) -> t.Optional[exp.Expression]: 2095 if isinstance(this, self.MODIFIABLES): 2096 for join in iter(self._parse_join, None): 2097 this.append("joins", join) 2098 for lateral in iter(self._parse_lateral, None): 2099 this.append("laterals", lateral) 2100 2101 while True: 2102 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2103 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2104 key, expression = parser(self) 2105 2106 if expression: 2107 this.set(key, expression) 2108 if key == "limit": 2109 offset = expression.args.pop("offset", None) 2110 if offset: 2111 this.set("offset", exp.Offset(expression=offset)) 2112 continue 2113 break 2114 return this 2115 2116 def _parse_hint(self) -> t.Optional[exp.Hint]: 2117 if self._match(TokenType.HINT): 2118 hints = [] 2119 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2120 hints.extend(hint) 2121 2122 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2123 self.raise_error("Expected */ after HINT") 2124 2125 return self.expression(exp.Hint, expressions=hints) 2126 2127 return None 2128 2129 def _parse_into(self) -> t.Optional[exp.Into]: 2130 if not self._match(TokenType.INTO): 2131 return None 2132 2133 temp = self._match(TokenType.TEMPORARY) 2134 unlogged = self._match_text_seq("UNLOGGED") 2135 self._match(TokenType.TABLE) 2136 2137 return self.expression( 2138 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2139 ) 2140 2141 def _parse_from( 2142 self, joins: bool = False, skip_from_token: bool = False 2143 ) -> t.Optional[exp.From]: 2144 if not skip_from_token and not self._match(TokenType.FROM): 2145 return None 2146 2147 return self.expression( 2148 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2149 ) 2150 2151 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2152 if not self._match(TokenType.MATCH_RECOGNIZE): 2153 return None 2154 2155 self._match_l_paren() 2156 2157 partition = self._parse_partition_by() 2158 order = self._parse_order() 2159 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2160 2161 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2162 rows = exp.var("ONE ROW PER MATCH") 2163 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2164 text = "ALL ROWS PER MATCH" 2165 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2166 text += f" SHOW EMPTY MATCHES" 2167 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2168 text += f" OMIT EMPTY MATCHES" 2169 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2170 text += f" WITH UNMATCHED ROWS" 2171 rows = exp.var(text) 2172 else: 2173 rows = None 2174 2175 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2176 text = "AFTER MATCH SKIP" 2177 if self._match_text_seq("PAST", "LAST", "ROW"): 2178 text += f" PAST LAST ROW" 2179 elif self._match_text_seq("TO", "NEXT", "ROW"): 2180 text += f" TO NEXT ROW" 2181 elif self._match_text_seq("TO", "FIRST"): 2182 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2183 elif self._match_text_seq("TO", "LAST"): 2184 text += f" TO LAST {self._advance_any().text}" # type: ignore 2185 after = exp.var(text) 2186 else: 2187 after = None 2188 2189 if self._match_text_seq("PATTERN"): 2190 self._match_l_paren() 2191 2192 if not self._curr: 2193 self.raise_error("Expecting )", self._curr) 2194 2195 paren = 1 2196 start = self._curr 2197 2198 while self._curr and paren > 0: 2199 if self._curr.token_type == TokenType.L_PAREN: 2200 paren += 1 2201 if self._curr.token_type == TokenType.R_PAREN: 2202 paren -= 1 2203 2204 end = self._prev 2205 self._advance() 2206 2207 if paren > 0: 2208 self.raise_error("Expecting )", self._curr) 2209 2210 pattern = exp.var(self._find_sql(start, end)) 2211 else: 2212 pattern = None 2213 2214 define = ( 2215 self._parse_csv( 2216 lambda: self.expression( 2217 exp.Alias, 2218 alias=self._parse_id_var(any_token=True), 2219 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2220 ) 2221 ) 2222 if self._match_text_seq("DEFINE") 2223 else None 2224 ) 2225 2226 self._match_r_paren() 2227 2228 return self.expression( 2229 exp.MatchRecognize, 2230 partition_by=partition, 2231 order=order, 2232 measures=measures, 2233 rows=rows, 2234 after=after, 2235 pattern=pattern, 2236 define=define, 2237 alias=self._parse_table_alias(), 2238 ) 2239 2240 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2241 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2242 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2243 2244 if outer_apply or cross_apply: 2245 this = self._parse_select(table=True) 2246 view = None 2247 outer = not cross_apply 2248 elif self._match(TokenType.LATERAL): 2249 this = self._parse_select(table=True) 2250 view = self._match(TokenType.VIEW) 2251 outer = self._match(TokenType.OUTER) 2252 else: 2253 return None 2254 2255 if not this: 2256 this = ( 2257 self._parse_unnest() 2258 or self._parse_function() 2259 or self._parse_id_var(any_token=False) 2260 ) 2261 2262 while self._match(TokenType.DOT): 2263 this = exp.Dot( 2264 this=this, 2265 expression=self._parse_function() or self._parse_id_var(any_token=False), 2266 ) 2267 2268 if view: 2269 table = self._parse_id_var(any_token=False) 2270 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2271 table_alias: t.Optional[exp.TableAlias] = self.expression( 2272 exp.TableAlias, this=table, columns=columns 2273 ) 2274 elif isinstance(this, exp.Subquery) and this.alias: 2275 # Ensures parity between the Subquery's and the Lateral's "alias" args 2276 table_alias = this.args["alias"].copy() 2277 else: 2278 table_alias = self._parse_table_alias() 2279 2280 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2281 2282 def _parse_join_parts( 2283 self, 2284 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2285 return ( 2286 self._match_set(self.JOIN_METHODS) and self._prev, 2287 self._match_set(self.JOIN_SIDES) and self._prev, 2288 self._match_set(self.JOIN_KINDS) and self._prev, 2289 ) 2290 2291 def _parse_join( 2292 self, skip_join_token: bool = False, parse_bracket: bool = False 2293 ) -> t.Optional[exp.Join]: 2294 if self._match(TokenType.COMMA): 2295 return self.expression(exp.Join, this=self._parse_table()) 2296 2297 index = self._index 2298 method, side, kind = self._parse_join_parts() 2299 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2300 join = self._match(TokenType.JOIN) 2301 2302 if not skip_join_token and not join: 2303 self._retreat(index) 2304 kind = None 2305 method = None 2306 side = None 2307 2308 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2309 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2310 2311 if not skip_join_token and not join and not outer_apply and not cross_apply: 2312 return None 2313 2314 if outer_apply: 2315 side = Token(TokenType.LEFT, "LEFT") 2316 2317 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2318 2319 if method: 2320 kwargs["method"] = method.text 2321 if side: 2322 kwargs["side"] = side.text 2323 if kind: 2324 kwargs["kind"] = kind.text 2325 if hint: 2326 kwargs["hint"] = hint 2327 2328 if self._match(TokenType.ON): 2329 kwargs["on"] = self._parse_conjunction() 2330 elif self._match(TokenType.USING): 2331 kwargs["using"] = self._parse_wrapped_id_vars() 2332 elif not (kind and kind.token_type == TokenType.CROSS): 2333 index = self._index 2334 joins = self._parse_joins() 2335 2336 if joins and self._match(TokenType.ON): 2337 kwargs["on"] = self._parse_conjunction() 2338 elif joins and self._match(TokenType.USING): 2339 kwargs["using"] = self._parse_wrapped_id_vars() 2340 else: 2341 joins = None 2342 self._retreat(index) 2343 2344 kwargs["this"].set("joins", joins) 2345 2346 return self.expression(exp.Join, **kwargs) 2347 2348 def _parse_index( 2349 self, 2350 index: t.Optional[exp.Expression] = None, 2351 ) -> t.Optional[exp.Index]: 2352 if index: 2353 unique = None 2354 primary = None 2355 amp = None 2356 2357 self._match(TokenType.ON) 2358 self._match(TokenType.TABLE) # hive 2359 table = self._parse_table_parts(schema=True) 2360 else: 2361 unique = self._match(TokenType.UNIQUE) 2362 primary = self._match_text_seq("PRIMARY") 2363 amp = self._match_text_seq("AMP") 2364 2365 if not self._match(TokenType.INDEX): 2366 return None 2367 2368 index = self._parse_id_var() 2369 table = None 2370 2371 using = self._parse_field() if self._match(TokenType.USING) else None 2372 2373 if self._match(TokenType.L_PAREN, advance=False): 2374 columns = self._parse_wrapped_csv(self._parse_ordered) 2375 else: 2376 columns = None 2377 2378 return self.expression( 2379 exp.Index, 2380 this=index, 2381 table=table, 2382 using=using, 2383 columns=columns, 2384 unique=unique, 2385 primary=primary, 2386 amp=amp, 2387 partition_by=self._parse_partition_by(), 2388 ) 2389 2390 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2391 hints: t.List[exp.Expression] = [] 2392 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2393 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2394 hints.append( 2395 self.expression( 2396 exp.WithTableHint, 2397 expressions=self._parse_csv( 2398 lambda: self._parse_function() or self._parse_var(any_token=True) 2399 ), 2400 ) 2401 ) 2402 self._match_r_paren() 2403 else: 2404 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2405 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2406 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2407 2408 self._match_texts({"INDEX", "KEY"}) 2409 if self._match(TokenType.FOR): 2410 hint.set("target", self._advance_any() and self._prev.text.upper()) 2411 2412 hint.set("expressions", self._parse_wrapped_id_vars()) 2413 hints.append(hint) 2414 2415 return hints or None 2416 2417 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2418 return ( 2419 (not schema and self._parse_function(optional_parens=False)) 2420 or self._parse_id_var(any_token=False) 2421 or self._parse_string_as_identifier() 2422 or self._parse_placeholder() 2423 ) 2424 2425 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2426 catalog = None 2427 db = None 2428 table = self._parse_table_part(schema=schema) 2429 2430 while self._match(TokenType.DOT): 2431 if catalog: 2432 # This allows nesting the table in arbitrarily many dot expressions if needed 2433 table = self.expression( 2434 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2435 ) 2436 else: 2437 catalog = db 2438 db = table 2439 table = self._parse_table_part(schema=schema) 2440 2441 if not table: 2442 self.raise_error(f"Expected table name but got {self._curr}") 2443 2444 return self.expression( 2445 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2446 ) 2447 2448 def _parse_table( 2449 self, 2450 schema: bool = False, 2451 joins: bool = False, 2452 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2453 parse_bracket: bool = False, 2454 ) -> t.Optional[exp.Expression]: 2455 lateral = self._parse_lateral() 2456 if lateral: 2457 return lateral 2458 2459 unnest = self._parse_unnest() 2460 if unnest: 2461 return unnest 2462 2463 values = self._parse_derived_table_values() 2464 if values: 2465 return values 2466 2467 subquery = self._parse_select(table=True) 2468 if subquery: 2469 if not subquery.args.get("pivots"): 2470 subquery.set("pivots", self._parse_pivots()) 2471 return subquery 2472 2473 bracket = parse_bracket and self._parse_bracket(None) 2474 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2475 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2476 2477 if schema: 2478 return self._parse_schema(this=this) 2479 2480 if self.ALIAS_POST_TABLESAMPLE: 2481 table_sample = self._parse_table_sample() 2482 2483 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2484 if alias: 2485 this.set("alias", alias) 2486 2487 if not this.args.get("pivots"): 2488 this.set("pivots", self._parse_pivots()) 2489 2490 this.set("hints", self._parse_table_hints()) 2491 2492 if not self.ALIAS_POST_TABLESAMPLE: 2493 table_sample = self._parse_table_sample() 2494 2495 if table_sample: 2496 table_sample.set("this", this) 2497 this = table_sample 2498 2499 if joins: 2500 for join in iter(self._parse_join, None): 2501 this.append("joins", join) 2502 2503 return this 2504 2505 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2506 if not self._match(TokenType.UNNEST): 2507 return None 2508 2509 expressions = self._parse_wrapped_csv(self._parse_type) 2510 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2511 2512 alias = self._parse_table_alias() if with_alias else None 2513 2514 if alias and self.UNNEST_COLUMN_ONLY: 2515 if alias.args.get("columns"): 2516 self.raise_error("Unexpected extra column alias in unnest.") 2517 2518 alias.set("columns", [alias.this]) 2519 alias.set("this", None) 2520 2521 offset = None 2522 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2523 self._match(TokenType.ALIAS) 2524 offset = self._parse_id_var() or exp.to_identifier("offset") 2525 2526 return self.expression( 2527 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2528 ) 2529 2530 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2531 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2532 if not is_derived and not self._match(TokenType.VALUES): 2533 return None 2534 2535 expressions = self._parse_csv(self._parse_value) 2536 alias = self._parse_table_alias() 2537 2538 if is_derived: 2539 self._match_r_paren() 2540 2541 return self.expression( 2542 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2543 ) 2544 2545 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2546 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2547 as_modifier and self._match_text_seq("USING", "SAMPLE") 2548 ): 2549 return None 2550 2551 bucket_numerator = None 2552 bucket_denominator = None 2553 bucket_field = None 2554 percent = None 2555 rows = None 2556 size = None 2557 seed = None 2558 2559 kind = ( 2560 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2561 ) 2562 method = self._parse_var(tokens=(TokenType.ROW,)) 2563 2564 self._match(TokenType.L_PAREN) 2565 2566 num = self._parse_number() 2567 2568 if self._match_text_seq("BUCKET"): 2569 bucket_numerator = self._parse_number() 2570 self._match_text_seq("OUT", "OF") 2571 bucket_denominator = bucket_denominator = self._parse_number() 2572 self._match(TokenType.ON) 2573 bucket_field = self._parse_field() 2574 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2575 percent = num 2576 elif self._match(TokenType.ROWS): 2577 rows = num 2578 else: 2579 size = num 2580 2581 self._match(TokenType.R_PAREN) 2582 2583 if self._match(TokenType.L_PAREN): 2584 method = self._parse_var() 2585 seed = self._match(TokenType.COMMA) and self._parse_number() 2586 self._match_r_paren() 2587 elif self._match_texts(("SEED", "REPEATABLE")): 2588 seed = self._parse_wrapped(self._parse_number) 2589 2590 return self.expression( 2591 exp.TableSample, 2592 method=method, 2593 bucket_numerator=bucket_numerator, 2594 bucket_denominator=bucket_denominator, 2595 bucket_field=bucket_field, 2596 percent=percent, 2597 rows=rows, 2598 size=size, 2599 seed=seed, 2600 kind=kind, 2601 ) 2602 2603 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2604 return list(iter(self._parse_pivot, None)) or None 2605 2606 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2607 return list(iter(self._parse_join, None)) or None 2608 2609 # https://duckdb.org/docs/sql/statements/pivot 2610 def _parse_simplified_pivot(self) -> exp.Pivot: 2611 def _parse_on() -> t.Optional[exp.Expression]: 2612 this = self._parse_bitwise() 2613 return self._parse_in(this) if self._match(TokenType.IN) else this 2614 2615 this = self._parse_table() 2616 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2617 using = self._match(TokenType.USING) and self._parse_csv( 2618 lambda: self._parse_alias(self._parse_function()) 2619 ) 2620 group = self._parse_group() 2621 return self.expression( 2622 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2623 ) 2624 2625 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2626 index = self._index 2627 2628 if self._match(TokenType.PIVOT): 2629 unpivot = False 2630 elif self._match(TokenType.UNPIVOT): 2631 unpivot = True 2632 else: 2633 return None 2634 2635 expressions = [] 2636 field = None 2637 2638 if not self._match(TokenType.L_PAREN): 2639 self._retreat(index) 2640 return None 2641 2642 if unpivot: 2643 expressions = self._parse_csv(self._parse_column) 2644 else: 2645 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2646 2647 if not expressions: 2648 self.raise_error("Failed to parse PIVOT's aggregation list") 2649 2650 if not self._match(TokenType.FOR): 2651 self.raise_error("Expecting FOR") 2652 2653 value = self._parse_column() 2654 2655 if not self._match(TokenType.IN): 2656 self.raise_error("Expecting IN") 2657 2658 field = self._parse_in(value, alias=True) 2659 2660 self._match_r_paren() 2661 2662 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2663 2664 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2665 pivot.set("alias", self._parse_table_alias()) 2666 2667 if not unpivot: 2668 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2669 2670 columns: t.List[exp.Expression] = [] 2671 for fld in pivot.args["field"].expressions: 2672 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2673 for name in names: 2674 if self.PREFIXED_PIVOT_COLUMNS: 2675 name = f"{name}_{field_name}" if name else field_name 2676 else: 2677 name = f"{field_name}_{name}" if name else field_name 2678 2679 columns.append(exp.to_identifier(name)) 2680 2681 pivot.set("columns", columns) 2682 2683 return pivot 2684 2685 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2686 return [agg.alias for agg in aggregations] 2687 2688 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2689 if not skip_where_token and not self._match(TokenType.WHERE): 2690 return None 2691 2692 return self.expression( 2693 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2694 ) 2695 2696 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2697 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2698 return None 2699 2700 elements = defaultdict(list) 2701 2702 if self._match(TokenType.ALL): 2703 return self.expression(exp.Group, all=True) 2704 2705 while True: 2706 expressions = self._parse_csv(self._parse_conjunction) 2707 if expressions: 2708 elements["expressions"].extend(expressions) 2709 2710 grouping_sets = self._parse_grouping_sets() 2711 if grouping_sets: 2712 elements["grouping_sets"].extend(grouping_sets) 2713 2714 rollup = None 2715 cube = None 2716 totals = None 2717 2718 with_ = self._match(TokenType.WITH) 2719 if self._match(TokenType.ROLLUP): 2720 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2721 elements["rollup"].extend(ensure_list(rollup)) 2722 2723 if self._match(TokenType.CUBE): 2724 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2725 elements["cube"].extend(ensure_list(cube)) 2726 2727 if self._match_text_seq("TOTALS"): 2728 totals = True 2729 elements["totals"] = True # type: ignore 2730 2731 if not (grouping_sets or rollup or cube or totals): 2732 break 2733 2734 return self.expression(exp.Group, **elements) # type: ignore 2735 2736 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2737 if not self._match(TokenType.GROUPING_SETS): 2738 return None 2739 2740 return self._parse_wrapped_csv(self._parse_grouping_set) 2741 2742 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2743 if self._match(TokenType.L_PAREN): 2744 grouping_set = self._parse_csv(self._parse_column) 2745 self._match_r_paren() 2746 return self.expression(exp.Tuple, expressions=grouping_set) 2747 2748 return self._parse_column() 2749 2750 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2751 if not skip_having_token and not self._match(TokenType.HAVING): 2752 return None 2753 return self.expression(exp.Having, this=self._parse_conjunction()) 2754 2755 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2756 if not self._match(TokenType.QUALIFY): 2757 return None 2758 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2759 2760 def _parse_order( 2761 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2762 ) -> t.Optional[exp.Expression]: 2763 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2764 return this 2765 2766 return self.expression( 2767 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2768 ) 2769 2770 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2771 if not self._match(token): 2772 return None 2773 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2774 2775 def _parse_ordered(self) -> exp.Ordered: 2776 this = self._parse_conjunction() 2777 self._match(TokenType.ASC) 2778 2779 is_desc = self._match(TokenType.DESC) 2780 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2781 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2782 desc = is_desc or False 2783 asc = not desc 2784 nulls_first = is_nulls_first or False 2785 explicitly_null_ordered = is_nulls_first or is_nulls_last 2786 2787 if ( 2788 not explicitly_null_ordered 2789 and ( 2790 (asc and self.NULL_ORDERING == "nulls_are_small") 2791 or (desc and self.NULL_ORDERING != "nulls_are_small") 2792 ) 2793 and self.NULL_ORDERING != "nulls_are_last" 2794 ): 2795 nulls_first = True 2796 2797 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2798 2799 def _parse_limit( 2800 self, this: t.Optional[exp.Expression] = None, top: bool = False 2801 ) -> t.Optional[exp.Expression]: 2802 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2803 comments = self._prev_comments 2804 if top: 2805 limit_paren = self._match(TokenType.L_PAREN) 2806 expression = self._parse_number() 2807 2808 if limit_paren: 2809 self._match_r_paren() 2810 else: 2811 expression = self._parse_term() 2812 2813 if self._match(TokenType.COMMA): 2814 offset = expression 2815 expression = self._parse_term() 2816 else: 2817 offset = None 2818 2819 limit_exp = self.expression( 2820 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2821 ) 2822 2823 return limit_exp 2824 2825 if self._match(TokenType.FETCH): 2826 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2827 direction = self._prev.text if direction else "FIRST" 2828 2829 count = self._parse_number() 2830 percent = self._match(TokenType.PERCENT) 2831 2832 self._match_set((TokenType.ROW, TokenType.ROWS)) 2833 2834 only = self._match_text_seq("ONLY") 2835 with_ties = self._match_text_seq("WITH", "TIES") 2836 2837 if only and with_ties: 2838 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2839 2840 return self.expression( 2841 exp.Fetch, 2842 direction=direction, 2843 count=count, 2844 percent=percent, 2845 with_ties=with_ties, 2846 ) 2847 2848 return this 2849 2850 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2851 if not self._match(TokenType.OFFSET): 2852 return this 2853 2854 count = self._parse_term() 2855 self._match_set((TokenType.ROW, TokenType.ROWS)) 2856 return self.expression(exp.Offset, this=this, expression=count) 2857 2858 def _parse_locks(self) -> t.List[exp.Lock]: 2859 locks = [] 2860 while True: 2861 if self._match_text_seq("FOR", "UPDATE"): 2862 update = True 2863 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2864 "LOCK", "IN", "SHARE", "MODE" 2865 ): 2866 update = False 2867 else: 2868 break 2869 2870 expressions = None 2871 if self._match_text_seq("OF"): 2872 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2873 2874 wait: t.Optional[bool | exp.Expression] = None 2875 if self._match_text_seq("NOWAIT"): 2876 wait = True 2877 elif self._match_text_seq("WAIT"): 2878 wait = self._parse_primary() 2879 elif self._match_text_seq("SKIP", "LOCKED"): 2880 wait = False 2881 2882 locks.append( 2883 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2884 ) 2885 2886 return locks 2887 2888 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2889 if not self._match_set(self.SET_OPERATIONS): 2890 return this 2891 2892 token_type = self._prev.token_type 2893 2894 if token_type == TokenType.UNION: 2895 expression = exp.Union 2896 elif token_type == TokenType.EXCEPT: 2897 expression = exp.Except 2898 else: 2899 expression = exp.Intersect 2900 2901 return self.expression( 2902 expression, 2903 this=this, 2904 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2905 expression=self._parse_set_operations(self._parse_select(nested=True)), 2906 ) 2907 2908 def _parse_expression(self) -> t.Optional[exp.Expression]: 2909 return self._parse_alias(self._parse_conjunction()) 2910 2911 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2912 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2913 2914 def _parse_equality(self) -> t.Optional[exp.Expression]: 2915 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2916 2917 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2918 return self._parse_tokens(self._parse_range, self.COMPARISON) 2919 2920 def _parse_range(self) -> t.Optional[exp.Expression]: 2921 this = self._parse_bitwise() 2922 negate = self._match(TokenType.NOT) 2923 2924 if self._match_set(self.RANGE_PARSERS): 2925 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2926 if not expression: 2927 return this 2928 2929 this = expression 2930 elif self._match(TokenType.ISNULL): 2931 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2932 2933 # Postgres supports ISNULL and NOTNULL for conditions. 2934 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2935 if self._match(TokenType.NOTNULL): 2936 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2937 this = self.expression(exp.Not, this=this) 2938 2939 if negate: 2940 this = self.expression(exp.Not, this=this) 2941 2942 if self._match(TokenType.IS): 2943 this = self._parse_is(this) 2944 2945 return this 2946 2947 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2948 index = self._index - 1 2949 negate = self._match(TokenType.NOT) 2950 2951 if self._match_text_seq("DISTINCT", "FROM"): 2952 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2953 return self.expression(klass, this=this, expression=self._parse_expression()) 2954 2955 expression = self._parse_null() or self._parse_boolean() 2956 if not expression: 2957 self._retreat(index) 2958 return None 2959 2960 this = self.expression(exp.Is, this=this, expression=expression) 2961 return self.expression(exp.Not, this=this) if negate else this 2962 2963 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2964 unnest = self._parse_unnest(with_alias=False) 2965 if unnest: 2966 this = self.expression(exp.In, this=this, unnest=unnest) 2967 elif self._match(TokenType.L_PAREN): 2968 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2969 2970 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2971 this = self.expression(exp.In, this=this, query=expressions[0]) 2972 else: 2973 this = self.expression(exp.In, this=this, expressions=expressions) 2974 2975 self._match_r_paren(this) 2976 else: 2977 this = self.expression(exp.In, this=this, field=self._parse_field()) 2978 2979 return this 2980 2981 def _parse_between(self, this: exp.Expression) -> exp.Between: 2982 low = self._parse_bitwise() 2983 self._match(TokenType.AND) 2984 high = self._parse_bitwise() 2985 return self.expression(exp.Between, this=this, low=low, high=high) 2986 2987 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2988 if not self._match(TokenType.ESCAPE): 2989 return this 2990 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2991 2992 def _parse_interval(self) -> t.Optional[exp.Interval]: 2993 if not self._match(TokenType.INTERVAL): 2994 return None 2995 2996 if self._match(TokenType.STRING, advance=False): 2997 this = self._parse_primary() 2998 else: 2999 this = self._parse_term() 3000 3001 unit = self._parse_function() or self._parse_var() 3002 3003 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3004 # each INTERVAL expression into this canonical form so it's easy to transpile 3005 if this and this.is_number: 3006 this = exp.Literal.string(this.name) 3007 elif this and this.is_string: 3008 parts = this.name.split() 3009 3010 if len(parts) == 2: 3011 if unit: 3012 # this is not actually a unit, it's something else 3013 unit = None 3014 self._retreat(self._index - 1) 3015 else: 3016 this = exp.Literal.string(parts[0]) 3017 unit = self.expression(exp.Var, this=parts[1]) 3018 3019 return self.expression(exp.Interval, this=this, unit=unit) 3020 3021 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3022 this = self._parse_term() 3023 3024 while True: 3025 if self._match_set(self.BITWISE): 3026 this = self.expression( 3027 self.BITWISE[self._prev.token_type], 3028 this=this, 3029 expression=self._parse_term(), 3030 ) 3031 elif self._match(TokenType.DQMARK): 3032 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3033 elif self._match_pair(TokenType.LT, TokenType.LT): 3034 this = self.expression( 3035 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3036 ) 3037 elif self._match_pair(TokenType.GT, TokenType.GT): 3038 this = self.expression( 3039 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3040 ) 3041 else: 3042 break 3043 3044 return this 3045 3046 def _parse_term(self) -> t.Optional[exp.Expression]: 3047 return self._parse_tokens(self._parse_factor, self.TERM) 3048 3049 def _parse_factor(self) -> t.Optional[exp.Expression]: 3050 return self._parse_tokens(self._parse_unary, self.FACTOR) 3051 3052 def _parse_unary(self) -> t.Optional[exp.Expression]: 3053 if self._match_set(self.UNARY_PARSERS): 3054 return self.UNARY_PARSERS[self._prev.token_type](self) 3055 return self._parse_at_time_zone(self._parse_type()) 3056 3057 def _parse_type(self) -> t.Optional[exp.Expression]: 3058 interval = self._parse_interval() 3059 if interval: 3060 return interval 3061 3062 index = self._index 3063 data_type = self._parse_types(check_func=True) 3064 this = self._parse_column() 3065 3066 if data_type: 3067 if isinstance(this, exp.Literal): 3068 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3069 if parser: 3070 return parser(self, this, data_type) 3071 return self.expression(exp.Cast, this=this, to=data_type) 3072 if not data_type.expressions: 3073 self._retreat(index) 3074 return self._parse_column() 3075 return self._parse_column_ops(data_type) 3076 3077 return this 3078 3079 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3080 this = self._parse_type() 3081 if not this: 3082 return None 3083 3084 return self.expression( 3085 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3086 ) 3087 3088 def _parse_types( 3089 self, check_func: bool = False, schema: bool = False 3090 ) -> t.Optional[exp.Expression]: 3091 index = self._index 3092 3093 prefix = self._match_text_seq("SYSUDTLIB", ".") 3094 3095 if not self._match_set(self.TYPE_TOKENS): 3096 return None 3097 3098 type_token = self._prev.token_type 3099 3100 if type_token == TokenType.PSEUDO_TYPE: 3101 return self.expression(exp.PseudoType, this=self._prev.text) 3102 3103 nested = type_token in self.NESTED_TYPE_TOKENS 3104 is_struct = type_token == TokenType.STRUCT 3105 expressions = None 3106 maybe_func = False 3107 3108 if self._match(TokenType.L_PAREN): 3109 if is_struct: 3110 expressions = self._parse_csv(self._parse_struct_types) 3111 elif nested: 3112 expressions = self._parse_csv( 3113 lambda: self._parse_types(check_func=check_func, schema=schema) 3114 ) 3115 elif type_token in self.ENUM_TYPE_TOKENS: 3116 expressions = self._parse_csv(self._parse_primary) 3117 else: 3118 expressions = self._parse_csv(self._parse_type_size) 3119 3120 if not expressions or not self._match(TokenType.R_PAREN): 3121 self._retreat(index) 3122 return None 3123 3124 maybe_func = True 3125 3126 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3127 this = exp.DataType( 3128 this=exp.DataType.Type.ARRAY, 3129 expressions=[ 3130 exp.DataType( 3131 this=exp.DataType.Type[type_token.value], 3132 expressions=expressions, 3133 nested=nested, 3134 ) 3135 ], 3136 nested=True, 3137 ) 3138 3139 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3140 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3141 3142 return this 3143 3144 if self._match(TokenType.L_BRACKET): 3145 self._retreat(index) 3146 return None 3147 3148 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3149 if nested and self._match(TokenType.LT): 3150 if is_struct: 3151 expressions = self._parse_csv(self._parse_struct_types) 3152 else: 3153 expressions = self._parse_csv( 3154 lambda: self._parse_types(check_func=check_func, schema=schema) 3155 ) 3156 3157 if not self._match(TokenType.GT): 3158 self.raise_error("Expecting >") 3159 3160 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3161 values = self._parse_csv(self._parse_conjunction) 3162 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3163 3164 value: t.Optional[exp.Expression] = None 3165 if type_token in self.TIMESTAMPS: 3166 if self._match_text_seq("WITH", "TIME", "ZONE"): 3167 maybe_func = False 3168 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3169 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3170 maybe_func = False 3171 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3172 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3173 maybe_func = False 3174 elif type_token == TokenType.INTERVAL: 3175 unit = self._parse_var() 3176 3177 if not unit: 3178 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3179 else: 3180 value = self.expression(exp.Interval, unit=unit) 3181 3182 if maybe_func and check_func: 3183 index2 = self._index 3184 peek = self._parse_string() 3185 3186 if not peek: 3187 self._retreat(index) 3188 return None 3189 3190 self._retreat(index2) 3191 3192 if value: 3193 return value 3194 3195 return exp.DataType( 3196 this=exp.DataType.Type[type_token.value], 3197 expressions=expressions, 3198 nested=nested, 3199 values=values, 3200 prefix=prefix, 3201 ) 3202 3203 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3204 this = self._parse_type() or self._parse_id_var() 3205 self._match(TokenType.COLON) 3206 return self._parse_column_def(this) 3207 3208 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3209 if not self._match_text_seq("AT", "TIME", "ZONE"): 3210 return this 3211 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3212 3213 def _parse_column(self) -> t.Optional[exp.Expression]: 3214 this = self._parse_field() 3215 if isinstance(this, exp.Identifier): 3216 this = self.expression(exp.Column, this=this) 3217 elif not this: 3218 return self._parse_bracket(this) 3219 return self._parse_column_ops(this) 3220 3221 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3222 this = self._parse_bracket(this) 3223 3224 while self._match_set(self.COLUMN_OPERATORS): 3225 op_token = self._prev.token_type 3226 op = self.COLUMN_OPERATORS.get(op_token) 3227 3228 if op_token == TokenType.DCOLON: 3229 field = self._parse_types() 3230 if not field: 3231 self.raise_error("Expected type") 3232 elif op and self._curr: 3233 self._advance() 3234 value = self._prev.text 3235 field = ( 3236 exp.Literal.number(value) 3237 if self._prev.token_type == TokenType.NUMBER 3238 else exp.Literal.string(value) 3239 ) 3240 else: 3241 field = self._parse_field(anonymous_func=True, any_token=True) 3242 3243 if isinstance(field, exp.Func): 3244 # bigquery allows function calls like x.y.count(...) 3245 # SAFE.SUBSTR(...) 3246 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3247 this = self._replace_columns_with_dots(this) 3248 3249 if op: 3250 this = op(self, this, field) 3251 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3252 this = self.expression( 3253 exp.Column, 3254 this=field, 3255 table=this.this, 3256 db=this.args.get("table"), 3257 catalog=this.args.get("db"), 3258 ) 3259 else: 3260 this = self.expression(exp.Dot, this=this, expression=field) 3261 this = self._parse_bracket(this) 3262 return this 3263 3264 def _parse_primary(self) -> t.Optional[exp.Expression]: 3265 if self._match_set(self.PRIMARY_PARSERS): 3266 token_type = self._prev.token_type 3267 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3268 3269 if token_type == TokenType.STRING: 3270 expressions = [primary] 3271 while self._match(TokenType.STRING): 3272 expressions.append(exp.Literal.string(self._prev.text)) 3273 3274 if len(expressions) > 1: 3275 return self.expression(exp.Concat, expressions=expressions) 3276 3277 return primary 3278 3279 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3280 return exp.Literal.number(f"0.{self._prev.text}") 3281 3282 if self._match(TokenType.L_PAREN): 3283 comments = self._prev_comments 3284 query = self._parse_select() 3285 3286 if query: 3287 expressions = [query] 3288 else: 3289 expressions = self._parse_expressions() 3290 3291 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3292 3293 if isinstance(this, exp.Subqueryable): 3294 this = self._parse_set_operations( 3295 self._parse_subquery(this=this, parse_alias=False) 3296 ) 3297 elif len(expressions) > 1: 3298 this = self.expression(exp.Tuple, expressions=expressions) 3299 else: 3300 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3301 3302 if this: 3303 this.add_comments(comments) 3304 3305 self._match_r_paren(expression=this) 3306 return this 3307 3308 return None 3309 3310 def _parse_field( 3311 self, 3312 any_token: bool = False, 3313 tokens: t.Optional[t.Collection[TokenType]] = None, 3314 anonymous_func: bool = False, 3315 ) -> t.Optional[exp.Expression]: 3316 return ( 3317 self._parse_primary() 3318 or self._parse_function(anonymous=anonymous_func) 3319 or self._parse_id_var(any_token=any_token, tokens=tokens) 3320 ) 3321 3322 def _parse_function( 3323 self, 3324 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3325 anonymous: bool = False, 3326 optional_parens: bool = True, 3327 ) -> t.Optional[exp.Expression]: 3328 if not self._curr: 3329 return None 3330 3331 token_type = self._curr.token_type 3332 3333 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3334 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3335 3336 if not self._next or self._next.token_type != TokenType.L_PAREN: 3337 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3338 self._advance() 3339 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3340 3341 return None 3342 3343 if token_type not in self.FUNC_TOKENS: 3344 return None 3345 3346 this = self._curr.text 3347 upper = this.upper() 3348 self._advance(2) 3349 3350 parser = self.FUNCTION_PARSERS.get(upper) 3351 3352 if parser and not anonymous: 3353 this = parser(self) 3354 else: 3355 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3356 3357 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3358 this = self.expression(subquery_predicate, this=self._parse_select()) 3359 self._match_r_paren() 3360 return this 3361 3362 if functions is None: 3363 functions = self.FUNCTIONS 3364 3365 function = functions.get(upper) 3366 3367 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3368 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3369 3370 if function and not anonymous: 3371 func = self.validate_expression(function(args), args) 3372 if not self.NORMALIZE_FUNCTIONS: 3373 func.meta["name"] = this 3374 this = func 3375 else: 3376 this = self.expression(exp.Anonymous, this=this, expressions=args) 3377 3378 self._match(TokenType.R_PAREN, expression=this) 3379 return self._parse_window(this) 3380 3381 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3382 return self._parse_column_def(self._parse_id_var()) 3383 3384 def _parse_user_defined_function( 3385 self, kind: t.Optional[TokenType] = None 3386 ) -> t.Optional[exp.Expression]: 3387 this = self._parse_id_var() 3388 3389 while self._match(TokenType.DOT): 3390 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3391 3392 if not self._match(TokenType.L_PAREN): 3393 return this 3394 3395 expressions = self._parse_csv(self._parse_function_parameter) 3396 self._match_r_paren() 3397 return self.expression( 3398 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3399 ) 3400 3401 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3402 literal = self._parse_primary() 3403 if literal: 3404 return self.expression(exp.Introducer, this=token.text, expression=literal) 3405 3406 return self.expression(exp.Identifier, this=token.text) 3407 3408 def _parse_session_parameter(self) -> exp.SessionParameter: 3409 kind = None 3410 this = self._parse_id_var() or self._parse_primary() 3411 3412 if this and self._match(TokenType.DOT): 3413 kind = this.name 3414 this = self._parse_var() or self._parse_primary() 3415 3416 return self.expression(exp.SessionParameter, this=this, kind=kind) 3417 3418 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3419 index = self._index 3420 3421 if self._match(TokenType.L_PAREN): 3422 expressions = self._parse_csv(self._parse_id_var) 3423 3424 if not self._match(TokenType.R_PAREN): 3425 self._retreat(index) 3426 else: 3427 expressions = [self._parse_id_var()] 3428 3429 if self._match_set(self.LAMBDAS): 3430 return self.LAMBDAS[self._prev.token_type](self, expressions) 3431 3432 self._retreat(index) 3433 3434 this: t.Optional[exp.Expression] 3435 3436 if self._match(TokenType.DISTINCT): 3437 this = self.expression( 3438 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3439 ) 3440 else: 3441 this = self._parse_select_or_expression(alias=alias) 3442 3443 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3444 3445 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3446 index = self._index 3447 3448 if not self.errors: 3449 try: 3450 if self._parse_select(nested=True): 3451 return this 3452 except ParseError: 3453 pass 3454 finally: 3455 self.errors.clear() 3456 self._retreat(index) 3457 3458 if not self._match(TokenType.L_PAREN): 3459 return this 3460 3461 args = self._parse_csv( 3462 lambda: self._parse_constraint() 3463 or self._parse_column_def(self._parse_field(any_token=True)) 3464 ) 3465 3466 self._match_r_paren() 3467 return self.expression(exp.Schema, this=this, expressions=args) 3468 3469 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3470 # column defs are not really columns, they're identifiers 3471 if isinstance(this, exp.Column): 3472 this = this.this 3473 3474 kind = self._parse_types(schema=True) 3475 3476 if self._match_text_seq("FOR", "ORDINALITY"): 3477 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3478 3479 constraints = [] 3480 while True: 3481 constraint = self._parse_column_constraint() 3482 if not constraint: 3483 break 3484 constraints.append(constraint) 3485 3486 if not kind and not constraints: 3487 return this 3488 3489 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3490 3491 def _parse_auto_increment( 3492 self, 3493 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3494 start = None 3495 increment = None 3496 3497 if self._match(TokenType.L_PAREN, advance=False): 3498 args = self._parse_wrapped_csv(self._parse_bitwise) 3499 start = seq_get(args, 0) 3500 increment = seq_get(args, 1) 3501 elif self._match_text_seq("START"): 3502 start = self._parse_bitwise() 3503 self._match_text_seq("INCREMENT") 3504 increment = self._parse_bitwise() 3505 3506 if start and increment: 3507 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3508 3509 return exp.AutoIncrementColumnConstraint() 3510 3511 def _parse_compress(self) -> exp.CompressColumnConstraint: 3512 if self._match(TokenType.L_PAREN, advance=False): 3513 return self.expression( 3514 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3515 ) 3516 3517 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3518 3519 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3520 if self._match_text_seq("BY", "DEFAULT"): 3521 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3522 this = self.expression( 3523 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3524 ) 3525 else: 3526 self._match_text_seq("ALWAYS") 3527 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3528 3529 self._match(TokenType.ALIAS) 3530 identity = self._match_text_seq("IDENTITY") 3531 3532 if self._match(TokenType.L_PAREN): 3533 if self._match_text_seq("START", "WITH"): 3534 this.set("start", self._parse_bitwise()) 3535 if self._match_text_seq("INCREMENT", "BY"): 3536 this.set("increment", self._parse_bitwise()) 3537 if self._match_text_seq("MINVALUE"): 3538 this.set("minvalue", self._parse_bitwise()) 3539 if self._match_text_seq("MAXVALUE"): 3540 this.set("maxvalue", self._parse_bitwise()) 3541 3542 if self._match_text_seq("CYCLE"): 3543 this.set("cycle", True) 3544 elif self._match_text_seq("NO", "CYCLE"): 3545 this.set("cycle", False) 3546 3547 if not identity: 3548 this.set("expression", self._parse_bitwise()) 3549 3550 self._match_r_paren() 3551 3552 return this 3553 3554 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3555 self._match_text_seq("LENGTH") 3556 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3557 3558 def _parse_not_constraint( 3559 self, 3560 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3561 if self._match_text_seq("NULL"): 3562 return self.expression(exp.NotNullColumnConstraint) 3563 if self._match_text_seq("CASESPECIFIC"): 3564 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3565 return None 3566 3567 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3568 if self._match(TokenType.CONSTRAINT): 3569 this = self._parse_id_var() 3570 else: 3571 this = None 3572 3573 if self._match_texts(self.CONSTRAINT_PARSERS): 3574 return self.expression( 3575 exp.ColumnConstraint, 3576 this=this, 3577 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3578 ) 3579 3580 return this 3581 3582 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3583 if not self._match(TokenType.CONSTRAINT): 3584 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3585 3586 this = self._parse_id_var() 3587 expressions = [] 3588 3589 while True: 3590 constraint = self._parse_unnamed_constraint() or self._parse_function() 3591 if not constraint: 3592 break 3593 expressions.append(constraint) 3594 3595 return self.expression(exp.Constraint, this=this, expressions=expressions) 3596 3597 def _parse_unnamed_constraint( 3598 self, constraints: t.Optional[t.Collection[str]] = None 3599 ) -> t.Optional[exp.Expression]: 3600 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3601 return None 3602 3603 constraint = self._prev.text.upper() 3604 if constraint not in self.CONSTRAINT_PARSERS: 3605 self.raise_error(f"No parser found for schema constraint {constraint}.") 3606 3607 return self.CONSTRAINT_PARSERS[constraint](self) 3608 3609 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3610 self._match_text_seq("KEY") 3611 return self.expression( 3612 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3613 ) 3614 3615 def _parse_key_constraint_options(self) -> t.List[str]: 3616 options = [] 3617 while True: 3618 if not self._curr: 3619 break 3620 3621 if self._match(TokenType.ON): 3622 action = None 3623 on = self._advance_any() and self._prev.text 3624 3625 if self._match_text_seq("NO", "ACTION"): 3626 action = "NO ACTION" 3627 elif self._match_text_seq("CASCADE"): 3628 action = "CASCADE" 3629 elif self._match_pair(TokenType.SET, TokenType.NULL): 3630 action = "SET NULL" 3631 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3632 action = "SET DEFAULT" 3633 else: 3634 self.raise_error("Invalid key constraint") 3635 3636 options.append(f"ON {on} {action}") 3637 elif self._match_text_seq("NOT", "ENFORCED"): 3638 options.append("NOT ENFORCED") 3639 elif self._match_text_seq("DEFERRABLE"): 3640 options.append("DEFERRABLE") 3641 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3642 options.append("INITIALLY DEFERRED") 3643 elif self._match_text_seq("NORELY"): 3644 options.append("NORELY") 3645 elif self._match_text_seq("MATCH", "FULL"): 3646 options.append("MATCH FULL") 3647 else: 3648 break 3649 3650 return options 3651 3652 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3653 if match and not self._match(TokenType.REFERENCES): 3654 return None 3655 3656 expressions = None 3657 this = self._parse_table(schema=True) 3658 options = self._parse_key_constraint_options() 3659 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3660 3661 def _parse_foreign_key(self) -> exp.ForeignKey: 3662 expressions = self._parse_wrapped_id_vars() 3663 reference = self._parse_references() 3664 options = {} 3665 3666 while self._match(TokenType.ON): 3667 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3668 self.raise_error("Expected DELETE or UPDATE") 3669 3670 kind = self._prev.text.lower() 3671 3672 if self._match_text_seq("NO", "ACTION"): 3673 action = "NO ACTION" 3674 elif self._match(TokenType.SET): 3675 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3676 action = "SET " + self._prev.text.upper() 3677 else: 3678 self._advance() 3679 action = self._prev.text.upper() 3680 3681 options[kind] = action 3682 3683 return self.expression( 3684 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3685 ) 3686 3687 def _parse_primary_key( 3688 self, wrapped_optional: bool = False, in_props: bool = False 3689 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3690 desc = ( 3691 self._match_set((TokenType.ASC, TokenType.DESC)) 3692 and self._prev.token_type == TokenType.DESC 3693 ) 3694 3695 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3696 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3697 3698 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3699 options = self._parse_key_constraint_options() 3700 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3701 3702 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3703 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3704 return this 3705 3706 bracket_kind = self._prev.token_type 3707 3708 if self._match(TokenType.COLON): 3709 expressions: t.List[t.Optional[exp.Expression]] = [ 3710 self.expression(exp.Slice, expression=self._parse_conjunction()) 3711 ] 3712 else: 3713 expressions = self._parse_csv( 3714 lambda: self._parse_slice( 3715 self._parse_alias(self._parse_conjunction(), explicit=True) 3716 ) 3717 ) 3718 3719 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3720 if bracket_kind == TokenType.L_BRACE: 3721 this = self.expression(exp.Struct, expressions=expressions) 3722 elif not this or this.name.upper() == "ARRAY": 3723 this = self.expression(exp.Array, expressions=expressions) 3724 else: 3725 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3726 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3727 3728 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3729 self.raise_error("Expected ]") 3730 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3731 self.raise_error("Expected }") 3732 3733 self._add_comments(this) 3734 return self._parse_bracket(this) 3735 3736 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3737 if self._match(TokenType.COLON): 3738 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3739 return this 3740 3741 def _parse_case(self) -> t.Optional[exp.Expression]: 3742 ifs = [] 3743 default = None 3744 3745 expression = self._parse_conjunction() 3746 3747 while self._match(TokenType.WHEN): 3748 this = self._parse_conjunction() 3749 self._match(TokenType.THEN) 3750 then = self._parse_conjunction() 3751 ifs.append(self.expression(exp.If, this=this, true=then)) 3752 3753 if self._match(TokenType.ELSE): 3754 default = self._parse_conjunction() 3755 3756 if not self._match(TokenType.END): 3757 self.raise_error("Expected END after CASE", self._prev) 3758 3759 return self._parse_window( 3760 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3761 ) 3762 3763 def _parse_if(self) -> t.Optional[exp.Expression]: 3764 if self._match(TokenType.L_PAREN): 3765 args = self._parse_csv(self._parse_conjunction) 3766 this = self.validate_expression(exp.If.from_arg_list(args), args) 3767 self._match_r_paren() 3768 else: 3769 index = self._index - 1 3770 condition = self._parse_conjunction() 3771 3772 if not condition: 3773 self._retreat(index) 3774 return None 3775 3776 self._match(TokenType.THEN) 3777 true = self._parse_conjunction() 3778 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3779 self._match(TokenType.END) 3780 this = self.expression(exp.If, this=condition, true=true, false=false) 3781 3782 return self._parse_window(this) 3783 3784 def _parse_extract(self) -> exp.Extract: 3785 this = self._parse_function() or self._parse_var() or self._parse_type() 3786 3787 if self._match(TokenType.FROM): 3788 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3789 3790 if not self._match(TokenType.COMMA): 3791 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3792 3793 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3794 3795 def _parse_any_value(self) -> exp.AnyValue: 3796 this = self._parse_lambda() 3797 is_max = None 3798 having = None 3799 3800 if self._match(TokenType.HAVING): 3801 self._match_texts(("MAX", "MIN")) 3802 is_max = self._prev.text == "MAX" 3803 having = self._parse_column() 3804 3805 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3806 3807 def _parse_cast(self, strict: bool) -> exp.Expression: 3808 this = self._parse_conjunction() 3809 3810 if not self._match(TokenType.ALIAS): 3811 if self._match(TokenType.COMMA): 3812 return self.expression( 3813 exp.CastToStrType, this=this, expression=self._parse_string() 3814 ) 3815 else: 3816 self.raise_error("Expected AS after CAST") 3817 3818 fmt = None 3819 to = self._parse_types() 3820 3821 if not to: 3822 self.raise_error("Expected TYPE after CAST") 3823 elif to.this == exp.DataType.Type.CHAR: 3824 if self._match(TokenType.CHARACTER_SET): 3825 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3826 elif self._match(TokenType.FORMAT): 3827 fmt_string = self._parse_string() 3828 fmt = self._parse_at_time_zone(fmt_string) 3829 3830 if to.this in exp.DataType.TEMPORAL_TYPES: 3831 this = self.expression( 3832 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3833 this=this, 3834 format=exp.Literal.string( 3835 format_time( 3836 fmt_string.this if fmt_string else "", 3837 self.FORMAT_MAPPING or self.TIME_MAPPING, 3838 self.FORMAT_TRIE or self.TIME_TRIE, 3839 ) 3840 ), 3841 ) 3842 3843 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3844 this.set("zone", fmt.args["zone"]) 3845 3846 return this 3847 3848 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3849 3850 def _parse_concat(self) -> t.Optional[exp.Expression]: 3851 args = self._parse_csv(self._parse_conjunction) 3852 if self.CONCAT_NULL_OUTPUTS_STRING: 3853 args = [ 3854 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3855 for arg in args 3856 if arg 3857 ] 3858 3859 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3860 # we find such a call we replace it with its argument. 3861 if len(args) == 1: 3862 return args[0] 3863 3864 return self.expression( 3865 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3866 ) 3867 3868 def _parse_string_agg(self) -> exp.Expression: 3869 if self._match(TokenType.DISTINCT): 3870 args: t.List[t.Optional[exp.Expression]] = [ 3871 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3872 ] 3873 if self._match(TokenType.COMMA): 3874 args.extend(self._parse_csv(self._parse_conjunction)) 3875 else: 3876 args = self._parse_csv(self._parse_conjunction) 3877 3878 index = self._index 3879 if not self._match(TokenType.R_PAREN) and args: 3880 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3881 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3882 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3883 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3884 3885 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3886 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3887 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3888 if not self._match_text_seq("WITHIN", "GROUP"): 3889 self._retreat(index) 3890 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3891 3892 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3893 order = self._parse_order(this=seq_get(args, 0)) 3894 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3895 3896 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3897 this = self._parse_bitwise() 3898 3899 if self._match(TokenType.USING): 3900 to: t.Optional[exp.Expression] = self.expression( 3901 exp.CharacterSet, this=self._parse_var() 3902 ) 3903 elif self._match(TokenType.COMMA): 3904 to = self._parse_types() 3905 else: 3906 to = None 3907 3908 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3909 3910 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3911 """ 3912 There are generally two variants of the DECODE function: 3913 3914 - DECODE(bin, charset) 3915 - DECODE(expression, search, result [, search, result] ... [, default]) 3916 3917 The second variant will always be parsed into a CASE expression. Note that NULL 3918 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3919 instead of relying on pattern matching. 3920 """ 3921 args = self._parse_csv(self._parse_conjunction) 3922 3923 if len(args) < 3: 3924 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3925 3926 expression, *expressions = args 3927 if not expression: 3928 return None 3929 3930 ifs = [] 3931 for search, result in zip(expressions[::2], expressions[1::2]): 3932 if not search or not result: 3933 return None 3934 3935 if isinstance(search, exp.Literal): 3936 ifs.append( 3937 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3938 ) 3939 elif isinstance(search, exp.Null): 3940 ifs.append( 3941 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3942 ) 3943 else: 3944 cond = exp.or_( 3945 exp.EQ(this=expression.copy(), expression=search), 3946 exp.and_( 3947 exp.Is(this=expression.copy(), expression=exp.Null()), 3948 exp.Is(this=search.copy(), expression=exp.Null()), 3949 copy=False, 3950 ), 3951 copy=False, 3952 ) 3953 ifs.append(exp.If(this=cond, true=result)) 3954 3955 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3956 3957 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3958 self._match_text_seq("KEY") 3959 key = self._parse_field() 3960 self._match(TokenType.COLON) 3961 self._match_text_seq("VALUE") 3962 value = self._parse_field() 3963 3964 if not key and not value: 3965 return None 3966 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3967 3968 def _parse_json_object(self) -> exp.JSONObject: 3969 star = self._parse_star() 3970 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3971 3972 null_handling = None 3973 if self._match_text_seq("NULL", "ON", "NULL"): 3974 null_handling = "NULL ON NULL" 3975 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3976 null_handling = "ABSENT ON NULL" 3977 3978 unique_keys = None 3979 if self._match_text_seq("WITH", "UNIQUE"): 3980 unique_keys = True 3981 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3982 unique_keys = False 3983 3984 self._match_text_seq("KEYS") 3985 3986 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3987 format_json = self._match_text_seq("FORMAT", "JSON") 3988 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3989 3990 return self.expression( 3991 exp.JSONObject, 3992 expressions=expressions, 3993 null_handling=null_handling, 3994 unique_keys=unique_keys, 3995 return_type=return_type, 3996 format_json=format_json, 3997 encoding=encoding, 3998 ) 3999 4000 def _parse_logarithm(self) -> exp.Func: 4001 # Default argument order is base, expression 4002 args = self._parse_csv(self._parse_range) 4003 4004 if len(args) > 1: 4005 if not self.LOG_BASE_FIRST: 4006 args.reverse() 4007 return exp.Log.from_arg_list(args) 4008 4009 return self.expression( 4010 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4011 ) 4012 4013 def _parse_match_against(self) -> exp.MatchAgainst: 4014 expressions = self._parse_csv(self._parse_column) 4015 4016 self._match_text_seq(")", "AGAINST", "(") 4017 4018 this = self._parse_string() 4019 4020 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4021 modifier = "IN NATURAL LANGUAGE MODE" 4022 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4023 modifier = f"{modifier} WITH QUERY EXPANSION" 4024 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4025 modifier = "IN BOOLEAN MODE" 4026 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4027 modifier = "WITH QUERY EXPANSION" 4028 else: 4029 modifier = None 4030 4031 return self.expression( 4032 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4033 ) 4034 4035 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4036 def _parse_open_json(self) -> exp.OpenJSON: 4037 this = self._parse_bitwise() 4038 path = self._match(TokenType.COMMA) and self._parse_string() 4039 4040 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4041 this = self._parse_field(any_token=True) 4042 kind = self._parse_types() 4043 path = self._parse_string() 4044 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4045 4046 return self.expression( 4047 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4048 ) 4049 4050 expressions = None 4051 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4052 self._match_l_paren() 4053 expressions = self._parse_csv(_parse_open_json_column_def) 4054 4055 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4056 4057 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4058 args = self._parse_csv(self._parse_bitwise) 4059 4060 if self._match(TokenType.IN): 4061 return self.expression( 4062 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4063 ) 4064 4065 if haystack_first: 4066 haystack = seq_get(args, 0) 4067 needle = seq_get(args, 1) 4068 else: 4069 needle = seq_get(args, 0) 4070 haystack = seq_get(args, 1) 4071 4072 return self.expression( 4073 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4074 ) 4075 4076 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4077 args = self._parse_csv(self._parse_table) 4078 return exp.JoinHint(this=func_name.upper(), expressions=args) 4079 4080 def _parse_substring(self) -> exp.Substring: 4081 # Postgres supports the form: substring(string [from int] [for int]) 4082 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4083 4084 args = self._parse_csv(self._parse_bitwise) 4085 4086 if self._match(TokenType.FROM): 4087 args.append(self._parse_bitwise()) 4088 if self._match(TokenType.FOR): 4089 args.append(self._parse_bitwise()) 4090 4091 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4092 4093 def _parse_trim(self) -> exp.Trim: 4094 # https://www.w3resource.com/sql/character-functions/trim.php 4095 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4096 4097 position = None 4098 collation = None 4099 4100 if self._match_texts(self.TRIM_TYPES): 4101 position = self._prev.text.upper() 4102 4103 expression = self._parse_bitwise() 4104 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4105 this = self._parse_bitwise() 4106 else: 4107 this = expression 4108 expression = None 4109 4110 if self._match(TokenType.COLLATE): 4111 collation = self._parse_bitwise() 4112 4113 return self.expression( 4114 exp.Trim, this=this, position=position, expression=expression, collation=collation 4115 ) 4116 4117 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4118 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4119 4120 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4121 return self._parse_window(self._parse_id_var(), alias=True) 4122 4123 def _parse_respect_or_ignore_nulls( 4124 self, this: t.Optional[exp.Expression] 4125 ) -> t.Optional[exp.Expression]: 4126 if self._match_text_seq("IGNORE", "NULLS"): 4127 return self.expression(exp.IgnoreNulls, this=this) 4128 if self._match_text_seq("RESPECT", "NULLS"): 4129 return self.expression(exp.RespectNulls, this=this) 4130 return this 4131 4132 def _parse_window( 4133 self, this: t.Optional[exp.Expression], alias: bool = False 4134 ) -> t.Optional[exp.Expression]: 4135 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4136 self._match(TokenType.WHERE) 4137 this = self.expression( 4138 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4139 ) 4140 self._match_r_paren() 4141 4142 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4143 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4144 if self._match_text_seq("WITHIN", "GROUP"): 4145 order = self._parse_wrapped(self._parse_order) 4146 this = self.expression(exp.WithinGroup, this=this, expression=order) 4147 4148 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4149 # Some dialects choose to implement and some do not. 4150 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4151 4152 # There is some code above in _parse_lambda that handles 4153 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4154 4155 # The below changes handle 4156 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4157 4158 # Oracle allows both formats 4159 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4160 # and Snowflake chose to do the same for familiarity 4161 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4162 this = self._parse_respect_or_ignore_nulls(this) 4163 4164 # bigquery select from window x AS (partition by ...) 4165 if alias: 4166 over = None 4167 self._match(TokenType.ALIAS) 4168 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4169 return this 4170 else: 4171 over = self._prev.text.upper() 4172 4173 if not self._match(TokenType.L_PAREN): 4174 return self.expression( 4175 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4176 ) 4177 4178 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4179 4180 first = self._match(TokenType.FIRST) 4181 if self._match_text_seq("LAST"): 4182 first = False 4183 4184 partition = self._parse_partition_by() 4185 order = self._parse_order() 4186 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4187 4188 if kind: 4189 self._match(TokenType.BETWEEN) 4190 start = self._parse_window_spec() 4191 self._match(TokenType.AND) 4192 end = self._parse_window_spec() 4193 4194 spec = self.expression( 4195 exp.WindowSpec, 4196 kind=kind, 4197 start=start["value"], 4198 start_side=start["side"], 4199 end=end["value"], 4200 end_side=end["side"], 4201 ) 4202 else: 4203 spec = None 4204 4205 self._match_r_paren() 4206 4207 window = self.expression( 4208 exp.Window, 4209 this=this, 4210 partition_by=partition, 4211 order=order, 4212 spec=spec, 4213 alias=window_alias, 4214 over=over, 4215 first=first, 4216 ) 4217 4218 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4219 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4220 return self._parse_window(window, alias=alias) 4221 4222 return window 4223 4224 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4225 self._match(TokenType.BETWEEN) 4226 4227 return { 4228 "value": ( 4229 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4230 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4231 or self._parse_bitwise() 4232 ), 4233 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4234 } 4235 4236 def _parse_alias( 4237 self, this: t.Optional[exp.Expression], explicit: bool = False 4238 ) -> t.Optional[exp.Expression]: 4239 any_token = self._match(TokenType.ALIAS) 4240 4241 if explicit and not any_token: 4242 return this 4243 4244 if self._match(TokenType.L_PAREN): 4245 aliases = self.expression( 4246 exp.Aliases, 4247 this=this, 4248 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4249 ) 4250 self._match_r_paren(aliases) 4251 return aliases 4252 4253 alias = self._parse_id_var(any_token) 4254 4255 if alias: 4256 return self.expression(exp.Alias, this=this, alias=alias) 4257 4258 return this 4259 4260 def _parse_id_var( 4261 self, 4262 any_token: bool = True, 4263 tokens: t.Optional[t.Collection[TokenType]] = None, 4264 ) -> t.Optional[exp.Expression]: 4265 identifier = self._parse_identifier() 4266 4267 if identifier: 4268 return identifier 4269 4270 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4271 quoted = self._prev.token_type == TokenType.STRING 4272 return exp.Identifier(this=self._prev.text, quoted=quoted) 4273 4274 return None 4275 4276 def _parse_string(self) -> t.Optional[exp.Expression]: 4277 if self._match(TokenType.STRING): 4278 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4279 return self._parse_placeholder() 4280 4281 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4282 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4283 4284 def _parse_number(self) -> t.Optional[exp.Expression]: 4285 if self._match(TokenType.NUMBER): 4286 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4287 return self._parse_placeholder() 4288 4289 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4290 if self._match(TokenType.IDENTIFIER): 4291 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4292 return self._parse_placeholder() 4293 4294 def _parse_var( 4295 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4296 ) -> t.Optional[exp.Expression]: 4297 if ( 4298 (any_token and self._advance_any()) 4299 or self._match(TokenType.VAR) 4300 or (self._match_set(tokens) if tokens else False) 4301 ): 4302 return self.expression(exp.Var, this=self._prev.text) 4303 return self._parse_placeholder() 4304 4305 def _advance_any(self) -> t.Optional[Token]: 4306 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4307 self._advance() 4308 return self._prev 4309 return None 4310 4311 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4312 return self._parse_var() or self._parse_string() 4313 4314 def _parse_null(self) -> t.Optional[exp.Expression]: 4315 if self._match(TokenType.NULL): 4316 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4317 return self._parse_placeholder() 4318 4319 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4320 if self._match(TokenType.TRUE): 4321 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4322 if self._match(TokenType.FALSE): 4323 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4324 return self._parse_placeholder() 4325 4326 def _parse_star(self) -> t.Optional[exp.Expression]: 4327 if self._match(TokenType.STAR): 4328 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4329 return self._parse_placeholder() 4330 4331 def _parse_parameter(self) -> exp.Parameter: 4332 wrapped = self._match(TokenType.L_BRACE) 4333 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4334 self._match(TokenType.R_BRACE) 4335 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4336 4337 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4338 if self._match_set(self.PLACEHOLDER_PARSERS): 4339 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4340 if placeholder: 4341 return placeholder 4342 self._advance(-1) 4343 return None 4344 4345 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4346 if not self._match(TokenType.EXCEPT): 4347 return None 4348 if self._match(TokenType.L_PAREN, advance=False): 4349 return self._parse_wrapped_csv(self._parse_column) 4350 return self._parse_csv(self._parse_column) 4351 4352 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4353 if not self._match(TokenType.REPLACE): 4354 return None 4355 if self._match(TokenType.L_PAREN, advance=False): 4356 return self._parse_wrapped_csv(self._parse_expression) 4357 return self._parse_expressions() 4358 4359 def _parse_csv( 4360 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4361 ) -> t.List[t.Optional[exp.Expression]]: 4362 parse_result = parse_method() 4363 items = [parse_result] if parse_result is not None else [] 4364 4365 while self._match(sep): 4366 self._add_comments(parse_result) 4367 parse_result = parse_method() 4368 if parse_result is not None: 4369 items.append(parse_result) 4370 4371 return items 4372 4373 def _parse_tokens( 4374 self, parse_method: t.Callable, expressions: t.Dict 4375 ) -> t.Optional[exp.Expression]: 4376 this = parse_method() 4377 4378 while self._match_set(expressions): 4379 this = self.expression( 4380 expressions[self._prev.token_type], 4381 this=this, 4382 comments=self._prev_comments, 4383 expression=parse_method(), 4384 ) 4385 4386 return this 4387 4388 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4389 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4390 4391 def _parse_wrapped_csv( 4392 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4393 ) -> t.List[t.Optional[exp.Expression]]: 4394 return self._parse_wrapped( 4395 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4396 ) 4397 4398 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4399 wrapped = self._match(TokenType.L_PAREN) 4400 if not wrapped and not optional: 4401 self.raise_error("Expecting (") 4402 parse_result = parse_method() 4403 if wrapped: 4404 self._match_r_paren() 4405 return parse_result 4406 4407 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4408 return self._parse_csv(self._parse_expression) 4409 4410 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4411 return self._parse_select() or self._parse_set_operations( 4412 self._parse_expression() if alias else self._parse_conjunction() 4413 ) 4414 4415 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4416 return self._parse_query_modifiers( 4417 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4418 ) 4419 4420 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4421 this = None 4422 if self._match_texts(self.TRANSACTION_KIND): 4423 this = self._prev.text 4424 4425 self._match_texts({"TRANSACTION", "WORK"}) 4426 4427 modes = [] 4428 while True: 4429 mode = [] 4430 while self._match(TokenType.VAR): 4431 mode.append(self._prev.text) 4432 4433 if mode: 4434 modes.append(" ".join(mode)) 4435 if not self._match(TokenType.COMMA): 4436 break 4437 4438 return self.expression(exp.Transaction, this=this, modes=modes) 4439 4440 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4441 chain = None 4442 savepoint = None 4443 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4444 4445 self._match_texts({"TRANSACTION", "WORK"}) 4446 4447 if self._match_text_seq("TO"): 4448 self._match_text_seq("SAVEPOINT") 4449 savepoint = self._parse_id_var() 4450 4451 if self._match(TokenType.AND): 4452 chain = not self._match_text_seq("NO") 4453 self._match_text_seq("CHAIN") 4454 4455 if is_rollback: 4456 return self.expression(exp.Rollback, savepoint=savepoint) 4457 4458 return self.expression(exp.Commit, chain=chain) 4459 4460 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4461 if not self._match_text_seq("ADD"): 4462 return None 4463 4464 self._match(TokenType.COLUMN) 4465 exists_column = self._parse_exists(not_=True) 4466 expression = self._parse_column_def(self._parse_field(any_token=True)) 4467 4468 if expression: 4469 expression.set("exists", exists_column) 4470 4471 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4472 if self._match_texts(("FIRST", "AFTER")): 4473 position = self._prev.text 4474 column_position = self.expression( 4475 exp.ColumnPosition, this=self._parse_column(), position=position 4476 ) 4477 expression.set("position", column_position) 4478 4479 return expression 4480 4481 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4482 drop = self._match(TokenType.DROP) and self._parse_drop() 4483 if drop and not isinstance(drop, exp.Command): 4484 drop.set("kind", drop.args.get("kind", "COLUMN")) 4485 return drop 4486 4487 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4488 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4489 return self.expression( 4490 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4491 ) 4492 4493 def _parse_add_constraint(self) -> exp.AddConstraint: 4494 this = None 4495 kind = self._prev.token_type 4496 4497 if kind == TokenType.CONSTRAINT: 4498 this = self._parse_id_var() 4499 4500 if self._match_text_seq("CHECK"): 4501 expression = self._parse_wrapped(self._parse_conjunction) 4502 enforced = self._match_text_seq("ENFORCED") 4503 4504 return self.expression( 4505 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4506 ) 4507 4508 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4509 expression = self._parse_foreign_key() 4510 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4511 expression = self._parse_primary_key() 4512 else: 4513 expression = None 4514 4515 return self.expression(exp.AddConstraint, this=this, expression=expression) 4516 4517 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4518 index = self._index - 1 4519 4520 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4521 return self._parse_csv(self._parse_add_constraint) 4522 4523 self._retreat(index) 4524 return self._parse_csv(self._parse_add_column) 4525 4526 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4527 self._match(TokenType.COLUMN) 4528 column = self._parse_field(any_token=True) 4529 4530 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4531 return self.expression(exp.AlterColumn, this=column, drop=True) 4532 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4533 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4534 4535 self._match_text_seq("SET", "DATA") 4536 return self.expression( 4537 exp.AlterColumn, 4538 this=column, 4539 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4540 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4541 using=self._match(TokenType.USING) and self._parse_conjunction(), 4542 ) 4543 4544 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4545 index = self._index - 1 4546 4547 partition_exists = self._parse_exists() 4548 if self._match(TokenType.PARTITION, advance=False): 4549 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4550 4551 self._retreat(index) 4552 return self._parse_csv(self._parse_drop_column) 4553 4554 def _parse_alter_table_rename(self) -> exp.RenameTable: 4555 self._match_text_seq("TO") 4556 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4557 4558 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4559 start = self._prev 4560 4561 if not self._match(TokenType.TABLE): 4562 return self._parse_as_command(start) 4563 4564 exists = self._parse_exists() 4565 this = self._parse_table(schema=True) 4566 4567 if self._next: 4568 self._advance() 4569 4570 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4571 if parser: 4572 actions = ensure_list(parser(self)) 4573 4574 if not self._curr: 4575 return self.expression( 4576 exp.AlterTable, 4577 this=this, 4578 exists=exists, 4579 actions=actions, 4580 ) 4581 return self._parse_as_command(start) 4582 4583 def _parse_merge(self) -> exp.Merge: 4584 self._match(TokenType.INTO) 4585 target = self._parse_table() 4586 4587 self._match(TokenType.USING) 4588 using = self._parse_table() 4589 4590 self._match(TokenType.ON) 4591 on = self._parse_conjunction() 4592 4593 whens = [] 4594 while self._match(TokenType.WHEN): 4595 matched = not self._match(TokenType.NOT) 4596 self._match_text_seq("MATCHED") 4597 source = ( 4598 False 4599 if self._match_text_seq("BY", "TARGET") 4600 else self._match_text_seq("BY", "SOURCE") 4601 ) 4602 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4603 4604 self._match(TokenType.THEN) 4605 4606 if self._match(TokenType.INSERT): 4607 _this = self._parse_star() 4608 if _this: 4609 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4610 else: 4611 then = self.expression( 4612 exp.Insert, 4613 this=self._parse_value(), 4614 expression=self._match(TokenType.VALUES) and self._parse_value(), 4615 ) 4616 elif self._match(TokenType.UPDATE): 4617 expressions = self._parse_star() 4618 if expressions: 4619 then = self.expression(exp.Update, expressions=expressions) 4620 else: 4621 then = self.expression( 4622 exp.Update, 4623 expressions=self._match(TokenType.SET) 4624 and self._parse_csv(self._parse_equality), 4625 ) 4626 elif self._match(TokenType.DELETE): 4627 then = self.expression(exp.Var, this=self._prev.text) 4628 else: 4629 then = None 4630 4631 whens.append( 4632 self.expression( 4633 exp.When, 4634 matched=matched, 4635 source=source, 4636 condition=condition, 4637 then=then, 4638 ) 4639 ) 4640 4641 return self.expression( 4642 exp.Merge, 4643 this=target, 4644 using=using, 4645 on=on, 4646 expressions=whens, 4647 ) 4648 4649 def _parse_show(self) -> t.Optional[exp.Expression]: 4650 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4651 if parser: 4652 return parser(self) 4653 self._advance() 4654 return self.expression(exp.Show, this=self._prev.text.upper()) 4655 4656 def _parse_set_item_assignment( 4657 self, kind: t.Optional[str] = None 4658 ) -> t.Optional[exp.Expression]: 4659 index = self._index 4660 4661 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4662 return self._parse_set_transaction(global_=kind == "GLOBAL") 4663 4664 left = self._parse_primary() or self._parse_id_var() 4665 4666 if not self._match_texts(("=", "TO")): 4667 self._retreat(index) 4668 return None 4669 4670 right = self._parse_statement() or self._parse_id_var() 4671 this = self.expression(exp.EQ, this=left, expression=right) 4672 4673 return self.expression(exp.SetItem, this=this, kind=kind) 4674 4675 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4676 self._match_text_seq("TRANSACTION") 4677 characteristics = self._parse_csv( 4678 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4679 ) 4680 return self.expression( 4681 exp.SetItem, 4682 expressions=characteristics, 4683 kind="TRANSACTION", 4684 **{"global": global_}, # type: ignore 4685 ) 4686 4687 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4688 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4689 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4690 4691 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4692 index = self._index 4693 set_ = self.expression( 4694 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4695 ) 4696 4697 if self._curr: 4698 self._retreat(index) 4699 return self._parse_as_command(self._prev) 4700 4701 return set_ 4702 4703 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4704 for option in options: 4705 if self._match_text_seq(*option.split(" ")): 4706 return exp.var(option) 4707 return None 4708 4709 def _parse_as_command(self, start: Token) -> exp.Command: 4710 while self._curr: 4711 self._advance() 4712 text = self._find_sql(start, self._prev) 4713 size = len(start.text) 4714 return exp.Command(this=text[:size], expression=text[size:]) 4715 4716 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4717 settings = [] 4718 4719 self._match_l_paren() 4720 kind = self._parse_id_var() 4721 4722 if self._match(TokenType.L_PAREN): 4723 while True: 4724 key = self._parse_id_var() 4725 value = self._parse_primary() 4726 4727 if not key and value is None: 4728 break 4729 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4730 self._match(TokenType.R_PAREN) 4731 4732 self._match_r_paren() 4733 4734 return self.expression( 4735 exp.DictProperty, 4736 this=this, 4737 kind=kind.this if kind else None, 4738 settings=settings, 4739 ) 4740 4741 def _parse_dict_range(self, this: str) -> exp.DictRange: 4742 self._match_l_paren() 4743 has_min = self._match_text_seq("MIN") 4744 if has_min: 4745 min = self._parse_var() or self._parse_primary() 4746 self._match_text_seq("MAX") 4747 max = self._parse_var() or self._parse_primary() 4748 else: 4749 max = self._parse_var() or self._parse_primary() 4750 min = exp.Literal.number(0) 4751 self._match_r_paren() 4752 return self.expression(exp.DictRange, this=this, min=min, max=max) 4753 4754 def _find_parser( 4755 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4756 ) -> t.Optional[t.Callable]: 4757 if not self._curr: 4758 return None 4759 4760 index = self._index 4761 this = [] 4762 while True: 4763 # The current token might be multiple words 4764 curr = self._curr.text.upper() 4765 key = curr.split(" ") 4766 this.append(curr) 4767 4768 self._advance() 4769 result, trie = in_trie(trie, key) 4770 if result == TrieResult.FAILED: 4771 break 4772 4773 if result == TrieResult.EXISTS: 4774 subparser = parsers[" ".join(this)] 4775 return subparser 4776 4777 self._retreat(index) 4778 return None 4779 4780 def _match(self, token_type, advance=True, expression=None): 4781 if not self._curr: 4782 return None 4783 4784 if self._curr.token_type == token_type: 4785 if advance: 4786 self._advance() 4787 self._add_comments(expression) 4788 return True 4789 4790 return None 4791 4792 def _match_set(self, types, advance=True): 4793 if not self._curr: 4794 return None 4795 4796 if self._curr.token_type in types: 4797 if advance: 4798 self._advance() 4799 return True 4800 4801 return None 4802 4803 def _match_pair(self, token_type_a, token_type_b, advance=True): 4804 if not self._curr or not self._next: 4805 return None 4806 4807 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4808 if advance: 4809 self._advance(2) 4810 return True 4811 4812 return None 4813 4814 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4815 if not self._match(TokenType.L_PAREN, expression=expression): 4816 self.raise_error("Expecting (") 4817 4818 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4819 if not self._match(TokenType.R_PAREN, expression=expression): 4820 self.raise_error("Expecting )") 4821 4822 def _match_texts(self, texts, advance=True): 4823 if self._curr and self._curr.text.upper() in texts: 4824 if advance: 4825 self._advance() 4826 return True 4827 return False 4828 4829 def _match_text_seq(self, *texts, advance=True): 4830 index = self._index 4831 for text in texts: 4832 if self._curr and self._curr.text.upper() == text: 4833 self._advance() 4834 else: 4835 self._retreat(index) 4836 return False 4837 4838 if not advance: 4839 self._retreat(index) 4840 4841 return True 4842 4843 @t.overload 4844 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4845 ... 4846 4847 @t.overload 4848 def _replace_columns_with_dots( 4849 self, this: t.Optional[exp.Expression] 4850 ) -> t.Optional[exp.Expression]: 4851 ... 4852 4853 def _replace_columns_with_dots(self, this): 4854 if isinstance(this, exp.Dot): 4855 exp.replace_children(this, self._replace_columns_with_dots) 4856 elif isinstance(this, exp.Column): 4857 exp.replace_children(this, self._replace_columns_with_dots) 4858 table = this.args.get("table") 4859 this = ( 4860 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4861 ) 4862 4863 return this 4864 4865 def _replace_lambda( 4866 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4867 ) -> t.Optional[exp.Expression]: 4868 if not node: 4869 return node 4870 4871 for column in node.find_all(exp.Column): 4872 if column.parts[0].name in lambda_variables: 4873 dot_or_id = column.to_dot() if column.table else column.this 4874 parent = column.parent 4875 4876 while isinstance(parent, exp.Dot): 4877 if not isinstance(parent.parent, exp.Dot): 4878 parent.replace(dot_or_id) 4879 break 4880 parent = parent.parent 4881 else: 4882 if column is node: 4883 node = dot_or_id 4884 else: 4885 column.replace(dot_or_id) 4886 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.IPADDRESS, 190 TokenType.IPPREFIX, 191 TokenType.ENUM, 192 *NESTED_TYPE_TOKENS, 193 } 194 195 SUBQUERY_PREDICATES = { 196 TokenType.ANY: exp.Any, 197 TokenType.ALL: exp.All, 198 TokenType.EXISTS: exp.Exists, 199 TokenType.SOME: exp.Any, 200 } 201 202 RESERVED_KEYWORDS = { 203 *Tokenizer.SINGLE_TOKENS.values(), 204 TokenType.SELECT, 205 } 206 207 DB_CREATABLES = { 208 TokenType.DATABASE, 209 TokenType.SCHEMA, 210 TokenType.TABLE, 211 TokenType.VIEW, 212 TokenType.DICTIONARY, 213 } 214 215 CREATABLES = { 216 TokenType.COLUMN, 217 TokenType.FUNCTION, 218 TokenType.INDEX, 219 TokenType.PROCEDURE, 220 *DB_CREATABLES, 221 } 222 223 # Tokens that can represent identifiers 224 ID_VAR_TOKENS = { 225 TokenType.VAR, 226 TokenType.ANTI, 227 TokenType.APPLY, 228 TokenType.ASC, 229 TokenType.AUTO_INCREMENT, 230 TokenType.BEGIN, 231 TokenType.CACHE, 232 TokenType.CASE, 233 TokenType.COLLATE, 234 TokenType.COMMAND, 235 TokenType.COMMENT, 236 TokenType.COMMIT, 237 TokenType.CONSTRAINT, 238 TokenType.DEFAULT, 239 TokenType.DELETE, 240 TokenType.DESC, 241 TokenType.DESCRIBE, 242 TokenType.DICTIONARY, 243 TokenType.DIV, 244 TokenType.END, 245 TokenType.EXECUTE, 246 TokenType.ESCAPE, 247 TokenType.FALSE, 248 TokenType.FIRST, 249 TokenType.FILTER, 250 TokenType.FORMAT, 251 TokenType.FULL, 252 TokenType.IF, 253 TokenType.IS, 254 TokenType.ISNULL, 255 TokenType.INTERVAL, 256 TokenType.KEEP, 257 TokenType.LEFT, 258 TokenType.LOAD, 259 TokenType.MERGE, 260 TokenType.NATURAL, 261 TokenType.NEXT, 262 TokenType.OFFSET, 263 TokenType.ORDINALITY, 264 TokenType.OVERWRITE, 265 TokenType.PARTITION, 266 TokenType.PERCENT, 267 TokenType.PIVOT, 268 TokenType.PRAGMA, 269 TokenType.RANGE, 270 TokenType.REFERENCES, 271 TokenType.RIGHT, 272 TokenType.ROW, 273 TokenType.ROWS, 274 TokenType.SEMI, 275 TokenType.SET, 276 TokenType.SETTINGS, 277 TokenType.SHOW, 278 TokenType.TEMPORARY, 279 TokenType.TOP, 280 TokenType.TRUE, 281 TokenType.UNIQUE, 282 TokenType.UNPIVOT, 283 TokenType.UPDATE, 284 TokenType.VOLATILE, 285 TokenType.WINDOW, 286 *CREATABLES, 287 *SUBQUERY_PREDICATES, 288 *TYPE_TOKENS, 289 *NO_PAREN_FUNCTIONS, 290 } 291 292 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 293 294 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 295 TokenType.APPLY, 296 TokenType.ASOF, 297 TokenType.FULL, 298 TokenType.LEFT, 299 TokenType.LOCK, 300 TokenType.NATURAL, 301 TokenType.OFFSET, 302 TokenType.RIGHT, 303 TokenType.WINDOW, 304 } 305 306 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 307 308 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 309 310 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 311 312 FUNC_TOKENS = { 313 TokenType.COMMAND, 314 TokenType.CURRENT_DATE, 315 TokenType.CURRENT_DATETIME, 316 TokenType.CURRENT_TIMESTAMP, 317 TokenType.CURRENT_TIME, 318 TokenType.CURRENT_USER, 319 TokenType.FILTER, 320 TokenType.FIRST, 321 TokenType.FORMAT, 322 TokenType.GLOB, 323 TokenType.IDENTIFIER, 324 TokenType.INDEX, 325 TokenType.ISNULL, 326 TokenType.ILIKE, 327 TokenType.LIKE, 328 TokenType.MERGE, 329 TokenType.OFFSET, 330 TokenType.PRIMARY_KEY, 331 TokenType.RANGE, 332 TokenType.REPLACE, 333 TokenType.RLIKE, 334 TokenType.ROW, 335 TokenType.UNNEST, 336 TokenType.VAR, 337 TokenType.LEFT, 338 TokenType.RIGHT, 339 TokenType.DATE, 340 TokenType.DATETIME, 341 TokenType.TABLE, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMPTZ, 344 TokenType.WINDOW, 345 TokenType.XOR, 346 *TYPE_TOKENS, 347 *SUBQUERY_PREDICATES, 348 } 349 350 CONJUNCTION = { 351 TokenType.AND: exp.And, 352 TokenType.OR: exp.Or, 353 } 354 355 EQUALITY = { 356 TokenType.EQ: exp.EQ, 357 TokenType.NEQ: exp.NEQ, 358 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 359 } 360 361 COMPARISON = { 362 TokenType.GT: exp.GT, 363 TokenType.GTE: exp.GTE, 364 TokenType.LT: exp.LT, 365 TokenType.LTE: exp.LTE, 366 } 367 368 BITWISE = { 369 TokenType.AMP: exp.BitwiseAnd, 370 TokenType.CARET: exp.BitwiseXor, 371 TokenType.PIPE: exp.BitwiseOr, 372 TokenType.DPIPE: exp.DPipe, 373 } 374 375 TERM = { 376 TokenType.DASH: exp.Sub, 377 TokenType.PLUS: exp.Add, 378 TokenType.MOD: exp.Mod, 379 TokenType.COLLATE: exp.Collate, 380 } 381 382 FACTOR = { 383 TokenType.DIV: exp.IntDiv, 384 TokenType.LR_ARROW: exp.Distance, 385 TokenType.SLASH: exp.Div, 386 TokenType.STAR: exp.Mul, 387 } 388 389 TIMESTAMPS = { 390 TokenType.TIME, 391 TokenType.TIMESTAMP, 392 TokenType.TIMESTAMPTZ, 393 TokenType.TIMESTAMPLTZ, 394 } 395 396 SET_OPERATIONS = { 397 TokenType.UNION, 398 TokenType.INTERSECT, 399 TokenType.EXCEPT, 400 } 401 402 JOIN_METHODS = { 403 TokenType.NATURAL, 404 TokenType.ASOF, 405 } 406 407 JOIN_SIDES = { 408 TokenType.LEFT, 409 TokenType.RIGHT, 410 TokenType.FULL, 411 } 412 413 JOIN_KINDS = { 414 TokenType.INNER, 415 TokenType.OUTER, 416 TokenType.CROSS, 417 TokenType.SEMI, 418 TokenType.ANTI, 419 } 420 421 JOIN_HINTS: t.Set[str] = set() 422 423 LAMBDAS = { 424 TokenType.ARROW: lambda self, expressions: self.expression( 425 exp.Lambda, 426 this=self._replace_lambda( 427 self._parse_conjunction(), 428 {node.name for node in expressions}, 429 ), 430 expressions=expressions, 431 ), 432 TokenType.FARROW: lambda self, expressions: self.expression( 433 exp.Kwarg, 434 this=exp.var(expressions[0].name), 435 expression=self._parse_conjunction(), 436 ), 437 } 438 439 COLUMN_OPERATORS = { 440 TokenType.DOT: None, 441 TokenType.DCOLON: lambda self, this, to: self.expression( 442 exp.Cast if self.STRICT_CAST else exp.TryCast, 443 this=this, 444 to=to, 445 ), 446 TokenType.ARROW: lambda self, this, path: self.expression( 447 exp.JSONExtract, 448 this=this, 449 expression=path, 450 ), 451 TokenType.DARROW: lambda self, this, path: self.expression( 452 exp.JSONExtractScalar, 453 this=this, 454 expression=path, 455 ), 456 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtract, 458 this=this, 459 expression=path, 460 ), 461 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 462 exp.JSONBExtractScalar, 463 this=this, 464 expression=path, 465 ), 466 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 467 exp.JSONBContains, 468 this=this, 469 expression=key, 470 ), 471 } 472 473 EXPRESSION_PARSERS = { 474 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 475 exp.Column: lambda self: self._parse_column(), 476 exp.Condition: lambda self: self._parse_conjunction(), 477 exp.DataType: lambda self: self._parse_types(), 478 exp.Expression: lambda self: self._parse_statement(), 479 exp.From: lambda self: self._parse_from(), 480 exp.Group: lambda self: self._parse_group(), 481 exp.Having: lambda self: self._parse_having(), 482 exp.Identifier: lambda self: self._parse_id_var(), 483 exp.Join: lambda self: self._parse_join(), 484 exp.Lambda: lambda self: self._parse_lambda(), 485 exp.Lateral: lambda self: self._parse_lateral(), 486 exp.Limit: lambda self: self._parse_limit(), 487 exp.Offset: lambda self: self._parse_offset(), 488 exp.Order: lambda self: self._parse_order(), 489 exp.Ordered: lambda self: self._parse_ordered(), 490 exp.Properties: lambda self: self._parse_properties(), 491 exp.Qualify: lambda self: self._parse_qualify(), 492 exp.Returning: lambda self: self._parse_returning(), 493 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 494 exp.Table: lambda self: self._parse_table_parts(), 495 exp.TableAlias: lambda self: self._parse_table_alias(), 496 exp.Where: lambda self: self._parse_where(), 497 exp.Window: lambda self: self._parse_named_window(), 498 exp.With: lambda self: self._parse_with(), 499 "JOIN_TYPE": lambda self: self._parse_join_parts(), 500 } 501 502 STATEMENT_PARSERS = { 503 TokenType.ALTER: lambda self: self._parse_alter(), 504 TokenType.BEGIN: lambda self: self._parse_transaction(), 505 TokenType.CACHE: lambda self: self._parse_cache(), 506 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 507 TokenType.COMMENT: lambda self: self._parse_comment(), 508 TokenType.CREATE: lambda self: self._parse_create(), 509 TokenType.DELETE: lambda self: self._parse_delete(), 510 TokenType.DESC: lambda self: self._parse_describe(), 511 TokenType.DESCRIBE: lambda self: self._parse_describe(), 512 TokenType.DROP: lambda self: self._parse_drop(), 513 TokenType.FROM: lambda self: exp.select("*").from_( 514 t.cast(exp.From, self._parse_from(skip_from_token=True)) 515 ), 516 TokenType.INSERT: lambda self: self._parse_insert(), 517 TokenType.LOAD: lambda self: self._parse_load(), 518 TokenType.MERGE: lambda self: self._parse_merge(), 519 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 520 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 521 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 522 TokenType.SET: lambda self: self._parse_set(), 523 TokenType.UNCACHE: lambda self: self._parse_uncache(), 524 TokenType.UPDATE: lambda self: self._parse_update(), 525 TokenType.USE: lambda self: self.expression( 526 exp.Use, 527 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 528 and exp.var(self._prev.text), 529 this=self._parse_table(schema=False), 530 ), 531 } 532 533 UNARY_PARSERS = { 534 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 535 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 536 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 537 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 538 } 539 540 PRIMARY_PARSERS = { 541 TokenType.STRING: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=True 543 ), 544 TokenType.NUMBER: lambda self, token: self.expression( 545 exp.Literal, this=token.text, is_string=False 546 ), 547 TokenType.STAR: lambda self, _: self.expression( 548 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 549 ), 550 TokenType.NULL: lambda self, _: self.expression(exp.Null), 551 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 552 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 553 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 554 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 555 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 556 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 557 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 558 exp.National, this=token.text 559 ), 560 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 561 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 562 } 563 564 PLACEHOLDER_PARSERS = { 565 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 566 TokenType.PARAMETER: lambda self: self._parse_parameter(), 567 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 568 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 569 else None, 570 } 571 572 RANGE_PARSERS = { 573 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 574 TokenType.GLOB: binary_range_parser(exp.Glob), 575 TokenType.ILIKE: binary_range_parser(exp.ILike), 576 TokenType.IN: lambda self, this: self._parse_in(this), 577 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 578 TokenType.IS: lambda self, this: self._parse_is(this), 579 TokenType.LIKE: binary_range_parser(exp.Like), 580 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 581 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 582 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 583 } 584 585 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 586 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 587 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 588 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 589 "CHARACTER SET": lambda self: self._parse_character_set(), 590 "CHECKSUM": lambda self: self._parse_checksum(), 591 "CLUSTER BY": lambda self: self._parse_cluster(), 592 "CLUSTERED": lambda self: self._parse_clustered_by(), 593 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 594 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 595 "COPY": lambda self: self._parse_copy_property(), 596 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 597 "DEFINER": lambda self: self._parse_definer(), 598 "DETERMINISTIC": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "DISTKEY": lambda self: self._parse_distkey(), 602 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 603 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 604 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 605 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 606 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 607 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 608 "FREESPACE": lambda self: self._parse_freespace(), 609 "HEAP": lambda self: self.expression(exp.HeapProperty), 610 "IMMUTABLE": lambda self: self.expression( 611 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 612 ), 613 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 614 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 615 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 616 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 617 "LIKE": lambda self: self._parse_create_like(), 618 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 619 "LOCK": lambda self: self._parse_locking(), 620 "LOCKING": lambda self: self._parse_locking(), 621 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 622 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 623 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 624 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 625 "NO": lambda self: self._parse_no_property(), 626 "ON": lambda self: self._parse_on_property(), 627 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 628 "PARTITION BY": lambda self: self._parse_partitioned_by(), 629 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 630 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 631 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 632 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 633 "RETURNS": lambda self: self._parse_returns(), 634 "ROW": lambda self: self._parse_row(), 635 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 636 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 637 "SETTINGS": lambda self: self.expression( 638 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 639 ), 640 "SORTKEY": lambda self: self._parse_sortkey(), 641 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 642 "STABLE": lambda self: self.expression( 643 exp.StabilityProperty, this=exp.Literal.string("STABLE") 644 ), 645 "STORED": lambda self: self._parse_stored(), 646 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 647 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 648 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 649 "TO": lambda self: self._parse_to_table(), 650 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 651 "TTL": lambda self: self._parse_ttl(), 652 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 653 "VOLATILE": lambda self: self._parse_volatile_property(), 654 "WITH": lambda self: self._parse_with_property(), 655 } 656 657 CONSTRAINT_PARSERS = { 658 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 659 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 660 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 661 "CHARACTER SET": lambda self: self.expression( 662 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 663 ), 664 "CHECK": lambda self: self.expression( 665 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 666 ), 667 "COLLATE": lambda self: self.expression( 668 exp.CollateColumnConstraint, this=self._parse_var() 669 ), 670 "COMMENT": lambda self: self.expression( 671 exp.CommentColumnConstraint, this=self._parse_string() 672 ), 673 "COMPRESS": lambda self: self._parse_compress(), 674 "DEFAULT": lambda self: self.expression( 675 exp.DefaultColumnConstraint, this=self._parse_bitwise() 676 ), 677 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 678 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 679 "FORMAT": lambda self: self.expression( 680 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 681 ), 682 "GENERATED": lambda self: self._parse_generated_as_identity(), 683 "IDENTITY": lambda self: self._parse_auto_increment(), 684 "INLINE": lambda self: self._parse_inline(), 685 "LIKE": lambda self: self._parse_create_like(), 686 "NOT": lambda self: self._parse_not_constraint(), 687 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 688 "ON": lambda self: self._match(TokenType.UPDATE) 689 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 690 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 691 "PRIMARY KEY": lambda self: self._parse_primary_key(), 692 "REFERENCES": lambda self: self._parse_references(match=False), 693 "TITLE": lambda self: self.expression( 694 exp.TitleColumnConstraint, this=self._parse_var_or_string() 695 ), 696 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 697 "UNIQUE": lambda self: self._parse_unique(), 698 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 699 } 700 701 ALTER_PARSERS = { 702 "ADD": lambda self: self._parse_alter_table_add(), 703 "ALTER": lambda self: self._parse_alter_table_alter(), 704 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 705 "DROP": lambda self: self._parse_alter_table_drop(), 706 "RENAME": lambda self: self._parse_alter_table_rename(), 707 } 708 709 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 710 711 NO_PAREN_FUNCTION_PARSERS = { 712 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 713 TokenType.CASE: lambda self: self._parse_case(), 714 TokenType.IF: lambda self: self._parse_if(), 715 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 716 exp.NextValueFor, 717 this=self._parse_column(), 718 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 719 ), 720 } 721 722 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 723 724 FUNCTION_PARSERS = { 725 "ANY_VALUE": lambda self: self._parse_any_value(), 726 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 727 "CONCAT": lambda self: self._parse_concat(), 728 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 729 "DECODE": lambda self: self._parse_decode(), 730 "EXTRACT": lambda self: self._parse_extract(), 731 "JSON_OBJECT": lambda self: self._parse_json_object(), 732 "LOG": lambda self: self._parse_logarithm(), 733 "MATCH": lambda self: self._parse_match_against(), 734 "OPENJSON": lambda self: self._parse_open_json(), 735 "POSITION": lambda self: self._parse_position(), 736 "SAFE_CAST": lambda self: self._parse_cast(False), 737 "STRING_AGG": lambda self: self._parse_string_agg(), 738 "SUBSTRING": lambda self: self._parse_substring(), 739 "TRIM": lambda self: self._parse_trim(), 740 "TRY_CAST": lambda self: self._parse_cast(False), 741 "TRY_CONVERT": lambda self: self._parse_convert(False), 742 } 743 744 QUERY_MODIFIER_PARSERS = { 745 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 746 TokenType.WHERE: lambda self: ("where", self._parse_where()), 747 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 748 TokenType.HAVING: lambda self: ("having", self._parse_having()), 749 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 750 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 751 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 752 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 753 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 754 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 755 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 756 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 757 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 758 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 759 TokenType.CLUSTER_BY: lambda self: ( 760 "cluster", 761 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 762 ), 763 TokenType.DISTRIBUTE_BY: lambda self: ( 764 "distribute", 765 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 766 ), 767 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 768 } 769 770 SET_PARSERS = { 771 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 772 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 773 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 774 "TRANSACTION": lambda self: self._parse_set_transaction(), 775 } 776 777 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 778 779 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 780 781 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 782 783 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 784 785 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 786 787 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 788 TRANSACTION_CHARACTERISTICS = { 789 "ISOLATION LEVEL REPEATABLE READ", 790 "ISOLATION LEVEL READ COMMITTED", 791 "ISOLATION LEVEL READ UNCOMMITTED", 792 "ISOLATION LEVEL SERIALIZABLE", 793 "READ WRITE", 794 "READ ONLY", 795 } 796 797 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 798 799 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 800 801 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 802 803 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 804 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 805 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 806 807 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 808 809 STRICT_CAST = True 810 811 # A NULL arg in CONCAT yields NULL by default 812 CONCAT_NULL_OUTPUTS_STRING = False 813 814 PREFIXED_PIVOT_COLUMNS = False 815 IDENTIFY_PIVOT_STRINGS = False 816 817 LOG_BASE_FIRST = True 818 LOG_DEFAULTS_TO_LN = False 819 820 __slots__ = ( 821 "error_level", 822 "error_message_context", 823 "max_errors", 824 "sql", 825 "errors", 826 "_tokens", 827 "_index", 828 "_curr", 829 "_next", 830 "_prev", 831 "_prev_comments", 832 ) 833 834 # Autofilled 835 INDEX_OFFSET: int = 0 836 UNNEST_COLUMN_ONLY: bool = False 837 ALIAS_POST_TABLESAMPLE: bool = False 838 STRICT_STRING_CONCAT = False 839 NORMALIZE_FUNCTIONS = "upper" 840 NULL_ORDERING: str = "nulls_are_small" 841 SHOW_TRIE: t.Dict = {} 842 SET_TRIE: t.Dict = {} 843 FORMAT_MAPPING: t.Dict[str, str] = {} 844 FORMAT_TRIE: t.Dict = {} 845 TIME_MAPPING: t.Dict[str, str] = {} 846 TIME_TRIE: t.Dict = {} 847 848 def __init__( 849 self, 850 error_level: t.Optional[ErrorLevel] = None, 851 error_message_context: int = 100, 852 max_errors: int = 3, 853 ): 854 self.error_level = error_level or ErrorLevel.IMMEDIATE 855 self.error_message_context = error_message_context 856 self.max_errors = max_errors 857 self.reset() 858 859 def reset(self): 860 self.sql = "" 861 self.errors = [] 862 self._tokens = [] 863 self._index = 0 864 self._curr = None 865 self._next = None 866 self._prev = None 867 self._prev_comments = None 868 869 def parse( 870 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 871 ) -> t.List[t.Optional[exp.Expression]]: 872 """ 873 Parses a list of tokens and returns a list of syntax trees, one tree 874 per parsed SQL statement. 875 876 Args: 877 raw_tokens: The list of tokens. 878 sql: The original SQL string, used to produce helpful debug messages. 879 880 Returns: 881 The list of the produced syntax trees. 882 """ 883 return self._parse( 884 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 885 ) 886 887 def parse_into( 888 self, 889 expression_types: exp.IntoType, 890 raw_tokens: t.List[Token], 891 sql: t.Optional[str] = None, 892 ) -> t.List[t.Optional[exp.Expression]]: 893 """ 894 Parses a list of tokens into a given Expression type. If a collection of Expression 895 types is given instead, this method will try to parse the token list into each one 896 of them, stopping at the first for which the parsing succeeds. 897 898 Args: 899 expression_types: The expression type(s) to try and parse the token list into. 900 raw_tokens: The list of tokens. 901 sql: The original SQL string, used to produce helpful debug messages. 902 903 Returns: 904 The target Expression. 905 """ 906 errors = [] 907 for expression_type in ensure_list(expression_types): 908 parser = self.EXPRESSION_PARSERS.get(expression_type) 909 if not parser: 910 raise TypeError(f"No parser registered for {expression_type}") 911 912 try: 913 return self._parse(parser, raw_tokens, sql) 914 except ParseError as e: 915 e.errors[0]["into_expression"] = expression_type 916 errors.append(e) 917 918 raise ParseError( 919 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 920 errors=merge_errors(errors), 921 ) from errors[-1] 922 923 def _parse( 924 self, 925 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 926 raw_tokens: t.List[Token], 927 sql: t.Optional[str] = None, 928 ) -> t.List[t.Optional[exp.Expression]]: 929 self.reset() 930 self.sql = sql or "" 931 932 total = len(raw_tokens) 933 chunks: t.List[t.List[Token]] = [[]] 934 935 for i, token in enumerate(raw_tokens): 936 if token.token_type == TokenType.SEMICOLON: 937 if i < total - 1: 938 chunks.append([]) 939 else: 940 chunks[-1].append(token) 941 942 expressions = [] 943 944 for tokens in chunks: 945 self._index = -1 946 self._tokens = tokens 947 self._advance() 948 949 expressions.append(parse_method(self)) 950 951 if self._index < len(self._tokens): 952 self.raise_error("Invalid expression / Unexpected token") 953 954 self.check_errors() 955 956 return expressions 957 958 def check_errors(self) -> None: 959 """Logs or raises any found errors, depending on the chosen error level setting.""" 960 if self.error_level == ErrorLevel.WARN: 961 for error in self.errors: 962 logger.error(str(error)) 963 elif self.error_level == ErrorLevel.RAISE and self.errors: 964 raise ParseError( 965 concat_messages(self.errors, self.max_errors), 966 errors=merge_errors(self.errors), 967 ) 968 969 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 970 """ 971 Appends an error in the list of recorded errors or raises it, depending on the chosen 972 error level setting. 973 """ 974 token = token or self._curr or self._prev or Token.string("") 975 start = token.start 976 end = token.end + 1 977 start_context = self.sql[max(start - self.error_message_context, 0) : start] 978 highlight = self.sql[start:end] 979 end_context = self.sql[end : end + self.error_message_context] 980 981 error = ParseError.new( 982 f"{message}. Line {token.line}, Col: {token.col}.\n" 983 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 984 description=message, 985 line=token.line, 986 col=token.col, 987 start_context=start_context, 988 highlight=highlight, 989 end_context=end_context, 990 ) 991 992 if self.error_level == ErrorLevel.IMMEDIATE: 993 raise error 994 995 self.errors.append(error) 996 997 def expression( 998 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 999 ) -> E: 1000 """ 1001 Creates a new, validated Expression. 1002 1003 Args: 1004 exp_class: The expression class to instantiate. 1005 comments: An optional list of comments to attach to the expression. 1006 kwargs: The arguments to set for the expression along with their respective values. 1007 1008 Returns: 1009 The target expression. 1010 """ 1011 instance = exp_class(**kwargs) 1012 instance.add_comments(comments) if comments else self._add_comments(instance) 1013 return self.validate_expression(instance) 1014 1015 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1016 if expression and self._prev_comments: 1017 expression.add_comments(self._prev_comments) 1018 self._prev_comments = None 1019 1020 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1021 """ 1022 Validates an Expression, making sure that all its mandatory arguments are set. 1023 1024 Args: 1025 expression: The expression to validate. 1026 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1027 1028 Returns: 1029 The validated expression. 1030 """ 1031 if self.error_level != ErrorLevel.IGNORE: 1032 for error_message in expression.error_messages(args): 1033 self.raise_error(error_message) 1034 1035 return expression 1036 1037 def _find_sql(self, start: Token, end: Token) -> str: 1038 return self.sql[start.start : end.end + 1] 1039 1040 def _advance(self, times: int = 1) -> None: 1041 self._index += times 1042 self._curr = seq_get(self._tokens, self._index) 1043 self._next = seq_get(self._tokens, self._index + 1) 1044 1045 if self._index > 0: 1046 self._prev = self._tokens[self._index - 1] 1047 self._prev_comments = self._prev.comments 1048 else: 1049 self._prev = None 1050 self._prev_comments = None 1051 1052 def _retreat(self, index: int) -> None: 1053 if index != self._index: 1054 self._advance(index - self._index) 1055 1056 def _parse_command(self) -> exp.Command: 1057 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1058 1059 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1060 start = self._prev 1061 exists = self._parse_exists() if allow_exists else None 1062 1063 self._match(TokenType.ON) 1064 1065 kind = self._match_set(self.CREATABLES) and self._prev 1066 if not kind: 1067 return self._parse_as_command(start) 1068 1069 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1070 this = self._parse_user_defined_function(kind=kind.token_type) 1071 elif kind.token_type == TokenType.TABLE: 1072 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1073 elif kind.token_type == TokenType.COLUMN: 1074 this = self._parse_column() 1075 else: 1076 this = self._parse_id_var() 1077 1078 self._match(TokenType.IS) 1079 1080 return self.expression( 1081 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1082 ) 1083 1084 def _parse_to_table( 1085 self, 1086 ) -> exp.ToTableProperty: 1087 table = self._parse_table_parts(schema=True) 1088 return self.expression(exp.ToTableProperty, this=table) 1089 1090 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1091 def _parse_ttl(self) -> exp.Expression: 1092 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1093 this = self._parse_bitwise() 1094 1095 if self._match_text_seq("DELETE"): 1096 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1097 if self._match_text_seq("RECOMPRESS"): 1098 return self.expression( 1099 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1100 ) 1101 if self._match_text_seq("TO", "DISK"): 1102 return self.expression( 1103 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1104 ) 1105 if self._match_text_seq("TO", "VOLUME"): 1106 return self.expression( 1107 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1108 ) 1109 1110 return this 1111 1112 expressions = self._parse_csv(_parse_ttl_action) 1113 where = self._parse_where() 1114 group = self._parse_group() 1115 1116 aggregates = None 1117 if group and self._match(TokenType.SET): 1118 aggregates = self._parse_csv(self._parse_set_item) 1119 1120 return self.expression( 1121 exp.MergeTreeTTL, 1122 expressions=expressions, 1123 where=where, 1124 group=group, 1125 aggregates=aggregates, 1126 ) 1127 1128 def _parse_statement(self) -> t.Optional[exp.Expression]: 1129 if self._curr is None: 1130 return None 1131 1132 if self._match_set(self.STATEMENT_PARSERS): 1133 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1134 1135 if self._match_set(Tokenizer.COMMANDS): 1136 return self._parse_command() 1137 1138 expression = self._parse_expression() 1139 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1140 return self._parse_query_modifiers(expression) 1141 1142 def _parse_drop(self) -> exp.Drop | exp.Command: 1143 start = self._prev 1144 temporary = self._match(TokenType.TEMPORARY) 1145 materialized = self._match_text_seq("MATERIALIZED") 1146 1147 kind = self._match_set(self.CREATABLES) and self._prev.text 1148 if not kind: 1149 return self._parse_as_command(start) 1150 1151 return self.expression( 1152 exp.Drop, 1153 comments=start.comments, 1154 exists=self._parse_exists(), 1155 this=self._parse_table(schema=True), 1156 kind=kind, 1157 temporary=temporary, 1158 materialized=materialized, 1159 cascade=self._match_text_seq("CASCADE"), 1160 constraints=self._match_text_seq("CONSTRAINTS"), 1161 purge=self._match_text_seq("PURGE"), 1162 ) 1163 1164 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1165 return ( 1166 self._match(TokenType.IF) 1167 and (not not_ or self._match(TokenType.NOT)) 1168 and self._match(TokenType.EXISTS) 1169 ) 1170 1171 def _parse_create(self) -> exp.Create | exp.Command: 1172 # Note: this can't be None because we've matched a statement parser 1173 start = self._prev 1174 replace = start.text.upper() == "REPLACE" or self._match_pair( 1175 TokenType.OR, TokenType.REPLACE 1176 ) 1177 unique = self._match(TokenType.UNIQUE) 1178 1179 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1180 self._advance() 1181 1182 properties = None 1183 create_token = self._match_set(self.CREATABLES) and self._prev 1184 1185 if not create_token: 1186 # exp.Properties.Location.POST_CREATE 1187 properties = self._parse_properties() 1188 create_token = self._match_set(self.CREATABLES) and self._prev 1189 1190 if not properties or not create_token: 1191 return self._parse_as_command(start) 1192 1193 exists = self._parse_exists(not_=True) 1194 this = None 1195 expression: t.Optional[exp.Expression] = None 1196 indexes = None 1197 no_schema_binding = None 1198 begin = None 1199 clone = None 1200 1201 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1202 nonlocal properties 1203 if properties and temp_props: 1204 properties.expressions.extend(temp_props.expressions) 1205 elif temp_props: 1206 properties = temp_props 1207 1208 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1209 this = self._parse_user_defined_function(kind=create_token.token_type) 1210 1211 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1212 extend_props(self._parse_properties()) 1213 1214 self._match(TokenType.ALIAS) 1215 1216 if self._match(TokenType.COMMAND): 1217 expression = self._parse_as_command(self._prev) 1218 else: 1219 begin = self._match(TokenType.BEGIN) 1220 return_ = self._match_text_seq("RETURN") 1221 expression = self._parse_statement() 1222 1223 if return_: 1224 expression = self.expression(exp.Return, this=expression) 1225 elif create_token.token_type == TokenType.INDEX: 1226 this = self._parse_index(index=self._parse_id_var()) 1227 elif create_token.token_type in self.DB_CREATABLES: 1228 table_parts = self._parse_table_parts(schema=True) 1229 1230 # exp.Properties.Location.POST_NAME 1231 self._match(TokenType.COMMA) 1232 extend_props(self._parse_properties(before=True)) 1233 1234 this = self._parse_schema(this=table_parts) 1235 1236 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1237 extend_props(self._parse_properties()) 1238 1239 self._match(TokenType.ALIAS) 1240 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1241 # exp.Properties.Location.POST_ALIAS 1242 extend_props(self._parse_properties()) 1243 1244 expression = self._parse_ddl_select() 1245 1246 if create_token.token_type == TokenType.TABLE: 1247 # exp.Properties.Location.POST_EXPRESSION 1248 extend_props(self._parse_properties()) 1249 1250 indexes = [] 1251 while True: 1252 index = self._parse_index() 1253 1254 # exp.Properties.Location.POST_INDEX 1255 extend_props(self._parse_properties()) 1256 1257 if not index: 1258 break 1259 else: 1260 self._match(TokenType.COMMA) 1261 indexes.append(index) 1262 elif create_token.token_type == TokenType.VIEW: 1263 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1264 no_schema_binding = True 1265 1266 if self._match_text_seq("CLONE"): 1267 clone = self._parse_table(schema=True) 1268 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1269 clone_kind = ( 1270 self._match(TokenType.L_PAREN) 1271 and self._match_texts(self.CLONE_KINDS) 1272 and self._prev.text.upper() 1273 ) 1274 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1275 self._match(TokenType.R_PAREN) 1276 clone = self.expression( 1277 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1278 ) 1279 1280 return self.expression( 1281 exp.Create, 1282 this=this, 1283 kind=create_token.text, 1284 replace=replace, 1285 unique=unique, 1286 expression=expression, 1287 exists=exists, 1288 properties=properties, 1289 indexes=indexes, 1290 no_schema_binding=no_schema_binding, 1291 begin=begin, 1292 clone=clone, 1293 ) 1294 1295 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1296 # only used for teradata currently 1297 self._match(TokenType.COMMA) 1298 1299 kwargs = { 1300 "no": self._match_text_seq("NO"), 1301 "dual": self._match_text_seq("DUAL"), 1302 "before": self._match_text_seq("BEFORE"), 1303 "default": self._match_text_seq("DEFAULT"), 1304 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1305 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1306 "after": self._match_text_seq("AFTER"), 1307 "minimum": self._match_texts(("MIN", "MINIMUM")), 1308 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1309 } 1310 1311 if self._match_texts(self.PROPERTY_PARSERS): 1312 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1313 try: 1314 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1315 except TypeError: 1316 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1317 1318 return None 1319 1320 def _parse_property(self) -> t.Optional[exp.Expression]: 1321 if self._match_texts(self.PROPERTY_PARSERS): 1322 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1323 1324 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1325 return self._parse_character_set(default=True) 1326 1327 if self._match_text_seq("COMPOUND", "SORTKEY"): 1328 return self._parse_sortkey(compound=True) 1329 1330 if self._match_text_seq("SQL", "SECURITY"): 1331 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1332 1333 assignment = self._match_pair( 1334 TokenType.VAR, TokenType.EQ, advance=False 1335 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1336 1337 if assignment: 1338 key = self._parse_var_or_string() 1339 self._match(TokenType.EQ) 1340 return self.expression(exp.Property, this=key, value=self._parse_column()) 1341 1342 return None 1343 1344 def _parse_stored(self) -> exp.FileFormatProperty: 1345 self._match(TokenType.ALIAS) 1346 1347 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1348 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1349 1350 return self.expression( 1351 exp.FileFormatProperty, 1352 this=self.expression( 1353 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1354 ) 1355 if input_format or output_format 1356 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1357 ) 1358 1359 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1360 self._match(TokenType.EQ) 1361 self._match(TokenType.ALIAS) 1362 return self.expression(exp_class, this=self._parse_field()) 1363 1364 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1365 properties = [] 1366 while True: 1367 if before: 1368 prop = self._parse_property_before() 1369 else: 1370 prop = self._parse_property() 1371 1372 if not prop: 1373 break 1374 for p in ensure_list(prop): 1375 properties.append(p) 1376 1377 if properties: 1378 return self.expression(exp.Properties, expressions=properties) 1379 1380 return None 1381 1382 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1383 return self.expression( 1384 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1385 ) 1386 1387 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1388 if self._index >= 2: 1389 pre_volatile_token = self._tokens[self._index - 2] 1390 else: 1391 pre_volatile_token = None 1392 1393 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1394 return exp.VolatileProperty() 1395 1396 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1397 1398 def _parse_with_property( 1399 self, 1400 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1401 if self._match(TokenType.L_PAREN, advance=False): 1402 return self._parse_wrapped_csv(self._parse_property) 1403 1404 if self._match_text_seq("JOURNAL"): 1405 return self._parse_withjournaltable() 1406 1407 if self._match_text_seq("DATA"): 1408 return self._parse_withdata(no=False) 1409 elif self._match_text_seq("NO", "DATA"): 1410 return self._parse_withdata(no=True) 1411 1412 if not self._next: 1413 return None 1414 1415 return self._parse_withisolatedloading() 1416 1417 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1418 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1419 self._match(TokenType.EQ) 1420 1421 user = self._parse_id_var() 1422 self._match(TokenType.PARAMETER) 1423 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1424 1425 if not user or not host: 1426 return None 1427 1428 return exp.DefinerProperty(this=f"{user}@{host}") 1429 1430 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1431 self._match(TokenType.TABLE) 1432 self._match(TokenType.EQ) 1433 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1434 1435 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1436 return self.expression(exp.LogProperty, no=no) 1437 1438 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1439 return self.expression(exp.JournalProperty, **kwargs) 1440 1441 def _parse_checksum(self) -> exp.ChecksumProperty: 1442 self._match(TokenType.EQ) 1443 1444 on = None 1445 if self._match(TokenType.ON): 1446 on = True 1447 elif self._match_text_seq("OFF"): 1448 on = False 1449 1450 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1451 1452 def _parse_cluster(self) -> exp.Cluster: 1453 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1454 1455 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1456 self._match_text_seq("BY") 1457 1458 self._match_l_paren() 1459 expressions = self._parse_csv(self._parse_column) 1460 self._match_r_paren() 1461 1462 if self._match_text_seq("SORTED", "BY"): 1463 self._match_l_paren() 1464 sorted_by = self._parse_csv(self._parse_ordered) 1465 self._match_r_paren() 1466 else: 1467 sorted_by = None 1468 1469 self._match(TokenType.INTO) 1470 buckets = self._parse_number() 1471 self._match_text_seq("BUCKETS") 1472 1473 return self.expression( 1474 exp.ClusteredByProperty, 1475 expressions=expressions, 1476 sorted_by=sorted_by, 1477 buckets=buckets, 1478 ) 1479 1480 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1481 if not self._match_text_seq("GRANTS"): 1482 self._retreat(self._index - 1) 1483 return None 1484 1485 return self.expression(exp.CopyGrantsProperty) 1486 1487 def _parse_freespace(self) -> exp.FreespaceProperty: 1488 self._match(TokenType.EQ) 1489 return self.expression( 1490 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1491 ) 1492 1493 def _parse_mergeblockratio( 1494 self, no: bool = False, default: bool = False 1495 ) -> exp.MergeBlockRatioProperty: 1496 if self._match(TokenType.EQ): 1497 return self.expression( 1498 exp.MergeBlockRatioProperty, 1499 this=self._parse_number(), 1500 percent=self._match(TokenType.PERCENT), 1501 ) 1502 1503 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1504 1505 def _parse_datablocksize( 1506 self, 1507 default: t.Optional[bool] = None, 1508 minimum: t.Optional[bool] = None, 1509 maximum: t.Optional[bool] = None, 1510 ) -> exp.DataBlocksizeProperty: 1511 self._match(TokenType.EQ) 1512 size = self._parse_number() 1513 1514 units = None 1515 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1516 units = self._prev.text 1517 1518 return self.expression( 1519 exp.DataBlocksizeProperty, 1520 size=size, 1521 units=units, 1522 default=default, 1523 minimum=minimum, 1524 maximum=maximum, 1525 ) 1526 1527 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1528 self._match(TokenType.EQ) 1529 always = self._match_text_seq("ALWAYS") 1530 manual = self._match_text_seq("MANUAL") 1531 never = self._match_text_seq("NEVER") 1532 default = self._match_text_seq("DEFAULT") 1533 1534 autotemp = None 1535 if self._match_text_seq("AUTOTEMP"): 1536 autotemp = self._parse_schema() 1537 1538 return self.expression( 1539 exp.BlockCompressionProperty, 1540 always=always, 1541 manual=manual, 1542 never=never, 1543 default=default, 1544 autotemp=autotemp, 1545 ) 1546 1547 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1548 no = self._match_text_seq("NO") 1549 concurrent = self._match_text_seq("CONCURRENT") 1550 self._match_text_seq("ISOLATED", "LOADING") 1551 for_all = self._match_text_seq("FOR", "ALL") 1552 for_insert = self._match_text_seq("FOR", "INSERT") 1553 for_none = self._match_text_seq("FOR", "NONE") 1554 return self.expression( 1555 exp.IsolatedLoadingProperty, 1556 no=no, 1557 concurrent=concurrent, 1558 for_all=for_all, 1559 for_insert=for_insert, 1560 for_none=for_none, 1561 ) 1562 1563 def _parse_locking(self) -> exp.LockingProperty: 1564 if self._match(TokenType.TABLE): 1565 kind = "TABLE" 1566 elif self._match(TokenType.VIEW): 1567 kind = "VIEW" 1568 elif self._match(TokenType.ROW): 1569 kind = "ROW" 1570 elif self._match_text_seq("DATABASE"): 1571 kind = "DATABASE" 1572 else: 1573 kind = None 1574 1575 if kind in ("DATABASE", "TABLE", "VIEW"): 1576 this = self._parse_table_parts() 1577 else: 1578 this = None 1579 1580 if self._match(TokenType.FOR): 1581 for_or_in = "FOR" 1582 elif self._match(TokenType.IN): 1583 for_or_in = "IN" 1584 else: 1585 for_or_in = None 1586 1587 if self._match_text_seq("ACCESS"): 1588 lock_type = "ACCESS" 1589 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1590 lock_type = "EXCLUSIVE" 1591 elif self._match_text_seq("SHARE"): 1592 lock_type = "SHARE" 1593 elif self._match_text_seq("READ"): 1594 lock_type = "READ" 1595 elif self._match_text_seq("WRITE"): 1596 lock_type = "WRITE" 1597 elif self._match_text_seq("CHECKSUM"): 1598 lock_type = "CHECKSUM" 1599 else: 1600 lock_type = None 1601 1602 override = self._match_text_seq("OVERRIDE") 1603 1604 return self.expression( 1605 exp.LockingProperty, 1606 this=this, 1607 kind=kind, 1608 for_or_in=for_or_in, 1609 lock_type=lock_type, 1610 override=override, 1611 ) 1612 1613 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1614 if self._match(TokenType.PARTITION_BY): 1615 return self._parse_csv(self._parse_conjunction) 1616 return [] 1617 1618 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1619 self._match(TokenType.EQ) 1620 return self.expression( 1621 exp.PartitionedByProperty, 1622 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1623 ) 1624 1625 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1626 if self._match_text_seq("AND", "STATISTICS"): 1627 statistics = True 1628 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1629 statistics = False 1630 else: 1631 statistics = None 1632 1633 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1634 1635 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1636 if self._match_text_seq("PRIMARY", "INDEX"): 1637 return exp.NoPrimaryIndexProperty() 1638 return None 1639 1640 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1641 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1642 return exp.OnCommitProperty() 1643 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1644 return exp.OnCommitProperty(delete=True) 1645 return None 1646 1647 def _parse_distkey(self) -> exp.DistKeyProperty: 1648 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1649 1650 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1651 table = self._parse_table(schema=True) 1652 1653 options = [] 1654 while self._match_texts(("INCLUDING", "EXCLUDING")): 1655 this = self._prev.text.upper() 1656 1657 id_var = self._parse_id_var() 1658 if not id_var: 1659 return None 1660 1661 options.append( 1662 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1663 ) 1664 1665 return self.expression(exp.LikeProperty, this=table, expressions=options) 1666 1667 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1668 return self.expression( 1669 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1670 ) 1671 1672 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1673 self._match(TokenType.EQ) 1674 return self.expression( 1675 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1676 ) 1677 1678 def _parse_returns(self) -> exp.ReturnsProperty: 1679 value: t.Optional[exp.Expression] 1680 is_table = self._match(TokenType.TABLE) 1681 1682 if is_table: 1683 if self._match(TokenType.LT): 1684 value = self.expression( 1685 exp.Schema, 1686 this="TABLE", 1687 expressions=self._parse_csv(self._parse_struct_types), 1688 ) 1689 if not self._match(TokenType.GT): 1690 self.raise_error("Expecting >") 1691 else: 1692 value = self._parse_schema(exp.var("TABLE")) 1693 else: 1694 value = self._parse_types() 1695 1696 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1697 1698 def _parse_describe(self) -> exp.Describe: 1699 kind = self._match_set(self.CREATABLES) and self._prev.text 1700 this = self._parse_table() 1701 return self.expression(exp.Describe, this=this, kind=kind) 1702 1703 def _parse_insert(self) -> exp.Insert: 1704 comments = ensure_list(self._prev_comments) 1705 overwrite = self._match(TokenType.OVERWRITE) 1706 ignore = self._match(TokenType.IGNORE) 1707 local = self._match_text_seq("LOCAL") 1708 alternative = None 1709 1710 if self._match_text_seq("DIRECTORY"): 1711 this: t.Optional[exp.Expression] = self.expression( 1712 exp.Directory, 1713 this=self._parse_var_or_string(), 1714 local=local, 1715 row_format=self._parse_row_format(match_row=True), 1716 ) 1717 else: 1718 if self._match(TokenType.OR): 1719 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1720 1721 self._match(TokenType.INTO) 1722 comments += ensure_list(self._prev_comments) 1723 self._match(TokenType.TABLE) 1724 this = self._parse_table(schema=True) 1725 1726 returning = self._parse_returning() 1727 1728 return self.expression( 1729 exp.Insert, 1730 comments=comments, 1731 this=this, 1732 exists=self._parse_exists(), 1733 partition=self._parse_partition(), 1734 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1735 and self._parse_conjunction(), 1736 expression=self._parse_ddl_select(), 1737 conflict=self._parse_on_conflict(), 1738 returning=returning or self._parse_returning(), 1739 overwrite=overwrite, 1740 alternative=alternative, 1741 ignore=ignore, 1742 ) 1743 1744 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1745 conflict = self._match_text_seq("ON", "CONFLICT") 1746 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1747 1748 if not conflict and not duplicate: 1749 return None 1750 1751 nothing = None 1752 expressions = None 1753 key = None 1754 constraint = None 1755 1756 if conflict: 1757 if self._match_text_seq("ON", "CONSTRAINT"): 1758 constraint = self._parse_id_var() 1759 else: 1760 key = self._parse_csv(self._parse_value) 1761 1762 self._match_text_seq("DO") 1763 if self._match_text_seq("NOTHING"): 1764 nothing = True 1765 else: 1766 self._match(TokenType.UPDATE) 1767 self._match(TokenType.SET) 1768 expressions = self._parse_csv(self._parse_equality) 1769 1770 return self.expression( 1771 exp.OnConflict, 1772 duplicate=duplicate, 1773 expressions=expressions, 1774 nothing=nothing, 1775 key=key, 1776 constraint=constraint, 1777 ) 1778 1779 def _parse_returning(self) -> t.Optional[exp.Returning]: 1780 if not self._match(TokenType.RETURNING): 1781 return None 1782 return self.expression( 1783 exp.Returning, 1784 expressions=self._parse_csv(self._parse_expression), 1785 into=self._match(TokenType.INTO) and self._parse_table_part(), 1786 ) 1787 1788 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1789 if not self._match(TokenType.FORMAT): 1790 return None 1791 return self._parse_row_format() 1792 1793 def _parse_row_format( 1794 self, match_row: bool = False 1795 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1796 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1797 return None 1798 1799 if self._match_text_seq("SERDE"): 1800 this = self._parse_string() 1801 1802 serde_properties = None 1803 if self._match(TokenType.SERDE_PROPERTIES): 1804 serde_properties = self.expression( 1805 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1806 ) 1807 1808 return self.expression( 1809 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1810 ) 1811 1812 self._match_text_seq("DELIMITED") 1813 1814 kwargs = {} 1815 1816 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1817 kwargs["fields"] = self._parse_string() 1818 if self._match_text_seq("ESCAPED", "BY"): 1819 kwargs["escaped"] = self._parse_string() 1820 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1821 kwargs["collection_items"] = self._parse_string() 1822 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1823 kwargs["map_keys"] = self._parse_string() 1824 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1825 kwargs["lines"] = self._parse_string() 1826 if self._match_text_seq("NULL", "DEFINED", "AS"): 1827 kwargs["null"] = self._parse_string() 1828 1829 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1830 1831 def _parse_load(self) -> exp.LoadData | exp.Command: 1832 if self._match_text_seq("DATA"): 1833 local = self._match_text_seq("LOCAL") 1834 self._match_text_seq("INPATH") 1835 inpath = self._parse_string() 1836 overwrite = self._match(TokenType.OVERWRITE) 1837 self._match_pair(TokenType.INTO, TokenType.TABLE) 1838 1839 return self.expression( 1840 exp.LoadData, 1841 this=self._parse_table(schema=True), 1842 local=local, 1843 overwrite=overwrite, 1844 inpath=inpath, 1845 partition=self._parse_partition(), 1846 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1847 serde=self._match_text_seq("SERDE") and self._parse_string(), 1848 ) 1849 return self._parse_as_command(self._prev) 1850 1851 def _parse_delete(self) -> exp.Delete: 1852 # This handles MySQL's "Multiple-Table Syntax" 1853 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1854 tables = None 1855 comments = self._prev_comments 1856 if not self._match(TokenType.FROM, advance=False): 1857 tables = self._parse_csv(self._parse_table) or None 1858 1859 returning = self._parse_returning() 1860 1861 return self.expression( 1862 exp.Delete, 1863 comments=comments, 1864 tables=tables, 1865 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1866 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1867 where=self._parse_where(), 1868 returning=returning or self._parse_returning(), 1869 limit=self._parse_limit(), 1870 ) 1871 1872 def _parse_update(self) -> exp.Update: 1873 comments = self._prev_comments 1874 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1875 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1876 returning = self._parse_returning() 1877 return self.expression( 1878 exp.Update, 1879 comments=comments, 1880 **{ # type: ignore 1881 "this": this, 1882 "expressions": expressions, 1883 "from": self._parse_from(joins=True), 1884 "where": self._parse_where(), 1885 "returning": returning or self._parse_returning(), 1886 "limit": self._parse_limit(), 1887 }, 1888 ) 1889 1890 def _parse_uncache(self) -> exp.Uncache: 1891 if not self._match(TokenType.TABLE): 1892 self.raise_error("Expecting TABLE after UNCACHE") 1893 1894 return self.expression( 1895 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1896 ) 1897 1898 def _parse_cache(self) -> exp.Cache: 1899 lazy = self._match_text_seq("LAZY") 1900 self._match(TokenType.TABLE) 1901 table = self._parse_table(schema=True) 1902 1903 options = [] 1904 if self._match_text_seq("OPTIONS"): 1905 self._match_l_paren() 1906 k = self._parse_string() 1907 self._match(TokenType.EQ) 1908 v = self._parse_string() 1909 options = [k, v] 1910 self._match_r_paren() 1911 1912 self._match(TokenType.ALIAS) 1913 return self.expression( 1914 exp.Cache, 1915 this=table, 1916 lazy=lazy, 1917 options=options, 1918 expression=self._parse_select(nested=True), 1919 ) 1920 1921 def _parse_partition(self) -> t.Optional[exp.Partition]: 1922 if not self._match(TokenType.PARTITION): 1923 return None 1924 1925 return self.expression( 1926 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1927 ) 1928 1929 def _parse_value(self) -> exp.Tuple: 1930 if self._match(TokenType.L_PAREN): 1931 expressions = self._parse_csv(self._parse_conjunction) 1932 self._match_r_paren() 1933 return self.expression(exp.Tuple, expressions=expressions) 1934 1935 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1936 # https://prestodb.io/docs/current/sql/values.html 1937 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1938 1939 def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: 1940 return self._parse_expressions() 1941 1942 def _parse_select( 1943 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1944 ) -> t.Optional[exp.Expression]: 1945 cte = self._parse_with() 1946 if cte: 1947 this = self._parse_statement() 1948 1949 if not this: 1950 self.raise_error("Failed to parse any statement following CTE") 1951 return cte 1952 1953 if "with" in this.arg_types: 1954 this.set("with", cte) 1955 else: 1956 self.raise_error(f"{this.key} does not support CTE") 1957 this = cte 1958 elif self._match(TokenType.SELECT): 1959 comments = self._prev_comments 1960 1961 hint = self._parse_hint() 1962 all_ = self._match(TokenType.ALL) 1963 distinct = self._match(TokenType.DISTINCT) 1964 1965 kind = ( 1966 self._match(TokenType.ALIAS) 1967 and self._match_texts(("STRUCT", "VALUE")) 1968 and self._prev.text 1969 ) 1970 1971 if distinct: 1972 distinct = self.expression( 1973 exp.Distinct, 1974 on=self._parse_value() if self._match(TokenType.ON) else None, 1975 ) 1976 1977 if all_ and distinct: 1978 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1979 1980 limit = self._parse_limit(top=True) 1981 projections = self._parse_projections() 1982 1983 this = self.expression( 1984 exp.Select, 1985 kind=kind, 1986 hint=hint, 1987 distinct=distinct, 1988 expressions=projections, 1989 limit=limit, 1990 ) 1991 this.comments = comments 1992 1993 into = self._parse_into() 1994 if into: 1995 this.set("into", into) 1996 1997 from_ = self._parse_from() 1998 if from_: 1999 this.set("from", from_) 2000 2001 this = self._parse_query_modifiers(this) 2002 elif (table or nested) and self._match(TokenType.L_PAREN): 2003 if self._match(TokenType.PIVOT): 2004 this = self._parse_simplified_pivot() 2005 elif self._match(TokenType.FROM): 2006 this = exp.select("*").from_( 2007 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2008 ) 2009 else: 2010 this = self._parse_table() if table else self._parse_select(nested=True) 2011 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2012 2013 self._match_r_paren() 2014 2015 # We return early here so that the UNION isn't attached to the subquery by the 2016 # following call to _parse_set_operations, but instead becomes the parent node 2017 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2018 elif self._match(TokenType.VALUES): 2019 this = self.expression( 2020 exp.Values, 2021 expressions=self._parse_csv(self._parse_value), 2022 alias=self._parse_table_alias(), 2023 ) 2024 else: 2025 this = None 2026 2027 return self._parse_set_operations(this) 2028 2029 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2030 if not skip_with_token and not self._match(TokenType.WITH): 2031 return None 2032 2033 comments = self._prev_comments 2034 recursive = self._match(TokenType.RECURSIVE) 2035 2036 expressions = [] 2037 while True: 2038 expressions.append(self._parse_cte()) 2039 2040 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2041 break 2042 else: 2043 self._match(TokenType.WITH) 2044 2045 return self.expression( 2046 exp.With, comments=comments, expressions=expressions, recursive=recursive 2047 ) 2048 2049 def _parse_cte(self) -> exp.CTE: 2050 alias = self._parse_table_alias() 2051 if not alias or not alias.this: 2052 self.raise_error("Expected CTE to have alias") 2053 2054 self._match(TokenType.ALIAS) 2055 return self.expression( 2056 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2057 ) 2058 2059 def _parse_table_alias( 2060 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2061 ) -> t.Optional[exp.TableAlias]: 2062 any_token = self._match(TokenType.ALIAS) 2063 alias = ( 2064 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2065 or self._parse_string_as_identifier() 2066 ) 2067 2068 index = self._index 2069 if self._match(TokenType.L_PAREN): 2070 columns = self._parse_csv(self._parse_function_parameter) 2071 self._match_r_paren() if columns else self._retreat(index) 2072 else: 2073 columns = None 2074 2075 if not alias and not columns: 2076 return None 2077 2078 return self.expression(exp.TableAlias, this=alias, columns=columns) 2079 2080 def _parse_subquery( 2081 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2082 ) -> t.Optional[exp.Subquery]: 2083 if not this: 2084 return None 2085 2086 return self.expression( 2087 exp.Subquery, 2088 this=this, 2089 pivots=self._parse_pivots(), 2090 alias=self._parse_table_alias() if parse_alias else None, 2091 ) 2092 2093 def _parse_query_modifiers( 2094 self, this: t.Optional[exp.Expression] 2095 ) -> t.Optional[exp.Expression]: 2096 if isinstance(this, self.MODIFIABLES): 2097 for join in iter(self._parse_join, None): 2098 this.append("joins", join) 2099 for lateral in iter(self._parse_lateral, None): 2100 this.append("laterals", lateral) 2101 2102 while True: 2103 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2104 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2105 key, expression = parser(self) 2106 2107 if expression: 2108 this.set(key, expression) 2109 if key == "limit": 2110 offset = expression.args.pop("offset", None) 2111 if offset: 2112 this.set("offset", exp.Offset(expression=offset)) 2113 continue 2114 break 2115 return this 2116 2117 def _parse_hint(self) -> t.Optional[exp.Hint]: 2118 if self._match(TokenType.HINT): 2119 hints = [] 2120 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2121 hints.extend(hint) 2122 2123 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2124 self.raise_error("Expected */ after HINT") 2125 2126 return self.expression(exp.Hint, expressions=hints) 2127 2128 return None 2129 2130 def _parse_into(self) -> t.Optional[exp.Into]: 2131 if not self._match(TokenType.INTO): 2132 return None 2133 2134 temp = self._match(TokenType.TEMPORARY) 2135 unlogged = self._match_text_seq("UNLOGGED") 2136 self._match(TokenType.TABLE) 2137 2138 return self.expression( 2139 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2140 ) 2141 2142 def _parse_from( 2143 self, joins: bool = False, skip_from_token: bool = False 2144 ) -> t.Optional[exp.From]: 2145 if not skip_from_token and not self._match(TokenType.FROM): 2146 return None 2147 2148 return self.expression( 2149 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2150 ) 2151 2152 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2153 if not self._match(TokenType.MATCH_RECOGNIZE): 2154 return None 2155 2156 self._match_l_paren() 2157 2158 partition = self._parse_partition_by() 2159 order = self._parse_order() 2160 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2161 2162 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2163 rows = exp.var("ONE ROW PER MATCH") 2164 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2165 text = "ALL ROWS PER MATCH" 2166 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2167 text += f" SHOW EMPTY MATCHES" 2168 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2169 text += f" OMIT EMPTY MATCHES" 2170 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2171 text += f" WITH UNMATCHED ROWS" 2172 rows = exp.var(text) 2173 else: 2174 rows = None 2175 2176 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2177 text = "AFTER MATCH SKIP" 2178 if self._match_text_seq("PAST", "LAST", "ROW"): 2179 text += f" PAST LAST ROW" 2180 elif self._match_text_seq("TO", "NEXT", "ROW"): 2181 text += f" TO NEXT ROW" 2182 elif self._match_text_seq("TO", "FIRST"): 2183 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2184 elif self._match_text_seq("TO", "LAST"): 2185 text += f" TO LAST {self._advance_any().text}" # type: ignore 2186 after = exp.var(text) 2187 else: 2188 after = None 2189 2190 if self._match_text_seq("PATTERN"): 2191 self._match_l_paren() 2192 2193 if not self._curr: 2194 self.raise_error("Expecting )", self._curr) 2195 2196 paren = 1 2197 start = self._curr 2198 2199 while self._curr and paren > 0: 2200 if self._curr.token_type == TokenType.L_PAREN: 2201 paren += 1 2202 if self._curr.token_type == TokenType.R_PAREN: 2203 paren -= 1 2204 2205 end = self._prev 2206 self._advance() 2207 2208 if paren > 0: 2209 self.raise_error("Expecting )", self._curr) 2210 2211 pattern = exp.var(self._find_sql(start, end)) 2212 else: 2213 pattern = None 2214 2215 define = ( 2216 self._parse_csv( 2217 lambda: self.expression( 2218 exp.Alias, 2219 alias=self._parse_id_var(any_token=True), 2220 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2221 ) 2222 ) 2223 if self._match_text_seq("DEFINE") 2224 else None 2225 ) 2226 2227 self._match_r_paren() 2228 2229 return self.expression( 2230 exp.MatchRecognize, 2231 partition_by=partition, 2232 order=order, 2233 measures=measures, 2234 rows=rows, 2235 after=after, 2236 pattern=pattern, 2237 define=define, 2238 alias=self._parse_table_alias(), 2239 ) 2240 2241 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2242 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2243 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2244 2245 if outer_apply or cross_apply: 2246 this = self._parse_select(table=True) 2247 view = None 2248 outer = not cross_apply 2249 elif self._match(TokenType.LATERAL): 2250 this = self._parse_select(table=True) 2251 view = self._match(TokenType.VIEW) 2252 outer = self._match(TokenType.OUTER) 2253 else: 2254 return None 2255 2256 if not this: 2257 this = ( 2258 self._parse_unnest() 2259 or self._parse_function() 2260 or self._parse_id_var(any_token=False) 2261 ) 2262 2263 while self._match(TokenType.DOT): 2264 this = exp.Dot( 2265 this=this, 2266 expression=self._parse_function() or self._parse_id_var(any_token=False), 2267 ) 2268 2269 if view: 2270 table = self._parse_id_var(any_token=False) 2271 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2272 table_alias: t.Optional[exp.TableAlias] = self.expression( 2273 exp.TableAlias, this=table, columns=columns 2274 ) 2275 elif isinstance(this, exp.Subquery) and this.alias: 2276 # Ensures parity between the Subquery's and the Lateral's "alias" args 2277 table_alias = this.args["alias"].copy() 2278 else: 2279 table_alias = self._parse_table_alias() 2280 2281 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2282 2283 def _parse_join_parts( 2284 self, 2285 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2286 return ( 2287 self._match_set(self.JOIN_METHODS) and self._prev, 2288 self._match_set(self.JOIN_SIDES) and self._prev, 2289 self._match_set(self.JOIN_KINDS) and self._prev, 2290 ) 2291 2292 def _parse_join( 2293 self, skip_join_token: bool = False, parse_bracket: bool = False 2294 ) -> t.Optional[exp.Join]: 2295 if self._match(TokenType.COMMA): 2296 return self.expression(exp.Join, this=self._parse_table()) 2297 2298 index = self._index 2299 method, side, kind = self._parse_join_parts() 2300 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2301 join = self._match(TokenType.JOIN) 2302 2303 if not skip_join_token and not join: 2304 self._retreat(index) 2305 kind = None 2306 method = None 2307 side = None 2308 2309 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2310 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2311 2312 if not skip_join_token and not join and not outer_apply and not cross_apply: 2313 return None 2314 2315 if outer_apply: 2316 side = Token(TokenType.LEFT, "LEFT") 2317 2318 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2319 2320 if method: 2321 kwargs["method"] = method.text 2322 if side: 2323 kwargs["side"] = side.text 2324 if kind: 2325 kwargs["kind"] = kind.text 2326 if hint: 2327 kwargs["hint"] = hint 2328 2329 if self._match(TokenType.ON): 2330 kwargs["on"] = self._parse_conjunction() 2331 elif self._match(TokenType.USING): 2332 kwargs["using"] = self._parse_wrapped_id_vars() 2333 elif not (kind and kind.token_type == TokenType.CROSS): 2334 index = self._index 2335 joins = self._parse_joins() 2336 2337 if joins and self._match(TokenType.ON): 2338 kwargs["on"] = self._parse_conjunction() 2339 elif joins and self._match(TokenType.USING): 2340 kwargs["using"] = self._parse_wrapped_id_vars() 2341 else: 2342 joins = None 2343 self._retreat(index) 2344 2345 kwargs["this"].set("joins", joins) 2346 2347 return self.expression(exp.Join, **kwargs) 2348 2349 def _parse_index( 2350 self, 2351 index: t.Optional[exp.Expression] = None, 2352 ) -> t.Optional[exp.Index]: 2353 if index: 2354 unique = None 2355 primary = None 2356 amp = None 2357 2358 self._match(TokenType.ON) 2359 self._match(TokenType.TABLE) # hive 2360 table = self._parse_table_parts(schema=True) 2361 else: 2362 unique = self._match(TokenType.UNIQUE) 2363 primary = self._match_text_seq("PRIMARY") 2364 amp = self._match_text_seq("AMP") 2365 2366 if not self._match(TokenType.INDEX): 2367 return None 2368 2369 index = self._parse_id_var() 2370 table = None 2371 2372 using = self._parse_field() if self._match(TokenType.USING) else None 2373 2374 if self._match(TokenType.L_PAREN, advance=False): 2375 columns = self._parse_wrapped_csv(self._parse_ordered) 2376 else: 2377 columns = None 2378 2379 return self.expression( 2380 exp.Index, 2381 this=index, 2382 table=table, 2383 using=using, 2384 columns=columns, 2385 unique=unique, 2386 primary=primary, 2387 amp=amp, 2388 partition_by=self._parse_partition_by(), 2389 ) 2390 2391 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2392 hints: t.List[exp.Expression] = [] 2393 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2394 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2395 hints.append( 2396 self.expression( 2397 exp.WithTableHint, 2398 expressions=self._parse_csv( 2399 lambda: self._parse_function() or self._parse_var(any_token=True) 2400 ), 2401 ) 2402 ) 2403 self._match_r_paren() 2404 else: 2405 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2406 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2407 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2408 2409 self._match_texts({"INDEX", "KEY"}) 2410 if self._match(TokenType.FOR): 2411 hint.set("target", self._advance_any() and self._prev.text.upper()) 2412 2413 hint.set("expressions", self._parse_wrapped_id_vars()) 2414 hints.append(hint) 2415 2416 return hints or None 2417 2418 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2419 return ( 2420 (not schema and self._parse_function(optional_parens=False)) 2421 or self._parse_id_var(any_token=False) 2422 or self._parse_string_as_identifier() 2423 or self._parse_placeholder() 2424 ) 2425 2426 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2427 catalog = None 2428 db = None 2429 table = self._parse_table_part(schema=schema) 2430 2431 while self._match(TokenType.DOT): 2432 if catalog: 2433 # This allows nesting the table in arbitrarily many dot expressions if needed 2434 table = self.expression( 2435 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2436 ) 2437 else: 2438 catalog = db 2439 db = table 2440 table = self._parse_table_part(schema=schema) 2441 2442 if not table: 2443 self.raise_error(f"Expected table name but got {self._curr}") 2444 2445 return self.expression( 2446 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2447 ) 2448 2449 def _parse_table( 2450 self, 2451 schema: bool = False, 2452 joins: bool = False, 2453 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2454 parse_bracket: bool = False, 2455 ) -> t.Optional[exp.Expression]: 2456 lateral = self._parse_lateral() 2457 if lateral: 2458 return lateral 2459 2460 unnest = self._parse_unnest() 2461 if unnest: 2462 return unnest 2463 2464 values = self._parse_derived_table_values() 2465 if values: 2466 return values 2467 2468 subquery = self._parse_select(table=True) 2469 if subquery: 2470 if not subquery.args.get("pivots"): 2471 subquery.set("pivots", self._parse_pivots()) 2472 return subquery 2473 2474 bracket = parse_bracket and self._parse_bracket(None) 2475 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2476 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2477 2478 if schema: 2479 return self._parse_schema(this=this) 2480 2481 if self.ALIAS_POST_TABLESAMPLE: 2482 table_sample = self._parse_table_sample() 2483 2484 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2485 if alias: 2486 this.set("alias", alias) 2487 2488 if not this.args.get("pivots"): 2489 this.set("pivots", self._parse_pivots()) 2490 2491 this.set("hints", self._parse_table_hints()) 2492 2493 if not self.ALIAS_POST_TABLESAMPLE: 2494 table_sample = self._parse_table_sample() 2495 2496 if table_sample: 2497 table_sample.set("this", this) 2498 this = table_sample 2499 2500 if joins: 2501 for join in iter(self._parse_join, None): 2502 this.append("joins", join) 2503 2504 return this 2505 2506 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2507 if not self._match(TokenType.UNNEST): 2508 return None 2509 2510 expressions = self._parse_wrapped_csv(self._parse_type) 2511 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2512 2513 alias = self._parse_table_alias() if with_alias else None 2514 2515 if alias and self.UNNEST_COLUMN_ONLY: 2516 if alias.args.get("columns"): 2517 self.raise_error("Unexpected extra column alias in unnest.") 2518 2519 alias.set("columns", [alias.this]) 2520 alias.set("this", None) 2521 2522 offset = None 2523 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2524 self._match(TokenType.ALIAS) 2525 offset = self._parse_id_var() or exp.to_identifier("offset") 2526 2527 return self.expression( 2528 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2529 ) 2530 2531 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2532 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2533 if not is_derived and not self._match(TokenType.VALUES): 2534 return None 2535 2536 expressions = self._parse_csv(self._parse_value) 2537 alias = self._parse_table_alias() 2538 2539 if is_derived: 2540 self._match_r_paren() 2541 2542 return self.expression( 2543 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2544 ) 2545 2546 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2547 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2548 as_modifier and self._match_text_seq("USING", "SAMPLE") 2549 ): 2550 return None 2551 2552 bucket_numerator = None 2553 bucket_denominator = None 2554 bucket_field = None 2555 percent = None 2556 rows = None 2557 size = None 2558 seed = None 2559 2560 kind = ( 2561 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2562 ) 2563 method = self._parse_var(tokens=(TokenType.ROW,)) 2564 2565 self._match(TokenType.L_PAREN) 2566 2567 num = self._parse_number() 2568 2569 if self._match_text_seq("BUCKET"): 2570 bucket_numerator = self._parse_number() 2571 self._match_text_seq("OUT", "OF") 2572 bucket_denominator = bucket_denominator = self._parse_number() 2573 self._match(TokenType.ON) 2574 bucket_field = self._parse_field() 2575 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2576 percent = num 2577 elif self._match(TokenType.ROWS): 2578 rows = num 2579 else: 2580 size = num 2581 2582 self._match(TokenType.R_PAREN) 2583 2584 if self._match(TokenType.L_PAREN): 2585 method = self._parse_var() 2586 seed = self._match(TokenType.COMMA) and self._parse_number() 2587 self._match_r_paren() 2588 elif self._match_texts(("SEED", "REPEATABLE")): 2589 seed = self._parse_wrapped(self._parse_number) 2590 2591 return self.expression( 2592 exp.TableSample, 2593 method=method, 2594 bucket_numerator=bucket_numerator, 2595 bucket_denominator=bucket_denominator, 2596 bucket_field=bucket_field, 2597 percent=percent, 2598 rows=rows, 2599 size=size, 2600 seed=seed, 2601 kind=kind, 2602 ) 2603 2604 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2605 return list(iter(self._parse_pivot, None)) or None 2606 2607 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2608 return list(iter(self._parse_join, None)) or None 2609 2610 # https://duckdb.org/docs/sql/statements/pivot 2611 def _parse_simplified_pivot(self) -> exp.Pivot: 2612 def _parse_on() -> t.Optional[exp.Expression]: 2613 this = self._parse_bitwise() 2614 return self._parse_in(this) if self._match(TokenType.IN) else this 2615 2616 this = self._parse_table() 2617 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2618 using = self._match(TokenType.USING) and self._parse_csv( 2619 lambda: self._parse_alias(self._parse_function()) 2620 ) 2621 group = self._parse_group() 2622 return self.expression( 2623 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2624 ) 2625 2626 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2627 index = self._index 2628 2629 if self._match(TokenType.PIVOT): 2630 unpivot = False 2631 elif self._match(TokenType.UNPIVOT): 2632 unpivot = True 2633 else: 2634 return None 2635 2636 expressions = [] 2637 field = None 2638 2639 if not self._match(TokenType.L_PAREN): 2640 self._retreat(index) 2641 return None 2642 2643 if unpivot: 2644 expressions = self._parse_csv(self._parse_column) 2645 else: 2646 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2647 2648 if not expressions: 2649 self.raise_error("Failed to parse PIVOT's aggregation list") 2650 2651 if not self._match(TokenType.FOR): 2652 self.raise_error("Expecting FOR") 2653 2654 value = self._parse_column() 2655 2656 if not self._match(TokenType.IN): 2657 self.raise_error("Expecting IN") 2658 2659 field = self._parse_in(value, alias=True) 2660 2661 self._match_r_paren() 2662 2663 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2664 2665 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2666 pivot.set("alias", self._parse_table_alias()) 2667 2668 if not unpivot: 2669 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2670 2671 columns: t.List[exp.Expression] = [] 2672 for fld in pivot.args["field"].expressions: 2673 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2674 for name in names: 2675 if self.PREFIXED_PIVOT_COLUMNS: 2676 name = f"{name}_{field_name}" if name else field_name 2677 else: 2678 name = f"{field_name}_{name}" if name else field_name 2679 2680 columns.append(exp.to_identifier(name)) 2681 2682 pivot.set("columns", columns) 2683 2684 return pivot 2685 2686 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2687 return [agg.alias for agg in aggregations] 2688 2689 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2690 if not skip_where_token and not self._match(TokenType.WHERE): 2691 return None 2692 2693 return self.expression( 2694 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2695 ) 2696 2697 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2698 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2699 return None 2700 2701 elements = defaultdict(list) 2702 2703 if self._match(TokenType.ALL): 2704 return self.expression(exp.Group, all=True) 2705 2706 while True: 2707 expressions = self._parse_csv(self._parse_conjunction) 2708 if expressions: 2709 elements["expressions"].extend(expressions) 2710 2711 grouping_sets = self._parse_grouping_sets() 2712 if grouping_sets: 2713 elements["grouping_sets"].extend(grouping_sets) 2714 2715 rollup = None 2716 cube = None 2717 totals = None 2718 2719 with_ = self._match(TokenType.WITH) 2720 if self._match(TokenType.ROLLUP): 2721 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2722 elements["rollup"].extend(ensure_list(rollup)) 2723 2724 if self._match(TokenType.CUBE): 2725 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2726 elements["cube"].extend(ensure_list(cube)) 2727 2728 if self._match_text_seq("TOTALS"): 2729 totals = True 2730 elements["totals"] = True # type: ignore 2731 2732 if not (grouping_sets or rollup or cube or totals): 2733 break 2734 2735 return self.expression(exp.Group, **elements) # type: ignore 2736 2737 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2738 if not self._match(TokenType.GROUPING_SETS): 2739 return None 2740 2741 return self._parse_wrapped_csv(self._parse_grouping_set) 2742 2743 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2744 if self._match(TokenType.L_PAREN): 2745 grouping_set = self._parse_csv(self._parse_column) 2746 self._match_r_paren() 2747 return self.expression(exp.Tuple, expressions=grouping_set) 2748 2749 return self._parse_column() 2750 2751 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2752 if not skip_having_token and not self._match(TokenType.HAVING): 2753 return None 2754 return self.expression(exp.Having, this=self._parse_conjunction()) 2755 2756 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2757 if not self._match(TokenType.QUALIFY): 2758 return None 2759 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2760 2761 def _parse_order( 2762 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2763 ) -> t.Optional[exp.Expression]: 2764 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2765 return this 2766 2767 return self.expression( 2768 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2769 ) 2770 2771 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2772 if not self._match(token): 2773 return None 2774 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2775 2776 def _parse_ordered(self) -> exp.Ordered: 2777 this = self._parse_conjunction() 2778 self._match(TokenType.ASC) 2779 2780 is_desc = self._match(TokenType.DESC) 2781 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2782 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2783 desc = is_desc or False 2784 asc = not desc 2785 nulls_first = is_nulls_first or False 2786 explicitly_null_ordered = is_nulls_first or is_nulls_last 2787 2788 if ( 2789 not explicitly_null_ordered 2790 and ( 2791 (asc and self.NULL_ORDERING == "nulls_are_small") 2792 or (desc and self.NULL_ORDERING != "nulls_are_small") 2793 ) 2794 and self.NULL_ORDERING != "nulls_are_last" 2795 ): 2796 nulls_first = True 2797 2798 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2799 2800 def _parse_limit( 2801 self, this: t.Optional[exp.Expression] = None, top: bool = False 2802 ) -> t.Optional[exp.Expression]: 2803 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2804 comments = self._prev_comments 2805 if top: 2806 limit_paren = self._match(TokenType.L_PAREN) 2807 expression = self._parse_number() 2808 2809 if limit_paren: 2810 self._match_r_paren() 2811 else: 2812 expression = self._parse_term() 2813 2814 if self._match(TokenType.COMMA): 2815 offset = expression 2816 expression = self._parse_term() 2817 else: 2818 offset = None 2819 2820 limit_exp = self.expression( 2821 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2822 ) 2823 2824 return limit_exp 2825 2826 if self._match(TokenType.FETCH): 2827 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2828 direction = self._prev.text if direction else "FIRST" 2829 2830 count = self._parse_number() 2831 percent = self._match(TokenType.PERCENT) 2832 2833 self._match_set((TokenType.ROW, TokenType.ROWS)) 2834 2835 only = self._match_text_seq("ONLY") 2836 with_ties = self._match_text_seq("WITH", "TIES") 2837 2838 if only and with_ties: 2839 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2840 2841 return self.expression( 2842 exp.Fetch, 2843 direction=direction, 2844 count=count, 2845 percent=percent, 2846 with_ties=with_ties, 2847 ) 2848 2849 return this 2850 2851 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2852 if not self._match(TokenType.OFFSET): 2853 return this 2854 2855 count = self._parse_term() 2856 self._match_set((TokenType.ROW, TokenType.ROWS)) 2857 return self.expression(exp.Offset, this=this, expression=count) 2858 2859 def _parse_locks(self) -> t.List[exp.Lock]: 2860 locks = [] 2861 while True: 2862 if self._match_text_seq("FOR", "UPDATE"): 2863 update = True 2864 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2865 "LOCK", "IN", "SHARE", "MODE" 2866 ): 2867 update = False 2868 else: 2869 break 2870 2871 expressions = None 2872 if self._match_text_seq("OF"): 2873 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2874 2875 wait: t.Optional[bool | exp.Expression] = None 2876 if self._match_text_seq("NOWAIT"): 2877 wait = True 2878 elif self._match_text_seq("WAIT"): 2879 wait = self._parse_primary() 2880 elif self._match_text_seq("SKIP", "LOCKED"): 2881 wait = False 2882 2883 locks.append( 2884 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2885 ) 2886 2887 return locks 2888 2889 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2890 if not self._match_set(self.SET_OPERATIONS): 2891 return this 2892 2893 token_type = self._prev.token_type 2894 2895 if token_type == TokenType.UNION: 2896 expression = exp.Union 2897 elif token_type == TokenType.EXCEPT: 2898 expression = exp.Except 2899 else: 2900 expression = exp.Intersect 2901 2902 return self.expression( 2903 expression, 2904 this=this, 2905 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2906 expression=self._parse_set_operations(self._parse_select(nested=True)), 2907 ) 2908 2909 def _parse_expression(self) -> t.Optional[exp.Expression]: 2910 return self._parse_alias(self._parse_conjunction()) 2911 2912 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2913 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2914 2915 def _parse_equality(self) -> t.Optional[exp.Expression]: 2916 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2917 2918 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2919 return self._parse_tokens(self._parse_range, self.COMPARISON) 2920 2921 def _parse_range(self) -> t.Optional[exp.Expression]: 2922 this = self._parse_bitwise() 2923 negate = self._match(TokenType.NOT) 2924 2925 if self._match_set(self.RANGE_PARSERS): 2926 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2927 if not expression: 2928 return this 2929 2930 this = expression 2931 elif self._match(TokenType.ISNULL): 2932 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2933 2934 # Postgres supports ISNULL and NOTNULL for conditions. 2935 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2936 if self._match(TokenType.NOTNULL): 2937 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2938 this = self.expression(exp.Not, this=this) 2939 2940 if negate: 2941 this = self.expression(exp.Not, this=this) 2942 2943 if self._match(TokenType.IS): 2944 this = self._parse_is(this) 2945 2946 return this 2947 2948 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2949 index = self._index - 1 2950 negate = self._match(TokenType.NOT) 2951 2952 if self._match_text_seq("DISTINCT", "FROM"): 2953 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2954 return self.expression(klass, this=this, expression=self._parse_expression()) 2955 2956 expression = self._parse_null() or self._parse_boolean() 2957 if not expression: 2958 self._retreat(index) 2959 return None 2960 2961 this = self.expression(exp.Is, this=this, expression=expression) 2962 return self.expression(exp.Not, this=this) if negate else this 2963 2964 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2965 unnest = self._parse_unnest(with_alias=False) 2966 if unnest: 2967 this = self.expression(exp.In, this=this, unnest=unnest) 2968 elif self._match(TokenType.L_PAREN): 2969 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2970 2971 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2972 this = self.expression(exp.In, this=this, query=expressions[0]) 2973 else: 2974 this = self.expression(exp.In, this=this, expressions=expressions) 2975 2976 self._match_r_paren(this) 2977 else: 2978 this = self.expression(exp.In, this=this, field=self._parse_field()) 2979 2980 return this 2981 2982 def _parse_between(self, this: exp.Expression) -> exp.Between: 2983 low = self._parse_bitwise() 2984 self._match(TokenType.AND) 2985 high = self._parse_bitwise() 2986 return self.expression(exp.Between, this=this, low=low, high=high) 2987 2988 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2989 if not self._match(TokenType.ESCAPE): 2990 return this 2991 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2992 2993 def _parse_interval(self) -> t.Optional[exp.Interval]: 2994 if not self._match(TokenType.INTERVAL): 2995 return None 2996 2997 if self._match(TokenType.STRING, advance=False): 2998 this = self._parse_primary() 2999 else: 3000 this = self._parse_term() 3001 3002 unit = self._parse_function() or self._parse_var() 3003 3004 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3005 # each INTERVAL expression into this canonical form so it's easy to transpile 3006 if this and this.is_number: 3007 this = exp.Literal.string(this.name) 3008 elif this and this.is_string: 3009 parts = this.name.split() 3010 3011 if len(parts) == 2: 3012 if unit: 3013 # this is not actually a unit, it's something else 3014 unit = None 3015 self._retreat(self._index - 1) 3016 else: 3017 this = exp.Literal.string(parts[0]) 3018 unit = self.expression(exp.Var, this=parts[1]) 3019 3020 return self.expression(exp.Interval, this=this, unit=unit) 3021 3022 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3023 this = self._parse_term() 3024 3025 while True: 3026 if self._match_set(self.BITWISE): 3027 this = self.expression( 3028 self.BITWISE[self._prev.token_type], 3029 this=this, 3030 expression=self._parse_term(), 3031 ) 3032 elif self._match(TokenType.DQMARK): 3033 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3034 elif self._match_pair(TokenType.LT, TokenType.LT): 3035 this = self.expression( 3036 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3037 ) 3038 elif self._match_pair(TokenType.GT, TokenType.GT): 3039 this = self.expression( 3040 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3041 ) 3042 else: 3043 break 3044 3045 return this 3046 3047 def _parse_term(self) -> t.Optional[exp.Expression]: 3048 return self._parse_tokens(self._parse_factor, self.TERM) 3049 3050 def _parse_factor(self) -> t.Optional[exp.Expression]: 3051 return self._parse_tokens(self._parse_unary, self.FACTOR) 3052 3053 def _parse_unary(self) -> t.Optional[exp.Expression]: 3054 if self._match_set(self.UNARY_PARSERS): 3055 return self.UNARY_PARSERS[self._prev.token_type](self) 3056 return self._parse_at_time_zone(self._parse_type()) 3057 3058 def _parse_type(self) -> t.Optional[exp.Expression]: 3059 interval = self._parse_interval() 3060 if interval: 3061 return interval 3062 3063 index = self._index 3064 data_type = self._parse_types(check_func=True) 3065 this = self._parse_column() 3066 3067 if data_type: 3068 if isinstance(this, exp.Literal): 3069 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3070 if parser: 3071 return parser(self, this, data_type) 3072 return self.expression(exp.Cast, this=this, to=data_type) 3073 if not data_type.expressions: 3074 self._retreat(index) 3075 return self._parse_column() 3076 return self._parse_column_ops(data_type) 3077 3078 return this 3079 3080 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3081 this = self._parse_type() 3082 if not this: 3083 return None 3084 3085 return self.expression( 3086 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3087 ) 3088 3089 def _parse_types( 3090 self, check_func: bool = False, schema: bool = False 3091 ) -> t.Optional[exp.Expression]: 3092 index = self._index 3093 3094 prefix = self._match_text_seq("SYSUDTLIB", ".") 3095 3096 if not self._match_set(self.TYPE_TOKENS): 3097 return None 3098 3099 type_token = self._prev.token_type 3100 3101 if type_token == TokenType.PSEUDO_TYPE: 3102 return self.expression(exp.PseudoType, this=self._prev.text) 3103 3104 nested = type_token in self.NESTED_TYPE_TOKENS 3105 is_struct = type_token == TokenType.STRUCT 3106 expressions = None 3107 maybe_func = False 3108 3109 if self._match(TokenType.L_PAREN): 3110 if is_struct: 3111 expressions = self._parse_csv(self._parse_struct_types) 3112 elif nested: 3113 expressions = self._parse_csv( 3114 lambda: self._parse_types(check_func=check_func, schema=schema) 3115 ) 3116 elif type_token in self.ENUM_TYPE_TOKENS: 3117 expressions = self._parse_csv(self._parse_primary) 3118 else: 3119 expressions = self._parse_csv(self._parse_type_size) 3120 3121 if not expressions or not self._match(TokenType.R_PAREN): 3122 self._retreat(index) 3123 return None 3124 3125 maybe_func = True 3126 3127 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3128 this = exp.DataType( 3129 this=exp.DataType.Type.ARRAY, 3130 expressions=[ 3131 exp.DataType( 3132 this=exp.DataType.Type[type_token.value], 3133 expressions=expressions, 3134 nested=nested, 3135 ) 3136 ], 3137 nested=True, 3138 ) 3139 3140 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3141 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3142 3143 return this 3144 3145 if self._match(TokenType.L_BRACKET): 3146 self._retreat(index) 3147 return None 3148 3149 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3150 if nested and self._match(TokenType.LT): 3151 if is_struct: 3152 expressions = self._parse_csv(self._parse_struct_types) 3153 else: 3154 expressions = self._parse_csv( 3155 lambda: self._parse_types(check_func=check_func, schema=schema) 3156 ) 3157 3158 if not self._match(TokenType.GT): 3159 self.raise_error("Expecting >") 3160 3161 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3162 values = self._parse_csv(self._parse_conjunction) 3163 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3164 3165 value: t.Optional[exp.Expression] = None 3166 if type_token in self.TIMESTAMPS: 3167 if self._match_text_seq("WITH", "TIME", "ZONE"): 3168 maybe_func = False 3169 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3170 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3171 maybe_func = False 3172 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3173 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3174 maybe_func = False 3175 elif type_token == TokenType.INTERVAL: 3176 unit = self._parse_var() 3177 3178 if not unit: 3179 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3180 else: 3181 value = self.expression(exp.Interval, unit=unit) 3182 3183 if maybe_func and check_func: 3184 index2 = self._index 3185 peek = self._parse_string() 3186 3187 if not peek: 3188 self._retreat(index) 3189 return None 3190 3191 self._retreat(index2) 3192 3193 if value: 3194 return value 3195 3196 return exp.DataType( 3197 this=exp.DataType.Type[type_token.value], 3198 expressions=expressions, 3199 nested=nested, 3200 values=values, 3201 prefix=prefix, 3202 ) 3203 3204 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3205 this = self._parse_type() or self._parse_id_var() 3206 self._match(TokenType.COLON) 3207 return self._parse_column_def(this) 3208 3209 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3210 if not self._match_text_seq("AT", "TIME", "ZONE"): 3211 return this 3212 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3213 3214 def _parse_column(self) -> t.Optional[exp.Expression]: 3215 this = self._parse_field() 3216 if isinstance(this, exp.Identifier): 3217 this = self.expression(exp.Column, this=this) 3218 elif not this: 3219 return self._parse_bracket(this) 3220 return self._parse_column_ops(this) 3221 3222 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3223 this = self._parse_bracket(this) 3224 3225 while self._match_set(self.COLUMN_OPERATORS): 3226 op_token = self._prev.token_type 3227 op = self.COLUMN_OPERATORS.get(op_token) 3228 3229 if op_token == TokenType.DCOLON: 3230 field = self._parse_types() 3231 if not field: 3232 self.raise_error("Expected type") 3233 elif op and self._curr: 3234 self._advance() 3235 value = self._prev.text 3236 field = ( 3237 exp.Literal.number(value) 3238 if self._prev.token_type == TokenType.NUMBER 3239 else exp.Literal.string(value) 3240 ) 3241 else: 3242 field = self._parse_field(anonymous_func=True, any_token=True) 3243 3244 if isinstance(field, exp.Func): 3245 # bigquery allows function calls like x.y.count(...) 3246 # SAFE.SUBSTR(...) 3247 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3248 this = self._replace_columns_with_dots(this) 3249 3250 if op: 3251 this = op(self, this, field) 3252 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3253 this = self.expression( 3254 exp.Column, 3255 this=field, 3256 table=this.this, 3257 db=this.args.get("table"), 3258 catalog=this.args.get("db"), 3259 ) 3260 else: 3261 this = self.expression(exp.Dot, this=this, expression=field) 3262 this = self._parse_bracket(this) 3263 return this 3264 3265 def _parse_primary(self) -> t.Optional[exp.Expression]: 3266 if self._match_set(self.PRIMARY_PARSERS): 3267 token_type = self._prev.token_type 3268 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3269 3270 if token_type == TokenType.STRING: 3271 expressions = [primary] 3272 while self._match(TokenType.STRING): 3273 expressions.append(exp.Literal.string(self._prev.text)) 3274 3275 if len(expressions) > 1: 3276 return self.expression(exp.Concat, expressions=expressions) 3277 3278 return primary 3279 3280 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3281 return exp.Literal.number(f"0.{self._prev.text}") 3282 3283 if self._match(TokenType.L_PAREN): 3284 comments = self._prev_comments 3285 query = self._parse_select() 3286 3287 if query: 3288 expressions = [query] 3289 else: 3290 expressions = self._parse_expressions() 3291 3292 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3293 3294 if isinstance(this, exp.Subqueryable): 3295 this = self._parse_set_operations( 3296 self._parse_subquery(this=this, parse_alias=False) 3297 ) 3298 elif len(expressions) > 1: 3299 this = self.expression(exp.Tuple, expressions=expressions) 3300 else: 3301 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3302 3303 if this: 3304 this.add_comments(comments) 3305 3306 self._match_r_paren(expression=this) 3307 return this 3308 3309 return None 3310 3311 def _parse_field( 3312 self, 3313 any_token: bool = False, 3314 tokens: t.Optional[t.Collection[TokenType]] = None, 3315 anonymous_func: bool = False, 3316 ) -> t.Optional[exp.Expression]: 3317 return ( 3318 self._parse_primary() 3319 or self._parse_function(anonymous=anonymous_func) 3320 or self._parse_id_var(any_token=any_token, tokens=tokens) 3321 ) 3322 3323 def _parse_function( 3324 self, 3325 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3326 anonymous: bool = False, 3327 optional_parens: bool = True, 3328 ) -> t.Optional[exp.Expression]: 3329 if not self._curr: 3330 return None 3331 3332 token_type = self._curr.token_type 3333 3334 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3335 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3336 3337 if not self._next or self._next.token_type != TokenType.L_PAREN: 3338 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3339 self._advance() 3340 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3341 3342 return None 3343 3344 if token_type not in self.FUNC_TOKENS: 3345 return None 3346 3347 this = self._curr.text 3348 upper = this.upper() 3349 self._advance(2) 3350 3351 parser = self.FUNCTION_PARSERS.get(upper) 3352 3353 if parser and not anonymous: 3354 this = parser(self) 3355 else: 3356 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3357 3358 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3359 this = self.expression(subquery_predicate, this=self._parse_select()) 3360 self._match_r_paren() 3361 return this 3362 3363 if functions is None: 3364 functions = self.FUNCTIONS 3365 3366 function = functions.get(upper) 3367 3368 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3369 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3370 3371 if function and not anonymous: 3372 func = self.validate_expression(function(args), args) 3373 if not self.NORMALIZE_FUNCTIONS: 3374 func.meta["name"] = this 3375 this = func 3376 else: 3377 this = self.expression(exp.Anonymous, this=this, expressions=args) 3378 3379 self._match(TokenType.R_PAREN, expression=this) 3380 return self._parse_window(this) 3381 3382 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3383 return self._parse_column_def(self._parse_id_var()) 3384 3385 def _parse_user_defined_function( 3386 self, kind: t.Optional[TokenType] = None 3387 ) -> t.Optional[exp.Expression]: 3388 this = self._parse_id_var() 3389 3390 while self._match(TokenType.DOT): 3391 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3392 3393 if not self._match(TokenType.L_PAREN): 3394 return this 3395 3396 expressions = self._parse_csv(self._parse_function_parameter) 3397 self._match_r_paren() 3398 return self.expression( 3399 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3400 ) 3401 3402 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3403 literal = self._parse_primary() 3404 if literal: 3405 return self.expression(exp.Introducer, this=token.text, expression=literal) 3406 3407 return self.expression(exp.Identifier, this=token.text) 3408 3409 def _parse_session_parameter(self) -> exp.SessionParameter: 3410 kind = None 3411 this = self._parse_id_var() or self._parse_primary() 3412 3413 if this and self._match(TokenType.DOT): 3414 kind = this.name 3415 this = self._parse_var() or self._parse_primary() 3416 3417 return self.expression(exp.SessionParameter, this=this, kind=kind) 3418 3419 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3420 index = self._index 3421 3422 if self._match(TokenType.L_PAREN): 3423 expressions = self._parse_csv(self._parse_id_var) 3424 3425 if not self._match(TokenType.R_PAREN): 3426 self._retreat(index) 3427 else: 3428 expressions = [self._parse_id_var()] 3429 3430 if self._match_set(self.LAMBDAS): 3431 return self.LAMBDAS[self._prev.token_type](self, expressions) 3432 3433 self._retreat(index) 3434 3435 this: t.Optional[exp.Expression] 3436 3437 if self._match(TokenType.DISTINCT): 3438 this = self.expression( 3439 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3440 ) 3441 else: 3442 this = self._parse_select_or_expression(alias=alias) 3443 3444 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3445 3446 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3447 index = self._index 3448 3449 if not self.errors: 3450 try: 3451 if self._parse_select(nested=True): 3452 return this 3453 except ParseError: 3454 pass 3455 finally: 3456 self.errors.clear() 3457 self._retreat(index) 3458 3459 if not self._match(TokenType.L_PAREN): 3460 return this 3461 3462 args = self._parse_csv( 3463 lambda: self._parse_constraint() 3464 or self._parse_column_def(self._parse_field(any_token=True)) 3465 ) 3466 3467 self._match_r_paren() 3468 return self.expression(exp.Schema, this=this, expressions=args) 3469 3470 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3471 # column defs are not really columns, they're identifiers 3472 if isinstance(this, exp.Column): 3473 this = this.this 3474 3475 kind = self._parse_types(schema=True) 3476 3477 if self._match_text_seq("FOR", "ORDINALITY"): 3478 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3479 3480 constraints = [] 3481 while True: 3482 constraint = self._parse_column_constraint() 3483 if not constraint: 3484 break 3485 constraints.append(constraint) 3486 3487 if not kind and not constraints: 3488 return this 3489 3490 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3491 3492 def _parse_auto_increment( 3493 self, 3494 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3495 start = None 3496 increment = None 3497 3498 if self._match(TokenType.L_PAREN, advance=False): 3499 args = self._parse_wrapped_csv(self._parse_bitwise) 3500 start = seq_get(args, 0) 3501 increment = seq_get(args, 1) 3502 elif self._match_text_seq("START"): 3503 start = self._parse_bitwise() 3504 self._match_text_seq("INCREMENT") 3505 increment = self._parse_bitwise() 3506 3507 if start and increment: 3508 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3509 3510 return exp.AutoIncrementColumnConstraint() 3511 3512 def _parse_compress(self) -> exp.CompressColumnConstraint: 3513 if self._match(TokenType.L_PAREN, advance=False): 3514 return self.expression( 3515 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3516 ) 3517 3518 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3519 3520 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3521 if self._match_text_seq("BY", "DEFAULT"): 3522 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3523 this = self.expression( 3524 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3525 ) 3526 else: 3527 self._match_text_seq("ALWAYS") 3528 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3529 3530 self._match(TokenType.ALIAS) 3531 identity = self._match_text_seq("IDENTITY") 3532 3533 if self._match(TokenType.L_PAREN): 3534 if self._match_text_seq("START", "WITH"): 3535 this.set("start", self._parse_bitwise()) 3536 if self._match_text_seq("INCREMENT", "BY"): 3537 this.set("increment", self._parse_bitwise()) 3538 if self._match_text_seq("MINVALUE"): 3539 this.set("minvalue", self._parse_bitwise()) 3540 if self._match_text_seq("MAXVALUE"): 3541 this.set("maxvalue", self._parse_bitwise()) 3542 3543 if self._match_text_seq("CYCLE"): 3544 this.set("cycle", True) 3545 elif self._match_text_seq("NO", "CYCLE"): 3546 this.set("cycle", False) 3547 3548 if not identity: 3549 this.set("expression", self._parse_bitwise()) 3550 3551 self._match_r_paren() 3552 3553 return this 3554 3555 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3556 self._match_text_seq("LENGTH") 3557 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3558 3559 def _parse_not_constraint( 3560 self, 3561 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3562 if self._match_text_seq("NULL"): 3563 return self.expression(exp.NotNullColumnConstraint) 3564 if self._match_text_seq("CASESPECIFIC"): 3565 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3566 return None 3567 3568 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3569 if self._match(TokenType.CONSTRAINT): 3570 this = self._parse_id_var() 3571 else: 3572 this = None 3573 3574 if self._match_texts(self.CONSTRAINT_PARSERS): 3575 return self.expression( 3576 exp.ColumnConstraint, 3577 this=this, 3578 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3579 ) 3580 3581 return this 3582 3583 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3584 if not self._match(TokenType.CONSTRAINT): 3585 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3586 3587 this = self._parse_id_var() 3588 expressions = [] 3589 3590 while True: 3591 constraint = self._parse_unnamed_constraint() or self._parse_function() 3592 if not constraint: 3593 break 3594 expressions.append(constraint) 3595 3596 return self.expression(exp.Constraint, this=this, expressions=expressions) 3597 3598 def _parse_unnamed_constraint( 3599 self, constraints: t.Optional[t.Collection[str]] = None 3600 ) -> t.Optional[exp.Expression]: 3601 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3602 return None 3603 3604 constraint = self._prev.text.upper() 3605 if constraint not in self.CONSTRAINT_PARSERS: 3606 self.raise_error(f"No parser found for schema constraint {constraint}.") 3607 3608 return self.CONSTRAINT_PARSERS[constraint](self) 3609 3610 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3611 self._match_text_seq("KEY") 3612 return self.expression( 3613 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3614 ) 3615 3616 def _parse_key_constraint_options(self) -> t.List[str]: 3617 options = [] 3618 while True: 3619 if not self._curr: 3620 break 3621 3622 if self._match(TokenType.ON): 3623 action = None 3624 on = self._advance_any() and self._prev.text 3625 3626 if self._match_text_seq("NO", "ACTION"): 3627 action = "NO ACTION" 3628 elif self._match_text_seq("CASCADE"): 3629 action = "CASCADE" 3630 elif self._match_pair(TokenType.SET, TokenType.NULL): 3631 action = "SET NULL" 3632 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3633 action = "SET DEFAULT" 3634 else: 3635 self.raise_error("Invalid key constraint") 3636 3637 options.append(f"ON {on} {action}") 3638 elif self._match_text_seq("NOT", "ENFORCED"): 3639 options.append("NOT ENFORCED") 3640 elif self._match_text_seq("DEFERRABLE"): 3641 options.append("DEFERRABLE") 3642 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3643 options.append("INITIALLY DEFERRED") 3644 elif self._match_text_seq("NORELY"): 3645 options.append("NORELY") 3646 elif self._match_text_seq("MATCH", "FULL"): 3647 options.append("MATCH FULL") 3648 else: 3649 break 3650 3651 return options 3652 3653 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3654 if match and not self._match(TokenType.REFERENCES): 3655 return None 3656 3657 expressions = None 3658 this = self._parse_table(schema=True) 3659 options = self._parse_key_constraint_options() 3660 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3661 3662 def _parse_foreign_key(self) -> exp.ForeignKey: 3663 expressions = self._parse_wrapped_id_vars() 3664 reference = self._parse_references() 3665 options = {} 3666 3667 while self._match(TokenType.ON): 3668 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3669 self.raise_error("Expected DELETE or UPDATE") 3670 3671 kind = self._prev.text.lower() 3672 3673 if self._match_text_seq("NO", "ACTION"): 3674 action = "NO ACTION" 3675 elif self._match(TokenType.SET): 3676 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3677 action = "SET " + self._prev.text.upper() 3678 else: 3679 self._advance() 3680 action = self._prev.text.upper() 3681 3682 options[kind] = action 3683 3684 return self.expression( 3685 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3686 ) 3687 3688 def _parse_primary_key( 3689 self, wrapped_optional: bool = False, in_props: bool = False 3690 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3691 desc = ( 3692 self._match_set((TokenType.ASC, TokenType.DESC)) 3693 and self._prev.token_type == TokenType.DESC 3694 ) 3695 3696 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3697 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3698 3699 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3700 options = self._parse_key_constraint_options() 3701 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3702 3703 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3704 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3705 return this 3706 3707 bracket_kind = self._prev.token_type 3708 3709 if self._match(TokenType.COLON): 3710 expressions: t.List[t.Optional[exp.Expression]] = [ 3711 self.expression(exp.Slice, expression=self._parse_conjunction()) 3712 ] 3713 else: 3714 expressions = self._parse_csv( 3715 lambda: self._parse_slice( 3716 self._parse_alias(self._parse_conjunction(), explicit=True) 3717 ) 3718 ) 3719 3720 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3721 if bracket_kind == TokenType.L_BRACE: 3722 this = self.expression(exp.Struct, expressions=expressions) 3723 elif not this or this.name.upper() == "ARRAY": 3724 this = self.expression(exp.Array, expressions=expressions) 3725 else: 3726 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3727 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3728 3729 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3730 self.raise_error("Expected ]") 3731 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3732 self.raise_error("Expected }") 3733 3734 self._add_comments(this) 3735 return self._parse_bracket(this) 3736 3737 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3738 if self._match(TokenType.COLON): 3739 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3740 return this 3741 3742 def _parse_case(self) -> t.Optional[exp.Expression]: 3743 ifs = [] 3744 default = None 3745 3746 expression = self._parse_conjunction() 3747 3748 while self._match(TokenType.WHEN): 3749 this = self._parse_conjunction() 3750 self._match(TokenType.THEN) 3751 then = self._parse_conjunction() 3752 ifs.append(self.expression(exp.If, this=this, true=then)) 3753 3754 if self._match(TokenType.ELSE): 3755 default = self._parse_conjunction() 3756 3757 if not self._match(TokenType.END): 3758 self.raise_error("Expected END after CASE", self._prev) 3759 3760 return self._parse_window( 3761 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3762 ) 3763 3764 def _parse_if(self) -> t.Optional[exp.Expression]: 3765 if self._match(TokenType.L_PAREN): 3766 args = self._parse_csv(self._parse_conjunction) 3767 this = self.validate_expression(exp.If.from_arg_list(args), args) 3768 self._match_r_paren() 3769 else: 3770 index = self._index - 1 3771 condition = self._parse_conjunction() 3772 3773 if not condition: 3774 self._retreat(index) 3775 return None 3776 3777 self._match(TokenType.THEN) 3778 true = self._parse_conjunction() 3779 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3780 self._match(TokenType.END) 3781 this = self.expression(exp.If, this=condition, true=true, false=false) 3782 3783 return self._parse_window(this) 3784 3785 def _parse_extract(self) -> exp.Extract: 3786 this = self._parse_function() or self._parse_var() or self._parse_type() 3787 3788 if self._match(TokenType.FROM): 3789 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3790 3791 if not self._match(TokenType.COMMA): 3792 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3793 3794 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3795 3796 def _parse_any_value(self) -> exp.AnyValue: 3797 this = self._parse_lambda() 3798 is_max = None 3799 having = None 3800 3801 if self._match(TokenType.HAVING): 3802 self._match_texts(("MAX", "MIN")) 3803 is_max = self._prev.text == "MAX" 3804 having = self._parse_column() 3805 3806 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3807 3808 def _parse_cast(self, strict: bool) -> exp.Expression: 3809 this = self._parse_conjunction() 3810 3811 if not self._match(TokenType.ALIAS): 3812 if self._match(TokenType.COMMA): 3813 return self.expression( 3814 exp.CastToStrType, this=this, expression=self._parse_string() 3815 ) 3816 else: 3817 self.raise_error("Expected AS after CAST") 3818 3819 fmt = None 3820 to = self._parse_types() 3821 3822 if not to: 3823 self.raise_error("Expected TYPE after CAST") 3824 elif to.this == exp.DataType.Type.CHAR: 3825 if self._match(TokenType.CHARACTER_SET): 3826 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3827 elif self._match(TokenType.FORMAT): 3828 fmt_string = self._parse_string() 3829 fmt = self._parse_at_time_zone(fmt_string) 3830 3831 if to.this in exp.DataType.TEMPORAL_TYPES: 3832 this = self.expression( 3833 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3834 this=this, 3835 format=exp.Literal.string( 3836 format_time( 3837 fmt_string.this if fmt_string else "", 3838 self.FORMAT_MAPPING or self.TIME_MAPPING, 3839 self.FORMAT_TRIE or self.TIME_TRIE, 3840 ) 3841 ), 3842 ) 3843 3844 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3845 this.set("zone", fmt.args["zone"]) 3846 3847 return this 3848 3849 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3850 3851 def _parse_concat(self) -> t.Optional[exp.Expression]: 3852 args = self._parse_csv(self._parse_conjunction) 3853 if self.CONCAT_NULL_OUTPUTS_STRING: 3854 args = [ 3855 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3856 for arg in args 3857 if arg 3858 ] 3859 3860 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3861 # we find such a call we replace it with its argument. 3862 if len(args) == 1: 3863 return args[0] 3864 3865 return self.expression( 3866 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3867 ) 3868 3869 def _parse_string_agg(self) -> exp.Expression: 3870 if self._match(TokenType.DISTINCT): 3871 args: t.List[t.Optional[exp.Expression]] = [ 3872 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3873 ] 3874 if self._match(TokenType.COMMA): 3875 args.extend(self._parse_csv(self._parse_conjunction)) 3876 else: 3877 args = self._parse_csv(self._parse_conjunction) 3878 3879 index = self._index 3880 if not self._match(TokenType.R_PAREN) and args: 3881 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3882 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3883 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3884 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3885 3886 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3887 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3888 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3889 if not self._match_text_seq("WITHIN", "GROUP"): 3890 self._retreat(index) 3891 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3892 3893 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3894 order = self._parse_order(this=seq_get(args, 0)) 3895 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3896 3897 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3898 this = self._parse_bitwise() 3899 3900 if self._match(TokenType.USING): 3901 to: t.Optional[exp.Expression] = self.expression( 3902 exp.CharacterSet, this=self._parse_var() 3903 ) 3904 elif self._match(TokenType.COMMA): 3905 to = self._parse_types() 3906 else: 3907 to = None 3908 3909 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3910 3911 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3912 """ 3913 There are generally two variants of the DECODE function: 3914 3915 - DECODE(bin, charset) 3916 - DECODE(expression, search, result [, search, result] ... [, default]) 3917 3918 The second variant will always be parsed into a CASE expression. Note that NULL 3919 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3920 instead of relying on pattern matching. 3921 """ 3922 args = self._parse_csv(self._parse_conjunction) 3923 3924 if len(args) < 3: 3925 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3926 3927 expression, *expressions = args 3928 if not expression: 3929 return None 3930 3931 ifs = [] 3932 for search, result in zip(expressions[::2], expressions[1::2]): 3933 if not search or not result: 3934 return None 3935 3936 if isinstance(search, exp.Literal): 3937 ifs.append( 3938 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3939 ) 3940 elif isinstance(search, exp.Null): 3941 ifs.append( 3942 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3943 ) 3944 else: 3945 cond = exp.or_( 3946 exp.EQ(this=expression.copy(), expression=search), 3947 exp.and_( 3948 exp.Is(this=expression.copy(), expression=exp.Null()), 3949 exp.Is(this=search.copy(), expression=exp.Null()), 3950 copy=False, 3951 ), 3952 copy=False, 3953 ) 3954 ifs.append(exp.If(this=cond, true=result)) 3955 3956 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3957 3958 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3959 self._match_text_seq("KEY") 3960 key = self._parse_field() 3961 self._match(TokenType.COLON) 3962 self._match_text_seq("VALUE") 3963 value = self._parse_field() 3964 3965 if not key and not value: 3966 return None 3967 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3968 3969 def _parse_json_object(self) -> exp.JSONObject: 3970 star = self._parse_star() 3971 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3972 3973 null_handling = None 3974 if self._match_text_seq("NULL", "ON", "NULL"): 3975 null_handling = "NULL ON NULL" 3976 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3977 null_handling = "ABSENT ON NULL" 3978 3979 unique_keys = None 3980 if self._match_text_seq("WITH", "UNIQUE"): 3981 unique_keys = True 3982 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3983 unique_keys = False 3984 3985 self._match_text_seq("KEYS") 3986 3987 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3988 format_json = self._match_text_seq("FORMAT", "JSON") 3989 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3990 3991 return self.expression( 3992 exp.JSONObject, 3993 expressions=expressions, 3994 null_handling=null_handling, 3995 unique_keys=unique_keys, 3996 return_type=return_type, 3997 format_json=format_json, 3998 encoding=encoding, 3999 ) 4000 4001 def _parse_logarithm(self) -> exp.Func: 4002 # Default argument order is base, expression 4003 args = self._parse_csv(self._parse_range) 4004 4005 if len(args) > 1: 4006 if not self.LOG_BASE_FIRST: 4007 args.reverse() 4008 return exp.Log.from_arg_list(args) 4009 4010 return self.expression( 4011 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4012 ) 4013 4014 def _parse_match_against(self) -> exp.MatchAgainst: 4015 expressions = self._parse_csv(self._parse_column) 4016 4017 self._match_text_seq(")", "AGAINST", "(") 4018 4019 this = self._parse_string() 4020 4021 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4022 modifier = "IN NATURAL LANGUAGE MODE" 4023 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4024 modifier = f"{modifier} WITH QUERY EXPANSION" 4025 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4026 modifier = "IN BOOLEAN MODE" 4027 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4028 modifier = "WITH QUERY EXPANSION" 4029 else: 4030 modifier = None 4031 4032 return self.expression( 4033 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4034 ) 4035 4036 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4037 def _parse_open_json(self) -> exp.OpenJSON: 4038 this = self._parse_bitwise() 4039 path = self._match(TokenType.COMMA) and self._parse_string() 4040 4041 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4042 this = self._parse_field(any_token=True) 4043 kind = self._parse_types() 4044 path = self._parse_string() 4045 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4046 4047 return self.expression( 4048 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4049 ) 4050 4051 expressions = None 4052 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4053 self._match_l_paren() 4054 expressions = self._parse_csv(_parse_open_json_column_def) 4055 4056 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4057 4058 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4059 args = self._parse_csv(self._parse_bitwise) 4060 4061 if self._match(TokenType.IN): 4062 return self.expression( 4063 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4064 ) 4065 4066 if haystack_first: 4067 haystack = seq_get(args, 0) 4068 needle = seq_get(args, 1) 4069 else: 4070 needle = seq_get(args, 0) 4071 haystack = seq_get(args, 1) 4072 4073 return self.expression( 4074 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4075 ) 4076 4077 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4078 args = self._parse_csv(self._parse_table) 4079 return exp.JoinHint(this=func_name.upper(), expressions=args) 4080 4081 def _parse_substring(self) -> exp.Substring: 4082 # Postgres supports the form: substring(string [from int] [for int]) 4083 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4084 4085 args = self._parse_csv(self._parse_bitwise) 4086 4087 if self._match(TokenType.FROM): 4088 args.append(self._parse_bitwise()) 4089 if self._match(TokenType.FOR): 4090 args.append(self._parse_bitwise()) 4091 4092 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4093 4094 def _parse_trim(self) -> exp.Trim: 4095 # https://www.w3resource.com/sql/character-functions/trim.php 4096 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4097 4098 position = None 4099 collation = None 4100 4101 if self._match_texts(self.TRIM_TYPES): 4102 position = self._prev.text.upper() 4103 4104 expression = self._parse_bitwise() 4105 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4106 this = self._parse_bitwise() 4107 else: 4108 this = expression 4109 expression = None 4110 4111 if self._match(TokenType.COLLATE): 4112 collation = self._parse_bitwise() 4113 4114 return self.expression( 4115 exp.Trim, this=this, position=position, expression=expression, collation=collation 4116 ) 4117 4118 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4119 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4120 4121 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4122 return self._parse_window(self._parse_id_var(), alias=True) 4123 4124 def _parse_respect_or_ignore_nulls( 4125 self, this: t.Optional[exp.Expression] 4126 ) -> t.Optional[exp.Expression]: 4127 if self._match_text_seq("IGNORE", "NULLS"): 4128 return self.expression(exp.IgnoreNulls, this=this) 4129 if self._match_text_seq("RESPECT", "NULLS"): 4130 return self.expression(exp.RespectNulls, this=this) 4131 return this 4132 4133 def _parse_window( 4134 self, this: t.Optional[exp.Expression], alias: bool = False 4135 ) -> t.Optional[exp.Expression]: 4136 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4137 self._match(TokenType.WHERE) 4138 this = self.expression( 4139 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4140 ) 4141 self._match_r_paren() 4142 4143 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4144 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4145 if self._match_text_seq("WITHIN", "GROUP"): 4146 order = self._parse_wrapped(self._parse_order) 4147 this = self.expression(exp.WithinGroup, this=this, expression=order) 4148 4149 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4150 # Some dialects choose to implement and some do not. 4151 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4152 4153 # There is some code above in _parse_lambda that handles 4154 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4155 4156 # The below changes handle 4157 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4158 4159 # Oracle allows both formats 4160 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4161 # and Snowflake chose to do the same for familiarity 4162 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4163 this = self._parse_respect_or_ignore_nulls(this) 4164 4165 # bigquery select from window x AS (partition by ...) 4166 if alias: 4167 over = None 4168 self._match(TokenType.ALIAS) 4169 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4170 return this 4171 else: 4172 over = self._prev.text.upper() 4173 4174 if not self._match(TokenType.L_PAREN): 4175 return self.expression( 4176 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4177 ) 4178 4179 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4180 4181 first = self._match(TokenType.FIRST) 4182 if self._match_text_seq("LAST"): 4183 first = False 4184 4185 partition = self._parse_partition_by() 4186 order = self._parse_order() 4187 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4188 4189 if kind: 4190 self._match(TokenType.BETWEEN) 4191 start = self._parse_window_spec() 4192 self._match(TokenType.AND) 4193 end = self._parse_window_spec() 4194 4195 spec = self.expression( 4196 exp.WindowSpec, 4197 kind=kind, 4198 start=start["value"], 4199 start_side=start["side"], 4200 end=end["value"], 4201 end_side=end["side"], 4202 ) 4203 else: 4204 spec = None 4205 4206 self._match_r_paren() 4207 4208 window = self.expression( 4209 exp.Window, 4210 this=this, 4211 partition_by=partition, 4212 order=order, 4213 spec=spec, 4214 alias=window_alias, 4215 over=over, 4216 first=first, 4217 ) 4218 4219 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4220 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4221 return self._parse_window(window, alias=alias) 4222 4223 return window 4224 4225 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4226 self._match(TokenType.BETWEEN) 4227 4228 return { 4229 "value": ( 4230 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4231 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4232 or self._parse_bitwise() 4233 ), 4234 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4235 } 4236 4237 def _parse_alias( 4238 self, this: t.Optional[exp.Expression], explicit: bool = False 4239 ) -> t.Optional[exp.Expression]: 4240 any_token = self._match(TokenType.ALIAS) 4241 4242 if explicit and not any_token: 4243 return this 4244 4245 if self._match(TokenType.L_PAREN): 4246 aliases = self.expression( 4247 exp.Aliases, 4248 this=this, 4249 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4250 ) 4251 self._match_r_paren(aliases) 4252 return aliases 4253 4254 alias = self._parse_id_var(any_token) 4255 4256 if alias: 4257 return self.expression(exp.Alias, this=this, alias=alias) 4258 4259 return this 4260 4261 def _parse_id_var( 4262 self, 4263 any_token: bool = True, 4264 tokens: t.Optional[t.Collection[TokenType]] = None, 4265 ) -> t.Optional[exp.Expression]: 4266 identifier = self._parse_identifier() 4267 4268 if identifier: 4269 return identifier 4270 4271 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4272 quoted = self._prev.token_type == TokenType.STRING 4273 return exp.Identifier(this=self._prev.text, quoted=quoted) 4274 4275 return None 4276 4277 def _parse_string(self) -> t.Optional[exp.Expression]: 4278 if self._match(TokenType.STRING): 4279 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4280 return self._parse_placeholder() 4281 4282 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4283 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4284 4285 def _parse_number(self) -> t.Optional[exp.Expression]: 4286 if self._match(TokenType.NUMBER): 4287 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4288 return self._parse_placeholder() 4289 4290 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4291 if self._match(TokenType.IDENTIFIER): 4292 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4293 return self._parse_placeholder() 4294 4295 def _parse_var( 4296 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4297 ) -> t.Optional[exp.Expression]: 4298 if ( 4299 (any_token and self._advance_any()) 4300 or self._match(TokenType.VAR) 4301 or (self._match_set(tokens) if tokens else False) 4302 ): 4303 return self.expression(exp.Var, this=self._prev.text) 4304 return self._parse_placeholder() 4305 4306 def _advance_any(self) -> t.Optional[Token]: 4307 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4308 self._advance() 4309 return self._prev 4310 return None 4311 4312 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4313 return self._parse_var() or self._parse_string() 4314 4315 def _parse_null(self) -> t.Optional[exp.Expression]: 4316 if self._match(TokenType.NULL): 4317 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4318 return self._parse_placeholder() 4319 4320 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4321 if self._match(TokenType.TRUE): 4322 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4323 if self._match(TokenType.FALSE): 4324 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4325 return self._parse_placeholder() 4326 4327 def _parse_star(self) -> t.Optional[exp.Expression]: 4328 if self._match(TokenType.STAR): 4329 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4330 return self._parse_placeholder() 4331 4332 def _parse_parameter(self) -> exp.Parameter: 4333 wrapped = self._match(TokenType.L_BRACE) 4334 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4335 self._match(TokenType.R_BRACE) 4336 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4337 4338 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4339 if self._match_set(self.PLACEHOLDER_PARSERS): 4340 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4341 if placeholder: 4342 return placeholder 4343 self._advance(-1) 4344 return None 4345 4346 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4347 if not self._match(TokenType.EXCEPT): 4348 return None 4349 if self._match(TokenType.L_PAREN, advance=False): 4350 return self._parse_wrapped_csv(self._parse_column) 4351 return self._parse_csv(self._parse_column) 4352 4353 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4354 if not self._match(TokenType.REPLACE): 4355 return None 4356 if self._match(TokenType.L_PAREN, advance=False): 4357 return self._parse_wrapped_csv(self._parse_expression) 4358 return self._parse_expressions() 4359 4360 def _parse_csv( 4361 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4362 ) -> t.List[t.Optional[exp.Expression]]: 4363 parse_result = parse_method() 4364 items = [parse_result] if parse_result is not None else [] 4365 4366 while self._match(sep): 4367 self._add_comments(parse_result) 4368 parse_result = parse_method() 4369 if parse_result is not None: 4370 items.append(parse_result) 4371 4372 return items 4373 4374 def _parse_tokens( 4375 self, parse_method: t.Callable, expressions: t.Dict 4376 ) -> t.Optional[exp.Expression]: 4377 this = parse_method() 4378 4379 while self._match_set(expressions): 4380 this = self.expression( 4381 expressions[self._prev.token_type], 4382 this=this, 4383 comments=self._prev_comments, 4384 expression=parse_method(), 4385 ) 4386 4387 return this 4388 4389 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4390 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4391 4392 def _parse_wrapped_csv( 4393 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4394 ) -> t.List[t.Optional[exp.Expression]]: 4395 return self._parse_wrapped( 4396 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4397 ) 4398 4399 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4400 wrapped = self._match(TokenType.L_PAREN) 4401 if not wrapped and not optional: 4402 self.raise_error("Expecting (") 4403 parse_result = parse_method() 4404 if wrapped: 4405 self._match_r_paren() 4406 return parse_result 4407 4408 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4409 return self._parse_csv(self._parse_expression) 4410 4411 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4412 return self._parse_select() or self._parse_set_operations( 4413 self._parse_expression() if alias else self._parse_conjunction() 4414 ) 4415 4416 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4417 return self._parse_query_modifiers( 4418 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4419 ) 4420 4421 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4422 this = None 4423 if self._match_texts(self.TRANSACTION_KIND): 4424 this = self._prev.text 4425 4426 self._match_texts({"TRANSACTION", "WORK"}) 4427 4428 modes = [] 4429 while True: 4430 mode = [] 4431 while self._match(TokenType.VAR): 4432 mode.append(self._prev.text) 4433 4434 if mode: 4435 modes.append(" ".join(mode)) 4436 if not self._match(TokenType.COMMA): 4437 break 4438 4439 return self.expression(exp.Transaction, this=this, modes=modes) 4440 4441 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4442 chain = None 4443 savepoint = None 4444 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4445 4446 self._match_texts({"TRANSACTION", "WORK"}) 4447 4448 if self._match_text_seq("TO"): 4449 self._match_text_seq("SAVEPOINT") 4450 savepoint = self._parse_id_var() 4451 4452 if self._match(TokenType.AND): 4453 chain = not self._match_text_seq("NO") 4454 self._match_text_seq("CHAIN") 4455 4456 if is_rollback: 4457 return self.expression(exp.Rollback, savepoint=savepoint) 4458 4459 return self.expression(exp.Commit, chain=chain) 4460 4461 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4462 if not self._match_text_seq("ADD"): 4463 return None 4464 4465 self._match(TokenType.COLUMN) 4466 exists_column = self._parse_exists(not_=True) 4467 expression = self._parse_column_def(self._parse_field(any_token=True)) 4468 4469 if expression: 4470 expression.set("exists", exists_column) 4471 4472 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4473 if self._match_texts(("FIRST", "AFTER")): 4474 position = self._prev.text 4475 column_position = self.expression( 4476 exp.ColumnPosition, this=self._parse_column(), position=position 4477 ) 4478 expression.set("position", column_position) 4479 4480 return expression 4481 4482 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4483 drop = self._match(TokenType.DROP) and self._parse_drop() 4484 if drop and not isinstance(drop, exp.Command): 4485 drop.set("kind", drop.args.get("kind", "COLUMN")) 4486 return drop 4487 4488 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4489 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4490 return self.expression( 4491 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4492 ) 4493 4494 def _parse_add_constraint(self) -> exp.AddConstraint: 4495 this = None 4496 kind = self._prev.token_type 4497 4498 if kind == TokenType.CONSTRAINT: 4499 this = self._parse_id_var() 4500 4501 if self._match_text_seq("CHECK"): 4502 expression = self._parse_wrapped(self._parse_conjunction) 4503 enforced = self._match_text_seq("ENFORCED") 4504 4505 return self.expression( 4506 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4507 ) 4508 4509 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4510 expression = self._parse_foreign_key() 4511 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4512 expression = self._parse_primary_key() 4513 else: 4514 expression = None 4515 4516 return self.expression(exp.AddConstraint, this=this, expression=expression) 4517 4518 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4519 index = self._index - 1 4520 4521 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4522 return self._parse_csv(self._parse_add_constraint) 4523 4524 self._retreat(index) 4525 return self._parse_csv(self._parse_add_column) 4526 4527 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4528 self._match(TokenType.COLUMN) 4529 column = self._parse_field(any_token=True) 4530 4531 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4532 return self.expression(exp.AlterColumn, this=column, drop=True) 4533 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4534 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4535 4536 self._match_text_seq("SET", "DATA") 4537 return self.expression( 4538 exp.AlterColumn, 4539 this=column, 4540 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4541 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4542 using=self._match(TokenType.USING) and self._parse_conjunction(), 4543 ) 4544 4545 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4546 index = self._index - 1 4547 4548 partition_exists = self._parse_exists() 4549 if self._match(TokenType.PARTITION, advance=False): 4550 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4551 4552 self._retreat(index) 4553 return self._parse_csv(self._parse_drop_column) 4554 4555 def _parse_alter_table_rename(self) -> exp.RenameTable: 4556 self._match_text_seq("TO") 4557 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4558 4559 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4560 start = self._prev 4561 4562 if not self._match(TokenType.TABLE): 4563 return self._parse_as_command(start) 4564 4565 exists = self._parse_exists() 4566 this = self._parse_table(schema=True) 4567 4568 if self._next: 4569 self._advance() 4570 4571 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4572 if parser: 4573 actions = ensure_list(parser(self)) 4574 4575 if not self._curr: 4576 return self.expression( 4577 exp.AlterTable, 4578 this=this, 4579 exists=exists, 4580 actions=actions, 4581 ) 4582 return self._parse_as_command(start) 4583 4584 def _parse_merge(self) -> exp.Merge: 4585 self._match(TokenType.INTO) 4586 target = self._parse_table() 4587 4588 self._match(TokenType.USING) 4589 using = self._parse_table() 4590 4591 self._match(TokenType.ON) 4592 on = self._parse_conjunction() 4593 4594 whens = [] 4595 while self._match(TokenType.WHEN): 4596 matched = not self._match(TokenType.NOT) 4597 self._match_text_seq("MATCHED") 4598 source = ( 4599 False 4600 if self._match_text_seq("BY", "TARGET") 4601 else self._match_text_seq("BY", "SOURCE") 4602 ) 4603 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4604 4605 self._match(TokenType.THEN) 4606 4607 if self._match(TokenType.INSERT): 4608 _this = self._parse_star() 4609 if _this: 4610 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4611 else: 4612 then = self.expression( 4613 exp.Insert, 4614 this=self._parse_value(), 4615 expression=self._match(TokenType.VALUES) and self._parse_value(), 4616 ) 4617 elif self._match(TokenType.UPDATE): 4618 expressions = self._parse_star() 4619 if expressions: 4620 then = self.expression(exp.Update, expressions=expressions) 4621 else: 4622 then = self.expression( 4623 exp.Update, 4624 expressions=self._match(TokenType.SET) 4625 and self._parse_csv(self._parse_equality), 4626 ) 4627 elif self._match(TokenType.DELETE): 4628 then = self.expression(exp.Var, this=self._prev.text) 4629 else: 4630 then = None 4631 4632 whens.append( 4633 self.expression( 4634 exp.When, 4635 matched=matched, 4636 source=source, 4637 condition=condition, 4638 then=then, 4639 ) 4640 ) 4641 4642 return self.expression( 4643 exp.Merge, 4644 this=target, 4645 using=using, 4646 on=on, 4647 expressions=whens, 4648 ) 4649 4650 def _parse_show(self) -> t.Optional[exp.Expression]: 4651 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4652 if parser: 4653 return parser(self) 4654 self._advance() 4655 return self.expression(exp.Show, this=self._prev.text.upper()) 4656 4657 def _parse_set_item_assignment( 4658 self, kind: t.Optional[str] = None 4659 ) -> t.Optional[exp.Expression]: 4660 index = self._index 4661 4662 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4663 return self._parse_set_transaction(global_=kind == "GLOBAL") 4664 4665 left = self._parse_primary() or self._parse_id_var() 4666 4667 if not self._match_texts(("=", "TO")): 4668 self._retreat(index) 4669 return None 4670 4671 right = self._parse_statement() or self._parse_id_var() 4672 this = self.expression(exp.EQ, this=left, expression=right) 4673 4674 return self.expression(exp.SetItem, this=this, kind=kind) 4675 4676 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4677 self._match_text_seq("TRANSACTION") 4678 characteristics = self._parse_csv( 4679 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4680 ) 4681 return self.expression( 4682 exp.SetItem, 4683 expressions=characteristics, 4684 kind="TRANSACTION", 4685 **{"global": global_}, # type: ignore 4686 ) 4687 4688 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4689 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4690 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4691 4692 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4693 index = self._index 4694 set_ = self.expression( 4695 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4696 ) 4697 4698 if self._curr: 4699 self._retreat(index) 4700 return self._parse_as_command(self._prev) 4701 4702 return set_ 4703 4704 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4705 for option in options: 4706 if self._match_text_seq(*option.split(" ")): 4707 return exp.var(option) 4708 return None 4709 4710 def _parse_as_command(self, start: Token) -> exp.Command: 4711 while self._curr: 4712 self._advance() 4713 text = self._find_sql(start, self._prev) 4714 size = len(start.text) 4715 return exp.Command(this=text[:size], expression=text[size:]) 4716 4717 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4718 settings = [] 4719 4720 self._match_l_paren() 4721 kind = self._parse_id_var() 4722 4723 if self._match(TokenType.L_PAREN): 4724 while True: 4725 key = self._parse_id_var() 4726 value = self._parse_primary() 4727 4728 if not key and value is None: 4729 break 4730 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4731 self._match(TokenType.R_PAREN) 4732 4733 self._match_r_paren() 4734 4735 return self.expression( 4736 exp.DictProperty, 4737 this=this, 4738 kind=kind.this if kind else None, 4739 settings=settings, 4740 ) 4741 4742 def _parse_dict_range(self, this: str) -> exp.DictRange: 4743 self._match_l_paren() 4744 has_min = self._match_text_seq("MIN") 4745 if has_min: 4746 min = self._parse_var() or self._parse_primary() 4747 self._match_text_seq("MAX") 4748 max = self._parse_var() or self._parse_primary() 4749 else: 4750 max = self._parse_var() or self._parse_primary() 4751 min = exp.Literal.number(0) 4752 self._match_r_paren() 4753 return self.expression(exp.DictRange, this=this, min=min, max=max) 4754 4755 def _find_parser( 4756 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4757 ) -> t.Optional[t.Callable]: 4758 if not self._curr: 4759 return None 4760 4761 index = self._index 4762 this = [] 4763 while True: 4764 # The current token might be multiple words 4765 curr = self._curr.text.upper() 4766 key = curr.split(" ") 4767 this.append(curr) 4768 4769 self._advance() 4770 result, trie = in_trie(trie, key) 4771 if result == TrieResult.FAILED: 4772 break 4773 4774 if result == TrieResult.EXISTS: 4775 subparser = parsers[" ".join(this)] 4776 return subparser 4777 4778 self._retreat(index) 4779 return None 4780 4781 def _match(self, token_type, advance=True, expression=None): 4782 if not self._curr: 4783 return None 4784 4785 if self._curr.token_type == token_type: 4786 if advance: 4787 self._advance() 4788 self._add_comments(expression) 4789 return True 4790 4791 return None 4792 4793 def _match_set(self, types, advance=True): 4794 if not self._curr: 4795 return None 4796 4797 if self._curr.token_type in types: 4798 if advance: 4799 self._advance() 4800 return True 4801 4802 return None 4803 4804 def _match_pair(self, token_type_a, token_type_b, advance=True): 4805 if not self._curr or not self._next: 4806 return None 4807 4808 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4809 if advance: 4810 self._advance(2) 4811 return True 4812 4813 return None 4814 4815 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4816 if not self._match(TokenType.L_PAREN, expression=expression): 4817 self.raise_error("Expecting (") 4818 4819 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4820 if not self._match(TokenType.R_PAREN, expression=expression): 4821 self.raise_error("Expecting )") 4822 4823 def _match_texts(self, texts, advance=True): 4824 if self._curr and self._curr.text.upper() in texts: 4825 if advance: 4826 self._advance() 4827 return True 4828 return False 4829 4830 def _match_text_seq(self, *texts, advance=True): 4831 index = self._index 4832 for text in texts: 4833 if self._curr and self._curr.text.upper() == text: 4834 self._advance() 4835 else: 4836 self._retreat(index) 4837 return False 4838 4839 if not advance: 4840 self._retreat(index) 4841 4842 return True 4843 4844 @t.overload 4845 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4846 ... 4847 4848 @t.overload 4849 def _replace_columns_with_dots( 4850 self, this: t.Optional[exp.Expression] 4851 ) -> t.Optional[exp.Expression]: 4852 ... 4853 4854 def _replace_columns_with_dots(self, this): 4855 if isinstance(this, exp.Dot): 4856 exp.replace_children(this, self._replace_columns_with_dots) 4857 elif isinstance(this, exp.Column): 4858 exp.replace_children(this, self._replace_columns_with_dots) 4859 table = this.args.get("table") 4860 this = ( 4861 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4862 ) 4863 4864 return this 4865 4866 def _replace_lambda( 4867 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4868 ) -> t.Optional[exp.Expression]: 4869 if not node: 4870 return node 4871 4872 for column in node.find_all(exp.Column): 4873 if column.parts[0].name in lambda_variables: 4874 dot_or_id = column.to_dot() if column.table else column.this 4875 parent = column.parent 4876 4877 while isinstance(parent, exp.Dot): 4878 if not isinstance(parent.parent, exp.Dot): 4879 parent.replace(dot_or_id) 4880 break 4881 parent = parent.parent 4882 else: 4883 if column is node: 4884 node = dot_or_id 4885 else: 4886 column.replace(dot_or_id) 4887 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
848 def __init__( 849 self, 850 error_level: t.Optional[ErrorLevel] = None, 851 error_message_context: int = 100, 852 max_errors: int = 3, 853 ): 854 self.error_level = error_level or ErrorLevel.IMMEDIATE 855 self.error_message_context = error_message_context 856 self.max_errors = max_errors 857 self.reset()
869 def parse( 870 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 871 ) -> t.List[t.Optional[exp.Expression]]: 872 """ 873 Parses a list of tokens and returns a list of syntax trees, one tree 874 per parsed SQL statement. 875 876 Args: 877 raw_tokens: The list of tokens. 878 sql: The original SQL string, used to produce helpful debug messages. 879 880 Returns: 881 The list of the produced syntax trees. 882 """ 883 return self._parse( 884 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 885 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
887 def parse_into( 888 self, 889 expression_types: exp.IntoType, 890 raw_tokens: t.List[Token], 891 sql: t.Optional[str] = None, 892 ) -> t.List[t.Optional[exp.Expression]]: 893 """ 894 Parses a list of tokens into a given Expression type. If a collection of Expression 895 types is given instead, this method will try to parse the token list into each one 896 of them, stopping at the first for which the parsing succeeds. 897 898 Args: 899 expression_types: The expression type(s) to try and parse the token list into. 900 raw_tokens: The list of tokens. 901 sql: The original SQL string, used to produce helpful debug messages. 902 903 Returns: 904 The target Expression. 905 """ 906 errors = [] 907 for expression_type in ensure_list(expression_types): 908 parser = self.EXPRESSION_PARSERS.get(expression_type) 909 if not parser: 910 raise TypeError(f"No parser registered for {expression_type}") 911 912 try: 913 return self._parse(parser, raw_tokens, sql) 914 except ParseError as e: 915 e.errors[0]["into_expression"] = expression_type 916 errors.append(e) 917 918 raise ParseError( 919 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 920 errors=merge_errors(errors), 921 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
958 def check_errors(self) -> None: 959 """Logs or raises any found errors, depending on the chosen error level setting.""" 960 if self.error_level == ErrorLevel.WARN: 961 for error in self.errors: 962 logger.error(str(error)) 963 elif self.error_level == ErrorLevel.RAISE and self.errors: 964 raise ParseError( 965 concat_messages(self.errors, self.max_errors), 966 errors=merge_errors(self.errors), 967 )
Logs or raises any found errors, depending on the chosen error level setting.
969 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 970 """ 971 Appends an error in the list of recorded errors or raises it, depending on the chosen 972 error level setting. 973 """ 974 token = token or self._curr or self._prev or Token.string("") 975 start = token.start 976 end = token.end + 1 977 start_context = self.sql[max(start - self.error_message_context, 0) : start] 978 highlight = self.sql[start:end] 979 end_context = self.sql[end : end + self.error_message_context] 980 981 error = ParseError.new( 982 f"{message}. Line {token.line}, Col: {token.col}.\n" 983 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 984 description=message, 985 line=token.line, 986 col=token.col, 987 start_context=start_context, 988 highlight=highlight, 989 end_context=end_context, 990 ) 991 992 if self.error_level == ErrorLevel.IMMEDIATE: 993 raise error 994 995 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
997 def expression( 998 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 999 ) -> E: 1000 """ 1001 Creates a new, validated Expression. 1002 1003 Args: 1004 exp_class: The expression class to instantiate. 1005 comments: An optional list of comments to attach to the expression. 1006 kwargs: The arguments to set for the expression along with their respective values. 1007 1008 Returns: 1009 The target expression. 1010 """ 1011 instance = exp_class(**kwargs) 1012 instance.add_comments(comments) if comments else self._add_comments(instance) 1013 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1020 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1021 """ 1022 Validates an Expression, making sure that all its mandatory arguments are set. 1023 1024 Args: 1025 expression: The expression to validate. 1026 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1027 1028 Returns: 1029 The validated expression. 1030 """ 1031 if self.error_level != ErrorLevel.IGNORE: 1032 for error_message in expression.error_messages(args): 1033 self.raise_error(error_message) 1034 1035 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.