sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.IPADDRESS, 189 TokenType.IPPREFIX, 190 TokenType.ENUM, 191 *NESTED_TYPE_TOKENS, 192 } 193 194 SUBQUERY_PREDICATES = { 195 TokenType.ANY: exp.Any, 196 TokenType.ALL: exp.All, 197 TokenType.EXISTS: exp.Exists, 198 TokenType.SOME: exp.Any, 199 } 200 201 RESERVED_KEYWORDS = { 202 *Tokenizer.SINGLE_TOKENS.values(), 203 TokenType.SELECT, 204 } 205 206 DB_CREATABLES = { 207 TokenType.DATABASE, 208 TokenType.SCHEMA, 209 TokenType.TABLE, 210 TokenType.VIEW, 211 TokenType.DICTIONARY, 212 } 213 214 CREATABLES = { 215 TokenType.COLUMN, 216 TokenType.FUNCTION, 217 TokenType.INDEX, 218 TokenType.PROCEDURE, 219 *DB_CREATABLES, 220 } 221 222 # Tokens that can represent identifiers 223 ID_VAR_TOKENS = { 224 TokenType.VAR, 225 TokenType.ANTI, 226 TokenType.APPLY, 227 TokenType.ASC, 228 TokenType.AUTO_INCREMENT, 229 TokenType.BEGIN, 230 TokenType.CACHE, 231 TokenType.CASE, 232 TokenType.COLLATE, 233 TokenType.COMMAND, 234 TokenType.COMMENT, 235 TokenType.COMMIT, 236 TokenType.CONSTRAINT, 237 TokenType.DEFAULT, 238 TokenType.DELETE, 239 TokenType.DESC, 240 TokenType.DESCRIBE, 241 TokenType.DICTIONARY, 242 TokenType.DIV, 243 TokenType.END, 244 TokenType.EXECUTE, 245 TokenType.ESCAPE, 246 TokenType.FALSE, 247 TokenType.FIRST, 248 TokenType.FILTER, 249 TokenType.FORMAT, 250 TokenType.FULL, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.RLIKE, 332 TokenType.ROW, 333 TokenType.UNNEST, 334 TokenType.VAR, 335 TokenType.LEFT, 336 TokenType.RIGHT, 337 TokenType.DATE, 338 TokenType.DATETIME, 339 TokenType.TABLE, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMPTZ, 342 TokenType.WINDOW, 343 TokenType.XOR, 344 *TYPE_TOKENS, 345 *SUBQUERY_PREDICATES, 346 } 347 348 CONJUNCTION = { 349 TokenType.AND: exp.And, 350 TokenType.OR: exp.Or, 351 } 352 353 EQUALITY = { 354 TokenType.EQ: exp.EQ, 355 TokenType.NEQ: exp.NEQ, 356 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 357 } 358 359 COMPARISON = { 360 TokenType.GT: exp.GT, 361 TokenType.GTE: exp.GTE, 362 TokenType.LT: exp.LT, 363 TokenType.LTE: exp.LTE, 364 } 365 366 BITWISE = { 367 TokenType.AMP: exp.BitwiseAnd, 368 TokenType.CARET: exp.BitwiseXor, 369 TokenType.PIPE: exp.BitwiseOr, 370 TokenType.DPIPE: exp.DPipe, 371 } 372 373 TERM = { 374 TokenType.DASH: exp.Sub, 375 TokenType.PLUS: exp.Add, 376 TokenType.MOD: exp.Mod, 377 TokenType.COLLATE: exp.Collate, 378 } 379 380 FACTOR = { 381 TokenType.DIV: exp.IntDiv, 382 TokenType.LR_ARROW: exp.Distance, 383 TokenType.SLASH: exp.Div, 384 TokenType.STAR: exp.Mul, 385 } 386 387 TIMESTAMPS = { 388 TokenType.TIME, 389 TokenType.TIMESTAMP, 390 TokenType.TIMESTAMPTZ, 391 TokenType.TIMESTAMPLTZ, 392 } 393 394 SET_OPERATIONS = { 395 TokenType.UNION, 396 TokenType.INTERSECT, 397 TokenType.EXCEPT, 398 } 399 400 JOIN_METHODS = { 401 TokenType.NATURAL, 402 TokenType.ASOF, 403 } 404 405 JOIN_SIDES = { 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.FULL, 409 } 410 411 JOIN_KINDS = { 412 TokenType.INNER, 413 TokenType.OUTER, 414 TokenType.CROSS, 415 TokenType.SEMI, 416 TokenType.ANTI, 417 } 418 419 JOIN_HINTS: t.Set[str] = set() 420 421 LAMBDAS = { 422 TokenType.ARROW: lambda self, expressions: self.expression( 423 exp.Lambda, 424 this=self._replace_lambda( 425 self._parse_conjunction(), 426 {node.name for node in expressions}, 427 ), 428 expressions=expressions, 429 ), 430 TokenType.FARROW: lambda self, expressions: self.expression( 431 exp.Kwarg, 432 this=exp.var(expressions[0].name), 433 expression=self._parse_conjunction(), 434 ), 435 } 436 437 COLUMN_OPERATORS = { 438 TokenType.DOT: None, 439 TokenType.DCOLON: lambda self, this, to: self.expression( 440 exp.Cast if self.STRICT_CAST else exp.TryCast, 441 this=this, 442 to=to, 443 ), 444 TokenType.ARROW: lambda self, this, path: self.expression( 445 exp.JSONExtract, 446 this=this, 447 expression=path, 448 ), 449 TokenType.DARROW: lambda self, this, path: self.expression( 450 exp.JSONExtractScalar, 451 this=this, 452 expression=path, 453 ), 454 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 455 exp.JSONBExtract, 456 this=this, 457 expression=path, 458 ), 459 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 460 exp.JSONBExtractScalar, 461 this=this, 462 expression=path, 463 ), 464 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 465 exp.JSONBContains, 466 this=this, 467 expression=key, 468 ), 469 } 470 471 EXPRESSION_PARSERS = { 472 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 473 exp.Column: lambda self: self._parse_column(), 474 exp.Condition: lambda self: self._parse_conjunction(), 475 exp.DataType: lambda self: self._parse_types(), 476 exp.Expression: lambda self: self._parse_statement(), 477 exp.From: lambda self: self._parse_from(), 478 exp.Group: lambda self: self._parse_group(), 479 exp.Having: lambda self: self._parse_having(), 480 exp.Identifier: lambda self: self._parse_id_var(), 481 exp.Join: lambda self: self._parse_join(), 482 exp.Lambda: lambda self: self._parse_lambda(), 483 exp.Lateral: lambda self: self._parse_lateral(), 484 exp.Limit: lambda self: self._parse_limit(), 485 exp.Offset: lambda self: self._parse_offset(), 486 exp.Order: lambda self: self._parse_order(), 487 exp.Ordered: lambda self: self._parse_ordered(), 488 exp.Properties: lambda self: self._parse_properties(), 489 exp.Qualify: lambda self: self._parse_qualify(), 490 exp.Returning: lambda self: self._parse_returning(), 491 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 492 exp.Table: lambda self: self._parse_table_parts(), 493 exp.TableAlias: lambda self: self._parse_table_alias(), 494 exp.Where: lambda self: self._parse_where(), 495 exp.Window: lambda self: self._parse_named_window(), 496 exp.With: lambda self: self._parse_with(), 497 "JOIN_TYPE": lambda self: self._parse_join_parts(), 498 } 499 500 STATEMENT_PARSERS = { 501 TokenType.ALTER: lambda self: self._parse_alter(), 502 TokenType.BEGIN: lambda self: self._parse_transaction(), 503 TokenType.CACHE: lambda self: self._parse_cache(), 504 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 505 TokenType.COMMENT: lambda self: self._parse_comment(), 506 TokenType.CREATE: lambda self: self._parse_create(), 507 TokenType.DELETE: lambda self: self._parse_delete(), 508 TokenType.DESC: lambda self: self._parse_describe(), 509 TokenType.DESCRIBE: lambda self: self._parse_describe(), 510 TokenType.DROP: lambda self: self._parse_drop(), 511 TokenType.FROM: lambda self: exp.select("*").from_( 512 t.cast(exp.From, self._parse_from(skip_from_token=True)) 513 ), 514 TokenType.INSERT: lambda self: self._parse_insert(), 515 TokenType.LOAD: lambda self: self._parse_load(), 516 TokenType.MERGE: lambda self: self._parse_merge(), 517 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 518 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 519 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 520 TokenType.SET: lambda self: self._parse_set(), 521 TokenType.UNCACHE: lambda self: self._parse_uncache(), 522 TokenType.UPDATE: lambda self: self._parse_update(), 523 TokenType.USE: lambda self: self.expression( 524 exp.Use, 525 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 526 and exp.var(self._prev.text), 527 this=self._parse_table(schema=False), 528 ), 529 } 530 531 UNARY_PARSERS = { 532 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 533 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 534 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 535 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 536 } 537 538 PRIMARY_PARSERS = { 539 TokenType.STRING: lambda self, token: self.expression( 540 exp.Literal, this=token.text, is_string=True 541 ), 542 TokenType.NUMBER: lambda self, token: self.expression( 543 exp.Literal, this=token.text, is_string=False 544 ), 545 TokenType.STAR: lambda self, _: self.expression( 546 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 547 ), 548 TokenType.NULL: lambda self, _: self.expression(exp.Null), 549 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 550 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 551 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 552 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 553 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 554 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 555 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 556 exp.National, this=token.text 557 ), 558 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 559 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 560 } 561 562 PLACEHOLDER_PARSERS = { 563 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 564 TokenType.PARAMETER: lambda self: self._parse_parameter(), 565 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 566 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 567 else None, 568 } 569 570 RANGE_PARSERS = { 571 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 572 TokenType.GLOB: binary_range_parser(exp.Glob), 573 TokenType.ILIKE: binary_range_parser(exp.ILike), 574 TokenType.IN: lambda self, this: self._parse_in(this), 575 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 576 TokenType.IS: lambda self, this: self._parse_is(this), 577 TokenType.LIKE: binary_range_parser(exp.Like), 578 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 579 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 580 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 581 } 582 583 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 584 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 585 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 586 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 587 "CHARACTER SET": lambda self: self._parse_character_set(), 588 "CHECKSUM": lambda self: self._parse_checksum(), 589 "CLUSTER BY": lambda self: self._parse_cluster(), 590 "CLUSTERED": lambda self: self._parse_clustered_by(), 591 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 592 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 593 "COPY": lambda self: self._parse_copy_property(), 594 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 595 "DEFINER": lambda self: self._parse_definer(), 596 "DETERMINISTIC": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 598 ), 599 "DISTKEY": lambda self: self._parse_distkey(), 600 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 601 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 602 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 603 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 604 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 605 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 606 "FREESPACE": lambda self: self._parse_freespace(), 607 "HEAP": lambda self: self.expression(exp.HeapProperty), 608 "IMMUTABLE": lambda self: self.expression( 609 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 610 ), 611 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 612 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 613 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 614 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 615 "LIKE": lambda self: self._parse_create_like(), 616 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 617 "LOCK": lambda self: self._parse_locking(), 618 "LOCKING": lambda self: self._parse_locking(), 619 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 620 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 621 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 622 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 623 "NO": lambda self: self._parse_no_property(), 624 "ON": lambda self: self._parse_on_property(), 625 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 626 "PARTITION BY": lambda self: self._parse_partitioned_by(), 627 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 628 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 629 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 630 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 631 "RETURNS": lambda self: self._parse_returns(), 632 "ROW": lambda self: self._parse_row(), 633 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 634 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 635 "SETTINGS": lambda self: self.expression( 636 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 637 ), 638 "SORTKEY": lambda self: self._parse_sortkey(), 639 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 640 "STABLE": lambda self: self.expression( 641 exp.StabilityProperty, this=exp.Literal.string("STABLE") 642 ), 643 "STORED": lambda self: self._parse_stored(), 644 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 645 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 646 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 647 "TO": lambda self: self._parse_to_table(), 648 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 649 "TTL": lambda self: self._parse_ttl(), 650 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 651 "VOLATILE": lambda self: self._parse_volatile_property(), 652 "WITH": lambda self: self._parse_with_property(), 653 } 654 655 CONSTRAINT_PARSERS = { 656 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 657 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 658 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 659 "CHARACTER SET": lambda self: self.expression( 660 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 661 ), 662 "CHECK": lambda self: self.expression( 663 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 664 ), 665 "COLLATE": lambda self: self.expression( 666 exp.CollateColumnConstraint, this=self._parse_var() 667 ), 668 "COMMENT": lambda self: self.expression( 669 exp.CommentColumnConstraint, this=self._parse_string() 670 ), 671 "COMPRESS": lambda self: self._parse_compress(), 672 "DEFAULT": lambda self: self.expression( 673 exp.DefaultColumnConstraint, this=self._parse_bitwise() 674 ), 675 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 676 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 677 "FORMAT": lambda self: self.expression( 678 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 679 ), 680 "GENERATED": lambda self: self._parse_generated_as_identity(), 681 "IDENTITY": lambda self: self._parse_auto_increment(), 682 "INLINE": lambda self: self._parse_inline(), 683 "LIKE": lambda self: self._parse_create_like(), 684 "NOT": lambda self: self._parse_not_constraint(), 685 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 686 "ON": lambda self: self._match(TokenType.UPDATE) 687 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 688 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 689 "PRIMARY KEY": lambda self: self._parse_primary_key(), 690 "REFERENCES": lambda self: self._parse_references(match=False), 691 "TITLE": lambda self: self.expression( 692 exp.TitleColumnConstraint, this=self._parse_var_or_string() 693 ), 694 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 695 "UNIQUE": lambda self: self._parse_unique(), 696 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 697 } 698 699 ALTER_PARSERS = { 700 "ADD": lambda self: self._parse_alter_table_add(), 701 "ALTER": lambda self: self._parse_alter_table_alter(), 702 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 703 "DROP": lambda self: self._parse_alter_table_drop(), 704 "RENAME": lambda self: self._parse_alter_table_rename(), 705 } 706 707 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 708 709 NO_PAREN_FUNCTION_PARSERS = { 710 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 711 "CASE": lambda self: self._parse_case(), 712 "IF": lambda self: self._parse_if(), 713 "NEXT": lambda self: self._parse_next_value_for(), 714 } 715 716 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 717 718 FUNCTION_PARSERS = { 719 "ANY_VALUE": lambda self: self._parse_any_value(), 720 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 721 "CONCAT": lambda self: self._parse_concat(), 722 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 723 "DECODE": lambda self: self._parse_decode(), 724 "EXTRACT": lambda self: self._parse_extract(), 725 "JSON_OBJECT": lambda self: self._parse_json_object(), 726 "LOG": lambda self: self._parse_logarithm(), 727 "MATCH": lambda self: self._parse_match_against(), 728 "OPENJSON": lambda self: self._parse_open_json(), 729 "POSITION": lambda self: self._parse_position(), 730 "SAFE_CAST": lambda self: self._parse_cast(False), 731 "STRING_AGG": lambda self: self._parse_string_agg(), 732 "SUBSTRING": lambda self: self._parse_substring(), 733 "TRIM": lambda self: self._parse_trim(), 734 "TRY_CAST": lambda self: self._parse_cast(False), 735 "TRY_CONVERT": lambda self: self._parse_convert(False), 736 } 737 738 QUERY_MODIFIER_PARSERS = { 739 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 740 TokenType.WHERE: lambda self: ("where", self._parse_where()), 741 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 742 TokenType.HAVING: lambda self: ("having", self._parse_having()), 743 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 744 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 745 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 746 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 747 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 748 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 749 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 750 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 751 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 752 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 753 TokenType.CLUSTER_BY: lambda self: ( 754 "cluster", 755 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 756 ), 757 TokenType.DISTRIBUTE_BY: lambda self: ( 758 "distribute", 759 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 760 ), 761 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 762 } 763 764 SET_PARSERS = { 765 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 766 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 767 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 768 "TRANSACTION": lambda self: self._parse_set_transaction(), 769 } 770 771 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 772 773 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 774 775 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 776 777 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 778 779 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 780 781 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 782 TRANSACTION_CHARACTERISTICS = { 783 "ISOLATION LEVEL REPEATABLE READ", 784 "ISOLATION LEVEL READ COMMITTED", 785 "ISOLATION LEVEL READ UNCOMMITTED", 786 "ISOLATION LEVEL SERIALIZABLE", 787 "READ WRITE", 788 "READ ONLY", 789 } 790 791 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 792 793 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 794 795 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 796 797 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 798 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 799 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 800 801 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 802 803 STRICT_CAST = True 804 805 # A NULL arg in CONCAT yields NULL by default 806 CONCAT_NULL_OUTPUTS_STRING = False 807 808 PREFIXED_PIVOT_COLUMNS = False 809 IDENTIFY_PIVOT_STRINGS = False 810 811 LOG_BASE_FIRST = True 812 LOG_DEFAULTS_TO_LN = False 813 814 __slots__ = ( 815 "error_level", 816 "error_message_context", 817 "max_errors", 818 "sql", 819 "errors", 820 "_tokens", 821 "_index", 822 "_curr", 823 "_next", 824 "_prev", 825 "_prev_comments", 826 ) 827 828 # Autofilled 829 INDEX_OFFSET: int = 0 830 UNNEST_COLUMN_ONLY: bool = False 831 ALIAS_POST_TABLESAMPLE: bool = False 832 STRICT_STRING_CONCAT = False 833 NORMALIZE_FUNCTIONS = "upper" 834 NULL_ORDERING: str = "nulls_are_small" 835 SHOW_TRIE: t.Dict = {} 836 SET_TRIE: t.Dict = {} 837 FORMAT_MAPPING: t.Dict[str, str] = {} 838 FORMAT_TRIE: t.Dict = {} 839 TIME_MAPPING: t.Dict[str, str] = {} 840 TIME_TRIE: t.Dict = {} 841 842 def __init__( 843 self, 844 error_level: t.Optional[ErrorLevel] = None, 845 error_message_context: int = 100, 846 max_errors: int = 3, 847 ): 848 self.error_level = error_level or ErrorLevel.IMMEDIATE 849 self.error_message_context = error_message_context 850 self.max_errors = max_errors 851 self.reset() 852 853 def reset(self): 854 self.sql = "" 855 self.errors = [] 856 self._tokens = [] 857 self._index = 0 858 self._curr = None 859 self._next = None 860 self._prev = None 861 self._prev_comments = None 862 863 def parse( 864 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 865 ) -> t.List[t.Optional[exp.Expression]]: 866 """ 867 Parses a list of tokens and returns a list of syntax trees, one tree 868 per parsed SQL statement. 869 870 Args: 871 raw_tokens: The list of tokens. 872 sql: The original SQL string, used to produce helpful debug messages. 873 874 Returns: 875 The list of the produced syntax trees. 876 """ 877 return self._parse( 878 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 879 ) 880 881 def parse_into( 882 self, 883 expression_types: exp.IntoType, 884 raw_tokens: t.List[Token], 885 sql: t.Optional[str] = None, 886 ) -> t.List[t.Optional[exp.Expression]]: 887 """ 888 Parses a list of tokens into a given Expression type. If a collection of Expression 889 types is given instead, this method will try to parse the token list into each one 890 of them, stopping at the first for which the parsing succeeds. 891 892 Args: 893 expression_types: The expression type(s) to try and parse the token list into. 894 raw_tokens: The list of tokens. 895 sql: The original SQL string, used to produce helpful debug messages. 896 897 Returns: 898 The target Expression. 899 """ 900 errors = [] 901 for expression_type in ensure_list(expression_types): 902 parser = self.EXPRESSION_PARSERS.get(expression_type) 903 if not parser: 904 raise TypeError(f"No parser registered for {expression_type}") 905 906 try: 907 return self._parse(parser, raw_tokens, sql) 908 except ParseError as e: 909 e.errors[0]["into_expression"] = expression_type 910 errors.append(e) 911 912 raise ParseError( 913 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 914 errors=merge_errors(errors), 915 ) from errors[-1] 916 917 def _parse( 918 self, 919 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 920 raw_tokens: t.List[Token], 921 sql: t.Optional[str] = None, 922 ) -> t.List[t.Optional[exp.Expression]]: 923 self.reset() 924 self.sql = sql or "" 925 926 total = len(raw_tokens) 927 chunks: t.List[t.List[Token]] = [[]] 928 929 for i, token in enumerate(raw_tokens): 930 if token.token_type == TokenType.SEMICOLON: 931 if i < total - 1: 932 chunks.append([]) 933 else: 934 chunks[-1].append(token) 935 936 expressions = [] 937 938 for tokens in chunks: 939 self._index = -1 940 self._tokens = tokens 941 self._advance() 942 943 expressions.append(parse_method(self)) 944 945 if self._index < len(self._tokens): 946 self.raise_error("Invalid expression / Unexpected token") 947 948 self.check_errors() 949 950 return expressions 951 952 def check_errors(self) -> None: 953 """Logs or raises any found errors, depending on the chosen error level setting.""" 954 if self.error_level == ErrorLevel.WARN: 955 for error in self.errors: 956 logger.error(str(error)) 957 elif self.error_level == ErrorLevel.RAISE and self.errors: 958 raise ParseError( 959 concat_messages(self.errors, self.max_errors), 960 errors=merge_errors(self.errors), 961 ) 962 963 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 964 """ 965 Appends an error in the list of recorded errors or raises it, depending on the chosen 966 error level setting. 967 """ 968 token = token or self._curr or self._prev or Token.string("") 969 start = token.start 970 end = token.end + 1 971 start_context = self.sql[max(start - self.error_message_context, 0) : start] 972 highlight = self.sql[start:end] 973 end_context = self.sql[end : end + self.error_message_context] 974 975 error = ParseError.new( 976 f"{message}. Line {token.line}, Col: {token.col}.\n" 977 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 978 description=message, 979 line=token.line, 980 col=token.col, 981 start_context=start_context, 982 highlight=highlight, 983 end_context=end_context, 984 ) 985 986 if self.error_level == ErrorLevel.IMMEDIATE: 987 raise error 988 989 self.errors.append(error) 990 991 def expression( 992 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 993 ) -> E: 994 """ 995 Creates a new, validated Expression. 996 997 Args: 998 exp_class: The expression class to instantiate. 999 comments: An optional list of comments to attach to the expression. 1000 kwargs: The arguments to set for the expression along with their respective values. 1001 1002 Returns: 1003 The target expression. 1004 """ 1005 instance = exp_class(**kwargs) 1006 instance.add_comments(comments) if comments else self._add_comments(instance) 1007 return self.validate_expression(instance) 1008 1009 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1010 if expression and self._prev_comments: 1011 expression.add_comments(self._prev_comments) 1012 self._prev_comments = None 1013 1014 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1015 """ 1016 Validates an Expression, making sure that all its mandatory arguments are set. 1017 1018 Args: 1019 expression: The expression to validate. 1020 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1021 1022 Returns: 1023 The validated expression. 1024 """ 1025 if self.error_level != ErrorLevel.IGNORE: 1026 for error_message in expression.error_messages(args): 1027 self.raise_error(error_message) 1028 1029 return expression 1030 1031 def _find_sql(self, start: Token, end: Token) -> str: 1032 return self.sql[start.start : end.end + 1] 1033 1034 def _advance(self, times: int = 1) -> None: 1035 self._index += times 1036 self._curr = seq_get(self._tokens, self._index) 1037 self._next = seq_get(self._tokens, self._index + 1) 1038 1039 if self._index > 0: 1040 self._prev = self._tokens[self._index - 1] 1041 self._prev_comments = self._prev.comments 1042 else: 1043 self._prev = None 1044 self._prev_comments = None 1045 1046 def _retreat(self, index: int) -> None: 1047 if index != self._index: 1048 self._advance(index - self._index) 1049 1050 def _parse_command(self) -> exp.Command: 1051 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1052 1053 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1054 start = self._prev 1055 exists = self._parse_exists() if allow_exists else None 1056 1057 self._match(TokenType.ON) 1058 1059 kind = self._match_set(self.CREATABLES) and self._prev 1060 if not kind: 1061 return self._parse_as_command(start) 1062 1063 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1064 this = self._parse_user_defined_function(kind=kind.token_type) 1065 elif kind.token_type == TokenType.TABLE: 1066 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1067 elif kind.token_type == TokenType.COLUMN: 1068 this = self._parse_column() 1069 else: 1070 this = self._parse_id_var() 1071 1072 self._match(TokenType.IS) 1073 1074 return self.expression( 1075 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1076 ) 1077 1078 def _parse_to_table( 1079 self, 1080 ) -> exp.ToTableProperty: 1081 table = self._parse_table_parts(schema=True) 1082 return self.expression(exp.ToTableProperty, this=table) 1083 1084 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1085 def _parse_ttl(self) -> exp.Expression: 1086 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1087 this = self._parse_bitwise() 1088 1089 if self._match_text_seq("DELETE"): 1090 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1091 if self._match_text_seq("RECOMPRESS"): 1092 return self.expression( 1093 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1094 ) 1095 if self._match_text_seq("TO", "DISK"): 1096 return self.expression( 1097 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1098 ) 1099 if self._match_text_seq("TO", "VOLUME"): 1100 return self.expression( 1101 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1102 ) 1103 1104 return this 1105 1106 expressions = self._parse_csv(_parse_ttl_action) 1107 where = self._parse_where() 1108 group = self._parse_group() 1109 1110 aggregates = None 1111 if group and self._match(TokenType.SET): 1112 aggregates = self._parse_csv(self._parse_set_item) 1113 1114 return self.expression( 1115 exp.MergeTreeTTL, 1116 expressions=expressions, 1117 where=where, 1118 group=group, 1119 aggregates=aggregates, 1120 ) 1121 1122 def _parse_statement(self) -> t.Optional[exp.Expression]: 1123 if self._curr is None: 1124 return None 1125 1126 if self._match_set(self.STATEMENT_PARSERS): 1127 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1128 1129 if self._match_set(Tokenizer.COMMANDS): 1130 return self._parse_command() 1131 1132 expression = self._parse_expression() 1133 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1134 return self._parse_query_modifiers(expression) 1135 1136 def _parse_drop(self) -> exp.Drop | exp.Command: 1137 start = self._prev 1138 temporary = self._match(TokenType.TEMPORARY) 1139 materialized = self._match_text_seq("MATERIALIZED") 1140 1141 kind = self._match_set(self.CREATABLES) and self._prev.text 1142 if not kind: 1143 return self._parse_as_command(start) 1144 1145 return self.expression( 1146 exp.Drop, 1147 comments=start.comments, 1148 exists=self._parse_exists(), 1149 this=self._parse_table(schema=True), 1150 kind=kind, 1151 temporary=temporary, 1152 materialized=materialized, 1153 cascade=self._match_text_seq("CASCADE"), 1154 constraints=self._match_text_seq("CONSTRAINTS"), 1155 purge=self._match_text_seq("PURGE"), 1156 ) 1157 1158 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1159 return ( 1160 self._match_text_seq("IF") 1161 and (not not_ or self._match(TokenType.NOT)) 1162 and self._match(TokenType.EXISTS) 1163 ) 1164 1165 def _parse_create(self) -> exp.Create | exp.Command: 1166 # Note: this can't be None because we've matched a statement parser 1167 start = self._prev 1168 replace = start.text.upper() == "REPLACE" or self._match_pair( 1169 TokenType.OR, TokenType.REPLACE 1170 ) 1171 unique = self._match(TokenType.UNIQUE) 1172 1173 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1174 self._advance() 1175 1176 properties = None 1177 create_token = self._match_set(self.CREATABLES) and self._prev 1178 1179 if not create_token: 1180 # exp.Properties.Location.POST_CREATE 1181 properties = self._parse_properties() 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not properties or not create_token: 1185 return self._parse_as_command(start) 1186 1187 exists = self._parse_exists(not_=True) 1188 this = None 1189 expression: t.Optional[exp.Expression] = None 1190 indexes = None 1191 no_schema_binding = None 1192 begin = None 1193 clone = None 1194 1195 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1196 nonlocal properties 1197 if properties and temp_props: 1198 properties.expressions.extend(temp_props.expressions) 1199 elif temp_props: 1200 properties = temp_props 1201 1202 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1203 this = self._parse_user_defined_function(kind=create_token.token_type) 1204 1205 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1206 extend_props(self._parse_properties()) 1207 1208 self._match(TokenType.ALIAS) 1209 1210 if self._match(TokenType.COMMAND): 1211 expression = self._parse_as_command(self._prev) 1212 else: 1213 begin = self._match(TokenType.BEGIN) 1214 return_ = self._match_text_seq("RETURN") 1215 expression = self._parse_statement() 1216 1217 if return_: 1218 expression = self.expression(exp.Return, this=expression) 1219 elif create_token.token_type == TokenType.INDEX: 1220 this = self._parse_index(index=self._parse_id_var()) 1221 elif create_token.token_type in self.DB_CREATABLES: 1222 table_parts = self._parse_table_parts(schema=True) 1223 1224 # exp.Properties.Location.POST_NAME 1225 self._match(TokenType.COMMA) 1226 extend_props(self._parse_properties(before=True)) 1227 1228 this = self._parse_schema(this=table_parts) 1229 1230 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1231 extend_props(self._parse_properties()) 1232 1233 self._match(TokenType.ALIAS) 1234 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1235 # exp.Properties.Location.POST_ALIAS 1236 extend_props(self._parse_properties()) 1237 1238 expression = self._parse_ddl_select() 1239 1240 if create_token.token_type == TokenType.TABLE: 1241 # exp.Properties.Location.POST_EXPRESSION 1242 extend_props(self._parse_properties()) 1243 1244 indexes = [] 1245 while True: 1246 index = self._parse_index() 1247 1248 # exp.Properties.Location.POST_INDEX 1249 extend_props(self._parse_properties()) 1250 1251 if not index: 1252 break 1253 else: 1254 self._match(TokenType.COMMA) 1255 indexes.append(index) 1256 elif create_token.token_type == TokenType.VIEW: 1257 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1258 no_schema_binding = True 1259 1260 if self._match_text_seq("CLONE"): 1261 clone = self._parse_table(schema=True) 1262 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1263 clone_kind = ( 1264 self._match(TokenType.L_PAREN) 1265 and self._match_texts(self.CLONE_KINDS) 1266 and self._prev.text.upper() 1267 ) 1268 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1269 self._match(TokenType.R_PAREN) 1270 clone = self.expression( 1271 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1272 ) 1273 1274 return self.expression( 1275 exp.Create, 1276 this=this, 1277 kind=create_token.text, 1278 replace=replace, 1279 unique=unique, 1280 expression=expression, 1281 exists=exists, 1282 properties=properties, 1283 indexes=indexes, 1284 no_schema_binding=no_schema_binding, 1285 begin=begin, 1286 clone=clone, 1287 ) 1288 1289 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1290 # only used for teradata currently 1291 self._match(TokenType.COMMA) 1292 1293 kwargs = { 1294 "no": self._match_text_seq("NO"), 1295 "dual": self._match_text_seq("DUAL"), 1296 "before": self._match_text_seq("BEFORE"), 1297 "default": self._match_text_seq("DEFAULT"), 1298 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1299 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1300 "after": self._match_text_seq("AFTER"), 1301 "minimum": self._match_texts(("MIN", "MINIMUM")), 1302 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1303 } 1304 1305 if self._match_texts(self.PROPERTY_PARSERS): 1306 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1307 try: 1308 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1309 except TypeError: 1310 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1311 1312 return None 1313 1314 def _parse_property(self) -> t.Optional[exp.Expression]: 1315 if self._match_texts(self.PROPERTY_PARSERS): 1316 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1317 1318 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1319 return self._parse_character_set(default=True) 1320 1321 if self._match_text_seq("COMPOUND", "SORTKEY"): 1322 return self._parse_sortkey(compound=True) 1323 1324 if self._match_text_seq("SQL", "SECURITY"): 1325 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1326 1327 assignment = self._match_pair( 1328 TokenType.VAR, TokenType.EQ, advance=False 1329 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1330 1331 if assignment: 1332 key = self._parse_var_or_string() 1333 self._match(TokenType.EQ) 1334 return self.expression(exp.Property, this=key, value=self._parse_column()) 1335 1336 return None 1337 1338 def _parse_stored(self) -> exp.FileFormatProperty: 1339 self._match(TokenType.ALIAS) 1340 1341 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1342 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1343 1344 return self.expression( 1345 exp.FileFormatProperty, 1346 this=self.expression( 1347 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1348 ) 1349 if input_format or output_format 1350 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1351 ) 1352 1353 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1354 self._match(TokenType.EQ) 1355 self._match(TokenType.ALIAS) 1356 return self.expression(exp_class, this=self._parse_field()) 1357 1358 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1359 properties = [] 1360 while True: 1361 if before: 1362 prop = self._parse_property_before() 1363 else: 1364 prop = self._parse_property() 1365 1366 if not prop: 1367 break 1368 for p in ensure_list(prop): 1369 properties.append(p) 1370 1371 if properties: 1372 return self.expression(exp.Properties, expressions=properties) 1373 1374 return None 1375 1376 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1377 return self.expression( 1378 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1379 ) 1380 1381 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1382 if self._index >= 2: 1383 pre_volatile_token = self._tokens[self._index - 2] 1384 else: 1385 pre_volatile_token = None 1386 1387 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1388 return exp.VolatileProperty() 1389 1390 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1391 1392 def _parse_with_property( 1393 self, 1394 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1395 if self._match(TokenType.L_PAREN, advance=False): 1396 return self._parse_wrapped_csv(self._parse_property) 1397 1398 if self._match_text_seq("JOURNAL"): 1399 return self._parse_withjournaltable() 1400 1401 if self._match_text_seq("DATA"): 1402 return self._parse_withdata(no=False) 1403 elif self._match_text_seq("NO", "DATA"): 1404 return self._parse_withdata(no=True) 1405 1406 if not self._next: 1407 return None 1408 1409 return self._parse_withisolatedloading() 1410 1411 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1412 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1413 self._match(TokenType.EQ) 1414 1415 user = self._parse_id_var() 1416 self._match(TokenType.PARAMETER) 1417 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1418 1419 if not user or not host: 1420 return None 1421 1422 return exp.DefinerProperty(this=f"{user}@{host}") 1423 1424 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1425 self._match(TokenType.TABLE) 1426 self._match(TokenType.EQ) 1427 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1428 1429 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1430 return self.expression(exp.LogProperty, no=no) 1431 1432 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1433 return self.expression(exp.JournalProperty, **kwargs) 1434 1435 def _parse_checksum(self) -> exp.ChecksumProperty: 1436 self._match(TokenType.EQ) 1437 1438 on = None 1439 if self._match(TokenType.ON): 1440 on = True 1441 elif self._match_text_seq("OFF"): 1442 on = False 1443 1444 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1445 1446 def _parse_cluster(self) -> exp.Cluster: 1447 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1448 1449 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1450 self._match_text_seq("BY") 1451 1452 self._match_l_paren() 1453 expressions = self._parse_csv(self._parse_column) 1454 self._match_r_paren() 1455 1456 if self._match_text_seq("SORTED", "BY"): 1457 self._match_l_paren() 1458 sorted_by = self._parse_csv(self._parse_ordered) 1459 self._match_r_paren() 1460 else: 1461 sorted_by = None 1462 1463 self._match(TokenType.INTO) 1464 buckets = self._parse_number() 1465 self._match_text_seq("BUCKETS") 1466 1467 return self.expression( 1468 exp.ClusteredByProperty, 1469 expressions=expressions, 1470 sorted_by=sorted_by, 1471 buckets=buckets, 1472 ) 1473 1474 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1475 if not self._match_text_seq("GRANTS"): 1476 self._retreat(self._index - 1) 1477 return None 1478 1479 return self.expression(exp.CopyGrantsProperty) 1480 1481 def _parse_freespace(self) -> exp.FreespaceProperty: 1482 self._match(TokenType.EQ) 1483 return self.expression( 1484 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1485 ) 1486 1487 def _parse_mergeblockratio( 1488 self, no: bool = False, default: bool = False 1489 ) -> exp.MergeBlockRatioProperty: 1490 if self._match(TokenType.EQ): 1491 return self.expression( 1492 exp.MergeBlockRatioProperty, 1493 this=self._parse_number(), 1494 percent=self._match(TokenType.PERCENT), 1495 ) 1496 1497 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1498 1499 def _parse_datablocksize( 1500 self, 1501 default: t.Optional[bool] = None, 1502 minimum: t.Optional[bool] = None, 1503 maximum: t.Optional[bool] = None, 1504 ) -> exp.DataBlocksizeProperty: 1505 self._match(TokenType.EQ) 1506 size = self._parse_number() 1507 1508 units = None 1509 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1510 units = self._prev.text 1511 1512 return self.expression( 1513 exp.DataBlocksizeProperty, 1514 size=size, 1515 units=units, 1516 default=default, 1517 minimum=minimum, 1518 maximum=maximum, 1519 ) 1520 1521 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1522 self._match(TokenType.EQ) 1523 always = self._match_text_seq("ALWAYS") 1524 manual = self._match_text_seq("MANUAL") 1525 never = self._match_text_seq("NEVER") 1526 default = self._match_text_seq("DEFAULT") 1527 1528 autotemp = None 1529 if self._match_text_seq("AUTOTEMP"): 1530 autotemp = self._parse_schema() 1531 1532 return self.expression( 1533 exp.BlockCompressionProperty, 1534 always=always, 1535 manual=manual, 1536 never=never, 1537 default=default, 1538 autotemp=autotemp, 1539 ) 1540 1541 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1542 no = self._match_text_seq("NO") 1543 concurrent = self._match_text_seq("CONCURRENT") 1544 self._match_text_seq("ISOLATED", "LOADING") 1545 for_all = self._match_text_seq("FOR", "ALL") 1546 for_insert = self._match_text_seq("FOR", "INSERT") 1547 for_none = self._match_text_seq("FOR", "NONE") 1548 return self.expression( 1549 exp.IsolatedLoadingProperty, 1550 no=no, 1551 concurrent=concurrent, 1552 for_all=for_all, 1553 for_insert=for_insert, 1554 for_none=for_none, 1555 ) 1556 1557 def _parse_locking(self) -> exp.LockingProperty: 1558 if self._match(TokenType.TABLE): 1559 kind = "TABLE" 1560 elif self._match(TokenType.VIEW): 1561 kind = "VIEW" 1562 elif self._match(TokenType.ROW): 1563 kind = "ROW" 1564 elif self._match_text_seq("DATABASE"): 1565 kind = "DATABASE" 1566 else: 1567 kind = None 1568 1569 if kind in ("DATABASE", "TABLE", "VIEW"): 1570 this = self._parse_table_parts() 1571 else: 1572 this = None 1573 1574 if self._match(TokenType.FOR): 1575 for_or_in = "FOR" 1576 elif self._match(TokenType.IN): 1577 for_or_in = "IN" 1578 else: 1579 for_or_in = None 1580 1581 if self._match_text_seq("ACCESS"): 1582 lock_type = "ACCESS" 1583 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1584 lock_type = "EXCLUSIVE" 1585 elif self._match_text_seq("SHARE"): 1586 lock_type = "SHARE" 1587 elif self._match_text_seq("READ"): 1588 lock_type = "READ" 1589 elif self._match_text_seq("WRITE"): 1590 lock_type = "WRITE" 1591 elif self._match_text_seq("CHECKSUM"): 1592 lock_type = "CHECKSUM" 1593 else: 1594 lock_type = None 1595 1596 override = self._match_text_seq("OVERRIDE") 1597 1598 return self.expression( 1599 exp.LockingProperty, 1600 this=this, 1601 kind=kind, 1602 for_or_in=for_or_in, 1603 lock_type=lock_type, 1604 override=override, 1605 ) 1606 1607 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1608 if self._match(TokenType.PARTITION_BY): 1609 return self._parse_csv(self._parse_conjunction) 1610 return [] 1611 1612 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1613 self._match(TokenType.EQ) 1614 return self.expression( 1615 exp.PartitionedByProperty, 1616 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1617 ) 1618 1619 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1620 if self._match_text_seq("AND", "STATISTICS"): 1621 statistics = True 1622 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1623 statistics = False 1624 else: 1625 statistics = None 1626 1627 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1628 1629 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1630 if self._match_text_seq("PRIMARY", "INDEX"): 1631 return exp.NoPrimaryIndexProperty() 1632 return None 1633 1634 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1635 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1636 return exp.OnCommitProperty() 1637 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1638 return exp.OnCommitProperty(delete=True) 1639 return None 1640 1641 def _parse_distkey(self) -> exp.DistKeyProperty: 1642 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1643 1644 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1645 table = self._parse_table(schema=True) 1646 1647 options = [] 1648 while self._match_texts(("INCLUDING", "EXCLUDING")): 1649 this = self._prev.text.upper() 1650 1651 id_var = self._parse_id_var() 1652 if not id_var: 1653 return None 1654 1655 options.append( 1656 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1657 ) 1658 1659 return self.expression(exp.LikeProperty, this=table, expressions=options) 1660 1661 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1662 return self.expression( 1663 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1664 ) 1665 1666 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1667 self._match(TokenType.EQ) 1668 return self.expression( 1669 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1670 ) 1671 1672 def _parse_returns(self) -> exp.ReturnsProperty: 1673 value: t.Optional[exp.Expression] 1674 is_table = self._match(TokenType.TABLE) 1675 1676 if is_table: 1677 if self._match(TokenType.LT): 1678 value = self.expression( 1679 exp.Schema, 1680 this="TABLE", 1681 expressions=self._parse_csv(self._parse_struct_types), 1682 ) 1683 if not self._match(TokenType.GT): 1684 self.raise_error("Expecting >") 1685 else: 1686 value = self._parse_schema(exp.var("TABLE")) 1687 else: 1688 value = self._parse_types() 1689 1690 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1691 1692 def _parse_describe(self) -> exp.Describe: 1693 kind = self._match_set(self.CREATABLES) and self._prev.text 1694 this = self._parse_table() 1695 return self.expression(exp.Describe, this=this, kind=kind) 1696 1697 def _parse_insert(self) -> exp.Insert: 1698 comments = ensure_list(self._prev_comments) 1699 overwrite = self._match(TokenType.OVERWRITE) 1700 ignore = self._match(TokenType.IGNORE) 1701 local = self._match_text_seq("LOCAL") 1702 alternative = None 1703 1704 if self._match_text_seq("DIRECTORY"): 1705 this: t.Optional[exp.Expression] = self.expression( 1706 exp.Directory, 1707 this=self._parse_var_or_string(), 1708 local=local, 1709 row_format=self._parse_row_format(match_row=True), 1710 ) 1711 else: 1712 if self._match(TokenType.OR): 1713 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1714 1715 self._match(TokenType.INTO) 1716 comments += ensure_list(self._prev_comments) 1717 self._match(TokenType.TABLE) 1718 this = self._parse_table(schema=True) 1719 1720 returning = self._parse_returning() 1721 1722 return self.expression( 1723 exp.Insert, 1724 comments=comments, 1725 this=this, 1726 exists=self._parse_exists(), 1727 partition=self._parse_partition(), 1728 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1729 and self._parse_conjunction(), 1730 expression=self._parse_ddl_select(), 1731 conflict=self._parse_on_conflict(), 1732 returning=returning or self._parse_returning(), 1733 overwrite=overwrite, 1734 alternative=alternative, 1735 ignore=ignore, 1736 ) 1737 1738 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1739 conflict = self._match_text_seq("ON", "CONFLICT") 1740 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1741 1742 if not conflict and not duplicate: 1743 return None 1744 1745 nothing = None 1746 expressions = None 1747 key = None 1748 constraint = None 1749 1750 if conflict: 1751 if self._match_text_seq("ON", "CONSTRAINT"): 1752 constraint = self._parse_id_var() 1753 else: 1754 key = self._parse_csv(self._parse_value) 1755 1756 self._match_text_seq("DO") 1757 if self._match_text_seq("NOTHING"): 1758 nothing = True 1759 else: 1760 self._match(TokenType.UPDATE) 1761 self._match(TokenType.SET) 1762 expressions = self._parse_csv(self._parse_equality) 1763 1764 return self.expression( 1765 exp.OnConflict, 1766 duplicate=duplicate, 1767 expressions=expressions, 1768 nothing=nothing, 1769 key=key, 1770 constraint=constraint, 1771 ) 1772 1773 def _parse_returning(self) -> t.Optional[exp.Returning]: 1774 if not self._match(TokenType.RETURNING): 1775 return None 1776 return self.expression( 1777 exp.Returning, 1778 expressions=self._parse_csv(self._parse_expression), 1779 into=self._match(TokenType.INTO) and self._parse_table_part(), 1780 ) 1781 1782 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1783 if not self._match(TokenType.FORMAT): 1784 return None 1785 return self._parse_row_format() 1786 1787 def _parse_row_format( 1788 self, match_row: bool = False 1789 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1790 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1791 return None 1792 1793 if self._match_text_seq("SERDE"): 1794 this = self._parse_string() 1795 1796 serde_properties = None 1797 if self._match(TokenType.SERDE_PROPERTIES): 1798 serde_properties = self.expression( 1799 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1800 ) 1801 1802 return self.expression( 1803 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1804 ) 1805 1806 self._match_text_seq("DELIMITED") 1807 1808 kwargs = {} 1809 1810 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1811 kwargs["fields"] = self._parse_string() 1812 if self._match_text_seq("ESCAPED", "BY"): 1813 kwargs["escaped"] = self._parse_string() 1814 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1815 kwargs["collection_items"] = self._parse_string() 1816 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1817 kwargs["map_keys"] = self._parse_string() 1818 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1819 kwargs["lines"] = self._parse_string() 1820 if self._match_text_seq("NULL", "DEFINED", "AS"): 1821 kwargs["null"] = self._parse_string() 1822 1823 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1824 1825 def _parse_load(self) -> exp.LoadData | exp.Command: 1826 if self._match_text_seq("DATA"): 1827 local = self._match_text_seq("LOCAL") 1828 self._match_text_seq("INPATH") 1829 inpath = self._parse_string() 1830 overwrite = self._match(TokenType.OVERWRITE) 1831 self._match_pair(TokenType.INTO, TokenType.TABLE) 1832 1833 return self.expression( 1834 exp.LoadData, 1835 this=self._parse_table(schema=True), 1836 local=local, 1837 overwrite=overwrite, 1838 inpath=inpath, 1839 partition=self._parse_partition(), 1840 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1841 serde=self._match_text_seq("SERDE") and self._parse_string(), 1842 ) 1843 return self._parse_as_command(self._prev) 1844 1845 def _parse_delete(self) -> exp.Delete: 1846 # This handles MySQL's "Multiple-Table Syntax" 1847 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1848 tables = None 1849 comments = self._prev_comments 1850 if not self._match(TokenType.FROM, advance=False): 1851 tables = self._parse_csv(self._parse_table) or None 1852 1853 returning = self._parse_returning() 1854 1855 return self.expression( 1856 exp.Delete, 1857 comments=comments, 1858 tables=tables, 1859 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1860 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1861 where=self._parse_where(), 1862 returning=returning or self._parse_returning(), 1863 limit=self._parse_limit(), 1864 ) 1865 1866 def _parse_update(self) -> exp.Update: 1867 comments = self._prev_comments 1868 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1869 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1870 returning = self._parse_returning() 1871 return self.expression( 1872 exp.Update, 1873 comments=comments, 1874 **{ # type: ignore 1875 "this": this, 1876 "expressions": expressions, 1877 "from": self._parse_from(joins=True), 1878 "where": self._parse_where(), 1879 "returning": returning or self._parse_returning(), 1880 "limit": self._parse_limit(), 1881 }, 1882 ) 1883 1884 def _parse_uncache(self) -> exp.Uncache: 1885 if not self._match(TokenType.TABLE): 1886 self.raise_error("Expecting TABLE after UNCACHE") 1887 1888 return self.expression( 1889 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1890 ) 1891 1892 def _parse_cache(self) -> exp.Cache: 1893 lazy = self._match_text_seq("LAZY") 1894 self._match(TokenType.TABLE) 1895 table = self._parse_table(schema=True) 1896 1897 options = [] 1898 if self._match_text_seq("OPTIONS"): 1899 self._match_l_paren() 1900 k = self._parse_string() 1901 self._match(TokenType.EQ) 1902 v = self._parse_string() 1903 options = [k, v] 1904 self._match_r_paren() 1905 1906 self._match(TokenType.ALIAS) 1907 return self.expression( 1908 exp.Cache, 1909 this=table, 1910 lazy=lazy, 1911 options=options, 1912 expression=self._parse_select(nested=True), 1913 ) 1914 1915 def _parse_partition(self) -> t.Optional[exp.Partition]: 1916 if not self._match(TokenType.PARTITION): 1917 return None 1918 1919 return self.expression( 1920 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1921 ) 1922 1923 def _parse_value(self) -> exp.Tuple: 1924 if self._match(TokenType.L_PAREN): 1925 expressions = self._parse_csv(self._parse_conjunction) 1926 self._match_r_paren() 1927 return self.expression(exp.Tuple, expressions=expressions) 1928 1929 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1930 # https://prestodb.io/docs/current/sql/values.html 1931 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1932 1933 def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: 1934 return self._parse_expressions() 1935 1936 def _parse_select( 1937 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1938 ) -> t.Optional[exp.Expression]: 1939 cte = self._parse_with() 1940 if cte: 1941 this = self._parse_statement() 1942 1943 if not this: 1944 self.raise_error("Failed to parse any statement following CTE") 1945 return cte 1946 1947 if "with" in this.arg_types: 1948 this.set("with", cte) 1949 else: 1950 self.raise_error(f"{this.key} does not support CTE") 1951 this = cte 1952 elif self._match(TokenType.SELECT): 1953 comments = self._prev_comments 1954 1955 hint = self._parse_hint() 1956 all_ = self._match(TokenType.ALL) 1957 distinct = self._match(TokenType.DISTINCT) 1958 1959 kind = ( 1960 self._match(TokenType.ALIAS) 1961 and self._match_texts(("STRUCT", "VALUE")) 1962 and self._prev.text 1963 ) 1964 1965 if distinct: 1966 distinct = self.expression( 1967 exp.Distinct, 1968 on=self._parse_value() if self._match(TokenType.ON) else None, 1969 ) 1970 1971 if all_ and distinct: 1972 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1973 1974 limit = self._parse_limit(top=True) 1975 projections = self._parse_projections() 1976 1977 this = self.expression( 1978 exp.Select, 1979 kind=kind, 1980 hint=hint, 1981 distinct=distinct, 1982 expressions=projections, 1983 limit=limit, 1984 ) 1985 this.comments = comments 1986 1987 into = self._parse_into() 1988 if into: 1989 this.set("into", into) 1990 1991 from_ = self._parse_from() 1992 if from_: 1993 this.set("from", from_) 1994 1995 this = self._parse_query_modifiers(this) 1996 elif (table or nested) and self._match(TokenType.L_PAREN): 1997 if self._match(TokenType.PIVOT): 1998 this = self._parse_simplified_pivot() 1999 elif self._match(TokenType.FROM): 2000 this = exp.select("*").from_( 2001 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2002 ) 2003 else: 2004 this = self._parse_table() if table else self._parse_select(nested=True) 2005 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2006 2007 self._match_r_paren() 2008 2009 # We return early here so that the UNION isn't attached to the subquery by the 2010 # following call to _parse_set_operations, but instead becomes the parent node 2011 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2012 elif self._match(TokenType.VALUES): 2013 this = self.expression( 2014 exp.Values, 2015 expressions=self._parse_csv(self._parse_value), 2016 alias=self._parse_table_alias(), 2017 ) 2018 else: 2019 this = None 2020 2021 return self._parse_set_operations(this) 2022 2023 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2024 if not skip_with_token and not self._match(TokenType.WITH): 2025 return None 2026 2027 comments = self._prev_comments 2028 recursive = self._match(TokenType.RECURSIVE) 2029 2030 expressions = [] 2031 while True: 2032 expressions.append(self._parse_cte()) 2033 2034 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2035 break 2036 else: 2037 self._match(TokenType.WITH) 2038 2039 return self.expression( 2040 exp.With, comments=comments, expressions=expressions, recursive=recursive 2041 ) 2042 2043 def _parse_cte(self) -> exp.CTE: 2044 alias = self._parse_table_alias() 2045 if not alias or not alias.this: 2046 self.raise_error("Expected CTE to have alias") 2047 2048 self._match(TokenType.ALIAS) 2049 return self.expression( 2050 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2051 ) 2052 2053 def _parse_table_alias( 2054 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2055 ) -> t.Optional[exp.TableAlias]: 2056 any_token = self._match(TokenType.ALIAS) 2057 alias = ( 2058 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2059 or self._parse_string_as_identifier() 2060 ) 2061 2062 index = self._index 2063 if self._match(TokenType.L_PAREN): 2064 columns = self._parse_csv(self._parse_function_parameter) 2065 self._match_r_paren() if columns else self._retreat(index) 2066 else: 2067 columns = None 2068 2069 if not alias and not columns: 2070 return None 2071 2072 return self.expression(exp.TableAlias, this=alias, columns=columns) 2073 2074 def _parse_subquery( 2075 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2076 ) -> t.Optional[exp.Subquery]: 2077 if not this: 2078 return None 2079 2080 return self.expression( 2081 exp.Subquery, 2082 this=this, 2083 pivots=self._parse_pivots(), 2084 alias=self._parse_table_alias() if parse_alias else None, 2085 ) 2086 2087 def _parse_query_modifiers( 2088 self, this: t.Optional[exp.Expression] 2089 ) -> t.Optional[exp.Expression]: 2090 if isinstance(this, self.MODIFIABLES): 2091 for join in iter(self._parse_join, None): 2092 this.append("joins", join) 2093 for lateral in iter(self._parse_lateral, None): 2094 this.append("laterals", lateral) 2095 2096 while True: 2097 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2098 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2099 key, expression = parser(self) 2100 2101 if expression: 2102 this.set(key, expression) 2103 if key == "limit": 2104 offset = expression.args.pop("offset", None) 2105 if offset: 2106 this.set("offset", exp.Offset(expression=offset)) 2107 continue 2108 break 2109 return this 2110 2111 def _parse_hint(self) -> t.Optional[exp.Hint]: 2112 if self._match(TokenType.HINT): 2113 hints = [] 2114 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2115 hints.extend(hint) 2116 2117 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2118 self.raise_error("Expected */ after HINT") 2119 2120 return self.expression(exp.Hint, expressions=hints) 2121 2122 return None 2123 2124 def _parse_into(self) -> t.Optional[exp.Into]: 2125 if not self._match(TokenType.INTO): 2126 return None 2127 2128 temp = self._match(TokenType.TEMPORARY) 2129 unlogged = self._match_text_seq("UNLOGGED") 2130 self._match(TokenType.TABLE) 2131 2132 return self.expression( 2133 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2134 ) 2135 2136 def _parse_from( 2137 self, joins: bool = False, skip_from_token: bool = False 2138 ) -> t.Optional[exp.From]: 2139 if not skip_from_token and not self._match(TokenType.FROM): 2140 return None 2141 2142 return self.expression( 2143 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2144 ) 2145 2146 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2147 if not self._match(TokenType.MATCH_RECOGNIZE): 2148 return None 2149 2150 self._match_l_paren() 2151 2152 partition = self._parse_partition_by() 2153 order = self._parse_order() 2154 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2155 2156 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2157 rows = exp.var("ONE ROW PER MATCH") 2158 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2159 text = "ALL ROWS PER MATCH" 2160 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2161 text += f" SHOW EMPTY MATCHES" 2162 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2163 text += f" OMIT EMPTY MATCHES" 2164 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2165 text += f" WITH UNMATCHED ROWS" 2166 rows = exp.var(text) 2167 else: 2168 rows = None 2169 2170 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2171 text = "AFTER MATCH SKIP" 2172 if self._match_text_seq("PAST", "LAST", "ROW"): 2173 text += f" PAST LAST ROW" 2174 elif self._match_text_seq("TO", "NEXT", "ROW"): 2175 text += f" TO NEXT ROW" 2176 elif self._match_text_seq("TO", "FIRST"): 2177 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2178 elif self._match_text_seq("TO", "LAST"): 2179 text += f" TO LAST {self._advance_any().text}" # type: ignore 2180 after = exp.var(text) 2181 else: 2182 after = None 2183 2184 if self._match_text_seq("PATTERN"): 2185 self._match_l_paren() 2186 2187 if not self._curr: 2188 self.raise_error("Expecting )", self._curr) 2189 2190 paren = 1 2191 start = self._curr 2192 2193 while self._curr and paren > 0: 2194 if self._curr.token_type == TokenType.L_PAREN: 2195 paren += 1 2196 if self._curr.token_type == TokenType.R_PAREN: 2197 paren -= 1 2198 2199 end = self._prev 2200 self._advance() 2201 2202 if paren > 0: 2203 self.raise_error("Expecting )", self._curr) 2204 2205 pattern = exp.var(self._find_sql(start, end)) 2206 else: 2207 pattern = None 2208 2209 define = ( 2210 self._parse_csv( 2211 lambda: self.expression( 2212 exp.Alias, 2213 alias=self._parse_id_var(any_token=True), 2214 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2215 ) 2216 ) 2217 if self._match_text_seq("DEFINE") 2218 else None 2219 ) 2220 2221 self._match_r_paren() 2222 2223 return self.expression( 2224 exp.MatchRecognize, 2225 partition_by=partition, 2226 order=order, 2227 measures=measures, 2228 rows=rows, 2229 after=after, 2230 pattern=pattern, 2231 define=define, 2232 alias=self._parse_table_alias(), 2233 ) 2234 2235 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2236 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2237 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2238 2239 if outer_apply or cross_apply: 2240 this = self._parse_select(table=True) 2241 view = None 2242 outer = not cross_apply 2243 elif self._match(TokenType.LATERAL): 2244 this = self._parse_select(table=True) 2245 view = self._match(TokenType.VIEW) 2246 outer = self._match(TokenType.OUTER) 2247 else: 2248 return None 2249 2250 if not this: 2251 this = ( 2252 self._parse_unnest() 2253 or self._parse_function() 2254 or self._parse_id_var(any_token=False) 2255 ) 2256 2257 while self._match(TokenType.DOT): 2258 this = exp.Dot( 2259 this=this, 2260 expression=self._parse_function() or self._parse_id_var(any_token=False), 2261 ) 2262 2263 if view: 2264 table = self._parse_id_var(any_token=False) 2265 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2266 table_alias: t.Optional[exp.TableAlias] = self.expression( 2267 exp.TableAlias, this=table, columns=columns 2268 ) 2269 elif isinstance(this, exp.Subquery) and this.alias: 2270 # Ensures parity between the Subquery's and the Lateral's "alias" args 2271 table_alias = this.args["alias"].copy() 2272 else: 2273 table_alias = self._parse_table_alias() 2274 2275 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2276 2277 def _parse_join_parts( 2278 self, 2279 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2280 return ( 2281 self._match_set(self.JOIN_METHODS) and self._prev, 2282 self._match_set(self.JOIN_SIDES) and self._prev, 2283 self._match_set(self.JOIN_KINDS) and self._prev, 2284 ) 2285 2286 def _parse_join( 2287 self, skip_join_token: bool = False, parse_bracket: bool = False 2288 ) -> t.Optional[exp.Join]: 2289 if self._match(TokenType.COMMA): 2290 return self.expression(exp.Join, this=self._parse_table()) 2291 2292 index = self._index 2293 method, side, kind = self._parse_join_parts() 2294 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2295 join = self._match(TokenType.JOIN) 2296 2297 if not skip_join_token and not join: 2298 self._retreat(index) 2299 kind = None 2300 method = None 2301 side = None 2302 2303 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2304 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2305 2306 if not skip_join_token and not join and not outer_apply and not cross_apply: 2307 return None 2308 2309 if outer_apply: 2310 side = Token(TokenType.LEFT, "LEFT") 2311 2312 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2313 2314 if method: 2315 kwargs["method"] = method.text 2316 if side: 2317 kwargs["side"] = side.text 2318 if kind: 2319 kwargs["kind"] = kind.text 2320 if hint: 2321 kwargs["hint"] = hint 2322 2323 if self._match(TokenType.ON): 2324 kwargs["on"] = self._parse_conjunction() 2325 elif self._match(TokenType.USING): 2326 kwargs["using"] = self._parse_wrapped_id_vars() 2327 elif not (kind and kind.token_type == TokenType.CROSS): 2328 index = self._index 2329 joins = self._parse_joins() 2330 2331 if joins and self._match(TokenType.ON): 2332 kwargs["on"] = self._parse_conjunction() 2333 elif joins and self._match(TokenType.USING): 2334 kwargs["using"] = self._parse_wrapped_id_vars() 2335 else: 2336 joins = None 2337 self._retreat(index) 2338 2339 kwargs["this"].set("joins", joins) 2340 2341 return self.expression(exp.Join, **kwargs) 2342 2343 def _parse_index( 2344 self, 2345 index: t.Optional[exp.Expression] = None, 2346 ) -> t.Optional[exp.Index]: 2347 if index: 2348 unique = None 2349 primary = None 2350 amp = None 2351 2352 self._match(TokenType.ON) 2353 self._match(TokenType.TABLE) # hive 2354 table = self._parse_table_parts(schema=True) 2355 else: 2356 unique = self._match(TokenType.UNIQUE) 2357 primary = self._match_text_seq("PRIMARY") 2358 amp = self._match_text_seq("AMP") 2359 2360 if not self._match(TokenType.INDEX): 2361 return None 2362 2363 index = self._parse_id_var() 2364 table = None 2365 2366 using = self._parse_field() if self._match(TokenType.USING) else None 2367 2368 if self._match(TokenType.L_PAREN, advance=False): 2369 columns = self._parse_wrapped_csv(self._parse_ordered) 2370 else: 2371 columns = None 2372 2373 return self.expression( 2374 exp.Index, 2375 this=index, 2376 table=table, 2377 using=using, 2378 columns=columns, 2379 unique=unique, 2380 primary=primary, 2381 amp=amp, 2382 partition_by=self._parse_partition_by(), 2383 ) 2384 2385 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2386 hints: t.List[exp.Expression] = [] 2387 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2388 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2389 hints.append( 2390 self.expression( 2391 exp.WithTableHint, 2392 expressions=self._parse_csv( 2393 lambda: self._parse_function() or self._parse_var(any_token=True) 2394 ), 2395 ) 2396 ) 2397 self._match_r_paren() 2398 else: 2399 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2400 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2401 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2402 2403 self._match_texts({"INDEX", "KEY"}) 2404 if self._match(TokenType.FOR): 2405 hint.set("target", self._advance_any() and self._prev.text.upper()) 2406 2407 hint.set("expressions", self._parse_wrapped_id_vars()) 2408 hints.append(hint) 2409 2410 return hints or None 2411 2412 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2413 return ( 2414 (not schema and self._parse_function(optional_parens=False)) 2415 or self._parse_id_var(any_token=False) 2416 or self._parse_string_as_identifier() 2417 or self._parse_placeholder() 2418 ) 2419 2420 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2421 catalog = None 2422 db = None 2423 table = self._parse_table_part(schema=schema) 2424 2425 while self._match(TokenType.DOT): 2426 if catalog: 2427 # This allows nesting the table in arbitrarily many dot expressions if needed 2428 table = self.expression( 2429 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2430 ) 2431 else: 2432 catalog = db 2433 db = table 2434 table = self._parse_table_part(schema=schema) 2435 2436 if not table: 2437 self.raise_error(f"Expected table name but got {self._curr}") 2438 2439 return self.expression( 2440 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2441 ) 2442 2443 def _parse_table( 2444 self, 2445 schema: bool = False, 2446 joins: bool = False, 2447 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2448 parse_bracket: bool = False, 2449 ) -> t.Optional[exp.Expression]: 2450 lateral = self._parse_lateral() 2451 if lateral: 2452 return lateral 2453 2454 unnest = self._parse_unnest() 2455 if unnest: 2456 return unnest 2457 2458 values = self._parse_derived_table_values() 2459 if values: 2460 return values 2461 2462 subquery = self._parse_select(table=True) 2463 if subquery: 2464 if not subquery.args.get("pivots"): 2465 subquery.set("pivots", self._parse_pivots()) 2466 return subquery 2467 2468 bracket = parse_bracket and self._parse_bracket(None) 2469 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2470 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2471 2472 if schema: 2473 return self._parse_schema(this=this) 2474 2475 if self.ALIAS_POST_TABLESAMPLE: 2476 table_sample = self._parse_table_sample() 2477 2478 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2479 if alias: 2480 this.set("alias", alias) 2481 2482 if not this.args.get("pivots"): 2483 this.set("pivots", self._parse_pivots()) 2484 2485 this.set("hints", self._parse_table_hints()) 2486 2487 if not self.ALIAS_POST_TABLESAMPLE: 2488 table_sample = self._parse_table_sample() 2489 2490 if table_sample: 2491 table_sample.set("this", this) 2492 this = table_sample 2493 2494 if joins: 2495 for join in iter(self._parse_join, None): 2496 this.append("joins", join) 2497 2498 return this 2499 2500 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2501 if not self._match(TokenType.UNNEST): 2502 return None 2503 2504 expressions = self._parse_wrapped_csv(self._parse_type) 2505 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2506 2507 alias = self._parse_table_alias() if with_alias else None 2508 2509 if alias and self.UNNEST_COLUMN_ONLY: 2510 if alias.args.get("columns"): 2511 self.raise_error("Unexpected extra column alias in unnest.") 2512 2513 alias.set("columns", [alias.this]) 2514 alias.set("this", None) 2515 2516 offset = None 2517 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2518 self._match(TokenType.ALIAS) 2519 offset = self._parse_id_var() or exp.to_identifier("offset") 2520 2521 return self.expression( 2522 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2523 ) 2524 2525 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2526 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2527 if not is_derived and not self._match(TokenType.VALUES): 2528 return None 2529 2530 expressions = self._parse_csv(self._parse_value) 2531 alias = self._parse_table_alias() 2532 2533 if is_derived: 2534 self._match_r_paren() 2535 2536 return self.expression( 2537 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2538 ) 2539 2540 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2541 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2542 as_modifier and self._match_text_seq("USING", "SAMPLE") 2543 ): 2544 return None 2545 2546 bucket_numerator = None 2547 bucket_denominator = None 2548 bucket_field = None 2549 percent = None 2550 rows = None 2551 size = None 2552 seed = None 2553 2554 kind = ( 2555 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2556 ) 2557 method = self._parse_var(tokens=(TokenType.ROW,)) 2558 2559 self._match(TokenType.L_PAREN) 2560 2561 num = self._parse_number() 2562 2563 if self._match_text_seq("BUCKET"): 2564 bucket_numerator = self._parse_number() 2565 self._match_text_seq("OUT", "OF") 2566 bucket_denominator = bucket_denominator = self._parse_number() 2567 self._match(TokenType.ON) 2568 bucket_field = self._parse_field() 2569 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2570 percent = num 2571 elif self._match(TokenType.ROWS): 2572 rows = num 2573 else: 2574 size = num 2575 2576 self._match(TokenType.R_PAREN) 2577 2578 if self._match(TokenType.L_PAREN): 2579 method = self._parse_var() 2580 seed = self._match(TokenType.COMMA) and self._parse_number() 2581 self._match_r_paren() 2582 elif self._match_texts(("SEED", "REPEATABLE")): 2583 seed = self._parse_wrapped(self._parse_number) 2584 2585 return self.expression( 2586 exp.TableSample, 2587 method=method, 2588 bucket_numerator=bucket_numerator, 2589 bucket_denominator=bucket_denominator, 2590 bucket_field=bucket_field, 2591 percent=percent, 2592 rows=rows, 2593 size=size, 2594 seed=seed, 2595 kind=kind, 2596 ) 2597 2598 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2599 return list(iter(self._parse_pivot, None)) or None 2600 2601 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2602 return list(iter(self._parse_join, None)) or None 2603 2604 # https://duckdb.org/docs/sql/statements/pivot 2605 def _parse_simplified_pivot(self) -> exp.Pivot: 2606 def _parse_on() -> t.Optional[exp.Expression]: 2607 this = self._parse_bitwise() 2608 return self._parse_in(this) if self._match(TokenType.IN) else this 2609 2610 this = self._parse_table() 2611 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2612 using = self._match(TokenType.USING) and self._parse_csv( 2613 lambda: self._parse_alias(self._parse_function()) 2614 ) 2615 group = self._parse_group() 2616 return self.expression( 2617 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2618 ) 2619 2620 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2621 index = self._index 2622 2623 if self._match(TokenType.PIVOT): 2624 unpivot = False 2625 elif self._match(TokenType.UNPIVOT): 2626 unpivot = True 2627 else: 2628 return None 2629 2630 expressions = [] 2631 field = None 2632 2633 if not self._match(TokenType.L_PAREN): 2634 self._retreat(index) 2635 return None 2636 2637 if unpivot: 2638 expressions = self._parse_csv(self._parse_column) 2639 else: 2640 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2641 2642 if not expressions: 2643 self.raise_error("Failed to parse PIVOT's aggregation list") 2644 2645 if not self._match(TokenType.FOR): 2646 self.raise_error("Expecting FOR") 2647 2648 value = self._parse_column() 2649 2650 if not self._match(TokenType.IN): 2651 self.raise_error("Expecting IN") 2652 2653 field = self._parse_in(value, alias=True) 2654 2655 self._match_r_paren() 2656 2657 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2658 2659 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2660 pivot.set("alias", self._parse_table_alias()) 2661 2662 if not unpivot: 2663 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2664 2665 columns: t.List[exp.Expression] = [] 2666 for fld in pivot.args["field"].expressions: 2667 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2668 for name in names: 2669 if self.PREFIXED_PIVOT_COLUMNS: 2670 name = f"{name}_{field_name}" if name else field_name 2671 else: 2672 name = f"{field_name}_{name}" if name else field_name 2673 2674 columns.append(exp.to_identifier(name)) 2675 2676 pivot.set("columns", columns) 2677 2678 return pivot 2679 2680 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2681 return [agg.alias for agg in aggregations] 2682 2683 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2684 if not skip_where_token and not self._match(TokenType.WHERE): 2685 return None 2686 2687 return self.expression( 2688 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2689 ) 2690 2691 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2692 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2693 return None 2694 2695 elements = defaultdict(list) 2696 2697 if self._match(TokenType.ALL): 2698 return self.expression(exp.Group, all=True) 2699 2700 while True: 2701 expressions = self._parse_csv(self._parse_conjunction) 2702 if expressions: 2703 elements["expressions"].extend(expressions) 2704 2705 grouping_sets = self._parse_grouping_sets() 2706 if grouping_sets: 2707 elements["grouping_sets"].extend(grouping_sets) 2708 2709 rollup = None 2710 cube = None 2711 totals = None 2712 2713 with_ = self._match(TokenType.WITH) 2714 if self._match(TokenType.ROLLUP): 2715 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2716 elements["rollup"].extend(ensure_list(rollup)) 2717 2718 if self._match(TokenType.CUBE): 2719 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2720 elements["cube"].extend(ensure_list(cube)) 2721 2722 if self._match_text_seq("TOTALS"): 2723 totals = True 2724 elements["totals"] = True # type: ignore 2725 2726 if not (grouping_sets or rollup or cube or totals): 2727 break 2728 2729 return self.expression(exp.Group, **elements) # type: ignore 2730 2731 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2732 if not self._match(TokenType.GROUPING_SETS): 2733 return None 2734 2735 return self._parse_wrapped_csv(self._parse_grouping_set) 2736 2737 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2738 if self._match(TokenType.L_PAREN): 2739 grouping_set = self._parse_csv(self._parse_column) 2740 self._match_r_paren() 2741 return self.expression(exp.Tuple, expressions=grouping_set) 2742 2743 return self._parse_column() 2744 2745 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2746 if not skip_having_token and not self._match(TokenType.HAVING): 2747 return None 2748 return self.expression(exp.Having, this=self._parse_conjunction()) 2749 2750 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2751 if not self._match(TokenType.QUALIFY): 2752 return None 2753 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2754 2755 def _parse_order( 2756 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2757 ) -> t.Optional[exp.Expression]: 2758 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2759 return this 2760 2761 return self.expression( 2762 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2763 ) 2764 2765 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2766 if not self._match(token): 2767 return None 2768 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2769 2770 def _parse_ordered(self) -> exp.Ordered: 2771 this = self._parse_conjunction() 2772 self._match(TokenType.ASC) 2773 2774 is_desc = self._match(TokenType.DESC) 2775 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2776 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2777 desc = is_desc or False 2778 asc = not desc 2779 nulls_first = is_nulls_first or False 2780 explicitly_null_ordered = is_nulls_first or is_nulls_last 2781 2782 if ( 2783 not explicitly_null_ordered 2784 and ( 2785 (asc and self.NULL_ORDERING == "nulls_are_small") 2786 or (desc and self.NULL_ORDERING != "nulls_are_small") 2787 ) 2788 and self.NULL_ORDERING != "nulls_are_last" 2789 ): 2790 nulls_first = True 2791 2792 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2793 2794 def _parse_limit( 2795 self, this: t.Optional[exp.Expression] = None, top: bool = False 2796 ) -> t.Optional[exp.Expression]: 2797 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2798 comments = self._prev_comments 2799 if top: 2800 limit_paren = self._match(TokenType.L_PAREN) 2801 expression = self._parse_number() 2802 2803 if limit_paren: 2804 self._match_r_paren() 2805 else: 2806 expression = self._parse_term() 2807 2808 if self._match(TokenType.COMMA): 2809 offset = expression 2810 expression = self._parse_term() 2811 else: 2812 offset = None 2813 2814 limit_exp = self.expression( 2815 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2816 ) 2817 2818 return limit_exp 2819 2820 if self._match(TokenType.FETCH): 2821 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2822 direction = self._prev.text if direction else "FIRST" 2823 2824 count = self._parse_number() 2825 percent = self._match(TokenType.PERCENT) 2826 2827 self._match_set((TokenType.ROW, TokenType.ROWS)) 2828 2829 only = self._match_text_seq("ONLY") 2830 with_ties = self._match_text_seq("WITH", "TIES") 2831 2832 if only and with_ties: 2833 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2834 2835 return self.expression( 2836 exp.Fetch, 2837 direction=direction, 2838 count=count, 2839 percent=percent, 2840 with_ties=with_ties, 2841 ) 2842 2843 return this 2844 2845 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2846 if not self._match(TokenType.OFFSET): 2847 return this 2848 2849 count = self._parse_term() 2850 self._match_set((TokenType.ROW, TokenType.ROWS)) 2851 return self.expression(exp.Offset, this=this, expression=count) 2852 2853 def _parse_locks(self) -> t.List[exp.Lock]: 2854 locks = [] 2855 while True: 2856 if self._match_text_seq("FOR", "UPDATE"): 2857 update = True 2858 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2859 "LOCK", "IN", "SHARE", "MODE" 2860 ): 2861 update = False 2862 else: 2863 break 2864 2865 expressions = None 2866 if self._match_text_seq("OF"): 2867 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2868 2869 wait: t.Optional[bool | exp.Expression] = None 2870 if self._match_text_seq("NOWAIT"): 2871 wait = True 2872 elif self._match_text_seq("WAIT"): 2873 wait = self._parse_primary() 2874 elif self._match_text_seq("SKIP", "LOCKED"): 2875 wait = False 2876 2877 locks.append( 2878 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2879 ) 2880 2881 return locks 2882 2883 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2884 if not self._match_set(self.SET_OPERATIONS): 2885 return this 2886 2887 token_type = self._prev.token_type 2888 2889 if token_type == TokenType.UNION: 2890 expression = exp.Union 2891 elif token_type == TokenType.EXCEPT: 2892 expression = exp.Except 2893 else: 2894 expression = exp.Intersect 2895 2896 return self.expression( 2897 expression, 2898 this=this, 2899 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2900 expression=self._parse_set_operations(self._parse_select(nested=True)), 2901 ) 2902 2903 def _parse_expression(self) -> t.Optional[exp.Expression]: 2904 return self._parse_alias(self._parse_conjunction()) 2905 2906 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2907 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2908 2909 def _parse_equality(self) -> t.Optional[exp.Expression]: 2910 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2911 2912 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2913 return self._parse_tokens(self._parse_range, self.COMPARISON) 2914 2915 def _parse_range(self) -> t.Optional[exp.Expression]: 2916 this = self._parse_bitwise() 2917 negate = self._match(TokenType.NOT) 2918 2919 if self._match_set(self.RANGE_PARSERS): 2920 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2921 if not expression: 2922 return this 2923 2924 this = expression 2925 elif self._match(TokenType.ISNULL): 2926 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2927 2928 # Postgres supports ISNULL and NOTNULL for conditions. 2929 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2930 if self._match(TokenType.NOTNULL): 2931 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2932 this = self.expression(exp.Not, this=this) 2933 2934 if negate: 2935 this = self.expression(exp.Not, this=this) 2936 2937 if self._match(TokenType.IS): 2938 this = self._parse_is(this) 2939 2940 return this 2941 2942 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2943 index = self._index - 1 2944 negate = self._match(TokenType.NOT) 2945 2946 if self._match_text_seq("DISTINCT", "FROM"): 2947 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2948 return self.expression(klass, this=this, expression=self._parse_expression()) 2949 2950 expression = self._parse_null() or self._parse_boolean() 2951 if not expression: 2952 self._retreat(index) 2953 return None 2954 2955 this = self.expression(exp.Is, this=this, expression=expression) 2956 return self.expression(exp.Not, this=this) if negate else this 2957 2958 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2959 unnest = self._parse_unnest(with_alias=False) 2960 if unnest: 2961 this = self.expression(exp.In, this=this, unnest=unnest) 2962 elif self._match(TokenType.L_PAREN): 2963 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2964 2965 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2966 this = self.expression(exp.In, this=this, query=expressions[0]) 2967 else: 2968 this = self.expression(exp.In, this=this, expressions=expressions) 2969 2970 self._match_r_paren(this) 2971 else: 2972 this = self.expression(exp.In, this=this, field=self._parse_field()) 2973 2974 return this 2975 2976 def _parse_between(self, this: exp.Expression) -> exp.Between: 2977 low = self._parse_bitwise() 2978 self._match(TokenType.AND) 2979 high = self._parse_bitwise() 2980 return self.expression(exp.Between, this=this, low=low, high=high) 2981 2982 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2983 if not self._match(TokenType.ESCAPE): 2984 return this 2985 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2986 2987 def _parse_interval(self) -> t.Optional[exp.Interval]: 2988 if not self._match(TokenType.INTERVAL): 2989 return None 2990 2991 if self._match(TokenType.STRING, advance=False): 2992 this = self._parse_primary() 2993 else: 2994 this = self._parse_term() 2995 2996 unit = self._parse_function() or self._parse_var() 2997 2998 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2999 # each INTERVAL expression into this canonical form so it's easy to transpile 3000 if this and this.is_number: 3001 this = exp.Literal.string(this.name) 3002 elif this and this.is_string: 3003 parts = this.name.split() 3004 3005 if len(parts) == 2: 3006 if unit: 3007 # this is not actually a unit, it's something else 3008 unit = None 3009 self._retreat(self._index - 1) 3010 else: 3011 this = exp.Literal.string(parts[0]) 3012 unit = self.expression(exp.Var, this=parts[1]) 3013 3014 return self.expression(exp.Interval, this=this, unit=unit) 3015 3016 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3017 this = self._parse_term() 3018 3019 while True: 3020 if self._match_set(self.BITWISE): 3021 this = self.expression( 3022 self.BITWISE[self._prev.token_type], 3023 this=this, 3024 expression=self._parse_term(), 3025 ) 3026 elif self._match(TokenType.DQMARK): 3027 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3028 elif self._match_pair(TokenType.LT, TokenType.LT): 3029 this = self.expression( 3030 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3031 ) 3032 elif self._match_pair(TokenType.GT, TokenType.GT): 3033 this = self.expression( 3034 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3035 ) 3036 else: 3037 break 3038 3039 return this 3040 3041 def _parse_term(self) -> t.Optional[exp.Expression]: 3042 return self._parse_tokens(self._parse_factor, self.TERM) 3043 3044 def _parse_factor(self) -> t.Optional[exp.Expression]: 3045 return self._parse_tokens(self._parse_unary, self.FACTOR) 3046 3047 def _parse_unary(self) -> t.Optional[exp.Expression]: 3048 if self._match_set(self.UNARY_PARSERS): 3049 return self.UNARY_PARSERS[self._prev.token_type](self) 3050 return self._parse_at_time_zone(self._parse_type()) 3051 3052 def _parse_type(self) -> t.Optional[exp.Expression]: 3053 interval = self._parse_interval() 3054 if interval: 3055 return interval 3056 3057 index = self._index 3058 data_type = self._parse_types(check_func=True) 3059 this = self._parse_column() 3060 3061 if data_type: 3062 if isinstance(this, exp.Literal): 3063 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3064 if parser: 3065 return parser(self, this, data_type) 3066 return self.expression(exp.Cast, this=this, to=data_type) 3067 if not data_type.expressions: 3068 self._retreat(index) 3069 return self._parse_column() 3070 return self._parse_column_ops(data_type) 3071 3072 return this 3073 3074 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3075 this = self._parse_type() 3076 if not this: 3077 return None 3078 3079 return self.expression( 3080 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3081 ) 3082 3083 def _parse_types( 3084 self, check_func: bool = False, schema: bool = False 3085 ) -> t.Optional[exp.Expression]: 3086 index = self._index 3087 3088 prefix = self._match_text_seq("SYSUDTLIB", ".") 3089 3090 if not self._match_set(self.TYPE_TOKENS): 3091 return None 3092 3093 type_token = self._prev.token_type 3094 3095 if type_token == TokenType.PSEUDO_TYPE: 3096 return self.expression(exp.PseudoType, this=self._prev.text) 3097 3098 nested = type_token in self.NESTED_TYPE_TOKENS 3099 is_struct = type_token == TokenType.STRUCT 3100 expressions = None 3101 maybe_func = False 3102 3103 if self._match(TokenType.L_PAREN): 3104 if is_struct: 3105 expressions = self._parse_csv(self._parse_struct_types) 3106 elif nested: 3107 expressions = self._parse_csv( 3108 lambda: self._parse_types(check_func=check_func, schema=schema) 3109 ) 3110 elif type_token in self.ENUM_TYPE_TOKENS: 3111 expressions = self._parse_csv(self._parse_primary) 3112 else: 3113 expressions = self._parse_csv(self._parse_type_size) 3114 3115 if not expressions or not self._match(TokenType.R_PAREN): 3116 self._retreat(index) 3117 return None 3118 3119 maybe_func = True 3120 3121 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3122 this = exp.DataType( 3123 this=exp.DataType.Type.ARRAY, 3124 expressions=[ 3125 exp.DataType( 3126 this=exp.DataType.Type[type_token.value], 3127 expressions=expressions, 3128 nested=nested, 3129 ) 3130 ], 3131 nested=True, 3132 ) 3133 3134 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3135 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3136 3137 return this 3138 3139 if self._match(TokenType.L_BRACKET): 3140 self._retreat(index) 3141 return None 3142 3143 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3144 if nested and self._match(TokenType.LT): 3145 if is_struct: 3146 expressions = self._parse_csv(self._parse_struct_types) 3147 else: 3148 expressions = self._parse_csv( 3149 lambda: self._parse_types(check_func=check_func, schema=schema) 3150 ) 3151 3152 if not self._match(TokenType.GT): 3153 self.raise_error("Expecting >") 3154 3155 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3156 values = self._parse_csv(self._parse_conjunction) 3157 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3158 3159 value: t.Optional[exp.Expression] = None 3160 if type_token in self.TIMESTAMPS: 3161 if self._match_text_seq("WITH", "TIME", "ZONE"): 3162 maybe_func = False 3163 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3164 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3165 maybe_func = False 3166 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3167 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3168 maybe_func = False 3169 elif type_token == TokenType.INTERVAL: 3170 unit = self._parse_var() 3171 3172 if not unit: 3173 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3174 else: 3175 value = self.expression(exp.Interval, unit=unit) 3176 3177 if maybe_func and check_func: 3178 index2 = self._index 3179 peek = self._parse_string() 3180 3181 if not peek: 3182 self._retreat(index) 3183 return None 3184 3185 self._retreat(index2) 3186 3187 if value: 3188 return value 3189 3190 return exp.DataType( 3191 this=exp.DataType.Type[type_token.value], 3192 expressions=expressions, 3193 nested=nested, 3194 values=values, 3195 prefix=prefix, 3196 ) 3197 3198 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3199 this = self._parse_type() or self._parse_id_var() 3200 self._match(TokenType.COLON) 3201 return self._parse_column_def(this) 3202 3203 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3204 if not self._match_text_seq("AT", "TIME", "ZONE"): 3205 return this 3206 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3207 3208 def _parse_column(self) -> t.Optional[exp.Expression]: 3209 this = self._parse_field() 3210 if isinstance(this, exp.Identifier): 3211 this = self.expression(exp.Column, this=this) 3212 elif not this: 3213 return self._parse_bracket(this) 3214 return self._parse_column_ops(this) 3215 3216 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3217 this = self._parse_bracket(this) 3218 3219 while self._match_set(self.COLUMN_OPERATORS): 3220 op_token = self._prev.token_type 3221 op = self.COLUMN_OPERATORS.get(op_token) 3222 3223 if op_token == TokenType.DCOLON: 3224 field = self._parse_types() 3225 if not field: 3226 self.raise_error("Expected type") 3227 elif op and self._curr: 3228 self._advance() 3229 value = self._prev.text 3230 field = ( 3231 exp.Literal.number(value) 3232 if self._prev.token_type == TokenType.NUMBER 3233 else exp.Literal.string(value) 3234 ) 3235 else: 3236 field = self._parse_field(anonymous_func=True, any_token=True) 3237 3238 if isinstance(field, exp.Func): 3239 # bigquery allows function calls like x.y.count(...) 3240 # SAFE.SUBSTR(...) 3241 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3242 this = self._replace_columns_with_dots(this) 3243 3244 if op: 3245 this = op(self, this, field) 3246 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3247 this = self.expression( 3248 exp.Column, 3249 this=field, 3250 table=this.this, 3251 db=this.args.get("table"), 3252 catalog=this.args.get("db"), 3253 ) 3254 else: 3255 this = self.expression(exp.Dot, this=this, expression=field) 3256 this = self._parse_bracket(this) 3257 return this 3258 3259 def _parse_primary(self) -> t.Optional[exp.Expression]: 3260 if self._match_set(self.PRIMARY_PARSERS): 3261 token_type = self._prev.token_type 3262 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3263 3264 if token_type == TokenType.STRING: 3265 expressions = [primary] 3266 while self._match(TokenType.STRING): 3267 expressions.append(exp.Literal.string(self._prev.text)) 3268 3269 if len(expressions) > 1: 3270 return self.expression(exp.Concat, expressions=expressions) 3271 3272 return primary 3273 3274 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3275 return exp.Literal.number(f"0.{self._prev.text}") 3276 3277 if self._match(TokenType.L_PAREN): 3278 comments = self._prev_comments 3279 query = self._parse_select() 3280 3281 if query: 3282 expressions = [query] 3283 else: 3284 expressions = self._parse_expressions() 3285 3286 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3287 3288 if isinstance(this, exp.Subqueryable): 3289 this = self._parse_set_operations( 3290 self._parse_subquery(this=this, parse_alias=False) 3291 ) 3292 elif len(expressions) > 1: 3293 this = self.expression(exp.Tuple, expressions=expressions) 3294 else: 3295 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3296 3297 if this: 3298 this.add_comments(comments) 3299 3300 self._match_r_paren(expression=this) 3301 return this 3302 3303 return None 3304 3305 def _parse_field( 3306 self, 3307 any_token: bool = False, 3308 tokens: t.Optional[t.Collection[TokenType]] = None, 3309 anonymous_func: bool = False, 3310 ) -> t.Optional[exp.Expression]: 3311 return ( 3312 self._parse_primary() 3313 or self._parse_function(anonymous=anonymous_func) 3314 or self._parse_id_var(any_token=any_token, tokens=tokens) 3315 ) 3316 3317 def _parse_function( 3318 self, 3319 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3320 anonymous: bool = False, 3321 optional_parens: bool = True, 3322 ) -> t.Optional[exp.Expression]: 3323 if not self._curr: 3324 return None 3325 3326 token_type = self._curr.token_type 3327 this = self._curr.text 3328 upper = this.upper() 3329 3330 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3331 if optional_parens and parser: 3332 self._advance() 3333 return parser(self) 3334 3335 if not self._next or self._next.token_type != TokenType.L_PAREN: 3336 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3337 self._advance() 3338 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3339 3340 return None 3341 3342 if token_type not in self.FUNC_TOKENS: 3343 return None 3344 3345 self._advance(2) 3346 3347 parser = self.FUNCTION_PARSERS.get(upper) 3348 if parser and not anonymous: 3349 this = parser(self) 3350 else: 3351 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3352 3353 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3354 this = self.expression(subquery_predicate, this=self._parse_select()) 3355 self._match_r_paren() 3356 return this 3357 3358 if functions is None: 3359 functions = self.FUNCTIONS 3360 3361 function = functions.get(upper) 3362 3363 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3364 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3365 3366 if function and not anonymous: 3367 func = self.validate_expression(function(args), args) 3368 if not self.NORMALIZE_FUNCTIONS: 3369 func.meta["name"] = this 3370 this = func 3371 else: 3372 this = self.expression(exp.Anonymous, this=this, expressions=args) 3373 3374 self._match_r_paren(this) 3375 return self._parse_window(this) 3376 3377 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3378 return self._parse_column_def(self._parse_id_var()) 3379 3380 def _parse_user_defined_function( 3381 self, kind: t.Optional[TokenType] = None 3382 ) -> t.Optional[exp.Expression]: 3383 this = self._parse_id_var() 3384 3385 while self._match(TokenType.DOT): 3386 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3387 3388 if not self._match(TokenType.L_PAREN): 3389 return this 3390 3391 expressions = self._parse_csv(self._parse_function_parameter) 3392 self._match_r_paren() 3393 return self.expression( 3394 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3395 ) 3396 3397 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3398 literal = self._parse_primary() 3399 if literal: 3400 return self.expression(exp.Introducer, this=token.text, expression=literal) 3401 3402 return self.expression(exp.Identifier, this=token.text) 3403 3404 def _parse_session_parameter(self) -> exp.SessionParameter: 3405 kind = None 3406 this = self._parse_id_var() or self._parse_primary() 3407 3408 if this and self._match(TokenType.DOT): 3409 kind = this.name 3410 this = self._parse_var() or self._parse_primary() 3411 3412 return self.expression(exp.SessionParameter, this=this, kind=kind) 3413 3414 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3415 index = self._index 3416 3417 if self._match(TokenType.L_PAREN): 3418 expressions = self._parse_csv(self._parse_id_var) 3419 3420 if not self._match(TokenType.R_PAREN): 3421 self._retreat(index) 3422 else: 3423 expressions = [self._parse_id_var()] 3424 3425 if self._match_set(self.LAMBDAS): 3426 return self.LAMBDAS[self._prev.token_type](self, expressions) 3427 3428 self._retreat(index) 3429 3430 this: t.Optional[exp.Expression] 3431 3432 if self._match(TokenType.DISTINCT): 3433 this = self.expression( 3434 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3435 ) 3436 else: 3437 this = self._parse_select_or_expression(alias=alias) 3438 3439 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3440 3441 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3442 index = self._index 3443 3444 if not self.errors: 3445 try: 3446 if self._parse_select(nested=True): 3447 return this 3448 except ParseError: 3449 pass 3450 finally: 3451 self.errors.clear() 3452 self._retreat(index) 3453 3454 if not self._match(TokenType.L_PAREN): 3455 return this 3456 3457 args = self._parse_csv( 3458 lambda: self._parse_constraint() 3459 or self._parse_column_def(self._parse_field(any_token=True)) 3460 ) 3461 3462 self._match_r_paren() 3463 return self.expression(exp.Schema, this=this, expressions=args) 3464 3465 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3466 # column defs are not really columns, they're identifiers 3467 if isinstance(this, exp.Column): 3468 this = this.this 3469 3470 kind = self._parse_types(schema=True) 3471 3472 if self._match_text_seq("FOR", "ORDINALITY"): 3473 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3474 3475 constraints = [] 3476 while True: 3477 constraint = self._parse_column_constraint() 3478 if not constraint: 3479 break 3480 constraints.append(constraint) 3481 3482 if not kind and not constraints: 3483 return this 3484 3485 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3486 3487 def _parse_auto_increment( 3488 self, 3489 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3490 start = None 3491 increment = None 3492 3493 if self._match(TokenType.L_PAREN, advance=False): 3494 args = self._parse_wrapped_csv(self._parse_bitwise) 3495 start = seq_get(args, 0) 3496 increment = seq_get(args, 1) 3497 elif self._match_text_seq("START"): 3498 start = self._parse_bitwise() 3499 self._match_text_seq("INCREMENT") 3500 increment = self._parse_bitwise() 3501 3502 if start and increment: 3503 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3504 3505 return exp.AutoIncrementColumnConstraint() 3506 3507 def _parse_compress(self) -> exp.CompressColumnConstraint: 3508 if self._match(TokenType.L_PAREN, advance=False): 3509 return self.expression( 3510 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3511 ) 3512 3513 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3514 3515 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3516 if self._match_text_seq("BY", "DEFAULT"): 3517 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3518 this = self.expression( 3519 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3520 ) 3521 else: 3522 self._match_text_seq("ALWAYS") 3523 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3524 3525 self._match(TokenType.ALIAS) 3526 identity = self._match_text_seq("IDENTITY") 3527 3528 if self._match(TokenType.L_PAREN): 3529 if self._match_text_seq("START", "WITH"): 3530 this.set("start", self._parse_bitwise()) 3531 if self._match_text_seq("INCREMENT", "BY"): 3532 this.set("increment", self._parse_bitwise()) 3533 if self._match_text_seq("MINVALUE"): 3534 this.set("minvalue", self._parse_bitwise()) 3535 if self._match_text_seq("MAXVALUE"): 3536 this.set("maxvalue", self._parse_bitwise()) 3537 3538 if self._match_text_seq("CYCLE"): 3539 this.set("cycle", True) 3540 elif self._match_text_seq("NO", "CYCLE"): 3541 this.set("cycle", False) 3542 3543 if not identity: 3544 this.set("expression", self._parse_bitwise()) 3545 3546 self._match_r_paren() 3547 3548 return this 3549 3550 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3551 self._match_text_seq("LENGTH") 3552 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3553 3554 def _parse_not_constraint( 3555 self, 3556 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3557 if self._match_text_seq("NULL"): 3558 return self.expression(exp.NotNullColumnConstraint) 3559 if self._match_text_seq("CASESPECIFIC"): 3560 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3561 return None 3562 3563 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3564 if self._match(TokenType.CONSTRAINT): 3565 this = self._parse_id_var() 3566 else: 3567 this = None 3568 3569 if self._match_texts(self.CONSTRAINT_PARSERS): 3570 return self.expression( 3571 exp.ColumnConstraint, 3572 this=this, 3573 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3574 ) 3575 3576 return this 3577 3578 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3579 if not self._match(TokenType.CONSTRAINT): 3580 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3581 3582 this = self._parse_id_var() 3583 expressions = [] 3584 3585 while True: 3586 constraint = self._parse_unnamed_constraint() or self._parse_function() 3587 if not constraint: 3588 break 3589 expressions.append(constraint) 3590 3591 return self.expression(exp.Constraint, this=this, expressions=expressions) 3592 3593 def _parse_unnamed_constraint( 3594 self, constraints: t.Optional[t.Collection[str]] = None 3595 ) -> t.Optional[exp.Expression]: 3596 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3597 return None 3598 3599 constraint = self._prev.text.upper() 3600 if constraint not in self.CONSTRAINT_PARSERS: 3601 self.raise_error(f"No parser found for schema constraint {constraint}.") 3602 3603 return self.CONSTRAINT_PARSERS[constraint](self) 3604 3605 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3606 self._match_text_seq("KEY") 3607 return self.expression( 3608 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3609 ) 3610 3611 def _parse_key_constraint_options(self) -> t.List[str]: 3612 options = [] 3613 while True: 3614 if not self._curr: 3615 break 3616 3617 if self._match(TokenType.ON): 3618 action = None 3619 on = self._advance_any() and self._prev.text 3620 3621 if self._match_text_seq("NO", "ACTION"): 3622 action = "NO ACTION" 3623 elif self._match_text_seq("CASCADE"): 3624 action = "CASCADE" 3625 elif self._match_pair(TokenType.SET, TokenType.NULL): 3626 action = "SET NULL" 3627 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3628 action = "SET DEFAULT" 3629 else: 3630 self.raise_error("Invalid key constraint") 3631 3632 options.append(f"ON {on} {action}") 3633 elif self._match_text_seq("NOT", "ENFORCED"): 3634 options.append("NOT ENFORCED") 3635 elif self._match_text_seq("DEFERRABLE"): 3636 options.append("DEFERRABLE") 3637 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3638 options.append("INITIALLY DEFERRED") 3639 elif self._match_text_seq("NORELY"): 3640 options.append("NORELY") 3641 elif self._match_text_seq("MATCH", "FULL"): 3642 options.append("MATCH FULL") 3643 else: 3644 break 3645 3646 return options 3647 3648 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3649 if match and not self._match(TokenType.REFERENCES): 3650 return None 3651 3652 expressions = None 3653 this = self._parse_table(schema=True) 3654 options = self._parse_key_constraint_options() 3655 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3656 3657 def _parse_foreign_key(self) -> exp.ForeignKey: 3658 expressions = self._parse_wrapped_id_vars() 3659 reference = self._parse_references() 3660 options = {} 3661 3662 while self._match(TokenType.ON): 3663 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3664 self.raise_error("Expected DELETE or UPDATE") 3665 3666 kind = self._prev.text.lower() 3667 3668 if self._match_text_seq("NO", "ACTION"): 3669 action = "NO ACTION" 3670 elif self._match(TokenType.SET): 3671 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3672 action = "SET " + self._prev.text.upper() 3673 else: 3674 self._advance() 3675 action = self._prev.text.upper() 3676 3677 options[kind] = action 3678 3679 return self.expression( 3680 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3681 ) 3682 3683 def _parse_primary_key( 3684 self, wrapped_optional: bool = False, in_props: bool = False 3685 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3686 desc = ( 3687 self._match_set((TokenType.ASC, TokenType.DESC)) 3688 and self._prev.token_type == TokenType.DESC 3689 ) 3690 3691 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3692 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3693 3694 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3695 options = self._parse_key_constraint_options() 3696 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3697 3698 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3699 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3700 return this 3701 3702 bracket_kind = self._prev.token_type 3703 3704 if self._match(TokenType.COLON): 3705 expressions: t.List[t.Optional[exp.Expression]] = [ 3706 self.expression(exp.Slice, expression=self._parse_conjunction()) 3707 ] 3708 else: 3709 expressions = self._parse_csv( 3710 lambda: self._parse_slice( 3711 self._parse_alias(self._parse_conjunction(), explicit=True) 3712 ) 3713 ) 3714 3715 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3716 if bracket_kind == TokenType.L_BRACE: 3717 this = self.expression(exp.Struct, expressions=expressions) 3718 elif not this or this.name.upper() == "ARRAY": 3719 this = self.expression(exp.Array, expressions=expressions) 3720 else: 3721 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3722 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3723 3724 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3725 self.raise_error("Expected ]") 3726 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3727 self.raise_error("Expected }") 3728 3729 self._add_comments(this) 3730 return self._parse_bracket(this) 3731 3732 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3733 if self._match(TokenType.COLON): 3734 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3735 return this 3736 3737 def _parse_case(self) -> t.Optional[exp.Expression]: 3738 ifs = [] 3739 default = None 3740 3741 expression = self._parse_conjunction() 3742 3743 while self._match(TokenType.WHEN): 3744 this = self._parse_conjunction() 3745 self._match(TokenType.THEN) 3746 then = self._parse_conjunction() 3747 ifs.append(self.expression(exp.If, this=this, true=then)) 3748 3749 if self._match(TokenType.ELSE): 3750 default = self._parse_conjunction() 3751 3752 if not self._match(TokenType.END): 3753 self.raise_error("Expected END after CASE", self._prev) 3754 3755 return self._parse_window( 3756 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3757 ) 3758 3759 def _parse_if(self) -> t.Optional[exp.Expression]: 3760 if self._match(TokenType.L_PAREN): 3761 args = self._parse_csv(self._parse_conjunction) 3762 this = self.validate_expression(exp.If.from_arg_list(args), args) 3763 self._match_r_paren() 3764 else: 3765 index = self._index - 1 3766 condition = self._parse_conjunction() 3767 3768 if not condition: 3769 self._retreat(index) 3770 return None 3771 3772 self._match(TokenType.THEN) 3773 true = self._parse_conjunction() 3774 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3775 self._match(TokenType.END) 3776 this = self.expression(exp.If, this=condition, true=true, false=false) 3777 3778 return self._parse_window(this) 3779 3780 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3781 if not self._match_text_seq("VALUE", "FOR"): 3782 self._retreat(self._index - 1) 3783 return None 3784 3785 return self.expression( 3786 exp.NextValueFor, 3787 this=self._parse_column(), 3788 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3789 ) 3790 3791 def _parse_extract(self) -> exp.Extract: 3792 this = self._parse_function() or self._parse_var() or self._parse_type() 3793 3794 if self._match(TokenType.FROM): 3795 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3796 3797 if not self._match(TokenType.COMMA): 3798 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3799 3800 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3801 3802 def _parse_any_value(self) -> exp.AnyValue: 3803 this = self._parse_lambda() 3804 is_max = None 3805 having = None 3806 3807 if self._match(TokenType.HAVING): 3808 self._match_texts(("MAX", "MIN")) 3809 is_max = self._prev.text == "MAX" 3810 having = self._parse_column() 3811 3812 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3813 3814 def _parse_cast(self, strict: bool) -> exp.Expression: 3815 this = self._parse_conjunction() 3816 3817 if not self._match(TokenType.ALIAS): 3818 if self._match(TokenType.COMMA): 3819 return self.expression( 3820 exp.CastToStrType, this=this, expression=self._parse_string() 3821 ) 3822 else: 3823 self.raise_error("Expected AS after CAST") 3824 3825 fmt = None 3826 to = self._parse_types() 3827 3828 if not to: 3829 self.raise_error("Expected TYPE after CAST") 3830 elif to.this == exp.DataType.Type.CHAR: 3831 if self._match(TokenType.CHARACTER_SET): 3832 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3833 elif self._match(TokenType.FORMAT): 3834 fmt_string = self._parse_string() 3835 fmt = self._parse_at_time_zone(fmt_string) 3836 3837 if to.this in exp.DataType.TEMPORAL_TYPES: 3838 this = self.expression( 3839 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3840 this=this, 3841 format=exp.Literal.string( 3842 format_time( 3843 fmt_string.this if fmt_string else "", 3844 self.FORMAT_MAPPING or self.TIME_MAPPING, 3845 self.FORMAT_TRIE or self.TIME_TRIE, 3846 ) 3847 ), 3848 ) 3849 3850 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3851 this.set("zone", fmt.args["zone"]) 3852 3853 return this 3854 3855 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3856 3857 def _parse_concat(self) -> t.Optional[exp.Expression]: 3858 args = self._parse_csv(self._parse_conjunction) 3859 if self.CONCAT_NULL_OUTPUTS_STRING: 3860 args = [ 3861 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3862 for arg in args 3863 if arg 3864 ] 3865 3866 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3867 # we find such a call we replace it with its argument. 3868 if len(args) == 1: 3869 return args[0] 3870 3871 return self.expression( 3872 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3873 ) 3874 3875 def _parse_string_agg(self) -> exp.Expression: 3876 if self._match(TokenType.DISTINCT): 3877 args: t.List[t.Optional[exp.Expression]] = [ 3878 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3879 ] 3880 if self._match(TokenType.COMMA): 3881 args.extend(self._parse_csv(self._parse_conjunction)) 3882 else: 3883 args = self._parse_csv(self._parse_conjunction) 3884 3885 index = self._index 3886 if not self._match(TokenType.R_PAREN) and args: 3887 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3888 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3889 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3890 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3891 3892 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3893 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3894 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3895 if not self._match_text_seq("WITHIN", "GROUP"): 3896 self._retreat(index) 3897 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3898 3899 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3900 order = self._parse_order(this=seq_get(args, 0)) 3901 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3902 3903 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3904 this = self._parse_bitwise() 3905 3906 if self._match(TokenType.USING): 3907 to: t.Optional[exp.Expression] = self.expression( 3908 exp.CharacterSet, this=self._parse_var() 3909 ) 3910 elif self._match(TokenType.COMMA): 3911 to = self._parse_types() 3912 else: 3913 to = None 3914 3915 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3916 3917 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3918 """ 3919 There are generally two variants of the DECODE function: 3920 3921 - DECODE(bin, charset) 3922 - DECODE(expression, search, result [, search, result] ... [, default]) 3923 3924 The second variant will always be parsed into a CASE expression. Note that NULL 3925 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3926 instead of relying on pattern matching. 3927 """ 3928 args = self._parse_csv(self._parse_conjunction) 3929 3930 if len(args) < 3: 3931 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3932 3933 expression, *expressions = args 3934 if not expression: 3935 return None 3936 3937 ifs = [] 3938 for search, result in zip(expressions[::2], expressions[1::2]): 3939 if not search or not result: 3940 return None 3941 3942 if isinstance(search, exp.Literal): 3943 ifs.append( 3944 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3945 ) 3946 elif isinstance(search, exp.Null): 3947 ifs.append( 3948 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3949 ) 3950 else: 3951 cond = exp.or_( 3952 exp.EQ(this=expression.copy(), expression=search), 3953 exp.and_( 3954 exp.Is(this=expression.copy(), expression=exp.Null()), 3955 exp.Is(this=search.copy(), expression=exp.Null()), 3956 copy=False, 3957 ), 3958 copy=False, 3959 ) 3960 ifs.append(exp.If(this=cond, true=result)) 3961 3962 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3963 3964 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3965 self._match_text_seq("KEY") 3966 key = self._parse_field() 3967 self._match(TokenType.COLON) 3968 self._match_text_seq("VALUE") 3969 value = self._parse_field() 3970 3971 if not key and not value: 3972 return None 3973 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3974 3975 def _parse_json_object(self) -> exp.JSONObject: 3976 star = self._parse_star() 3977 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3978 3979 null_handling = None 3980 if self._match_text_seq("NULL", "ON", "NULL"): 3981 null_handling = "NULL ON NULL" 3982 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3983 null_handling = "ABSENT ON NULL" 3984 3985 unique_keys = None 3986 if self._match_text_seq("WITH", "UNIQUE"): 3987 unique_keys = True 3988 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3989 unique_keys = False 3990 3991 self._match_text_seq("KEYS") 3992 3993 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3994 format_json = self._match_text_seq("FORMAT", "JSON") 3995 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3996 3997 return self.expression( 3998 exp.JSONObject, 3999 expressions=expressions, 4000 null_handling=null_handling, 4001 unique_keys=unique_keys, 4002 return_type=return_type, 4003 format_json=format_json, 4004 encoding=encoding, 4005 ) 4006 4007 def _parse_logarithm(self) -> exp.Func: 4008 # Default argument order is base, expression 4009 args = self._parse_csv(self._parse_range) 4010 4011 if len(args) > 1: 4012 if not self.LOG_BASE_FIRST: 4013 args.reverse() 4014 return exp.Log.from_arg_list(args) 4015 4016 return self.expression( 4017 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4018 ) 4019 4020 def _parse_match_against(self) -> exp.MatchAgainst: 4021 expressions = self._parse_csv(self._parse_column) 4022 4023 self._match_text_seq(")", "AGAINST", "(") 4024 4025 this = self._parse_string() 4026 4027 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4028 modifier = "IN NATURAL LANGUAGE MODE" 4029 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4030 modifier = f"{modifier} WITH QUERY EXPANSION" 4031 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4032 modifier = "IN BOOLEAN MODE" 4033 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4034 modifier = "WITH QUERY EXPANSION" 4035 else: 4036 modifier = None 4037 4038 return self.expression( 4039 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4040 ) 4041 4042 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4043 def _parse_open_json(self) -> exp.OpenJSON: 4044 this = self._parse_bitwise() 4045 path = self._match(TokenType.COMMA) and self._parse_string() 4046 4047 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4048 this = self._parse_field(any_token=True) 4049 kind = self._parse_types() 4050 path = self._parse_string() 4051 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4052 4053 return self.expression( 4054 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4055 ) 4056 4057 expressions = None 4058 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4059 self._match_l_paren() 4060 expressions = self._parse_csv(_parse_open_json_column_def) 4061 4062 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4063 4064 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4065 args = self._parse_csv(self._parse_bitwise) 4066 4067 if self._match(TokenType.IN): 4068 return self.expression( 4069 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4070 ) 4071 4072 if haystack_first: 4073 haystack = seq_get(args, 0) 4074 needle = seq_get(args, 1) 4075 else: 4076 needle = seq_get(args, 0) 4077 haystack = seq_get(args, 1) 4078 4079 return self.expression( 4080 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4081 ) 4082 4083 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4084 args = self._parse_csv(self._parse_table) 4085 return exp.JoinHint(this=func_name.upper(), expressions=args) 4086 4087 def _parse_substring(self) -> exp.Substring: 4088 # Postgres supports the form: substring(string [from int] [for int]) 4089 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4090 4091 args = self._parse_csv(self._parse_bitwise) 4092 4093 if self._match(TokenType.FROM): 4094 args.append(self._parse_bitwise()) 4095 if self._match(TokenType.FOR): 4096 args.append(self._parse_bitwise()) 4097 4098 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4099 4100 def _parse_trim(self) -> exp.Trim: 4101 # https://www.w3resource.com/sql/character-functions/trim.php 4102 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4103 4104 position = None 4105 collation = None 4106 4107 if self._match_texts(self.TRIM_TYPES): 4108 position = self._prev.text.upper() 4109 4110 expression = self._parse_bitwise() 4111 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4112 this = self._parse_bitwise() 4113 else: 4114 this = expression 4115 expression = None 4116 4117 if self._match(TokenType.COLLATE): 4118 collation = self._parse_bitwise() 4119 4120 return self.expression( 4121 exp.Trim, this=this, position=position, expression=expression, collation=collation 4122 ) 4123 4124 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4125 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4126 4127 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4128 return self._parse_window(self._parse_id_var(), alias=True) 4129 4130 def _parse_respect_or_ignore_nulls( 4131 self, this: t.Optional[exp.Expression] 4132 ) -> t.Optional[exp.Expression]: 4133 if self._match_text_seq("IGNORE", "NULLS"): 4134 return self.expression(exp.IgnoreNulls, this=this) 4135 if self._match_text_seq("RESPECT", "NULLS"): 4136 return self.expression(exp.RespectNulls, this=this) 4137 return this 4138 4139 def _parse_window( 4140 self, this: t.Optional[exp.Expression], alias: bool = False 4141 ) -> t.Optional[exp.Expression]: 4142 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4143 self._match(TokenType.WHERE) 4144 this = self.expression( 4145 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4146 ) 4147 self._match_r_paren() 4148 4149 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4150 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4151 if self._match_text_seq("WITHIN", "GROUP"): 4152 order = self._parse_wrapped(self._parse_order) 4153 this = self.expression(exp.WithinGroup, this=this, expression=order) 4154 4155 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4156 # Some dialects choose to implement and some do not. 4157 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4158 4159 # There is some code above in _parse_lambda that handles 4160 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4161 4162 # The below changes handle 4163 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4164 4165 # Oracle allows both formats 4166 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4167 # and Snowflake chose to do the same for familiarity 4168 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4169 this = self._parse_respect_or_ignore_nulls(this) 4170 4171 # bigquery select from window x AS (partition by ...) 4172 if alias: 4173 over = None 4174 self._match(TokenType.ALIAS) 4175 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4176 return this 4177 else: 4178 over = self._prev.text.upper() 4179 4180 if not self._match(TokenType.L_PAREN): 4181 return self.expression( 4182 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4183 ) 4184 4185 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4186 4187 first = self._match(TokenType.FIRST) 4188 if self._match_text_seq("LAST"): 4189 first = False 4190 4191 partition = self._parse_partition_by() 4192 order = self._parse_order() 4193 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4194 4195 if kind: 4196 self._match(TokenType.BETWEEN) 4197 start = self._parse_window_spec() 4198 self._match(TokenType.AND) 4199 end = self._parse_window_spec() 4200 4201 spec = self.expression( 4202 exp.WindowSpec, 4203 kind=kind, 4204 start=start["value"], 4205 start_side=start["side"], 4206 end=end["value"], 4207 end_side=end["side"], 4208 ) 4209 else: 4210 spec = None 4211 4212 self._match_r_paren() 4213 4214 window = self.expression( 4215 exp.Window, 4216 this=this, 4217 partition_by=partition, 4218 order=order, 4219 spec=spec, 4220 alias=window_alias, 4221 over=over, 4222 first=first, 4223 ) 4224 4225 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4226 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4227 return self._parse_window(window, alias=alias) 4228 4229 return window 4230 4231 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4232 self._match(TokenType.BETWEEN) 4233 4234 return { 4235 "value": ( 4236 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4237 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4238 or self._parse_bitwise() 4239 ), 4240 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4241 } 4242 4243 def _parse_alias( 4244 self, this: t.Optional[exp.Expression], explicit: bool = False 4245 ) -> t.Optional[exp.Expression]: 4246 any_token = self._match(TokenType.ALIAS) 4247 4248 if explicit and not any_token: 4249 return this 4250 4251 if self._match(TokenType.L_PAREN): 4252 aliases = self.expression( 4253 exp.Aliases, 4254 this=this, 4255 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4256 ) 4257 self._match_r_paren(aliases) 4258 return aliases 4259 4260 alias = self._parse_id_var(any_token) 4261 4262 if alias: 4263 return self.expression(exp.Alias, this=this, alias=alias) 4264 4265 return this 4266 4267 def _parse_id_var( 4268 self, 4269 any_token: bool = True, 4270 tokens: t.Optional[t.Collection[TokenType]] = None, 4271 ) -> t.Optional[exp.Expression]: 4272 identifier = self._parse_identifier() 4273 4274 if identifier: 4275 return identifier 4276 4277 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4278 quoted = self._prev.token_type == TokenType.STRING 4279 return exp.Identifier(this=self._prev.text, quoted=quoted) 4280 4281 return None 4282 4283 def _parse_string(self) -> t.Optional[exp.Expression]: 4284 if self._match(TokenType.STRING): 4285 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4286 return self._parse_placeholder() 4287 4288 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4289 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4290 4291 def _parse_number(self) -> t.Optional[exp.Expression]: 4292 if self._match(TokenType.NUMBER): 4293 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4294 return self._parse_placeholder() 4295 4296 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4297 if self._match(TokenType.IDENTIFIER): 4298 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4299 return self._parse_placeholder() 4300 4301 def _parse_var( 4302 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4303 ) -> t.Optional[exp.Expression]: 4304 if ( 4305 (any_token and self._advance_any()) 4306 or self._match(TokenType.VAR) 4307 or (self._match_set(tokens) if tokens else False) 4308 ): 4309 return self.expression(exp.Var, this=self._prev.text) 4310 return self._parse_placeholder() 4311 4312 def _advance_any(self) -> t.Optional[Token]: 4313 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4314 self._advance() 4315 return self._prev 4316 return None 4317 4318 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4319 return self._parse_var() or self._parse_string() 4320 4321 def _parse_null(self) -> t.Optional[exp.Expression]: 4322 if self._match(TokenType.NULL): 4323 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4324 return self._parse_placeholder() 4325 4326 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4327 if self._match(TokenType.TRUE): 4328 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4329 if self._match(TokenType.FALSE): 4330 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4331 return self._parse_placeholder() 4332 4333 def _parse_star(self) -> t.Optional[exp.Expression]: 4334 if self._match(TokenType.STAR): 4335 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4336 return self._parse_placeholder() 4337 4338 def _parse_parameter(self) -> exp.Parameter: 4339 wrapped = self._match(TokenType.L_BRACE) 4340 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4341 self._match(TokenType.R_BRACE) 4342 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4343 4344 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4345 if self._match_set(self.PLACEHOLDER_PARSERS): 4346 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4347 if placeholder: 4348 return placeholder 4349 self._advance(-1) 4350 return None 4351 4352 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4353 if not self._match(TokenType.EXCEPT): 4354 return None 4355 if self._match(TokenType.L_PAREN, advance=False): 4356 return self._parse_wrapped_csv(self._parse_column) 4357 return self._parse_csv(self._parse_column) 4358 4359 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4360 if not self._match(TokenType.REPLACE): 4361 return None 4362 if self._match(TokenType.L_PAREN, advance=False): 4363 return self._parse_wrapped_csv(self._parse_expression) 4364 return self._parse_expressions() 4365 4366 def _parse_csv( 4367 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4368 ) -> t.List[t.Optional[exp.Expression]]: 4369 parse_result = parse_method() 4370 items = [parse_result] if parse_result is not None else [] 4371 4372 while self._match(sep): 4373 self._add_comments(parse_result) 4374 parse_result = parse_method() 4375 if parse_result is not None: 4376 items.append(parse_result) 4377 4378 return items 4379 4380 def _parse_tokens( 4381 self, parse_method: t.Callable, expressions: t.Dict 4382 ) -> t.Optional[exp.Expression]: 4383 this = parse_method() 4384 4385 while self._match_set(expressions): 4386 this = self.expression( 4387 expressions[self._prev.token_type], 4388 this=this, 4389 comments=self._prev_comments, 4390 expression=parse_method(), 4391 ) 4392 4393 return this 4394 4395 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4396 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4397 4398 def _parse_wrapped_csv( 4399 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4400 ) -> t.List[t.Optional[exp.Expression]]: 4401 return self._parse_wrapped( 4402 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4403 ) 4404 4405 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4406 wrapped = self._match(TokenType.L_PAREN) 4407 if not wrapped and not optional: 4408 self.raise_error("Expecting (") 4409 parse_result = parse_method() 4410 if wrapped: 4411 self._match_r_paren() 4412 return parse_result 4413 4414 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4415 return self._parse_csv(self._parse_expression) 4416 4417 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4418 return self._parse_select() or self._parse_set_operations( 4419 self._parse_expression() if alias else self._parse_conjunction() 4420 ) 4421 4422 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4423 return self._parse_query_modifiers( 4424 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4425 ) 4426 4427 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4428 this = None 4429 if self._match_texts(self.TRANSACTION_KIND): 4430 this = self._prev.text 4431 4432 self._match_texts({"TRANSACTION", "WORK"}) 4433 4434 modes = [] 4435 while True: 4436 mode = [] 4437 while self._match(TokenType.VAR): 4438 mode.append(self._prev.text) 4439 4440 if mode: 4441 modes.append(" ".join(mode)) 4442 if not self._match(TokenType.COMMA): 4443 break 4444 4445 return self.expression(exp.Transaction, this=this, modes=modes) 4446 4447 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4448 chain = None 4449 savepoint = None 4450 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4451 4452 self._match_texts({"TRANSACTION", "WORK"}) 4453 4454 if self._match_text_seq("TO"): 4455 self._match_text_seq("SAVEPOINT") 4456 savepoint = self._parse_id_var() 4457 4458 if self._match(TokenType.AND): 4459 chain = not self._match_text_seq("NO") 4460 self._match_text_seq("CHAIN") 4461 4462 if is_rollback: 4463 return self.expression(exp.Rollback, savepoint=savepoint) 4464 4465 return self.expression(exp.Commit, chain=chain) 4466 4467 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4468 if not self._match_text_seq("ADD"): 4469 return None 4470 4471 self._match(TokenType.COLUMN) 4472 exists_column = self._parse_exists(not_=True) 4473 expression = self._parse_column_def(self._parse_field(any_token=True)) 4474 4475 if expression: 4476 expression.set("exists", exists_column) 4477 4478 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4479 if self._match_texts(("FIRST", "AFTER")): 4480 position = self._prev.text 4481 column_position = self.expression( 4482 exp.ColumnPosition, this=self._parse_column(), position=position 4483 ) 4484 expression.set("position", column_position) 4485 4486 return expression 4487 4488 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4489 drop = self._match(TokenType.DROP) and self._parse_drop() 4490 if drop and not isinstance(drop, exp.Command): 4491 drop.set("kind", drop.args.get("kind", "COLUMN")) 4492 return drop 4493 4494 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4495 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4496 return self.expression( 4497 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4498 ) 4499 4500 def _parse_add_constraint(self) -> exp.AddConstraint: 4501 this = None 4502 kind = self._prev.token_type 4503 4504 if kind == TokenType.CONSTRAINT: 4505 this = self._parse_id_var() 4506 4507 if self._match_text_seq("CHECK"): 4508 expression = self._parse_wrapped(self._parse_conjunction) 4509 enforced = self._match_text_seq("ENFORCED") 4510 4511 return self.expression( 4512 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4513 ) 4514 4515 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4516 expression = self._parse_foreign_key() 4517 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4518 expression = self._parse_primary_key() 4519 else: 4520 expression = None 4521 4522 return self.expression(exp.AddConstraint, this=this, expression=expression) 4523 4524 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4525 index = self._index - 1 4526 4527 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4528 return self._parse_csv(self._parse_add_constraint) 4529 4530 self._retreat(index) 4531 return self._parse_csv(self._parse_add_column) 4532 4533 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4534 self._match(TokenType.COLUMN) 4535 column = self._parse_field(any_token=True) 4536 4537 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4538 return self.expression(exp.AlterColumn, this=column, drop=True) 4539 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4540 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4541 4542 self._match_text_seq("SET", "DATA") 4543 return self.expression( 4544 exp.AlterColumn, 4545 this=column, 4546 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4547 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4548 using=self._match(TokenType.USING) and self._parse_conjunction(), 4549 ) 4550 4551 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4552 index = self._index - 1 4553 4554 partition_exists = self._parse_exists() 4555 if self._match(TokenType.PARTITION, advance=False): 4556 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4557 4558 self._retreat(index) 4559 return self._parse_csv(self._parse_drop_column) 4560 4561 def _parse_alter_table_rename(self) -> exp.RenameTable: 4562 self._match_text_seq("TO") 4563 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4564 4565 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4566 start = self._prev 4567 4568 if not self._match(TokenType.TABLE): 4569 return self._parse_as_command(start) 4570 4571 exists = self._parse_exists() 4572 this = self._parse_table(schema=True) 4573 4574 if self._next: 4575 self._advance() 4576 4577 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4578 if parser: 4579 actions = ensure_list(parser(self)) 4580 4581 if not self._curr: 4582 return self.expression( 4583 exp.AlterTable, 4584 this=this, 4585 exists=exists, 4586 actions=actions, 4587 ) 4588 return self._parse_as_command(start) 4589 4590 def _parse_merge(self) -> exp.Merge: 4591 self._match(TokenType.INTO) 4592 target = self._parse_table() 4593 4594 self._match(TokenType.USING) 4595 using = self._parse_table() 4596 4597 self._match(TokenType.ON) 4598 on = self._parse_conjunction() 4599 4600 whens = [] 4601 while self._match(TokenType.WHEN): 4602 matched = not self._match(TokenType.NOT) 4603 self._match_text_seq("MATCHED") 4604 source = ( 4605 False 4606 if self._match_text_seq("BY", "TARGET") 4607 else self._match_text_seq("BY", "SOURCE") 4608 ) 4609 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4610 4611 self._match(TokenType.THEN) 4612 4613 if self._match(TokenType.INSERT): 4614 _this = self._parse_star() 4615 if _this: 4616 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4617 else: 4618 then = self.expression( 4619 exp.Insert, 4620 this=self._parse_value(), 4621 expression=self._match(TokenType.VALUES) and self._parse_value(), 4622 ) 4623 elif self._match(TokenType.UPDATE): 4624 expressions = self._parse_star() 4625 if expressions: 4626 then = self.expression(exp.Update, expressions=expressions) 4627 else: 4628 then = self.expression( 4629 exp.Update, 4630 expressions=self._match(TokenType.SET) 4631 and self._parse_csv(self._parse_equality), 4632 ) 4633 elif self._match(TokenType.DELETE): 4634 then = self.expression(exp.Var, this=self._prev.text) 4635 else: 4636 then = None 4637 4638 whens.append( 4639 self.expression( 4640 exp.When, 4641 matched=matched, 4642 source=source, 4643 condition=condition, 4644 then=then, 4645 ) 4646 ) 4647 4648 return self.expression( 4649 exp.Merge, 4650 this=target, 4651 using=using, 4652 on=on, 4653 expressions=whens, 4654 ) 4655 4656 def _parse_show(self) -> t.Optional[exp.Expression]: 4657 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4658 if parser: 4659 return parser(self) 4660 self._advance() 4661 return self.expression(exp.Show, this=self._prev.text.upper()) 4662 4663 def _parse_set_item_assignment( 4664 self, kind: t.Optional[str] = None 4665 ) -> t.Optional[exp.Expression]: 4666 index = self._index 4667 4668 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4669 return self._parse_set_transaction(global_=kind == "GLOBAL") 4670 4671 left = self._parse_primary() or self._parse_id_var() 4672 4673 if not self._match_texts(("=", "TO")): 4674 self._retreat(index) 4675 return None 4676 4677 right = self._parse_statement() or self._parse_id_var() 4678 this = self.expression(exp.EQ, this=left, expression=right) 4679 4680 return self.expression(exp.SetItem, this=this, kind=kind) 4681 4682 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4683 self._match_text_seq("TRANSACTION") 4684 characteristics = self._parse_csv( 4685 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4686 ) 4687 return self.expression( 4688 exp.SetItem, 4689 expressions=characteristics, 4690 kind="TRANSACTION", 4691 **{"global": global_}, # type: ignore 4692 ) 4693 4694 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4695 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4696 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4697 4698 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4699 index = self._index 4700 set_ = self.expression( 4701 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4702 ) 4703 4704 if self._curr: 4705 self._retreat(index) 4706 return self._parse_as_command(self._prev) 4707 4708 return set_ 4709 4710 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4711 for option in options: 4712 if self._match_text_seq(*option.split(" ")): 4713 return exp.var(option) 4714 return None 4715 4716 def _parse_as_command(self, start: Token) -> exp.Command: 4717 while self._curr: 4718 self._advance() 4719 text = self._find_sql(start, self._prev) 4720 size = len(start.text) 4721 return exp.Command(this=text[:size], expression=text[size:]) 4722 4723 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4724 settings = [] 4725 4726 self._match_l_paren() 4727 kind = self._parse_id_var() 4728 4729 if self._match(TokenType.L_PAREN): 4730 while True: 4731 key = self._parse_id_var() 4732 value = self._parse_primary() 4733 4734 if not key and value is None: 4735 break 4736 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4737 self._match(TokenType.R_PAREN) 4738 4739 self._match_r_paren() 4740 4741 return self.expression( 4742 exp.DictProperty, 4743 this=this, 4744 kind=kind.this if kind else None, 4745 settings=settings, 4746 ) 4747 4748 def _parse_dict_range(self, this: str) -> exp.DictRange: 4749 self._match_l_paren() 4750 has_min = self._match_text_seq("MIN") 4751 if has_min: 4752 min = self._parse_var() or self._parse_primary() 4753 self._match_text_seq("MAX") 4754 max = self._parse_var() or self._parse_primary() 4755 else: 4756 max = self._parse_var() or self._parse_primary() 4757 min = exp.Literal.number(0) 4758 self._match_r_paren() 4759 return self.expression(exp.DictRange, this=this, min=min, max=max) 4760 4761 def _find_parser( 4762 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4763 ) -> t.Optional[t.Callable]: 4764 if not self._curr: 4765 return None 4766 4767 index = self._index 4768 this = [] 4769 while True: 4770 # The current token might be multiple words 4771 curr = self._curr.text.upper() 4772 key = curr.split(" ") 4773 this.append(curr) 4774 4775 self._advance() 4776 result, trie = in_trie(trie, key) 4777 if result == TrieResult.FAILED: 4778 break 4779 4780 if result == TrieResult.EXISTS: 4781 subparser = parsers[" ".join(this)] 4782 return subparser 4783 4784 self._retreat(index) 4785 return None 4786 4787 def _match(self, token_type, advance=True, expression=None): 4788 if not self._curr: 4789 return None 4790 4791 if self._curr.token_type == token_type: 4792 if advance: 4793 self._advance() 4794 self._add_comments(expression) 4795 return True 4796 4797 return None 4798 4799 def _match_set(self, types, advance=True): 4800 if not self._curr: 4801 return None 4802 4803 if self._curr.token_type in types: 4804 if advance: 4805 self._advance() 4806 return True 4807 4808 return None 4809 4810 def _match_pair(self, token_type_a, token_type_b, advance=True): 4811 if not self._curr or not self._next: 4812 return None 4813 4814 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4815 if advance: 4816 self._advance(2) 4817 return True 4818 4819 return None 4820 4821 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4822 if not self._match(TokenType.L_PAREN, expression=expression): 4823 self.raise_error("Expecting (") 4824 4825 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4826 if not self._match(TokenType.R_PAREN, expression=expression): 4827 self.raise_error("Expecting )") 4828 4829 def _match_texts(self, texts, advance=True): 4830 if self._curr and self._curr.text.upper() in texts: 4831 if advance: 4832 self._advance() 4833 return True 4834 return False 4835 4836 def _match_text_seq(self, *texts, advance=True): 4837 index = self._index 4838 for text in texts: 4839 if self._curr and self._curr.text.upper() == text: 4840 self._advance() 4841 else: 4842 self._retreat(index) 4843 return False 4844 4845 if not advance: 4846 self._retreat(index) 4847 4848 return True 4849 4850 @t.overload 4851 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4852 ... 4853 4854 @t.overload 4855 def _replace_columns_with_dots( 4856 self, this: t.Optional[exp.Expression] 4857 ) -> t.Optional[exp.Expression]: 4858 ... 4859 4860 def _replace_columns_with_dots(self, this): 4861 if isinstance(this, exp.Dot): 4862 exp.replace_children(this, self._replace_columns_with_dots) 4863 elif isinstance(this, exp.Column): 4864 exp.replace_children(this, self._replace_columns_with_dots) 4865 table = this.args.get("table") 4866 this = ( 4867 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4868 ) 4869 4870 return this 4871 4872 def _replace_lambda( 4873 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4874 ) -> t.Optional[exp.Expression]: 4875 if not node: 4876 return node 4877 4878 for column in node.find_all(exp.Column): 4879 if column.parts[0].name in lambda_variables: 4880 dot_or_id = column.to_dot() if column.table else column.this 4881 parent = column.parent 4882 4883 while isinstance(parent, exp.Dot): 4884 if not isinstance(parent.parent, exp.Dot): 4885 parent.replace(dot_or_id) 4886 break 4887 parent = parent.parent 4888 else: 4889 if column is node: 4890 node = dot_or_id 4891 else: 4892 column.replace(dot_or_id) 4893 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.IPADDRESS, 190 TokenType.IPPREFIX, 191 TokenType.ENUM, 192 *NESTED_TYPE_TOKENS, 193 } 194 195 SUBQUERY_PREDICATES = { 196 TokenType.ANY: exp.Any, 197 TokenType.ALL: exp.All, 198 TokenType.EXISTS: exp.Exists, 199 TokenType.SOME: exp.Any, 200 } 201 202 RESERVED_KEYWORDS = { 203 *Tokenizer.SINGLE_TOKENS.values(), 204 TokenType.SELECT, 205 } 206 207 DB_CREATABLES = { 208 TokenType.DATABASE, 209 TokenType.SCHEMA, 210 TokenType.TABLE, 211 TokenType.VIEW, 212 TokenType.DICTIONARY, 213 } 214 215 CREATABLES = { 216 TokenType.COLUMN, 217 TokenType.FUNCTION, 218 TokenType.INDEX, 219 TokenType.PROCEDURE, 220 *DB_CREATABLES, 221 } 222 223 # Tokens that can represent identifiers 224 ID_VAR_TOKENS = { 225 TokenType.VAR, 226 TokenType.ANTI, 227 TokenType.APPLY, 228 TokenType.ASC, 229 TokenType.AUTO_INCREMENT, 230 TokenType.BEGIN, 231 TokenType.CACHE, 232 TokenType.CASE, 233 TokenType.COLLATE, 234 TokenType.COMMAND, 235 TokenType.COMMENT, 236 TokenType.COMMIT, 237 TokenType.CONSTRAINT, 238 TokenType.DEFAULT, 239 TokenType.DELETE, 240 TokenType.DESC, 241 TokenType.DESCRIBE, 242 TokenType.DICTIONARY, 243 TokenType.DIV, 244 TokenType.END, 245 TokenType.EXECUTE, 246 TokenType.ESCAPE, 247 TokenType.FALSE, 248 TokenType.FIRST, 249 TokenType.FILTER, 250 TokenType.FORMAT, 251 TokenType.FULL, 252 TokenType.IS, 253 TokenType.ISNULL, 254 TokenType.INTERVAL, 255 TokenType.KEEP, 256 TokenType.LEFT, 257 TokenType.LOAD, 258 TokenType.MERGE, 259 TokenType.NATURAL, 260 TokenType.NEXT, 261 TokenType.OFFSET, 262 TokenType.ORDINALITY, 263 TokenType.OVERWRITE, 264 TokenType.PARTITION, 265 TokenType.PERCENT, 266 TokenType.PIVOT, 267 TokenType.PRAGMA, 268 TokenType.RANGE, 269 TokenType.REFERENCES, 270 TokenType.RIGHT, 271 TokenType.ROW, 272 TokenType.ROWS, 273 TokenType.SEMI, 274 TokenType.SET, 275 TokenType.SETTINGS, 276 TokenType.SHOW, 277 TokenType.TEMPORARY, 278 TokenType.TOP, 279 TokenType.TRUE, 280 TokenType.UNIQUE, 281 TokenType.UNPIVOT, 282 TokenType.UPDATE, 283 TokenType.VOLATILE, 284 TokenType.WINDOW, 285 *CREATABLES, 286 *SUBQUERY_PREDICATES, 287 *TYPE_TOKENS, 288 *NO_PAREN_FUNCTIONS, 289 } 290 291 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 292 293 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 294 TokenType.APPLY, 295 TokenType.ASOF, 296 TokenType.FULL, 297 TokenType.LEFT, 298 TokenType.LOCK, 299 TokenType.NATURAL, 300 TokenType.OFFSET, 301 TokenType.RIGHT, 302 TokenType.WINDOW, 303 } 304 305 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 306 307 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 308 309 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 310 311 FUNC_TOKENS = { 312 TokenType.COMMAND, 313 TokenType.CURRENT_DATE, 314 TokenType.CURRENT_DATETIME, 315 TokenType.CURRENT_TIMESTAMP, 316 TokenType.CURRENT_TIME, 317 TokenType.CURRENT_USER, 318 TokenType.FILTER, 319 TokenType.FIRST, 320 TokenType.FORMAT, 321 TokenType.GLOB, 322 TokenType.IDENTIFIER, 323 TokenType.INDEX, 324 TokenType.ISNULL, 325 TokenType.ILIKE, 326 TokenType.LIKE, 327 TokenType.MERGE, 328 TokenType.OFFSET, 329 TokenType.PRIMARY_KEY, 330 TokenType.RANGE, 331 TokenType.REPLACE, 332 TokenType.RLIKE, 333 TokenType.ROW, 334 TokenType.UNNEST, 335 TokenType.VAR, 336 TokenType.LEFT, 337 TokenType.RIGHT, 338 TokenType.DATE, 339 TokenType.DATETIME, 340 TokenType.TABLE, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMPTZ, 343 TokenType.WINDOW, 344 TokenType.XOR, 345 *TYPE_TOKENS, 346 *SUBQUERY_PREDICATES, 347 } 348 349 CONJUNCTION = { 350 TokenType.AND: exp.And, 351 TokenType.OR: exp.Or, 352 } 353 354 EQUALITY = { 355 TokenType.EQ: exp.EQ, 356 TokenType.NEQ: exp.NEQ, 357 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 358 } 359 360 COMPARISON = { 361 TokenType.GT: exp.GT, 362 TokenType.GTE: exp.GTE, 363 TokenType.LT: exp.LT, 364 TokenType.LTE: exp.LTE, 365 } 366 367 BITWISE = { 368 TokenType.AMP: exp.BitwiseAnd, 369 TokenType.CARET: exp.BitwiseXor, 370 TokenType.PIPE: exp.BitwiseOr, 371 TokenType.DPIPE: exp.DPipe, 372 } 373 374 TERM = { 375 TokenType.DASH: exp.Sub, 376 TokenType.PLUS: exp.Add, 377 TokenType.MOD: exp.Mod, 378 TokenType.COLLATE: exp.Collate, 379 } 380 381 FACTOR = { 382 TokenType.DIV: exp.IntDiv, 383 TokenType.LR_ARROW: exp.Distance, 384 TokenType.SLASH: exp.Div, 385 TokenType.STAR: exp.Mul, 386 } 387 388 TIMESTAMPS = { 389 TokenType.TIME, 390 TokenType.TIMESTAMP, 391 TokenType.TIMESTAMPTZ, 392 TokenType.TIMESTAMPLTZ, 393 } 394 395 SET_OPERATIONS = { 396 TokenType.UNION, 397 TokenType.INTERSECT, 398 TokenType.EXCEPT, 399 } 400 401 JOIN_METHODS = { 402 TokenType.NATURAL, 403 TokenType.ASOF, 404 } 405 406 JOIN_SIDES = { 407 TokenType.LEFT, 408 TokenType.RIGHT, 409 TokenType.FULL, 410 } 411 412 JOIN_KINDS = { 413 TokenType.INNER, 414 TokenType.OUTER, 415 TokenType.CROSS, 416 TokenType.SEMI, 417 TokenType.ANTI, 418 } 419 420 JOIN_HINTS: t.Set[str] = set() 421 422 LAMBDAS = { 423 TokenType.ARROW: lambda self, expressions: self.expression( 424 exp.Lambda, 425 this=self._replace_lambda( 426 self._parse_conjunction(), 427 {node.name for node in expressions}, 428 ), 429 expressions=expressions, 430 ), 431 TokenType.FARROW: lambda self, expressions: self.expression( 432 exp.Kwarg, 433 this=exp.var(expressions[0].name), 434 expression=self._parse_conjunction(), 435 ), 436 } 437 438 COLUMN_OPERATORS = { 439 TokenType.DOT: None, 440 TokenType.DCOLON: lambda self, this, to: self.expression( 441 exp.Cast if self.STRICT_CAST else exp.TryCast, 442 this=this, 443 to=to, 444 ), 445 TokenType.ARROW: lambda self, this, path: self.expression( 446 exp.JSONExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DARROW: lambda self, this, path: self.expression( 451 exp.JSONExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtract, 457 this=this, 458 expression=path, 459 ), 460 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 461 exp.JSONBExtractScalar, 462 this=this, 463 expression=path, 464 ), 465 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 466 exp.JSONBContains, 467 this=this, 468 expression=key, 469 ), 470 } 471 472 EXPRESSION_PARSERS = { 473 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 474 exp.Column: lambda self: self._parse_column(), 475 exp.Condition: lambda self: self._parse_conjunction(), 476 exp.DataType: lambda self: self._parse_types(), 477 exp.Expression: lambda self: self._parse_statement(), 478 exp.From: lambda self: self._parse_from(), 479 exp.Group: lambda self: self._parse_group(), 480 exp.Having: lambda self: self._parse_having(), 481 exp.Identifier: lambda self: self._parse_id_var(), 482 exp.Join: lambda self: self._parse_join(), 483 exp.Lambda: lambda self: self._parse_lambda(), 484 exp.Lateral: lambda self: self._parse_lateral(), 485 exp.Limit: lambda self: self._parse_limit(), 486 exp.Offset: lambda self: self._parse_offset(), 487 exp.Order: lambda self: self._parse_order(), 488 exp.Ordered: lambda self: self._parse_ordered(), 489 exp.Properties: lambda self: self._parse_properties(), 490 exp.Qualify: lambda self: self._parse_qualify(), 491 exp.Returning: lambda self: self._parse_returning(), 492 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 493 exp.Table: lambda self: self._parse_table_parts(), 494 exp.TableAlias: lambda self: self._parse_table_alias(), 495 exp.Where: lambda self: self._parse_where(), 496 exp.Window: lambda self: self._parse_named_window(), 497 exp.With: lambda self: self._parse_with(), 498 "JOIN_TYPE": lambda self: self._parse_join_parts(), 499 } 500 501 STATEMENT_PARSERS = { 502 TokenType.ALTER: lambda self: self._parse_alter(), 503 TokenType.BEGIN: lambda self: self._parse_transaction(), 504 TokenType.CACHE: lambda self: self._parse_cache(), 505 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 506 TokenType.COMMENT: lambda self: self._parse_comment(), 507 TokenType.CREATE: lambda self: self._parse_create(), 508 TokenType.DELETE: lambda self: self._parse_delete(), 509 TokenType.DESC: lambda self: self._parse_describe(), 510 TokenType.DESCRIBE: lambda self: self._parse_describe(), 511 TokenType.DROP: lambda self: self._parse_drop(), 512 TokenType.FROM: lambda self: exp.select("*").from_( 513 t.cast(exp.From, self._parse_from(skip_from_token=True)) 514 ), 515 TokenType.INSERT: lambda self: self._parse_insert(), 516 TokenType.LOAD: lambda self: self._parse_load(), 517 TokenType.MERGE: lambda self: self._parse_merge(), 518 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 519 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 520 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 521 TokenType.SET: lambda self: self._parse_set(), 522 TokenType.UNCACHE: lambda self: self._parse_uncache(), 523 TokenType.UPDATE: lambda self: self._parse_update(), 524 TokenType.USE: lambda self: self.expression( 525 exp.Use, 526 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 527 and exp.var(self._prev.text), 528 this=self._parse_table(schema=False), 529 ), 530 } 531 532 UNARY_PARSERS = { 533 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 534 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 535 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 536 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 537 } 538 539 PRIMARY_PARSERS = { 540 TokenType.STRING: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=True 542 ), 543 TokenType.NUMBER: lambda self, token: self.expression( 544 exp.Literal, this=token.text, is_string=False 545 ), 546 TokenType.STAR: lambda self, _: self.expression( 547 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 548 ), 549 TokenType.NULL: lambda self, _: self.expression(exp.Null), 550 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 551 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 552 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 553 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 554 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 555 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 556 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 557 exp.National, this=token.text 558 ), 559 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 560 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 561 } 562 563 PLACEHOLDER_PARSERS = { 564 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 565 TokenType.PARAMETER: lambda self: self._parse_parameter(), 566 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 567 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 568 else None, 569 } 570 571 RANGE_PARSERS = { 572 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 573 TokenType.GLOB: binary_range_parser(exp.Glob), 574 TokenType.ILIKE: binary_range_parser(exp.ILike), 575 TokenType.IN: lambda self, this: self._parse_in(this), 576 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 577 TokenType.IS: lambda self, this: self._parse_is(this), 578 TokenType.LIKE: binary_range_parser(exp.Like), 579 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 580 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 581 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 582 } 583 584 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 585 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 586 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 587 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 588 "CHARACTER SET": lambda self: self._parse_character_set(), 589 "CHECKSUM": lambda self: self._parse_checksum(), 590 "CLUSTER BY": lambda self: self._parse_cluster(), 591 "CLUSTERED": lambda self: self._parse_clustered_by(), 592 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 593 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 594 "COPY": lambda self: self._parse_copy_property(), 595 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 596 "DEFINER": lambda self: self._parse_definer(), 597 "DETERMINISTIC": lambda self: self.expression( 598 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 599 ), 600 "DISTKEY": lambda self: self._parse_distkey(), 601 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 602 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 603 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 604 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 605 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 606 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 607 "FREESPACE": lambda self: self._parse_freespace(), 608 "HEAP": lambda self: self.expression(exp.HeapProperty), 609 "IMMUTABLE": lambda self: self.expression( 610 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 611 ), 612 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 613 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 614 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 615 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 616 "LIKE": lambda self: self._parse_create_like(), 617 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 618 "LOCK": lambda self: self._parse_locking(), 619 "LOCKING": lambda self: self._parse_locking(), 620 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 621 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 622 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 623 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 624 "NO": lambda self: self._parse_no_property(), 625 "ON": lambda self: self._parse_on_property(), 626 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 627 "PARTITION BY": lambda self: self._parse_partitioned_by(), 628 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 629 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 630 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 631 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 632 "RETURNS": lambda self: self._parse_returns(), 633 "ROW": lambda self: self._parse_row(), 634 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 635 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 636 "SETTINGS": lambda self: self.expression( 637 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 638 ), 639 "SORTKEY": lambda self: self._parse_sortkey(), 640 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 641 "STABLE": lambda self: self.expression( 642 exp.StabilityProperty, this=exp.Literal.string("STABLE") 643 ), 644 "STORED": lambda self: self._parse_stored(), 645 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 646 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 647 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 648 "TO": lambda self: self._parse_to_table(), 649 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 650 "TTL": lambda self: self._parse_ttl(), 651 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 652 "VOLATILE": lambda self: self._parse_volatile_property(), 653 "WITH": lambda self: self._parse_with_property(), 654 } 655 656 CONSTRAINT_PARSERS = { 657 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 658 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 659 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 660 "CHARACTER SET": lambda self: self.expression( 661 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 662 ), 663 "CHECK": lambda self: self.expression( 664 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 665 ), 666 "COLLATE": lambda self: self.expression( 667 exp.CollateColumnConstraint, this=self._parse_var() 668 ), 669 "COMMENT": lambda self: self.expression( 670 exp.CommentColumnConstraint, this=self._parse_string() 671 ), 672 "COMPRESS": lambda self: self._parse_compress(), 673 "DEFAULT": lambda self: self.expression( 674 exp.DefaultColumnConstraint, this=self._parse_bitwise() 675 ), 676 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 677 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 678 "FORMAT": lambda self: self.expression( 679 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "GENERATED": lambda self: self._parse_generated_as_identity(), 682 "IDENTITY": lambda self: self._parse_auto_increment(), 683 "INLINE": lambda self: self._parse_inline(), 684 "LIKE": lambda self: self._parse_create_like(), 685 "NOT": lambda self: self._parse_not_constraint(), 686 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 687 "ON": lambda self: self._match(TokenType.UPDATE) 688 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 689 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 690 "PRIMARY KEY": lambda self: self._parse_primary_key(), 691 "REFERENCES": lambda self: self._parse_references(match=False), 692 "TITLE": lambda self: self.expression( 693 exp.TitleColumnConstraint, this=self._parse_var_or_string() 694 ), 695 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 696 "UNIQUE": lambda self: self._parse_unique(), 697 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 698 } 699 700 ALTER_PARSERS = { 701 "ADD": lambda self: self._parse_alter_table_add(), 702 "ALTER": lambda self: self._parse_alter_table_alter(), 703 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 704 "DROP": lambda self: self._parse_alter_table_drop(), 705 "RENAME": lambda self: self._parse_alter_table_rename(), 706 } 707 708 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 709 710 NO_PAREN_FUNCTION_PARSERS = { 711 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 712 "CASE": lambda self: self._parse_case(), 713 "IF": lambda self: self._parse_if(), 714 "NEXT": lambda self: self._parse_next_value_for(), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 741 TokenType.WHERE: lambda self: ("where", self._parse_where()), 742 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 743 TokenType.HAVING: lambda self: ("having", self._parse_having()), 744 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 745 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 746 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 747 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 748 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 749 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 750 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 751 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 752 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 753 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.CLUSTER_BY: lambda self: ( 755 "cluster", 756 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 757 ), 758 TokenType.DISTRIBUTE_BY: lambda self: ( 759 "distribute", 760 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 761 ), 762 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 763 } 764 765 SET_PARSERS = { 766 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 767 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 768 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 769 "TRANSACTION": lambda self: self._parse_set_transaction(), 770 } 771 772 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 773 774 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 775 776 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 777 778 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 779 780 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 781 782 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 783 TRANSACTION_CHARACTERISTICS = { 784 "ISOLATION LEVEL REPEATABLE READ", 785 "ISOLATION LEVEL READ COMMITTED", 786 "ISOLATION LEVEL READ UNCOMMITTED", 787 "ISOLATION LEVEL SERIALIZABLE", 788 "READ WRITE", 789 "READ ONLY", 790 } 791 792 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 793 794 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 795 796 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 797 798 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 799 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 800 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 801 802 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 803 804 STRICT_CAST = True 805 806 # A NULL arg in CONCAT yields NULL by default 807 CONCAT_NULL_OUTPUTS_STRING = False 808 809 PREFIXED_PIVOT_COLUMNS = False 810 IDENTIFY_PIVOT_STRINGS = False 811 812 LOG_BASE_FIRST = True 813 LOG_DEFAULTS_TO_LN = False 814 815 __slots__ = ( 816 "error_level", 817 "error_message_context", 818 "max_errors", 819 "sql", 820 "errors", 821 "_tokens", 822 "_index", 823 "_curr", 824 "_next", 825 "_prev", 826 "_prev_comments", 827 ) 828 829 # Autofilled 830 INDEX_OFFSET: int = 0 831 UNNEST_COLUMN_ONLY: bool = False 832 ALIAS_POST_TABLESAMPLE: bool = False 833 STRICT_STRING_CONCAT = False 834 NORMALIZE_FUNCTIONS = "upper" 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 comments=start.comments, 1149 exists=self._parse_exists(), 1150 this=self._parse_table(schema=True), 1151 kind=kind, 1152 temporary=temporary, 1153 materialized=materialized, 1154 cascade=self._match_text_seq("CASCADE"), 1155 constraints=self._match_text_seq("CONSTRAINTS"), 1156 purge=self._match_text_seq("PURGE"), 1157 ) 1158 1159 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1160 return ( 1161 self._match_text_seq("IF") 1162 and (not not_ or self._match(TokenType.NOT)) 1163 and self._match(TokenType.EXISTS) 1164 ) 1165 1166 def _parse_create(self) -> exp.Create | exp.Command: 1167 # Note: this can't be None because we've matched a statement parser 1168 start = self._prev 1169 replace = start.text.upper() == "REPLACE" or self._match_pair( 1170 TokenType.OR, TokenType.REPLACE 1171 ) 1172 unique = self._match(TokenType.UNIQUE) 1173 1174 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1175 self._advance() 1176 1177 properties = None 1178 create_token = self._match_set(self.CREATABLES) and self._prev 1179 1180 if not create_token: 1181 # exp.Properties.Location.POST_CREATE 1182 properties = self._parse_properties() 1183 create_token = self._match_set(self.CREATABLES) and self._prev 1184 1185 if not properties or not create_token: 1186 return self._parse_as_command(start) 1187 1188 exists = self._parse_exists(not_=True) 1189 this = None 1190 expression: t.Optional[exp.Expression] = None 1191 indexes = None 1192 no_schema_binding = None 1193 begin = None 1194 clone = None 1195 1196 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1197 nonlocal properties 1198 if properties and temp_props: 1199 properties.expressions.extend(temp_props.expressions) 1200 elif temp_props: 1201 properties = temp_props 1202 1203 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1204 this = self._parse_user_defined_function(kind=create_token.token_type) 1205 1206 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1207 extend_props(self._parse_properties()) 1208 1209 self._match(TokenType.ALIAS) 1210 1211 if self._match(TokenType.COMMAND): 1212 expression = self._parse_as_command(self._prev) 1213 else: 1214 begin = self._match(TokenType.BEGIN) 1215 return_ = self._match_text_seq("RETURN") 1216 expression = self._parse_statement() 1217 1218 if return_: 1219 expression = self.expression(exp.Return, this=expression) 1220 elif create_token.token_type == TokenType.INDEX: 1221 this = self._parse_index(index=self._parse_id_var()) 1222 elif create_token.token_type in self.DB_CREATABLES: 1223 table_parts = self._parse_table_parts(schema=True) 1224 1225 # exp.Properties.Location.POST_NAME 1226 self._match(TokenType.COMMA) 1227 extend_props(self._parse_properties(before=True)) 1228 1229 this = self._parse_schema(this=table_parts) 1230 1231 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1232 extend_props(self._parse_properties()) 1233 1234 self._match(TokenType.ALIAS) 1235 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1236 # exp.Properties.Location.POST_ALIAS 1237 extend_props(self._parse_properties()) 1238 1239 expression = self._parse_ddl_select() 1240 1241 if create_token.token_type == TokenType.TABLE: 1242 # exp.Properties.Location.POST_EXPRESSION 1243 extend_props(self._parse_properties()) 1244 1245 indexes = [] 1246 while True: 1247 index = self._parse_index() 1248 1249 # exp.Properties.Location.POST_INDEX 1250 extend_props(self._parse_properties()) 1251 1252 if not index: 1253 break 1254 else: 1255 self._match(TokenType.COMMA) 1256 indexes.append(index) 1257 elif create_token.token_type == TokenType.VIEW: 1258 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1259 no_schema_binding = True 1260 1261 if self._match_text_seq("CLONE"): 1262 clone = self._parse_table(schema=True) 1263 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1264 clone_kind = ( 1265 self._match(TokenType.L_PAREN) 1266 and self._match_texts(self.CLONE_KINDS) 1267 and self._prev.text.upper() 1268 ) 1269 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1270 self._match(TokenType.R_PAREN) 1271 clone = self.expression( 1272 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1273 ) 1274 1275 return self.expression( 1276 exp.Create, 1277 this=this, 1278 kind=create_token.text, 1279 replace=replace, 1280 unique=unique, 1281 expression=expression, 1282 exists=exists, 1283 properties=properties, 1284 indexes=indexes, 1285 no_schema_binding=no_schema_binding, 1286 begin=begin, 1287 clone=clone, 1288 ) 1289 1290 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1291 # only used for teradata currently 1292 self._match(TokenType.COMMA) 1293 1294 kwargs = { 1295 "no": self._match_text_seq("NO"), 1296 "dual": self._match_text_seq("DUAL"), 1297 "before": self._match_text_seq("BEFORE"), 1298 "default": self._match_text_seq("DEFAULT"), 1299 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1300 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1301 "after": self._match_text_seq("AFTER"), 1302 "minimum": self._match_texts(("MIN", "MINIMUM")), 1303 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1304 } 1305 1306 if self._match_texts(self.PROPERTY_PARSERS): 1307 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1308 try: 1309 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1310 except TypeError: 1311 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1312 1313 return None 1314 1315 def _parse_property(self) -> t.Optional[exp.Expression]: 1316 if self._match_texts(self.PROPERTY_PARSERS): 1317 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1318 1319 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1320 return self._parse_character_set(default=True) 1321 1322 if self._match_text_seq("COMPOUND", "SORTKEY"): 1323 return self._parse_sortkey(compound=True) 1324 1325 if self._match_text_seq("SQL", "SECURITY"): 1326 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1327 1328 assignment = self._match_pair( 1329 TokenType.VAR, TokenType.EQ, advance=False 1330 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1331 1332 if assignment: 1333 key = self._parse_var_or_string() 1334 self._match(TokenType.EQ) 1335 return self.expression(exp.Property, this=key, value=self._parse_column()) 1336 1337 return None 1338 1339 def _parse_stored(self) -> exp.FileFormatProperty: 1340 self._match(TokenType.ALIAS) 1341 1342 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1343 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1344 1345 return self.expression( 1346 exp.FileFormatProperty, 1347 this=self.expression( 1348 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1349 ) 1350 if input_format or output_format 1351 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1352 ) 1353 1354 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1355 self._match(TokenType.EQ) 1356 self._match(TokenType.ALIAS) 1357 return self.expression(exp_class, this=self._parse_field()) 1358 1359 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1360 properties = [] 1361 while True: 1362 if before: 1363 prop = self._parse_property_before() 1364 else: 1365 prop = self._parse_property() 1366 1367 if not prop: 1368 break 1369 for p in ensure_list(prop): 1370 properties.append(p) 1371 1372 if properties: 1373 return self.expression(exp.Properties, expressions=properties) 1374 1375 return None 1376 1377 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1378 return self.expression( 1379 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1380 ) 1381 1382 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1383 if self._index >= 2: 1384 pre_volatile_token = self._tokens[self._index - 2] 1385 else: 1386 pre_volatile_token = None 1387 1388 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1389 return exp.VolatileProperty() 1390 1391 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1392 1393 def _parse_with_property( 1394 self, 1395 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1396 if self._match(TokenType.L_PAREN, advance=False): 1397 return self._parse_wrapped_csv(self._parse_property) 1398 1399 if self._match_text_seq("JOURNAL"): 1400 return self._parse_withjournaltable() 1401 1402 if self._match_text_seq("DATA"): 1403 return self._parse_withdata(no=False) 1404 elif self._match_text_seq("NO", "DATA"): 1405 return self._parse_withdata(no=True) 1406 1407 if not self._next: 1408 return None 1409 1410 return self._parse_withisolatedloading() 1411 1412 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1413 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1414 self._match(TokenType.EQ) 1415 1416 user = self._parse_id_var() 1417 self._match(TokenType.PARAMETER) 1418 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1419 1420 if not user or not host: 1421 return None 1422 1423 return exp.DefinerProperty(this=f"{user}@{host}") 1424 1425 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1426 self._match(TokenType.TABLE) 1427 self._match(TokenType.EQ) 1428 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1429 1430 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1431 return self.expression(exp.LogProperty, no=no) 1432 1433 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1434 return self.expression(exp.JournalProperty, **kwargs) 1435 1436 def _parse_checksum(self) -> exp.ChecksumProperty: 1437 self._match(TokenType.EQ) 1438 1439 on = None 1440 if self._match(TokenType.ON): 1441 on = True 1442 elif self._match_text_seq("OFF"): 1443 on = False 1444 1445 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1446 1447 def _parse_cluster(self) -> exp.Cluster: 1448 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1449 1450 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1451 self._match_text_seq("BY") 1452 1453 self._match_l_paren() 1454 expressions = self._parse_csv(self._parse_column) 1455 self._match_r_paren() 1456 1457 if self._match_text_seq("SORTED", "BY"): 1458 self._match_l_paren() 1459 sorted_by = self._parse_csv(self._parse_ordered) 1460 self._match_r_paren() 1461 else: 1462 sorted_by = None 1463 1464 self._match(TokenType.INTO) 1465 buckets = self._parse_number() 1466 self._match_text_seq("BUCKETS") 1467 1468 return self.expression( 1469 exp.ClusteredByProperty, 1470 expressions=expressions, 1471 sorted_by=sorted_by, 1472 buckets=buckets, 1473 ) 1474 1475 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1476 if not self._match_text_seq("GRANTS"): 1477 self._retreat(self._index - 1) 1478 return None 1479 1480 return self.expression(exp.CopyGrantsProperty) 1481 1482 def _parse_freespace(self) -> exp.FreespaceProperty: 1483 self._match(TokenType.EQ) 1484 return self.expression( 1485 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1486 ) 1487 1488 def _parse_mergeblockratio( 1489 self, no: bool = False, default: bool = False 1490 ) -> exp.MergeBlockRatioProperty: 1491 if self._match(TokenType.EQ): 1492 return self.expression( 1493 exp.MergeBlockRatioProperty, 1494 this=self._parse_number(), 1495 percent=self._match(TokenType.PERCENT), 1496 ) 1497 1498 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1499 1500 def _parse_datablocksize( 1501 self, 1502 default: t.Optional[bool] = None, 1503 minimum: t.Optional[bool] = None, 1504 maximum: t.Optional[bool] = None, 1505 ) -> exp.DataBlocksizeProperty: 1506 self._match(TokenType.EQ) 1507 size = self._parse_number() 1508 1509 units = None 1510 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1511 units = self._prev.text 1512 1513 return self.expression( 1514 exp.DataBlocksizeProperty, 1515 size=size, 1516 units=units, 1517 default=default, 1518 minimum=minimum, 1519 maximum=maximum, 1520 ) 1521 1522 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1523 self._match(TokenType.EQ) 1524 always = self._match_text_seq("ALWAYS") 1525 manual = self._match_text_seq("MANUAL") 1526 never = self._match_text_seq("NEVER") 1527 default = self._match_text_seq("DEFAULT") 1528 1529 autotemp = None 1530 if self._match_text_seq("AUTOTEMP"): 1531 autotemp = self._parse_schema() 1532 1533 return self.expression( 1534 exp.BlockCompressionProperty, 1535 always=always, 1536 manual=manual, 1537 never=never, 1538 default=default, 1539 autotemp=autotemp, 1540 ) 1541 1542 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1543 no = self._match_text_seq("NO") 1544 concurrent = self._match_text_seq("CONCURRENT") 1545 self._match_text_seq("ISOLATED", "LOADING") 1546 for_all = self._match_text_seq("FOR", "ALL") 1547 for_insert = self._match_text_seq("FOR", "INSERT") 1548 for_none = self._match_text_seq("FOR", "NONE") 1549 return self.expression( 1550 exp.IsolatedLoadingProperty, 1551 no=no, 1552 concurrent=concurrent, 1553 for_all=for_all, 1554 for_insert=for_insert, 1555 for_none=for_none, 1556 ) 1557 1558 def _parse_locking(self) -> exp.LockingProperty: 1559 if self._match(TokenType.TABLE): 1560 kind = "TABLE" 1561 elif self._match(TokenType.VIEW): 1562 kind = "VIEW" 1563 elif self._match(TokenType.ROW): 1564 kind = "ROW" 1565 elif self._match_text_seq("DATABASE"): 1566 kind = "DATABASE" 1567 else: 1568 kind = None 1569 1570 if kind in ("DATABASE", "TABLE", "VIEW"): 1571 this = self._parse_table_parts() 1572 else: 1573 this = None 1574 1575 if self._match(TokenType.FOR): 1576 for_or_in = "FOR" 1577 elif self._match(TokenType.IN): 1578 for_or_in = "IN" 1579 else: 1580 for_or_in = None 1581 1582 if self._match_text_seq("ACCESS"): 1583 lock_type = "ACCESS" 1584 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1585 lock_type = "EXCLUSIVE" 1586 elif self._match_text_seq("SHARE"): 1587 lock_type = "SHARE" 1588 elif self._match_text_seq("READ"): 1589 lock_type = "READ" 1590 elif self._match_text_seq("WRITE"): 1591 lock_type = "WRITE" 1592 elif self._match_text_seq("CHECKSUM"): 1593 lock_type = "CHECKSUM" 1594 else: 1595 lock_type = None 1596 1597 override = self._match_text_seq("OVERRIDE") 1598 1599 return self.expression( 1600 exp.LockingProperty, 1601 this=this, 1602 kind=kind, 1603 for_or_in=for_or_in, 1604 lock_type=lock_type, 1605 override=override, 1606 ) 1607 1608 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1609 if self._match(TokenType.PARTITION_BY): 1610 return self._parse_csv(self._parse_conjunction) 1611 return [] 1612 1613 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1614 self._match(TokenType.EQ) 1615 return self.expression( 1616 exp.PartitionedByProperty, 1617 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1618 ) 1619 1620 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1621 if self._match_text_seq("AND", "STATISTICS"): 1622 statistics = True 1623 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1624 statistics = False 1625 else: 1626 statistics = None 1627 1628 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1629 1630 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1631 if self._match_text_seq("PRIMARY", "INDEX"): 1632 return exp.NoPrimaryIndexProperty() 1633 return None 1634 1635 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1636 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1637 return exp.OnCommitProperty() 1638 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1639 return exp.OnCommitProperty(delete=True) 1640 return None 1641 1642 def _parse_distkey(self) -> exp.DistKeyProperty: 1643 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1644 1645 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1646 table = self._parse_table(schema=True) 1647 1648 options = [] 1649 while self._match_texts(("INCLUDING", "EXCLUDING")): 1650 this = self._prev.text.upper() 1651 1652 id_var = self._parse_id_var() 1653 if not id_var: 1654 return None 1655 1656 options.append( 1657 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1658 ) 1659 1660 return self.expression(exp.LikeProperty, this=table, expressions=options) 1661 1662 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1663 return self.expression( 1664 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1665 ) 1666 1667 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1668 self._match(TokenType.EQ) 1669 return self.expression( 1670 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1671 ) 1672 1673 def _parse_returns(self) -> exp.ReturnsProperty: 1674 value: t.Optional[exp.Expression] 1675 is_table = self._match(TokenType.TABLE) 1676 1677 if is_table: 1678 if self._match(TokenType.LT): 1679 value = self.expression( 1680 exp.Schema, 1681 this="TABLE", 1682 expressions=self._parse_csv(self._parse_struct_types), 1683 ) 1684 if not self._match(TokenType.GT): 1685 self.raise_error("Expecting >") 1686 else: 1687 value = self._parse_schema(exp.var("TABLE")) 1688 else: 1689 value = self._parse_types() 1690 1691 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1692 1693 def _parse_describe(self) -> exp.Describe: 1694 kind = self._match_set(self.CREATABLES) and self._prev.text 1695 this = self._parse_table() 1696 return self.expression(exp.Describe, this=this, kind=kind) 1697 1698 def _parse_insert(self) -> exp.Insert: 1699 comments = ensure_list(self._prev_comments) 1700 overwrite = self._match(TokenType.OVERWRITE) 1701 ignore = self._match(TokenType.IGNORE) 1702 local = self._match_text_seq("LOCAL") 1703 alternative = None 1704 1705 if self._match_text_seq("DIRECTORY"): 1706 this: t.Optional[exp.Expression] = self.expression( 1707 exp.Directory, 1708 this=self._parse_var_or_string(), 1709 local=local, 1710 row_format=self._parse_row_format(match_row=True), 1711 ) 1712 else: 1713 if self._match(TokenType.OR): 1714 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1715 1716 self._match(TokenType.INTO) 1717 comments += ensure_list(self._prev_comments) 1718 self._match(TokenType.TABLE) 1719 this = self._parse_table(schema=True) 1720 1721 returning = self._parse_returning() 1722 1723 return self.expression( 1724 exp.Insert, 1725 comments=comments, 1726 this=this, 1727 exists=self._parse_exists(), 1728 partition=self._parse_partition(), 1729 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1730 and self._parse_conjunction(), 1731 expression=self._parse_ddl_select(), 1732 conflict=self._parse_on_conflict(), 1733 returning=returning or self._parse_returning(), 1734 overwrite=overwrite, 1735 alternative=alternative, 1736 ignore=ignore, 1737 ) 1738 1739 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1740 conflict = self._match_text_seq("ON", "CONFLICT") 1741 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1742 1743 if not conflict and not duplicate: 1744 return None 1745 1746 nothing = None 1747 expressions = None 1748 key = None 1749 constraint = None 1750 1751 if conflict: 1752 if self._match_text_seq("ON", "CONSTRAINT"): 1753 constraint = self._parse_id_var() 1754 else: 1755 key = self._parse_csv(self._parse_value) 1756 1757 self._match_text_seq("DO") 1758 if self._match_text_seq("NOTHING"): 1759 nothing = True 1760 else: 1761 self._match(TokenType.UPDATE) 1762 self._match(TokenType.SET) 1763 expressions = self._parse_csv(self._parse_equality) 1764 1765 return self.expression( 1766 exp.OnConflict, 1767 duplicate=duplicate, 1768 expressions=expressions, 1769 nothing=nothing, 1770 key=key, 1771 constraint=constraint, 1772 ) 1773 1774 def _parse_returning(self) -> t.Optional[exp.Returning]: 1775 if not self._match(TokenType.RETURNING): 1776 return None 1777 return self.expression( 1778 exp.Returning, 1779 expressions=self._parse_csv(self._parse_expression), 1780 into=self._match(TokenType.INTO) and self._parse_table_part(), 1781 ) 1782 1783 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1784 if not self._match(TokenType.FORMAT): 1785 return None 1786 return self._parse_row_format() 1787 1788 def _parse_row_format( 1789 self, match_row: bool = False 1790 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1791 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1792 return None 1793 1794 if self._match_text_seq("SERDE"): 1795 this = self._parse_string() 1796 1797 serde_properties = None 1798 if self._match(TokenType.SERDE_PROPERTIES): 1799 serde_properties = self.expression( 1800 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1801 ) 1802 1803 return self.expression( 1804 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1805 ) 1806 1807 self._match_text_seq("DELIMITED") 1808 1809 kwargs = {} 1810 1811 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1812 kwargs["fields"] = self._parse_string() 1813 if self._match_text_seq("ESCAPED", "BY"): 1814 kwargs["escaped"] = self._parse_string() 1815 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1816 kwargs["collection_items"] = self._parse_string() 1817 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1818 kwargs["map_keys"] = self._parse_string() 1819 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1820 kwargs["lines"] = self._parse_string() 1821 if self._match_text_seq("NULL", "DEFINED", "AS"): 1822 kwargs["null"] = self._parse_string() 1823 1824 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1825 1826 def _parse_load(self) -> exp.LoadData | exp.Command: 1827 if self._match_text_seq("DATA"): 1828 local = self._match_text_seq("LOCAL") 1829 self._match_text_seq("INPATH") 1830 inpath = self._parse_string() 1831 overwrite = self._match(TokenType.OVERWRITE) 1832 self._match_pair(TokenType.INTO, TokenType.TABLE) 1833 1834 return self.expression( 1835 exp.LoadData, 1836 this=self._parse_table(schema=True), 1837 local=local, 1838 overwrite=overwrite, 1839 inpath=inpath, 1840 partition=self._parse_partition(), 1841 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1842 serde=self._match_text_seq("SERDE") and self._parse_string(), 1843 ) 1844 return self._parse_as_command(self._prev) 1845 1846 def _parse_delete(self) -> exp.Delete: 1847 # This handles MySQL's "Multiple-Table Syntax" 1848 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1849 tables = None 1850 comments = self._prev_comments 1851 if not self._match(TokenType.FROM, advance=False): 1852 tables = self._parse_csv(self._parse_table) or None 1853 1854 returning = self._parse_returning() 1855 1856 return self.expression( 1857 exp.Delete, 1858 comments=comments, 1859 tables=tables, 1860 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1861 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1862 where=self._parse_where(), 1863 returning=returning or self._parse_returning(), 1864 limit=self._parse_limit(), 1865 ) 1866 1867 def _parse_update(self) -> exp.Update: 1868 comments = self._prev_comments 1869 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1870 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1871 returning = self._parse_returning() 1872 return self.expression( 1873 exp.Update, 1874 comments=comments, 1875 **{ # type: ignore 1876 "this": this, 1877 "expressions": expressions, 1878 "from": self._parse_from(joins=True), 1879 "where": self._parse_where(), 1880 "returning": returning or self._parse_returning(), 1881 "limit": self._parse_limit(), 1882 }, 1883 ) 1884 1885 def _parse_uncache(self) -> exp.Uncache: 1886 if not self._match(TokenType.TABLE): 1887 self.raise_error("Expecting TABLE after UNCACHE") 1888 1889 return self.expression( 1890 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1891 ) 1892 1893 def _parse_cache(self) -> exp.Cache: 1894 lazy = self._match_text_seq("LAZY") 1895 self._match(TokenType.TABLE) 1896 table = self._parse_table(schema=True) 1897 1898 options = [] 1899 if self._match_text_seq("OPTIONS"): 1900 self._match_l_paren() 1901 k = self._parse_string() 1902 self._match(TokenType.EQ) 1903 v = self._parse_string() 1904 options = [k, v] 1905 self._match_r_paren() 1906 1907 self._match(TokenType.ALIAS) 1908 return self.expression( 1909 exp.Cache, 1910 this=table, 1911 lazy=lazy, 1912 options=options, 1913 expression=self._parse_select(nested=True), 1914 ) 1915 1916 def _parse_partition(self) -> t.Optional[exp.Partition]: 1917 if not self._match(TokenType.PARTITION): 1918 return None 1919 1920 return self.expression( 1921 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1922 ) 1923 1924 def _parse_value(self) -> exp.Tuple: 1925 if self._match(TokenType.L_PAREN): 1926 expressions = self._parse_csv(self._parse_conjunction) 1927 self._match_r_paren() 1928 return self.expression(exp.Tuple, expressions=expressions) 1929 1930 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1931 # https://prestodb.io/docs/current/sql/values.html 1932 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1933 1934 def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: 1935 return self._parse_expressions() 1936 1937 def _parse_select( 1938 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1939 ) -> t.Optional[exp.Expression]: 1940 cte = self._parse_with() 1941 if cte: 1942 this = self._parse_statement() 1943 1944 if not this: 1945 self.raise_error("Failed to parse any statement following CTE") 1946 return cte 1947 1948 if "with" in this.arg_types: 1949 this.set("with", cte) 1950 else: 1951 self.raise_error(f"{this.key} does not support CTE") 1952 this = cte 1953 elif self._match(TokenType.SELECT): 1954 comments = self._prev_comments 1955 1956 hint = self._parse_hint() 1957 all_ = self._match(TokenType.ALL) 1958 distinct = self._match(TokenType.DISTINCT) 1959 1960 kind = ( 1961 self._match(TokenType.ALIAS) 1962 and self._match_texts(("STRUCT", "VALUE")) 1963 and self._prev.text 1964 ) 1965 1966 if distinct: 1967 distinct = self.expression( 1968 exp.Distinct, 1969 on=self._parse_value() if self._match(TokenType.ON) else None, 1970 ) 1971 1972 if all_ and distinct: 1973 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1974 1975 limit = self._parse_limit(top=True) 1976 projections = self._parse_projections() 1977 1978 this = self.expression( 1979 exp.Select, 1980 kind=kind, 1981 hint=hint, 1982 distinct=distinct, 1983 expressions=projections, 1984 limit=limit, 1985 ) 1986 this.comments = comments 1987 1988 into = self._parse_into() 1989 if into: 1990 this.set("into", into) 1991 1992 from_ = self._parse_from() 1993 if from_: 1994 this.set("from", from_) 1995 1996 this = self._parse_query_modifiers(this) 1997 elif (table or nested) and self._match(TokenType.L_PAREN): 1998 if self._match(TokenType.PIVOT): 1999 this = self._parse_simplified_pivot() 2000 elif self._match(TokenType.FROM): 2001 this = exp.select("*").from_( 2002 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2003 ) 2004 else: 2005 this = self._parse_table() if table else self._parse_select(nested=True) 2006 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2007 2008 self._match_r_paren() 2009 2010 # We return early here so that the UNION isn't attached to the subquery by the 2011 # following call to _parse_set_operations, but instead becomes the parent node 2012 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2013 elif self._match(TokenType.VALUES): 2014 this = self.expression( 2015 exp.Values, 2016 expressions=self._parse_csv(self._parse_value), 2017 alias=self._parse_table_alias(), 2018 ) 2019 else: 2020 this = None 2021 2022 return self._parse_set_operations(this) 2023 2024 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2025 if not skip_with_token and not self._match(TokenType.WITH): 2026 return None 2027 2028 comments = self._prev_comments 2029 recursive = self._match(TokenType.RECURSIVE) 2030 2031 expressions = [] 2032 while True: 2033 expressions.append(self._parse_cte()) 2034 2035 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2036 break 2037 else: 2038 self._match(TokenType.WITH) 2039 2040 return self.expression( 2041 exp.With, comments=comments, expressions=expressions, recursive=recursive 2042 ) 2043 2044 def _parse_cte(self) -> exp.CTE: 2045 alias = self._parse_table_alias() 2046 if not alias or not alias.this: 2047 self.raise_error("Expected CTE to have alias") 2048 2049 self._match(TokenType.ALIAS) 2050 return self.expression( 2051 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2052 ) 2053 2054 def _parse_table_alias( 2055 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2056 ) -> t.Optional[exp.TableAlias]: 2057 any_token = self._match(TokenType.ALIAS) 2058 alias = ( 2059 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2060 or self._parse_string_as_identifier() 2061 ) 2062 2063 index = self._index 2064 if self._match(TokenType.L_PAREN): 2065 columns = self._parse_csv(self._parse_function_parameter) 2066 self._match_r_paren() if columns else self._retreat(index) 2067 else: 2068 columns = None 2069 2070 if not alias and not columns: 2071 return None 2072 2073 return self.expression(exp.TableAlias, this=alias, columns=columns) 2074 2075 def _parse_subquery( 2076 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2077 ) -> t.Optional[exp.Subquery]: 2078 if not this: 2079 return None 2080 2081 return self.expression( 2082 exp.Subquery, 2083 this=this, 2084 pivots=self._parse_pivots(), 2085 alias=self._parse_table_alias() if parse_alias else None, 2086 ) 2087 2088 def _parse_query_modifiers( 2089 self, this: t.Optional[exp.Expression] 2090 ) -> t.Optional[exp.Expression]: 2091 if isinstance(this, self.MODIFIABLES): 2092 for join in iter(self._parse_join, None): 2093 this.append("joins", join) 2094 for lateral in iter(self._parse_lateral, None): 2095 this.append("laterals", lateral) 2096 2097 while True: 2098 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2099 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2100 key, expression = parser(self) 2101 2102 if expression: 2103 this.set(key, expression) 2104 if key == "limit": 2105 offset = expression.args.pop("offset", None) 2106 if offset: 2107 this.set("offset", exp.Offset(expression=offset)) 2108 continue 2109 break 2110 return this 2111 2112 def _parse_hint(self) -> t.Optional[exp.Hint]: 2113 if self._match(TokenType.HINT): 2114 hints = [] 2115 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2116 hints.extend(hint) 2117 2118 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2119 self.raise_error("Expected */ after HINT") 2120 2121 return self.expression(exp.Hint, expressions=hints) 2122 2123 return None 2124 2125 def _parse_into(self) -> t.Optional[exp.Into]: 2126 if not self._match(TokenType.INTO): 2127 return None 2128 2129 temp = self._match(TokenType.TEMPORARY) 2130 unlogged = self._match_text_seq("UNLOGGED") 2131 self._match(TokenType.TABLE) 2132 2133 return self.expression( 2134 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2135 ) 2136 2137 def _parse_from( 2138 self, joins: bool = False, skip_from_token: bool = False 2139 ) -> t.Optional[exp.From]: 2140 if not skip_from_token and not self._match(TokenType.FROM): 2141 return None 2142 2143 return self.expression( 2144 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2145 ) 2146 2147 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2148 if not self._match(TokenType.MATCH_RECOGNIZE): 2149 return None 2150 2151 self._match_l_paren() 2152 2153 partition = self._parse_partition_by() 2154 order = self._parse_order() 2155 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2156 2157 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2158 rows = exp.var("ONE ROW PER MATCH") 2159 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2160 text = "ALL ROWS PER MATCH" 2161 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2162 text += f" SHOW EMPTY MATCHES" 2163 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2164 text += f" OMIT EMPTY MATCHES" 2165 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2166 text += f" WITH UNMATCHED ROWS" 2167 rows = exp.var(text) 2168 else: 2169 rows = None 2170 2171 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2172 text = "AFTER MATCH SKIP" 2173 if self._match_text_seq("PAST", "LAST", "ROW"): 2174 text += f" PAST LAST ROW" 2175 elif self._match_text_seq("TO", "NEXT", "ROW"): 2176 text += f" TO NEXT ROW" 2177 elif self._match_text_seq("TO", "FIRST"): 2178 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2179 elif self._match_text_seq("TO", "LAST"): 2180 text += f" TO LAST {self._advance_any().text}" # type: ignore 2181 after = exp.var(text) 2182 else: 2183 after = None 2184 2185 if self._match_text_seq("PATTERN"): 2186 self._match_l_paren() 2187 2188 if not self._curr: 2189 self.raise_error("Expecting )", self._curr) 2190 2191 paren = 1 2192 start = self._curr 2193 2194 while self._curr and paren > 0: 2195 if self._curr.token_type == TokenType.L_PAREN: 2196 paren += 1 2197 if self._curr.token_type == TokenType.R_PAREN: 2198 paren -= 1 2199 2200 end = self._prev 2201 self._advance() 2202 2203 if paren > 0: 2204 self.raise_error("Expecting )", self._curr) 2205 2206 pattern = exp.var(self._find_sql(start, end)) 2207 else: 2208 pattern = None 2209 2210 define = ( 2211 self._parse_csv( 2212 lambda: self.expression( 2213 exp.Alias, 2214 alias=self._parse_id_var(any_token=True), 2215 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2216 ) 2217 ) 2218 if self._match_text_seq("DEFINE") 2219 else None 2220 ) 2221 2222 self._match_r_paren() 2223 2224 return self.expression( 2225 exp.MatchRecognize, 2226 partition_by=partition, 2227 order=order, 2228 measures=measures, 2229 rows=rows, 2230 after=after, 2231 pattern=pattern, 2232 define=define, 2233 alias=self._parse_table_alias(), 2234 ) 2235 2236 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2237 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2238 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2239 2240 if outer_apply or cross_apply: 2241 this = self._parse_select(table=True) 2242 view = None 2243 outer = not cross_apply 2244 elif self._match(TokenType.LATERAL): 2245 this = self._parse_select(table=True) 2246 view = self._match(TokenType.VIEW) 2247 outer = self._match(TokenType.OUTER) 2248 else: 2249 return None 2250 2251 if not this: 2252 this = ( 2253 self._parse_unnest() 2254 or self._parse_function() 2255 or self._parse_id_var(any_token=False) 2256 ) 2257 2258 while self._match(TokenType.DOT): 2259 this = exp.Dot( 2260 this=this, 2261 expression=self._parse_function() or self._parse_id_var(any_token=False), 2262 ) 2263 2264 if view: 2265 table = self._parse_id_var(any_token=False) 2266 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2267 table_alias: t.Optional[exp.TableAlias] = self.expression( 2268 exp.TableAlias, this=table, columns=columns 2269 ) 2270 elif isinstance(this, exp.Subquery) and this.alias: 2271 # Ensures parity between the Subquery's and the Lateral's "alias" args 2272 table_alias = this.args["alias"].copy() 2273 else: 2274 table_alias = self._parse_table_alias() 2275 2276 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2277 2278 def _parse_join_parts( 2279 self, 2280 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2281 return ( 2282 self._match_set(self.JOIN_METHODS) and self._prev, 2283 self._match_set(self.JOIN_SIDES) and self._prev, 2284 self._match_set(self.JOIN_KINDS) and self._prev, 2285 ) 2286 2287 def _parse_join( 2288 self, skip_join_token: bool = False, parse_bracket: bool = False 2289 ) -> t.Optional[exp.Join]: 2290 if self._match(TokenType.COMMA): 2291 return self.expression(exp.Join, this=self._parse_table()) 2292 2293 index = self._index 2294 method, side, kind = self._parse_join_parts() 2295 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2296 join = self._match(TokenType.JOIN) 2297 2298 if not skip_join_token and not join: 2299 self._retreat(index) 2300 kind = None 2301 method = None 2302 side = None 2303 2304 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2305 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2306 2307 if not skip_join_token and not join and not outer_apply and not cross_apply: 2308 return None 2309 2310 if outer_apply: 2311 side = Token(TokenType.LEFT, "LEFT") 2312 2313 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2314 2315 if method: 2316 kwargs["method"] = method.text 2317 if side: 2318 kwargs["side"] = side.text 2319 if kind: 2320 kwargs["kind"] = kind.text 2321 if hint: 2322 kwargs["hint"] = hint 2323 2324 if self._match(TokenType.ON): 2325 kwargs["on"] = self._parse_conjunction() 2326 elif self._match(TokenType.USING): 2327 kwargs["using"] = self._parse_wrapped_id_vars() 2328 elif not (kind and kind.token_type == TokenType.CROSS): 2329 index = self._index 2330 joins = self._parse_joins() 2331 2332 if joins and self._match(TokenType.ON): 2333 kwargs["on"] = self._parse_conjunction() 2334 elif joins and self._match(TokenType.USING): 2335 kwargs["using"] = self._parse_wrapped_id_vars() 2336 else: 2337 joins = None 2338 self._retreat(index) 2339 2340 kwargs["this"].set("joins", joins) 2341 2342 return self.expression(exp.Join, **kwargs) 2343 2344 def _parse_index( 2345 self, 2346 index: t.Optional[exp.Expression] = None, 2347 ) -> t.Optional[exp.Index]: 2348 if index: 2349 unique = None 2350 primary = None 2351 amp = None 2352 2353 self._match(TokenType.ON) 2354 self._match(TokenType.TABLE) # hive 2355 table = self._parse_table_parts(schema=True) 2356 else: 2357 unique = self._match(TokenType.UNIQUE) 2358 primary = self._match_text_seq("PRIMARY") 2359 amp = self._match_text_seq("AMP") 2360 2361 if not self._match(TokenType.INDEX): 2362 return None 2363 2364 index = self._parse_id_var() 2365 table = None 2366 2367 using = self._parse_field() if self._match(TokenType.USING) else None 2368 2369 if self._match(TokenType.L_PAREN, advance=False): 2370 columns = self._parse_wrapped_csv(self._parse_ordered) 2371 else: 2372 columns = None 2373 2374 return self.expression( 2375 exp.Index, 2376 this=index, 2377 table=table, 2378 using=using, 2379 columns=columns, 2380 unique=unique, 2381 primary=primary, 2382 amp=amp, 2383 partition_by=self._parse_partition_by(), 2384 ) 2385 2386 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2387 hints: t.List[exp.Expression] = [] 2388 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2389 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2390 hints.append( 2391 self.expression( 2392 exp.WithTableHint, 2393 expressions=self._parse_csv( 2394 lambda: self._parse_function() or self._parse_var(any_token=True) 2395 ), 2396 ) 2397 ) 2398 self._match_r_paren() 2399 else: 2400 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2401 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2402 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2403 2404 self._match_texts({"INDEX", "KEY"}) 2405 if self._match(TokenType.FOR): 2406 hint.set("target", self._advance_any() and self._prev.text.upper()) 2407 2408 hint.set("expressions", self._parse_wrapped_id_vars()) 2409 hints.append(hint) 2410 2411 return hints or None 2412 2413 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2414 return ( 2415 (not schema and self._parse_function(optional_parens=False)) 2416 or self._parse_id_var(any_token=False) 2417 or self._parse_string_as_identifier() 2418 or self._parse_placeholder() 2419 ) 2420 2421 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2422 catalog = None 2423 db = None 2424 table = self._parse_table_part(schema=schema) 2425 2426 while self._match(TokenType.DOT): 2427 if catalog: 2428 # This allows nesting the table in arbitrarily many dot expressions if needed 2429 table = self.expression( 2430 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2431 ) 2432 else: 2433 catalog = db 2434 db = table 2435 table = self._parse_table_part(schema=schema) 2436 2437 if not table: 2438 self.raise_error(f"Expected table name but got {self._curr}") 2439 2440 return self.expression( 2441 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2442 ) 2443 2444 def _parse_table( 2445 self, 2446 schema: bool = False, 2447 joins: bool = False, 2448 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2449 parse_bracket: bool = False, 2450 ) -> t.Optional[exp.Expression]: 2451 lateral = self._parse_lateral() 2452 if lateral: 2453 return lateral 2454 2455 unnest = self._parse_unnest() 2456 if unnest: 2457 return unnest 2458 2459 values = self._parse_derived_table_values() 2460 if values: 2461 return values 2462 2463 subquery = self._parse_select(table=True) 2464 if subquery: 2465 if not subquery.args.get("pivots"): 2466 subquery.set("pivots", self._parse_pivots()) 2467 return subquery 2468 2469 bracket = parse_bracket and self._parse_bracket(None) 2470 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2471 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2472 2473 if schema: 2474 return self._parse_schema(this=this) 2475 2476 if self.ALIAS_POST_TABLESAMPLE: 2477 table_sample = self._parse_table_sample() 2478 2479 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2480 if alias: 2481 this.set("alias", alias) 2482 2483 if not this.args.get("pivots"): 2484 this.set("pivots", self._parse_pivots()) 2485 2486 this.set("hints", self._parse_table_hints()) 2487 2488 if not self.ALIAS_POST_TABLESAMPLE: 2489 table_sample = self._parse_table_sample() 2490 2491 if table_sample: 2492 table_sample.set("this", this) 2493 this = table_sample 2494 2495 if joins: 2496 for join in iter(self._parse_join, None): 2497 this.append("joins", join) 2498 2499 return this 2500 2501 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2502 if not self._match(TokenType.UNNEST): 2503 return None 2504 2505 expressions = self._parse_wrapped_csv(self._parse_type) 2506 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2507 2508 alias = self._parse_table_alias() if with_alias else None 2509 2510 if alias and self.UNNEST_COLUMN_ONLY: 2511 if alias.args.get("columns"): 2512 self.raise_error("Unexpected extra column alias in unnest.") 2513 2514 alias.set("columns", [alias.this]) 2515 alias.set("this", None) 2516 2517 offset = None 2518 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2519 self._match(TokenType.ALIAS) 2520 offset = self._parse_id_var() or exp.to_identifier("offset") 2521 2522 return self.expression( 2523 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2524 ) 2525 2526 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2527 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2528 if not is_derived and not self._match(TokenType.VALUES): 2529 return None 2530 2531 expressions = self._parse_csv(self._parse_value) 2532 alias = self._parse_table_alias() 2533 2534 if is_derived: 2535 self._match_r_paren() 2536 2537 return self.expression( 2538 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2539 ) 2540 2541 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2542 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2543 as_modifier and self._match_text_seq("USING", "SAMPLE") 2544 ): 2545 return None 2546 2547 bucket_numerator = None 2548 bucket_denominator = None 2549 bucket_field = None 2550 percent = None 2551 rows = None 2552 size = None 2553 seed = None 2554 2555 kind = ( 2556 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2557 ) 2558 method = self._parse_var(tokens=(TokenType.ROW,)) 2559 2560 self._match(TokenType.L_PAREN) 2561 2562 num = self._parse_number() 2563 2564 if self._match_text_seq("BUCKET"): 2565 bucket_numerator = self._parse_number() 2566 self._match_text_seq("OUT", "OF") 2567 bucket_denominator = bucket_denominator = self._parse_number() 2568 self._match(TokenType.ON) 2569 bucket_field = self._parse_field() 2570 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2571 percent = num 2572 elif self._match(TokenType.ROWS): 2573 rows = num 2574 else: 2575 size = num 2576 2577 self._match(TokenType.R_PAREN) 2578 2579 if self._match(TokenType.L_PAREN): 2580 method = self._parse_var() 2581 seed = self._match(TokenType.COMMA) and self._parse_number() 2582 self._match_r_paren() 2583 elif self._match_texts(("SEED", "REPEATABLE")): 2584 seed = self._parse_wrapped(self._parse_number) 2585 2586 return self.expression( 2587 exp.TableSample, 2588 method=method, 2589 bucket_numerator=bucket_numerator, 2590 bucket_denominator=bucket_denominator, 2591 bucket_field=bucket_field, 2592 percent=percent, 2593 rows=rows, 2594 size=size, 2595 seed=seed, 2596 kind=kind, 2597 ) 2598 2599 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2600 return list(iter(self._parse_pivot, None)) or None 2601 2602 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2603 return list(iter(self._parse_join, None)) or None 2604 2605 # https://duckdb.org/docs/sql/statements/pivot 2606 def _parse_simplified_pivot(self) -> exp.Pivot: 2607 def _parse_on() -> t.Optional[exp.Expression]: 2608 this = self._parse_bitwise() 2609 return self._parse_in(this) if self._match(TokenType.IN) else this 2610 2611 this = self._parse_table() 2612 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2613 using = self._match(TokenType.USING) and self._parse_csv( 2614 lambda: self._parse_alias(self._parse_function()) 2615 ) 2616 group = self._parse_group() 2617 return self.expression( 2618 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2619 ) 2620 2621 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2622 index = self._index 2623 2624 if self._match(TokenType.PIVOT): 2625 unpivot = False 2626 elif self._match(TokenType.UNPIVOT): 2627 unpivot = True 2628 else: 2629 return None 2630 2631 expressions = [] 2632 field = None 2633 2634 if not self._match(TokenType.L_PAREN): 2635 self._retreat(index) 2636 return None 2637 2638 if unpivot: 2639 expressions = self._parse_csv(self._parse_column) 2640 else: 2641 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2642 2643 if not expressions: 2644 self.raise_error("Failed to parse PIVOT's aggregation list") 2645 2646 if not self._match(TokenType.FOR): 2647 self.raise_error("Expecting FOR") 2648 2649 value = self._parse_column() 2650 2651 if not self._match(TokenType.IN): 2652 self.raise_error("Expecting IN") 2653 2654 field = self._parse_in(value, alias=True) 2655 2656 self._match_r_paren() 2657 2658 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2659 2660 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2661 pivot.set("alias", self._parse_table_alias()) 2662 2663 if not unpivot: 2664 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2665 2666 columns: t.List[exp.Expression] = [] 2667 for fld in pivot.args["field"].expressions: 2668 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2669 for name in names: 2670 if self.PREFIXED_PIVOT_COLUMNS: 2671 name = f"{name}_{field_name}" if name else field_name 2672 else: 2673 name = f"{field_name}_{name}" if name else field_name 2674 2675 columns.append(exp.to_identifier(name)) 2676 2677 pivot.set("columns", columns) 2678 2679 return pivot 2680 2681 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2682 return [agg.alias for agg in aggregations] 2683 2684 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2685 if not skip_where_token and not self._match(TokenType.WHERE): 2686 return None 2687 2688 return self.expression( 2689 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2690 ) 2691 2692 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2693 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2694 return None 2695 2696 elements = defaultdict(list) 2697 2698 if self._match(TokenType.ALL): 2699 return self.expression(exp.Group, all=True) 2700 2701 while True: 2702 expressions = self._parse_csv(self._parse_conjunction) 2703 if expressions: 2704 elements["expressions"].extend(expressions) 2705 2706 grouping_sets = self._parse_grouping_sets() 2707 if grouping_sets: 2708 elements["grouping_sets"].extend(grouping_sets) 2709 2710 rollup = None 2711 cube = None 2712 totals = None 2713 2714 with_ = self._match(TokenType.WITH) 2715 if self._match(TokenType.ROLLUP): 2716 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2717 elements["rollup"].extend(ensure_list(rollup)) 2718 2719 if self._match(TokenType.CUBE): 2720 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2721 elements["cube"].extend(ensure_list(cube)) 2722 2723 if self._match_text_seq("TOTALS"): 2724 totals = True 2725 elements["totals"] = True # type: ignore 2726 2727 if not (grouping_sets or rollup or cube or totals): 2728 break 2729 2730 return self.expression(exp.Group, **elements) # type: ignore 2731 2732 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2733 if not self._match(TokenType.GROUPING_SETS): 2734 return None 2735 2736 return self._parse_wrapped_csv(self._parse_grouping_set) 2737 2738 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2739 if self._match(TokenType.L_PAREN): 2740 grouping_set = self._parse_csv(self._parse_column) 2741 self._match_r_paren() 2742 return self.expression(exp.Tuple, expressions=grouping_set) 2743 2744 return self._parse_column() 2745 2746 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2747 if not skip_having_token and not self._match(TokenType.HAVING): 2748 return None 2749 return self.expression(exp.Having, this=self._parse_conjunction()) 2750 2751 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2752 if not self._match(TokenType.QUALIFY): 2753 return None 2754 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2755 2756 def _parse_order( 2757 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2758 ) -> t.Optional[exp.Expression]: 2759 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2760 return this 2761 2762 return self.expression( 2763 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2764 ) 2765 2766 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2767 if not self._match(token): 2768 return None 2769 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2770 2771 def _parse_ordered(self) -> exp.Ordered: 2772 this = self._parse_conjunction() 2773 self._match(TokenType.ASC) 2774 2775 is_desc = self._match(TokenType.DESC) 2776 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2777 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2778 desc = is_desc or False 2779 asc = not desc 2780 nulls_first = is_nulls_first or False 2781 explicitly_null_ordered = is_nulls_first or is_nulls_last 2782 2783 if ( 2784 not explicitly_null_ordered 2785 and ( 2786 (asc and self.NULL_ORDERING == "nulls_are_small") 2787 or (desc and self.NULL_ORDERING != "nulls_are_small") 2788 ) 2789 and self.NULL_ORDERING != "nulls_are_last" 2790 ): 2791 nulls_first = True 2792 2793 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2794 2795 def _parse_limit( 2796 self, this: t.Optional[exp.Expression] = None, top: bool = False 2797 ) -> t.Optional[exp.Expression]: 2798 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2799 comments = self._prev_comments 2800 if top: 2801 limit_paren = self._match(TokenType.L_PAREN) 2802 expression = self._parse_number() 2803 2804 if limit_paren: 2805 self._match_r_paren() 2806 else: 2807 expression = self._parse_term() 2808 2809 if self._match(TokenType.COMMA): 2810 offset = expression 2811 expression = self._parse_term() 2812 else: 2813 offset = None 2814 2815 limit_exp = self.expression( 2816 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2817 ) 2818 2819 return limit_exp 2820 2821 if self._match(TokenType.FETCH): 2822 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2823 direction = self._prev.text if direction else "FIRST" 2824 2825 count = self._parse_number() 2826 percent = self._match(TokenType.PERCENT) 2827 2828 self._match_set((TokenType.ROW, TokenType.ROWS)) 2829 2830 only = self._match_text_seq("ONLY") 2831 with_ties = self._match_text_seq("WITH", "TIES") 2832 2833 if only and with_ties: 2834 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2835 2836 return self.expression( 2837 exp.Fetch, 2838 direction=direction, 2839 count=count, 2840 percent=percent, 2841 with_ties=with_ties, 2842 ) 2843 2844 return this 2845 2846 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2847 if not self._match(TokenType.OFFSET): 2848 return this 2849 2850 count = self._parse_term() 2851 self._match_set((TokenType.ROW, TokenType.ROWS)) 2852 return self.expression(exp.Offset, this=this, expression=count) 2853 2854 def _parse_locks(self) -> t.List[exp.Lock]: 2855 locks = [] 2856 while True: 2857 if self._match_text_seq("FOR", "UPDATE"): 2858 update = True 2859 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2860 "LOCK", "IN", "SHARE", "MODE" 2861 ): 2862 update = False 2863 else: 2864 break 2865 2866 expressions = None 2867 if self._match_text_seq("OF"): 2868 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2869 2870 wait: t.Optional[bool | exp.Expression] = None 2871 if self._match_text_seq("NOWAIT"): 2872 wait = True 2873 elif self._match_text_seq("WAIT"): 2874 wait = self._parse_primary() 2875 elif self._match_text_seq("SKIP", "LOCKED"): 2876 wait = False 2877 2878 locks.append( 2879 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2880 ) 2881 2882 return locks 2883 2884 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2885 if not self._match_set(self.SET_OPERATIONS): 2886 return this 2887 2888 token_type = self._prev.token_type 2889 2890 if token_type == TokenType.UNION: 2891 expression = exp.Union 2892 elif token_type == TokenType.EXCEPT: 2893 expression = exp.Except 2894 else: 2895 expression = exp.Intersect 2896 2897 return self.expression( 2898 expression, 2899 this=this, 2900 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2901 expression=self._parse_set_operations(self._parse_select(nested=True)), 2902 ) 2903 2904 def _parse_expression(self) -> t.Optional[exp.Expression]: 2905 return self._parse_alias(self._parse_conjunction()) 2906 2907 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2908 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2909 2910 def _parse_equality(self) -> t.Optional[exp.Expression]: 2911 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2912 2913 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2914 return self._parse_tokens(self._parse_range, self.COMPARISON) 2915 2916 def _parse_range(self) -> t.Optional[exp.Expression]: 2917 this = self._parse_bitwise() 2918 negate = self._match(TokenType.NOT) 2919 2920 if self._match_set(self.RANGE_PARSERS): 2921 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2922 if not expression: 2923 return this 2924 2925 this = expression 2926 elif self._match(TokenType.ISNULL): 2927 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2928 2929 # Postgres supports ISNULL and NOTNULL for conditions. 2930 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2931 if self._match(TokenType.NOTNULL): 2932 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2933 this = self.expression(exp.Not, this=this) 2934 2935 if negate: 2936 this = self.expression(exp.Not, this=this) 2937 2938 if self._match(TokenType.IS): 2939 this = self._parse_is(this) 2940 2941 return this 2942 2943 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2944 index = self._index - 1 2945 negate = self._match(TokenType.NOT) 2946 2947 if self._match_text_seq("DISTINCT", "FROM"): 2948 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2949 return self.expression(klass, this=this, expression=self._parse_expression()) 2950 2951 expression = self._parse_null() or self._parse_boolean() 2952 if not expression: 2953 self._retreat(index) 2954 return None 2955 2956 this = self.expression(exp.Is, this=this, expression=expression) 2957 return self.expression(exp.Not, this=this) if negate else this 2958 2959 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2960 unnest = self._parse_unnest(with_alias=False) 2961 if unnest: 2962 this = self.expression(exp.In, this=this, unnest=unnest) 2963 elif self._match(TokenType.L_PAREN): 2964 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2965 2966 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2967 this = self.expression(exp.In, this=this, query=expressions[0]) 2968 else: 2969 this = self.expression(exp.In, this=this, expressions=expressions) 2970 2971 self._match_r_paren(this) 2972 else: 2973 this = self.expression(exp.In, this=this, field=self._parse_field()) 2974 2975 return this 2976 2977 def _parse_between(self, this: exp.Expression) -> exp.Between: 2978 low = self._parse_bitwise() 2979 self._match(TokenType.AND) 2980 high = self._parse_bitwise() 2981 return self.expression(exp.Between, this=this, low=low, high=high) 2982 2983 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2984 if not self._match(TokenType.ESCAPE): 2985 return this 2986 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2987 2988 def _parse_interval(self) -> t.Optional[exp.Interval]: 2989 if not self._match(TokenType.INTERVAL): 2990 return None 2991 2992 if self._match(TokenType.STRING, advance=False): 2993 this = self._parse_primary() 2994 else: 2995 this = self._parse_term() 2996 2997 unit = self._parse_function() or self._parse_var() 2998 2999 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3000 # each INTERVAL expression into this canonical form so it's easy to transpile 3001 if this and this.is_number: 3002 this = exp.Literal.string(this.name) 3003 elif this and this.is_string: 3004 parts = this.name.split() 3005 3006 if len(parts) == 2: 3007 if unit: 3008 # this is not actually a unit, it's something else 3009 unit = None 3010 self._retreat(self._index - 1) 3011 else: 3012 this = exp.Literal.string(parts[0]) 3013 unit = self.expression(exp.Var, this=parts[1]) 3014 3015 return self.expression(exp.Interval, this=this, unit=unit) 3016 3017 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3018 this = self._parse_term() 3019 3020 while True: 3021 if self._match_set(self.BITWISE): 3022 this = self.expression( 3023 self.BITWISE[self._prev.token_type], 3024 this=this, 3025 expression=self._parse_term(), 3026 ) 3027 elif self._match(TokenType.DQMARK): 3028 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3029 elif self._match_pair(TokenType.LT, TokenType.LT): 3030 this = self.expression( 3031 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3032 ) 3033 elif self._match_pair(TokenType.GT, TokenType.GT): 3034 this = self.expression( 3035 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3036 ) 3037 else: 3038 break 3039 3040 return this 3041 3042 def _parse_term(self) -> t.Optional[exp.Expression]: 3043 return self._parse_tokens(self._parse_factor, self.TERM) 3044 3045 def _parse_factor(self) -> t.Optional[exp.Expression]: 3046 return self._parse_tokens(self._parse_unary, self.FACTOR) 3047 3048 def _parse_unary(self) -> t.Optional[exp.Expression]: 3049 if self._match_set(self.UNARY_PARSERS): 3050 return self.UNARY_PARSERS[self._prev.token_type](self) 3051 return self._parse_at_time_zone(self._parse_type()) 3052 3053 def _parse_type(self) -> t.Optional[exp.Expression]: 3054 interval = self._parse_interval() 3055 if interval: 3056 return interval 3057 3058 index = self._index 3059 data_type = self._parse_types(check_func=True) 3060 this = self._parse_column() 3061 3062 if data_type: 3063 if isinstance(this, exp.Literal): 3064 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3065 if parser: 3066 return parser(self, this, data_type) 3067 return self.expression(exp.Cast, this=this, to=data_type) 3068 if not data_type.expressions: 3069 self._retreat(index) 3070 return self._parse_column() 3071 return self._parse_column_ops(data_type) 3072 3073 return this 3074 3075 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3076 this = self._parse_type() 3077 if not this: 3078 return None 3079 3080 return self.expression( 3081 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3082 ) 3083 3084 def _parse_types( 3085 self, check_func: bool = False, schema: bool = False 3086 ) -> t.Optional[exp.Expression]: 3087 index = self._index 3088 3089 prefix = self._match_text_seq("SYSUDTLIB", ".") 3090 3091 if not self._match_set(self.TYPE_TOKENS): 3092 return None 3093 3094 type_token = self._prev.token_type 3095 3096 if type_token == TokenType.PSEUDO_TYPE: 3097 return self.expression(exp.PseudoType, this=self._prev.text) 3098 3099 nested = type_token in self.NESTED_TYPE_TOKENS 3100 is_struct = type_token == TokenType.STRUCT 3101 expressions = None 3102 maybe_func = False 3103 3104 if self._match(TokenType.L_PAREN): 3105 if is_struct: 3106 expressions = self._parse_csv(self._parse_struct_types) 3107 elif nested: 3108 expressions = self._parse_csv( 3109 lambda: self._parse_types(check_func=check_func, schema=schema) 3110 ) 3111 elif type_token in self.ENUM_TYPE_TOKENS: 3112 expressions = self._parse_csv(self._parse_primary) 3113 else: 3114 expressions = self._parse_csv(self._parse_type_size) 3115 3116 if not expressions or not self._match(TokenType.R_PAREN): 3117 self._retreat(index) 3118 return None 3119 3120 maybe_func = True 3121 3122 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3123 this = exp.DataType( 3124 this=exp.DataType.Type.ARRAY, 3125 expressions=[ 3126 exp.DataType( 3127 this=exp.DataType.Type[type_token.value], 3128 expressions=expressions, 3129 nested=nested, 3130 ) 3131 ], 3132 nested=True, 3133 ) 3134 3135 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3136 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3137 3138 return this 3139 3140 if self._match(TokenType.L_BRACKET): 3141 self._retreat(index) 3142 return None 3143 3144 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3145 if nested and self._match(TokenType.LT): 3146 if is_struct: 3147 expressions = self._parse_csv(self._parse_struct_types) 3148 else: 3149 expressions = self._parse_csv( 3150 lambda: self._parse_types(check_func=check_func, schema=schema) 3151 ) 3152 3153 if not self._match(TokenType.GT): 3154 self.raise_error("Expecting >") 3155 3156 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3157 values = self._parse_csv(self._parse_conjunction) 3158 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3159 3160 value: t.Optional[exp.Expression] = None 3161 if type_token in self.TIMESTAMPS: 3162 if self._match_text_seq("WITH", "TIME", "ZONE"): 3163 maybe_func = False 3164 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3165 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3166 maybe_func = False 3167 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3168 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3169 maybe_func = False 3170 elif type_token == TokenType.INTERVAL: 3171 unit = self._parse_var() 3172 3173 if not unit: 3174 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3175 else: 3176 value = self.expression(exp.Interval, unit=unit) 3177 3178 if maybe_func and check_func: 3179 index2 = self._index 3180 peek = self._parse_string() 3181 3182 if not peek: 3183 self._retreat(index) 3184 return None 3185 3186 self._retreat(index2) 3187 3188 if value: 3189 return value 3190 3191 return exp.DataType( 3192 this=exp.DataType.Type[type_token.value], 3193 expressions=expressions, 3194 nested=nested, 3195 values=values, 3196 prefix=prefix, 3197 ) 3198 3199 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3200 this = self._parse_type() or self._parse_id_var() 3201 self._match(TokenType.COLON) 3202 return self._parse_column_def(this) 3203 3204 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3205 if not self._match_text_seq("AT", "TIME", "ZONE"): 3206 return this 3207 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3208 3209 def _parse_column(self) -> t.Optional[exp.Expression]: 3210 this = self._parse_field() 3211 if isinstance(this, exp.Identifier): 3212 this = self.expression(exp.Column, this=this) 3213 elif not this: 3214 return self._parse_bracket(this) 3215 return self._parse_column_ops(this) 3216 3217 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3218 this = self._parse_bracket(this) 3219 3220 while self._match_set(self.COLUMN_OPERATORS): 3221 op_token = self._prev.token_type 3222 op = self.COLUMN_OPERATORS.get(op_token) 3223 3224 if op_token == TokenType.DCOLON: 3225 field = self._parse_types() 3226 if not field: 3227 self.raise_error("Expected type") 3228 elif op and self._curr: 3229 self._advance() 3230 value = self._prev.text 3231 field = ( 3232 exp.Literal.number(value) 3233 if self._prev.token_type == TokenType.NUMBER 3234 else exp.Literal.string(value) 3235 ) 3236 else: 3237 field = self._parse_field(anonymous_func=True, any_token=True) 3238 3239 if isinstance(field, exp.Func): 3240 # bigquery allows function calls like x.y.count(...) 3241 # SAFE.SUBSTR(...) 3242 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3243 this = self._replace_columns_with_dots(this) 3244 3245 if op: 3246 this = op(self, this, field) 3247 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3248 this = self.expression( 3249 exp.Column, 3250 this=field, 3251 table=this.this, 3252 db=this.args.get("table"), 3253 catalog=this.args.get("db"), 3254 ) 3255 else: 3256 this = self.expression(exp.Dot, this=this, expression=field) 3257 this = self._parse_bracket(this) 3258 return this 3259 3260 def _parse_primary(self) -> t.Optional[exp.Expression]: 3261 if self._match_set(self.PRIMARY_PARSERS): 3262 token_type = self._prev.token_type 3263 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3264 3265 if token_type == TokenType.STRING: 3266 expressions = [primary] 3267 while self._match(TokenType.STRING): 3268 expressions.append(exp.Literal.string(self._prev.text)) 3269 3270 if len(expressions) > 1: 3271 return self.expression(exp.Concat, expressions=expressions) 3272 3273 return primary 3274 3275 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3276 return exp.Literal.number(f"0.{self._prev.text}") 3277 3278 if self._match(TokenType.L_PAREN): 3279 comments = self._prev_comments 3280 query = self._parse_select() 3281 3282 if query: 3283 expressions = [query] 3284 else: 3285 expressions = self._parse_expressions() 3286 3287 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3288 3289 if isinstance(this, exp.Subqueryable): 3290 this = self._parse_set_operations( 3291 self._parse_subquery(this=this, parse_alias=False) 3292 ) 3293 elif len(expressions) > 1: 3294 this = self.expression(exp.Tuple, expressions=expressions) 3295 else: 3296 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3297 3298 if this: 3299 this.add_comments(comments) 3300 3301 self._match_r_paren(expression=this) 3302 return this 3303 3304 return None 3305 3306 def _parse_field( 3307 self, 3308 any_token: bool = False, 3309 tokens: t.Optional[t.Collection[TokenType]] = None, 3310 anonymous_func: bool = False, 3311 ) -> t.Optional[exp.Expression]: 3312 return ( 3313 self._parse_primary() 3314 or self._parse_function(anonymous=anonymous_func) 3315 or self._parse_id_var(any_token=any_token, tokens=tokens) 3316 ) 3317 3318 def _parse_function( 3319 self, 3320 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3321 anonymous: bool = False, 3322 optional_parens: bool = True, 3323 ) -> t.Optional[exp.Expression]: 3324 if not self._curr: 3325 return None 3326 3327 token_type = self._curr.token_type 3328 this = self._curr.text 3329 upper = this.upper() 3330 3331 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3332 if optional_parens and parser: 3333 self._advance() 3334 return parser(self) 3335 3336 if not self._next or self._next.token_type != TokenType.L_PAREN: 3337 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3338 self._advance() 3339 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3340 3341 return None 3342 3343 if token_type not in self.FUNC_TOKENS: 3344 return None 3345 3346 self._advance(2) 3347 3348 parser = self.FUNCTION_PARSERS.get(upper) 3349 if parser and not anonymous: 3350 this = parser(self) 3351 else: 3352 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3353 3354 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3355 this = self.expression(subquery_predicate, this=self._parse_select()) 3356 self._match_r_paren() 3357 return this 3358 3359 if functions is None: 3360 functions = self.FUNCTIONS 3361 3362 function = functions.get(upper) 3363 3364 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3365 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3366 3367 if function and not anonymous: 3368 func = self.validate_expression(function(args), args) 3369 if not self.NORMALIZE_FUNCTIONS: 3370 func.meta["name"] = this 3371 this = func 3372 else: 3373 this = self.expression(exp.Anonymous, this=this, expressions=args) 3374 3375 self._match_r_paren(this) 3376 return self._parse_window(this) 3377 3378 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3379 return self._parse_column_def(self._parse_id_var()) 3380 3381 def _parse_user_defined_function( 3382 self, kind: t.Optional[TokenType] = None 3383 ) -> t.Optional[exp.Expression]: 3384 this = self._parse_id_var() 3385 3386 while self._match(TokenType.DOT): 3387 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3388 3389 if not self._match(TokenType.L_PAREN): 3390 return this 3391 3392 expressions = self._parse_csv(self._parse_function_parameter) 3393 self._match_r_paren() 3394 return self.expression( 3395 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3396 ) 3397 3398 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3399 literal = self._parse_primary() 3400 if literal: 3401 return self.expression(exp.Introducer, this=token.text, expression=literal) 3402 3403 return self.expression(exp.Identifier, this=token.text) 3404 3405 def _parse_session_parameter(self) -> exp.SessionParameter: 3406 kind = None 3407 this = self._parse_id_var() or self._parse_primary() 3408 3409 if this and self._match(TokenType.DOT): 3410 kind = this.name 3411 this = self._parse_var() or self._parse_primary() 3412 3413 return self.expression(exp.SessionParameter, this=this, kind=kind) 3414 3415 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3416 index = self._index 3417 3418 if self._match(TokenType.L_PAREN): 3419 expressions = self._parse_csv(self._parse_id_var) 3420 3421 if not self._match(TokenType.R_PAREN): 3422 self._retreat(index) 3423 else: 3424 expressions = [self._parse_id_var()] 3425 3426 if self._match_set(self.LAMBDAS): 3427 return self.LAMBDAS[self._prev.token_type](self, expressions) 3428 3429 self._retreat(index) 3430 3431 this: t.Optional[exp.Expression] 3432 3433 if self._match(TokenType.DISTINCT): 3434 this = self.expression( 3435 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3436 ) 3437 else: 3438 this = self._parse_select_or_expression(alias=alias) 3439 3440 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3441 3442 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3443 index = self._index 3444 3445 if not self.errors: 3446 try: 3447 if self._parse_select(nested=True): 3448 return this 3449 except ParseError: 3450 pass 3451 finally: 3452 self.errors.clear() 3453 self._retreat(index) 3454 3455 if not self._match(TokenType.L_PAREN): 3456 return this 3457 3458 args = self._parse_csv( 3459 lambda: self._parse_constraint() 3460 or self._parse_column_def(self._parse_field(any_token=True)) 3461 ) 3462 3463 self._match_r_paren() 3464 return self.expression(exp.Schema, this=this, expressions=args) 3465 3466 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3467 # column defs are not really columns, they're identifiers 3468 if isinstance(this, exp.Column): 3469 this = this.this 3470 3471 kind = self._parse_types(schema=True) 3472 3473 if self._match_text_seq("FOR", "ORDINALITY"): 3474 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3475 3476 constraints = [] 3477 while True: 3478 constraint = self._parse_column_constraint() 3479 if not constraint: 3480 break 3481 constraints.append(constraint) 3482 3483 if not kind and not constraints: 3484 return this 3485 3486 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3487 3488 def _parse_auto_increment( 3489 self, 3490 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3491 start = None 3492 increment = None 3493 3494 if self._match(TokenType.L_PAREN, advance=False): 3495 args = self._parse_wrapped_csv(self._parse_bitwise) 3496 start = seq_get(args, 0) 3497 increment = seq_get(args, 1) 3498 elif self._match_text_seq("START"): 3499 start = self._parse_bitwise() 3500 self._match_text_seq("INCREMENT") 3501 increment = self._parse_bitwise() 3502 3503 if start and increment: 3504 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3505 3506 return exp.AutoIncrementColumnConstraint() 3507 3508 def _parse_compress(self) -> exp.CompressColumnConstraint: 3509 if self._match(TokenType.L_PAREN, advance=False): 3510 return self.expression( 3511 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3512 ) 3513 3514 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3515 3516 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3517 if self._match_text_seq("BY", "DEFAULT"): 3518 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3519 this = self.expression( 3520 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3521 ) 3522 else: 3523 self._match_text_seq("ALWAYS") 3524 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3525 3526 self._match(TokenType.ALIAS) 3527 identity = self._match_text_seq("IDENTITY") 3528 3529 if self._match(TokenType.L_PAREN): 3530 if self._match_text_seq("START", "WITH"): 3531 this.set("start", self._parse_bitwise()) 3532 if self._match_text_seq("INCREMENT", "BY"): 3533 this.set("increment", self._parse_bitwise()) 3534 if self._match_text_seq("MINVALUE"): 3535 this.set("minvalue", self._parse_bitwise()) 3536 if self._match_text_seq("MAXVALUE"): 3537 this.set("maxvalue", self._parse_bitwise()) 3538 3539 if self._match_text_seq("CYCLE"): 3540 this.set("cycle", True) 3541 elif self._match_text_seq("NO", "CYCLE"): 3542 this.set("cycle", False) 3543 3544 if not identity: 3545 this.set("expression", self._parse_bitwise()) 3546 3547 self._match_r_paren() 3548 3549 return this 3550 3551 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3552 self._match_text_seq("LENGTH") 3553 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3554 3555 def _parse_not_constraint( 3556 self, 3557 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3558 if self._match_text_seq("NULL"): 3559 return self.expression(exp.NotNullColumnConstraint) 3560 if self._match_text_seq("CASESPECIFIC"): 3561 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3562 return None 3563 3564 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3565 if self._match(TokenType.CONSTRAINT): 3566 this = self._parse_id_var() 3567 else: 3568 this = None 3569 3570 if self._match_texts(self.CONSTRAINT_PARSERS): 3571 return self.expression( 3572 exp.ColumnConstraint, 3573 this=this, 3574 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3575 ) 3576 3577 return this 3578 3579 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3580 if not self._match(TokenType.CONSTRAINT): 3581 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3582 3583 this = self._parse_id_var() 3584 expressions = [] 3585 3586 while True: 3587 constraint = self._parse_unnamed_constraint() or self._parse_function() 3588 if not constraint: 3589 break 3590 expressions.append(constraint) 3591 3592 return self.expression(exp.Constraint, this=this, expressions=expressions) 3593 3594 def _parse_unnamed_constraint( 3595 self, constraints: t.Optional[t.Collection[str]] = None 3596 ) -> t.Optional[exp.Expression]: 3597 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3598 return None 3599 3600 constraint = self._prev.text.upper() 3601 if constraint not in self.CONSTRAINT_PARSERS: 3602 self.raise_error(f"No parser found for schema constraint {constraint}.") 3603 3604 return self.CONSTRAINT_PARSERS[constraint](self) 3605 3606 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3607 self._match_text_seq("KEY") 3608 return self.expression( 3609 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3610 ) 3611 3612 def _parse_key_constraint_options(self) -> t.List[str]: 3613 options = [] 3614 while True: 3615 if not self._curr: 3616 break 3617 3618 if self._match(TokenType.ON): 3619 action = None 3620 on = self._advance_any() and self._prev.text 3621 3622 if self._match_text_seq("NO", "ACTION"): 3623 action = "NO ACTION" 3624 elif self._match_text_seq("CASCADE"): 3625 action = "CASCADE" 3626 elif self._match_pair(TokenType.SET, TokenType.NULL): 3627 action = "SET NULL" 3628 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3629 action = "SET DEFAULT" 3630 else: 3631 self.raise_error("Invalid key constraint") 3632 3633 options.append(f"ON {on} {action}") 3634 elif self._match_text_seq("NOT", "ENFORCED"): 3635 options.append("NOT ENFORCED") 3636 elif self._match_text_seq("DEFERRABLE"): 3637 options.append("DEFERRABLE") 3638 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3639 options.append("INITIALLY DEFERRED") 3640 elif self._match_text_seq("NORELY"): 3641 options.append("NORELY") 3642 elif self._match_text_seq("MATCH", "FULL"): 3643 options.append("MATCH FULL") 3644 else: 3645 break 3646 3647 return options 3648 3649 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3650 if match and not self._match(TokenType.REFERENCES): 3651 return None 3652 3653 expressions = None 3654 this = self._parse_table(schema=True) 3655 options = self._parse_key_constraint_options() 3656 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3657 3658 def _parse_foreign_key(self) -> exp.ForeignKey: 3659 expressions = self._parse_wrapped_id_vars() 3660 reference = self._parse_references() 3661 options = {} 3662 3663 while self._match(TokenType.ON): 3664 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3665 self.raise_error("Expected DELETE or UPDATE") 3666 3667 kind = self._prev.text.lower() 3668 3669 if self._match_text_seq("NO", "ACTION"): 3670 action = "NO ACTION" 3671 elif self._match(TokenType.SET): 3672 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3673 action = "SET " + self._prev.text.upper() 3674 else: 3675 self._advance() 3676 action = self._prev.text.upper() 3677 3678 options[kind] = action 3679 3680 return self.expression( 3681 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3682 ) 3683 3684 def _parse_primary_key( 3685 self, wrapped_optional: bool = False, in_props: bool = False 3686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3687 desc = ( 3688 self._match_set((TokenType.ASC, TokenType.DESC)) 3689 and self._prev.token_type == TokenType.DESC 3690 ) 3691 3692 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3693 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3694 3695 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3696 options = self._parse_key_constraint_options() 3697 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3698 3699 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3700 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3701 return this 3702 3703 bracket_kind = self._prev.token_type 3704 3705 if self._match(TokenType.COLON): 3706 expressions: t.List[t.Optional[exp.Expression]] = [ 3707 self.expression(exp.Slice, expression=self._parse_conjunction()) 3708 ] 3709 else: 3710 expressions = self._parse_csv( 3711 lambda: self._parse_slice( 3712 self._parse_alias(self._parse_conjunction(), explicit=True) 3713 ) 3714 ) 3715 3716 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3717 if bracket_kind == TokenType.L_BRACE: 3718 this = self.expression(exp.Struct, expressions=expressions) 3719 elif not this or this.name.upper() == "ARRAY": 3720 this = self.expression(exp.Array, expressions=expressions) 3721 else: 3722 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3723 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3724 3725 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3726 self.raise_error("Expected ]") 3727 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3728 self.raise_error("Expected }") 3729 3730 self._add_comments(this) 3731 return self._parse_bracket(this) 3732 3733 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3734 if self._match(TokenType.COLON): 3735 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3736 return this 3737 3738 def _parse_case(self) -> t.Optional[exp.Expression]: 3739 ifs = [] 3740 default = None 3741 3742 expression = self._parse_conjunction() 3743 3744 while self._match(TokenType.WHEN): 3745 this = self._parse_conjunction() 3746 self._match(TokenType.THEN) 3747 then = self._parse_conjunction() 3748 ifs.append(self.expression(exp.If, this=this, true=then)) 3749 3750 if self._match(TokenType.ELSE): 3751 default = self._parse_conjunction() 3752 3753 if not self._match(TokenType.END): 3754 self.raise_error("Expected END after CASE", self._prev) 3755 3756 return self._parse_window( 3757 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3758 ) 3759 3760 def _parse_if(self) -> t.Optional[exp.Expression]: 3761 if self._match(TokenType.L_PAREN): 3762 args = self._parse_csv(self._parse_conjunction) 3763 this = self.validate_expression(exp.If.from_arg_list(args), args) 3764 self._match_r_paren() 3765 else: 3766 index = self._index - 1 3767 condition = self._parse_conjunction() 3768 3769 if not condition: 3770 self._retreat(index) 3771 return None 3772 3773 self._match(TokenType.THEN) 3774 true = self._parse_conjunction() 3775 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3776 self._match(TokenType.END) 3777 this = self.expression(exp.If, this=condition, true=true, false=false) 3778 3779 return self._parse_window(this) 3780 3781 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3782 if not self._match_text_seq("VALUE", "FOR"): 3783 self._retreat(self._index - 1) 3784 return None 3785 3786 return self.expression( 3787 exp.NextValueFor, 3788 this=self._parse_column(), 3789 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3790 ) 3791 3792 def _parse_extract(self) -> exp.Extract: 3793 this = self._parse_function() or self._parse_var() or self._parse_type() 3794 3795 if self._match(TokenType.FROM): 3796 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3797 3798 if not self._match(TokenType.COMMA): 3799 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3800 3801 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3802 3803 def _parse_any_value(self) -> exp.AnyValue: 3804 this = self._parse_lambda() 3805 is_max = None 3806 having = None 3807 3808 if self._match(TokenType.HAVING): 3809 self._match_texts(("MAX", "MIN")) 3810 is_max = self._prev.text == "MAX" 3811 having = self._parse_column() 3812 3813 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3814 3815 def _parse_cast(self, strict: bool) -> exp.Expression: 3816 this = self._parse_conjunction() 3817 3818 if not self._match(TokenType.ALIAS): 3819 if self._match(TokenType.COMMA): 3820 return self.expression( 3821 exp.CastToStrType, this=this, expression=self._parse_string() 3822 ) 3823 else: 3824 self.raise_error("Expected AS after CAST") 3825 3826 fmt = None 3827 to = self._parse_types() 3828 3829 if not to: 3830 self.raise_error("Expected TYPE after CAST") 3831 elif to.this == exp.DataType.Type.CHAR: 3832 if self._match(TokenType.CHARACTER_SET): 3833 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3834 elif self._match(TokenType.FORMAT): 3835 fmt_string = self._parse_string() 3836 fmt = self._parse_at_time_zone(fmt_string) 3837 3838 if to.this in exp.DataType.TEMPORAL_TYPES: 3839 this = self.expression( 3840 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3841 this=this, 3842 format=exp.Literal.string( 3843 format_time( 3844 fmt_string.this if fmt_string else "", 3845 self.FORMAT_MAPPING or self.TIME_MAPPING, 3846 self.FORMAT_TRIE or self.TIME_TRIE, 3847 ) 3848 ), 3849 ) 3850 3851 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3852 this.set("zone", fmt.args["zone"]) 3853 3854 return this 3855 3856 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3857 3858 def _parse_concat(self) -> t.Optional[exp.Expression]: 3859 args = self._parse_csv(self._parse_conjunction) 3860 if self.CONCAT_NULL_OUTPUTS_STRING: 3861 args = [ 3862 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3863 for arg in args 3864 if arg 3865 ] 3866 3867 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3868 # we find such a call we replace it with its argument. 3869 if len(args) == 1: 3870 return args[0] 3871 3872 return self.expression( 3873 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3874 ) 3875 3876 def _parse_string_agg(self) -> exp.Expression: 3877 if self._match(TokenType.DISTINCT): 3878 args: t.List[t.Optional[exp.Expression]] = [ 3879 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3880 ] 3881 if self._match(TokenType.COMMA): 3882 args.extend(self._parse_csv(self._parse_conjunction)) 3883 else: 3884 args = self._parse_csv(self._parse_conjunction) 3885 3886 index = self._index 3887 if not self._match(TokenType.R_PAREN) and args: 3888 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3889 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3890 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3891 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3892 3893 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3894 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3895 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3896 if not self._match_text_seq("WITHIN", "GROUP"): 3897 self._retreat(index) 3898 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3899 3900 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3901 order = self._parse_order(this=seq_get(args, 0)) 3902 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3903 3904 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3905 this = self._parse_bitwise() 3906 3907 if self._match(TokenType.USING): 3908 to: t.Optional[exp.Expression] = self.expression( 3909 exp.CharacterSet, this=self._parse_var() 3910 ) 3911 elif self._match(TokenType.COMMA): 3912 to = self._parse_types() 3913 else: 3914 to = None 3915 3916 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3917 3918 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3919 """ 3920 There are generally two variants of the DECODE function: 3921 3922 - DECODE(bin, charset) 3923 - DECODE(expression, search, result [, search, result] ... [, default]) 3924 3925 The second variant will always be parsed into a CASE expression. Note that NULL 3926 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3927 instead of relying on pattern matching. 3928 """ 3929 args = self._parse_csv(self._parse_conjunction) 3930 3931 if len(args) < 3: 3932 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3933 3934 expression, *expressions = args 3935 if not expression: 3936 return None 3937 3938 ifs = [] 3939 for search, result in zip(expressions[::2], expressions[1::2]): 3940 if not search or not result: 3941 return None 3942 3943 if isinstance(search, exp.Literal): 3944 ifs.append( 3945 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3946 ) 3947 elif isinstance(search, exp.Null): 3948 ifs.append( 3949 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3950 ) 3951 else: 3952 cond = exp.or_( 3953 exp.EQ(this=expression.copy(), expression=search), 3954 exp.and_( 3955 exp.Is(this=expression.copy(), expression=exp.Null()), 3956 exp.Is(this=search.copy(), expression=exp.Null()), 3957 copy=False, 3958 ), 3959 copy=False, 3960 ) 3961 ifs.append(exp.If(this=cond, true=result)) 3962 3963 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3964 3965 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3966 self._match_text_seq("KEY") 3967 key = self._parse_field() 3968 self._match(TokenType.COLON) 3969 self._match_text_seq("VALUE") 3970 value = self._parse_field() 3971 3972 if not key and not value: 3973 return None 3974 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3975 3976 def _parse_json_object(self) -> exp.JSONObject: 3977 star = self._parse_star() 3978 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3979 3980 null_handling = None 3981 if self._match_text_seq("NULL", "ON", "NULL"): 3982 null_handling = "NULL ON NULL" 3983 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3984 null_handling = "ABSENT ON NULL" 3985 3986 unique_keys = None 3987 if self._match_text_seq("WITH", "UNIQUE"): 3988 unique_keys = True 3989 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3990 unique_keys = False 3991 3992 self._match_text_seq("KEYS") 3993 3994 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3995 format_json = self._match_text_seq("FORMAT", "JSON") 3996 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3997 3998 return self.expression( 3999 exp.JSONObject, 4000 expressions=expressions, 4001 null_handling=null_handling, 4002 unique_keys=unique_keys, 4003 return_type=return_type, 4004 format_json=format_json, 4005 encoding=encoding, 4006 ) 4007 4008 def _parse_logarithm(self) -> exp.Func: 4009 # Default argument order is base, expression 4010 args = self._parse_csv(self._parse_range) 4011 4012 if len(args) > 1: 4013 if not self.LOG_BASE_FIRST: 4014 args.reverse() 4015 return exp.Log.from_arg_list(args) 4016 4017 return self.expression( 4018 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4019 ) 4020 4021 def _parse_match_against(self) -> exp.MatchAgainst: 4022 expressions = self._parse_csv(self._parse_column) 4023 4024 self._match_text_seq(")", "AGAINST", "(") 4025 4026 this = self._parse_string() 4027 4028 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4029 modifier = "IN NATURAL LANGUAGE MODE" 4030 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4031 modifier = f"{modifier} WITH QUERY EXPANSION" 4032 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4033 modifier = "IN BOOLEAN MODE" 4034 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4035 modifier = "WITH QUERY EXPANSION" 4036 else: 4037 modifier = None 4038 4039 return self.expression( 4040 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4041 ) 4042 4043 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4044 def _parse_open_json(self) -> exp.OpenJSON: 4045 this = self._parse_bitwise() 4046 path = self._match(TokenType.COMMA) and self._parse_string() 4047 4048 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4049 this = self._parse_field(any_token=True) 4050 kind = self._parse_types() 4051 path = self._parse_string() 4052 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4053 4054 return self.expression( 4055 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4056 ) 4057 4058 expressions = None 4059 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4060 self._match_l_paren() 4061 expressions = self._parse_csv(_parse_open_json_column_def) 4062 4063 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4064 4065 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4066 args = self._parse_csv(self._parse_bitwise) 4067 4068 if self._match(TokenType.IN): 4069 return self.expression( 4070 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4071 ) 4072 4073 if haystack_first: 4074 haystack = seq_get(args, 0) 4075 needle = seq_get(args, 1) 4076 else: 4077 needle = seq_get(args, 0) 4078 haystack = seq_get(args, 1) 4079 4080 return self.expression( 4081 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4082 ) 4083 4084 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4085 args = self._parse_csv(self._parse_table) 4086 return exp.JoinHint(this=func_name.upper(), expressions=args) 4087 4088 def _parse_substring(self) -> exp.Substring: 4089 # Postgres supports the form: substring(string [from int] [for int]) 4090 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4091 4092 args = self._parse_csv(self._parse_bitwise) 4093 4094 if self._match(TokenType.FROM): 4095 args.append(self._parse_bitwise()) 4096 if self._match(TokenType.FOR): 4097 args.append(self._parse_bitwise()) 4098 4099 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4100 4101 def _parse_trim(self) -> exp.Trim: 4102 # https://www.w3resource.com/sql/character-functions/trim.php 4103 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4104 4105 position = None 4106 collation = None 4107 4108 if self._match_texts(self.TRIM_TYPES): 4109 position = self._prev.text.upper() 4110 4111 expression = self._parse_bitwise() 4112 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4113 this = self._parse_bitwise() 4114 else: 4115 this = expression 4116 expression = None 4117 4118 if self._match(TokenType.COLLATE): 4119 collation = self._parse_bitwise() 4120 4121 return self.expression( 4122 exp.Trim, this=this, position=position, expression=expression, collation=collation 4123 ) 4124 4125 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4126 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4127 4128 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4129 return self._parse_window(self._parse_id_var(), alias=True) 4130 4131 def _parse_respect_or_ignore_nulls( 4132 self, this: t.Optional[exp.Expression] 4133 ) -> t.Optional[exp.Expression]: 4134 if self._match_text_seq("IGNORE", "NULLS"): 4135 return self.expression(exp.IgnoreNulls, this=this) 4136 if self._match_text_seq("RESPECT", "NULLS"): 4137 return self.expression(exp.RespectNulls, this=this) 4138 return this 4139 4140 def _parse_window( 4141 self, this: t.Optional[exp.Expression], alias: bool = False 4142 ) -> t.Optional[exp.Expression]: 4143 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4144 self._match(TokenType.WHERE) 4145 this = self.expression( 4146 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4147 ) 4148 self._match_r_paren() 4149 4150 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4151 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4152 if self._match_text_seq("WITHIN", "GROUP"): 4153 order = self._parse_wrapped(self._parse_order) 4154 this = self.expression(exp.WithinGroup, this=this, expression=order) 4155 4156 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4157 # Some dialects choose to implement and some do not. 4158 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4159 4160 # There is some code above in _parse_lambda that handles 4161 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4162 4163 # The below changes handle 4164 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4165 4166 # Oracle allows both formats 4167 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4168 # and Snowflake chose to do the same for familiarity 4169 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4170 this = self._parse_respect_or_ignore_nulls(this) 4171 4172 # bigquery select from window x AS (partition by ...) 4173 if alias: 4174 over = None 4175 self._match(TokenType.ALIAS) 4176 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4177 return this 4178 else: 4179 over = self._prev.text.upper() 4180 4181 if not self._match(TokenType.L_PAREN): 4182 return self.expression( 4183 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4184 ) 4185 4186 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4187 4188 first = self._match(TokenType.FIRST) 4189 if self._match_text_seq("LAST"): 4190 first = False 4191 4192 partition = self._parse_partition_by() 4193 order = self._parse_order() 4194 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4195 4196 if kind: 4197 self._match(TokenType.BETWEEN) 4198 start = self._parse_window_spec() 4199 self._match(TokenType.AND) 4200 end = self._parse_window_spec() 4201 4202 spec = self.expression( 4203 exp.WindowSpec, 4204 kind=kind, 4205 start=start["value"], 4206 start_side=start["side"], 4207 end=end["value"], 4208 end_side=end["side"], 4209 ) 4210 else: 4211 spec = None 4212 4213 self._match_r_paren() 4214 4215 window = self.expression( 4216 exp.Window, 4217 this=this, 4218 partition_by=partition, 4219 order=order, 4220 spec=spec, 4221 alias=window_alias, 4222 over=over, 4223 first=first, 4224 ) 4225 4226 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4227 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4228 return self._parse_window(window, alias=alias) 4229 4230 return window 4231 4232 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4233 self._match(TokenType.BETWEEN) 4234 4235 return { 4236 "value": ( 4237 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4238 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4239 or self._parse_bitwise() 4240 ), 4241 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4242 } 4243 4244 def _parse_alias( 4245 self, this: t.Optional[exp.Expression], explicit: bool = False 4246 ) -> t.Optional[exp.Expression]: 4247 any_token = self._match(TokenType.ALIAS) 4248 4249 if explicit and not any_token: 4250 return this 4251 4252 if self._match(TokenType.L_PAREN): 4253 aliases = self.expression( 4254 exp.Aliases, 4255 this=this, 4256 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4257 ) 4258 self._match_r_paren(aliases) 4259 return aliases 4260 4261 alias = self._parse_id_var(any_token) 4262 4263 if alias: 4264 return self.expression(exp.Alias, this=this, alias=alias) 4265 4266 return this 4267 4268 def _parse_id_var( 4269 self, 4270 any_token: bool = True, 4271 tokens: t.Optional[t.Collection[TokenType]] = None, 4272 ) -> t.Optional[exp.Expression]: 4273 identifier = self._parse_identifier() 4274 4275 if identifier: 4276 return identifier 4277 4278 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4279 quoted = self._prev.token_type == TokenType.STRING 4280 return exp.Identifier(this=self._prev.text, quoted=quoted) 4281 4282 return None 4283 4284 def _parse_string(self) -> t.Optional[exp.Expression]: 4285 if self._match(TokenType.STRING): 4286 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4287 return self._parse_placeholder() 4288 4289 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4290 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4291 4292 def _parse_number(self) -> t.Optional[exp.Expression]: 4293 if self._match(TokenType.NUMBER): 4294 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4295 return self._parse_placeholder() 4296 4297 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4298 if self._match(TokenType.IDENTIFIER): 4299 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4300 return self._parse_placeholder() 4301 4302 def _parse_var( 4303 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4304 ) -> t.Optional[exp.Expression]: 4305 if ( 4306 (any_token and self._advance_any()) 4307 or self._match(TokenType.VAR) 4308 or (self._match_set(tokens) if tokens else False) 4309 ): 4310 return self.expression(exp.Var, this=self._prev.text) 4311 return self._parse_placeholder() 4312 4313 def _advance_any(self) -> t.Optional[Token]: 4314 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4315 self._advance() 4316 return self._prev 4317 return None 4318 4319 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4320 return self._parse_var() or self._parse_string() 4321 4322 def _parse_null(self) -> t.Optional[exp.Expression]: 4323 if self._match(TokenType.NULL): 4324 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4325 return self._parse_placeholder() 4326 4327 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4328 if self._match(TokenType.TRUE): 4329 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4330 if self._match(TokenType.FALSE): 4331 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4332 return self._parse_placeholder() 4333 4334 def _parse_star(self) -> t.Optional[exp.Expression]: 4335 if self._match(TokenType.STAR): 4336 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4337 return self._parse_placeholder() 4338 4339 def _parse_parameter(self) -> exp.Parameter: 4340 wrapped = self._match(TokenType.L_BRACE) 4341 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4342 self._match(TokenType.R_BRACE) 4343 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4344 4345 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4346 if self._match_set(self.PLACEHOLDER_PARSERS): 4347 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4348 if placeholder: 4349 return placeholder 4350 self._advance(-1) 4351 return None 4352 4353 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4354 if not self._match(TokenType.EXCEPT): 4355 return None 4356 if self._match(TokenType.L_PAREN, advance=False): 4357 return self._parse_wrapped_csv(self._parse_column) 4358 return self._parse_csv(self._parse_column) 4359 4360 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4361 if not self._match(TokenType.REPLACE): 4362 return None 4363 if self._match(TokenType.L_PAREN, advance=False): 4364 return self._parse_wrapped_csv(self._parse_expression) 4365 return self._parse_expressions() 4366 4367 def _parse_csv( 4368 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4369 ) -> t.List[t.Optional[exp.Expression]]: 4370 parse_result = parse_method() 4371 items = [parse_result] if parse_result is not None else [] 4372 4373 while self._match(sep): 4374 self._add_comments(parse_result) 4375 parse_result = parse_method() 4376 if parse_result is not None: 4377 items.append(parse_result) 4378 4379 return items 4380 4381 def _parse_tokens( 4382 self, parse_method: t.Callable, expressions: t.Dict 4383 ) -> t.Optional[exp.Expression]: 4384 this = parse_method() 4385 4386 while self._match_set(expressions): 4387 this = self.expression( 4388 expressions[self._prev.token_type], 4389 this=this, 4390 comments=self._prev_comments, 4391 expression=parse_method(), 4392 ) 4393 4394 return this 4395 4396 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4397 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4398 4399 def _parse_wrapped_csv( 4400 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4401 ) -> t.List[t.Optional[exp.Expression]]: 4402 return self._parse_wrapped( 4403 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4404 ) 4405 4406 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4407 wrapped = self._match(TokenType.L_PAREN) 4408 if not wrapped and not optional: 4409 self.raise_error("Expecting (") 4410 parse_result = parse_method() 4411 if wrapped: 4412 self._match_r_paren() 4413 return parse_result 4414 4415 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4416 return self._parse_csv(self._parse_expression) 4417 4418 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4419 return self._parse_select() or self._parse_set_operations( 4420 self._parse_expression() if alias else self._parse_conjunction() 4421 ) 4422 4423 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4424 return self._parse_query_modifiers( 4425 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4426 ) 4427 4428 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4429 this = None 4430 if self._match_texts(self.TRANSACTION_KIND): 4431 this = self._prev.text 4432 4433 self._match_texts({"TRANSACTION", "WORK"}) 4434 4435 modes = [] 4436 while True: 4437 mode = [] 4438 while self._match(TokenType.VAR): 4439 mode.append(self._prev.text) 4440 4441 if mode: 4442 modes.append(" ".join(mode)) 4443 if not self._match(TokenType.COMMA): 4444 break 4445 4446 return self.expression(exp.Transaction, this=this, modes=modes) 4447 4448 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4449 chain = None 4450 savepoint = None 4451 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4452 4453 self._match_texts({"TRANSACTION", "WORK"}) 4454 4455 if self._match_text_seq("TO"): 4456 self._match_text_seq("SAVEPOINT") 4457 savepoint = self._parse_id_var() 4458 4459 if self._match(TokenType.AND): 4460 chain = not self._match_text_seq("NO") 4461 self._match_text_seq("CHAIN") 4462 4463 if is_rollback: 4464 return self.expression(exp.Rollback, savepoint=savepoint) 4465 4466 return self.expression(exp.Commit, chain=chain) 4467 4468 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4469 if not self._match_text_seq("ADD"): 4470 return None 4471 4472 self._match(TokenType.COLUMN) 4473 exists_column = self._parse_exists(not_=True) 4474 expression = self._parse_column_def(self._parse_field(any_token=True)) 4475 4476 if expression: 4477 expression.set("exists", exists_column) 4478 4479 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4480 if self._match_texts(("FIRST", "AFTER")): 4481 position = self._prev.text 4482 column_position = self.expression( 4483 exp.ColumnPosition, this=self._parse_column(), position=position 4484 ) 4485 expression.set("position", column_position) 4486 4487 return expression 4488 4489 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4490 drop = self._match(TokenType.DROP) and self._parse_drop() 4491 if drop and not isinstance(drop, exp.Command): 4492 drop.set("kind", drop.args.get("kind", "COLUMN")) 4493 return drop 4494 4495 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4496 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4497 return self.expression( 4498 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4499 ) 4500 4501 def _parse_add_constraint(self) -> exp.AddConstraint: 4502 this = None 4503 kind = self._prev.token_type 4504 4505 if kind == TokenType.CONSTRAINT: 4506 this = self._parse_id_var() 4507 4508 if self._match_text_seq("CHECK"): 4509 expression = self._parse_wrapped(self._parse_conjunction) 4510 enforced = self._match_text_seq("ENFORCED") 4511 4512 return self.expression( 4513 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4514 ) 4515 4516 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4517 expression = self._parse_foreign_key() 4518 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4519 expression = self._parse_primary_key() 4520 else: 4521 expression = None 4522 4523 return self.expression(exp.AddConstraint, this=this, expression=expression) 4524 4525 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4526 index = self._index - 1 4527 4528 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4529 return self._parse_csv(self._parse_add_constraint) 4530 4531 self._retreat(index) 4532 return self._parse_csv(self._parse_add_column) 4533 4534 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4535 self._match(TokenType.COLUMN) 4536 column = self._parse_field(any_token=True) 4537 4538 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4539 return self.expression(exp.AlterColumn, this=column, drop=True) 4540 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4541 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4542 4543 self._match_text_seq("SET", "DATA") 4544 return self.expression( 4545 exp.AlterColumn, 4546 this=column, 4547 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4548 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4549 using=self._match(TokenType.USING) and self._parse_conjunction(), 4550 ) 4551 4552 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4553 index = self._index - 1 4554 4555 partition_exists = self._parse_exists() 4556 if self._match(TokenType.PARTITION, advance=False): 4557 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4558 4559 self._retreat(index) 4560 return self._parse_csv(self._parse_drop_column) 4561 4562 def _parse_alter_table_rename(self) -> exp.RenameTable: 4563 self._match_text_seq("TO") 4564 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4565 4566 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4567 start = self._prev 4568 4569 if not self._match(TokenType.TABLE): 4570 return self._parse_as_command(start) 4571 4572 exists = self._parse_exists() 4573 this = self._parse_table(schema=True) 4574 4575 if self._next: 4576 self._advance() 4577 4578 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4579 if parser: 4580 actions = ensure_list(parser(self)) 4581 4582 if not self._curr: 4583 return self.expression( 4584 exp.AlterTable, 4585 this=this, 4586 exists=exists, 4587 actions=actions, 4588 ) 4589 return self._parse_as_command(start) 4590 4591 def _parse_merge(self) -> exp.Merge: 4592 self._match(TokenType.INTO) 4593 target = self._parse_table() 4594 4595 self._match(TokenType.USING) 4596 using = self._parse_table() 4597 4598 self._match(TokenType.ON) 4599 on = self._parse_conjunction() 4600 4601 whens = [] 4602 while self._match(TokenType.WHEN): 4603 matched = not self._match(TokenType.NOT) 4604 self._match_text_seq("MATCHED") 4605 source = ( 4606 False 4607 if self._match_text_seq("BY", "TARGET") 4608 else self._match_text_seq("BY", "SOURCE") 4609 ) 4610 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4611 4612 self._match(TokenType.THEN) 4613 4614 if self._match(TokenType.INSERT): 4615 _this = self._parse_star() 4616 if _this: 4617 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4618 else: 4619 then = self.expression( 4620 exp.Insert, 4621 this=self._parse_value(), 4622 expression=self._match(TokenType.VALUES) and self._parse_value(), 4623 ) 4624 elif self._match(TokenType.UPDATE): 4625 expressions = self._parse_star() 4626 if expressions: 4627 then = self.expression(exp.Update, expressions=expressions) 4628 else: 4629 then = self.expression( 4630 exp.Update, 4631 expressions=self._match(TokenType.SET) 4632 and self._parse_csv(self._parse_equality), 4633 ) 4634 elif self._match(TokenType.DELETE): 4635 then = self.expression(exp.Var, this=self._prev.text) 4636 else: 4637 then = None 4638 4639 whens.append( 4640 self.expression( 4641 exp.When, 4642 matched=matched, 4643 source=source, 4644 condition=condition, 4645 then=then, 4646 ) 4647 ) 4648 4649 return self.expression( 4650 exp.Merge, 4651 this=target, 4652 using=using, 4653 on=on, 4654 expressions=whens, 4655 ) 4656 4657 def _parse_show(self) -> t.Optional[exp.Expression]: 4658 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4659 if parser: 4660 return parser(self) 4661 self._advance() 4662 return self.expression(exp.Show, this=self._prev.text.upper()) 4663 4664 def _parse_set_item_assignment( 4665 self, kind: t.Optional[str] = None 4666 ) -> t.Optional[exp.Expression]: 4667 index = self._index 4668 4669 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4670 return self._parse_set_transaction(global_=kind == "GLOBAL") 4671 4672 left = self._parse_primary() or self._parse_id_var() 4673 4674 if not self._match_texts(("=", "TO")): 4675 self._retreat(index) 4676 return None 4677 4678 right = self._parse_statement() or self._parse_id_var() 4679 this = self.expression(exp.EQ, this=left, expression=right) 4680 4681 return self.expression(exp.SetItem, this=this, kind=kind) 4682 4683 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4684 self._match_text_seq("TRANSACTION") 4685 characteristics = self._parse_csv( 4686 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4687 ) 4688 return self.expression( 4689 exp.SetItem, 4690 expressions=characteristics, 4691 kind="TRANSACTION", 4692 **{"global": global_}, # type: ignore 4693 ) 4694 4695 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4696 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4697 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4698 4699 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4700 index = self._index 4701 set_ = self.expression( 4702 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4703 ) 4704 4705 if self._curr: 4706 self._retreat(index) 4707 return self._parse_as_command(self._prev) 4708 4709 return set_ 4710 4711 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4712 for option in options: 4713 if self._match_text_seq(*option.split(" ")): 4714 return exp.var(option) 4715 return None 4716 4717 def _parse_as_command(self, start: Token) -> exp.Command: 4718 while self._curr: 4719 self._advance() 4720 text = self._find_sql(start, self._prev) 4721 size = len(start.text) 4722 return exp.Command(this=text[:size], expression=text[size:]) 4723 4724 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4725 settings = [] 4726 4727 self._match_l_paren() 4728 kind = self._parse_id_var() 4729 4730 if self._match(TokenType.L_PAREN): 4731 while True: 4732 key = self._parse_id_var() 4733 value = self._parse_primary() 4734 4735 if not key and value is None: 4736 break 4737 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4738 self._match(TokenType.R_PAREN) 4739 4740 self._match_r_paren() 4741 4742 return self.expression( 4743 exp.DictProperty, 4744 this=this, 4745 kind=kind.this if kind else None, 4746 settings=settings, 4747 ) 4748 4749 def _parse_dict_range(self, this: str) -> exp.DictRange: 4750 self._match_l_paren() 4751 has_min = self._match_text_seq("MIN") 4752 if has_min: 4753 min = self._parse_var() or self._parse_primary() 4754 self._match_text_seq("MAX") 4755 max = self._parse_var() or self._parse_primary() 4756 else: 4757 max = self._parse_var() or self._parse_primary() 4758 min = exp.Literal.number(0) 4759 self._match_r_paren() 4760 return self.expression(exp.DictRange, this=this, min=min, max=max) 4761 4762 def _find_parser( 4763 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4764 ) -> t.Optional[t.Callable]: 4765 if not self._curr: 4766 return None 4767 4768 index = self._index 4769 this = [] 4770 while True: 4771 # The current token might be multiple words 4772 curr = self._curr.text.upper() 4773 key = curr.split(" ") 4774 this.append(curr) 4775 4776 self._advance() 4777 result, trie = in_trie(trie, key) 4778 if result == TrieResult.FAILED: 4779 break 4780 4781 if result == TrieResult.EXISTS: 4782 subparser = parsers[" ".join(this)] 4783 return subparser 4784 4785 self._retreat(index) 4786 return None 4787 4788 def _match(self, token_type, advance=True, expression=None): 4789 if not self._curr: 4790 return None 4791 4792 if self._curr.token_type == token_type: 4793 if advance: 4794 self._advance() 4795 self._add_comments(expression) 4796 return True 4797 4798 return None 4799 4800 def _match_set(self, types, advance=True): 4801 if not self._curr: 4802 return None 4803 4804 if self._curr.token_type in types: 4805 if advance: 4806 self._advance() 4807 return True 4808 4809 return None 4810 4811 def _match_pair(self, token_type_a, token_type_b, advance=True): 4812 if not self._curr or not self._next: 4813 return None 4814 4815 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4816 if advance: 4817 self._advance(2) 4818 return True 4819 4820 return None 4821 4822 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4823 if not self._match(TokenType.L_PAREN, expression=expression): 4824 self.raise_error("Expecting (") 4825 4826 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4827 if not self._match(TokenType.R_PAREN, expression=expression): 4828 self.raise_error("Expecting )") 4829 4830 def _match_texts(self, texts, advance=True): 4831 if self._curr and self._curr.text.upper() in texts: 4832 if advance: 4833 self._advance() 4834 return True 4835 return False 4836 4837 def _match_text_seq(self, *texts, advance=True): 4838 index = self._index 4839 for text in texts: 4840 if self._curr and self._curr.text.upper() == text: 4841 self._advance() 4842 else: 4843 self._retreat(index) 4844 return False 4845 4846 if not advance: 4847 self._retreat(index) 4848 4849 return True 4850 4851 @t.overload 4852 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4853 ... 4854 4855 @t.overload 4856 def _replace_columns_with_dots( 4857 self, this: t.Optional[exp.Expression] 4858 ) -> t.Optional[exp.Expression]: 4859 ... 4860 4861 def _replace_columns_with_dots(self, this): 4862 if isinstance(this, exp.Dot): 4863 exp.replace_children(this, self._replace_columns_with_dots) 4864 elif isinstance(this, exp.Column): 4865 exp.replace_children(this, self._replace_columns_with_dots) 4866 table = this.args.get("table") 4867 this = ( 4868 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4869 ) 4870 4871 return this 4872 4873 def _replace_lambda( 4874 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4875 ) -> t.Optional[exp.Expression]: 4876 if not node: 4877 return node 4878 4879 for column in node.find_all(exp.Column): 4880 if column.parts[0].name in lambda_variables: 4881 dot_or_id = column.to_dot() if column.table else column.this 4882 parent = column.parent 4883 4884 while isinstance(parent, exp.Dot): 4885 if not isinstance(parent.parent, exp.Dot): 4886 parent.replace(dot_or_id) 4887 break 4888 parent = parent.parent 4889 else: 4890 if column is node: 4891 node = dot_or_id 4892 else: 4893 column.replace(dot_or_id) 4894 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset()
864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 )
Logs or raises any found errors, depending on the chosen error level setting.
964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.