sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.FUNCTION, 249 TokenType.INDEX, 250 TokenType.PROCEDURE, 251 *DB_CREATABLES, 252 } 253 254 # Tokens that can represent identifiers 255 ID_VAR_TOKENS = { 256 TokenType.VAR, 257 TokenType.ANTI, 258 TokenType.APPLY, 259 TokenType.ASC, 260 TokenType.AUTO_INCREMENT, 261 TokenType.BEGIN, 262 TokenType.CACHE, 263 TokenType.CASE, 264 TokenType.COLLATE, 265 TokenType.COMMAND, 266 TokenType.COMMENT, 267 TokenType.COMMIT, 268 TokenType.CONSTRAINT, 269 TokenType.DEFAULT, 270 TokenType.DELETE, 271 TokenType.DESC, 272 TokenType.DESCRIBE, 273 TokenType.DICTIONARY, 274 TokenType.DIV, 275 TokenType.END, 276 TokenType.EXECUTE, 277 TokenType.ESCAPE, 278 TokenType.FALSE, 279 TokenType.FIRST, 280 TokenType.FILTER, 281 TokenType.FORMAT, 282 TokenType.FULL, 283 TokenType.IS, 284 TokenType.ISNULL, 285 TokenType.INTERVAL, 286 TokenType.KEEP, 287 TokenType.KILL, 288 TokenType.LEFT, 289 TokenType.LOAD, 290 TokenType.MERGE, 291 TokenType.NATURAL, 292 TokenType.NEXT, 293 TokenType.OFFSET, 294 TokenType.ORDINALITY, 295 TokenType.OVERLAPS, 296 TokenType.OVERWRITE, 297 TokenType.PARTITION, 298 TokenType.PERCENT, 299 TokenType.PIVOT, 300 TokenType.PRAGMA, 301 TokenType.RANGE, 302 TokenType.REFERENCES, 303 TokenType.RIGHT, 304 TokenType.ROW, 305 TokenType.ROWS, 306 TokenType.SEMI, 307 TokenType.SET, 308 TokenType.SETTINGS, 309 TokenType.SHOW, 310 TokenType.TEMPORARY, 311 TokenType.TOP, 312 TokenType.TRUE, 313 TokenType.UNIQUE, 314 TokenType.UNPIVOT, 315 TokenType.UPDATE, 316 TokenType.VOLATILE, 317 TokenType.WINDOW, 318 *CREATABLES, 319 *SUBQUERY_PREDICATES, 320 *TYPE_TOKENS, 321 *NO_PAREN_FUNCTIONS, 322 } 323 324 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 325 326 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 327 TokenType.ANTI, 328 TokenType.APPLY, 329 TokenType.ASOF, 330 TokenType.FULL, 331 TokenType.LEFT, 332 TokenType.LOCK, 333 TokenType.NATURAL, 334 TokenType.OFFSET, 335 TokenType.RIGHT, 336 TokenType.SEMI, 337 TokenType.WINDOW, 338 } 339 340 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 341 342 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 343 344 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 345 346 FUNC_TOKENS = { 347 TokenType.COLLATE, 348 TokenType.COMMAND, 349 TokenType.CURRENT_DATE, 350 TokenType.CURRENT_DATETIME, 351 TokenType.CURRENT_TIMESTAMP, 352 TokenType.CURRENT_TIME, 353 TokenType.CURRENT_USER, 354 TokenType.FILTER, 355 TokenType.FIRST, 356 TokenType.FORMAT, 357 TokenType.GLOB, 358 TokenType.IDENTIFIER, 359 TokenType.INDEX, 360 TokenType.ISNULL, 361 TokenType.ILIKE, 362 TokenType.INSERT, 363 TokenType.LIKE, 364 TokenType.MERGE, 365 TokenType.OFFSET, 366 TokenType.PRIMARY_KEY, 367 TokenType.RANGE, 368 TokenType.REPLACE, 369 TokenType.RLIKE, 370 TokenType.ROW, 371 TokenType.UNNEST, 372 TokenType.VAR, 373 TokenType.LEFT, 374 TokenType.RIGHT, 375 TokenType.DATE, 376 TokenType.DATETIME, 377 TokenType.TABLE, 378 TokenType.TIMESTAMP, 379 TokenType.TIMESTAMPTZ, 380 TokenType.WINDOW, 381 TokenType.XOR, 382 *TYPE_TOKENS, 383 *SUBQUERY_PREDICATES, 384 } 385 386 CONJUNCTION = { 387 TokenType.AND: exp.And, 388 TokenType.OR: exp.Or, 389 } 390 391 EQUALITY = { 392 TokenType.EQ: exp.EQ, 393 TokenType.NEQ: exp.NEQ, 394 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 395 } 396 397 COMPARISON = { 398 TokenType.GT: exp.GT, 399 TokenType.GTE: exp.GTE, 400 TokenType.LT: exp.LT, 401 TokenType.LTE: exp.LTE, 402 } 403 404 BITWISE = { 405 TokenType.AMP: exp.BitwiseAnd, 406 TokenType.CARET: exp.BitwiseXor, 407 TokenType.PIPE: exp.BitwiseOr, 408 TokenType.DPIPE: exp.DPipe, 409 } 410 411 TERM = { 412 TokenType.DASH: exp.Sub, 413 TokenType.PLUS: exp.Add, 414 TokenType.MOD: exp.Mod, 415 TokenType.COLLATE: exp.Collate, 416 } 417 418 FACTOR = { 419 TokenType.DIV: exp.IntDiv, 420 TokenType.LR_ARROW: exp.Distance, 421 TokenType.SLASH: exp.Div, 422 TokenType.STAR: exp.Mul, 423 } 424 425 TIMES = { 426 TokenType.TIME, 427 TokenType.TIMETZ, 428 } 429 430 TIMESTAMPS = { 431 TokenType.TIMESTAMP, 432 TokenType.TIMESTAMPTZ, 433 TokenType.TIMESTAMPLTZ, 434 *TIMES, 435 } 436 437 SET_OPERATIONS = { 438 TokenType.UNION, 439 TokenType.INTERSECT, 440 TokenType.EXCEPT, 441 } 442 443 JOIN_METHODS = { 444 TokenType.NATURAL, 445 TokenType.ASOF, 446 } 447 448 JOIN_SIDES = { 449 TokenType.LEFT, 450 TokenType.RIGHT, 451 TokenType.FULL, 452 } 453 454 JOIN_KINDS = { 455 TokenType.INNER, 456 TokenType.OUTER, 457 TokenType.CROSS, 458 TokenType.SEMI, 459 TokenType.ANTI, 460 } 461 462 JOIN_HINTS: t.Set[str] = set() 463 464 LAMBDAS = { 465 TokenType.ARROW: lambda self, expressions: self.expression( 466 exp.Lambda, 467 this=self._replace_lambda( 468 self._parse_conjunction(), 469 {node.name for node in expressions}, 470 ), 471 expressions=expressions, 472 ), 473 TokenType.FARROW: lambda self, expressions: self.expression( 474 exp.Kwarg, 475 this=exp.var(expressions[0].name), 476 expression=self._parse_conjunction(), 477 ), 478 } 479 480 COLUMN_OPERATORS = { 481 TokenType.DOT: None, 482 TokenType.DCOLON: lambda self, this, to: self.expression( 483 exp.Cast if self.STRICT_CAST else exp.TryCast, 484 this=this, 485 to=to, 486 ), 487 TokenType.ARROW: lambda self, this, path: self.expression( 488 exp.JSONExtract, 489 this=this, 490 expression=path, 491 ), 492 TokenType.DARROW: lambda self, this, path: self.expression( 493 exp.JSONExtractScalar, 494 this=this, 495 expression=path, 496 ), 497 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 498 exp.JSONBExtract, 499 this=this, 500 expression=path, 501 ), 502 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 503 exp.JSONBExtractScalar, 504 this=this, 505 expression=path, 506 ), 507 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 508 exp.JSONBContains, 509 this=this, 510 expression=key, 511 ), 512 } 513 514 EXPRESSION_PARSERS = { 515 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 516 exp.Column: lambda self: self._parse_column(), 517 exp.Condition: lambda self: self._parse_conjunction(), 518 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 519 exp.Expression: lambda self: self._parse_statement(), 520 exp.From: lambda self: self._parse_from(), 521 exp.Group: lambda self: self._parse_group(), 522 exp.Having: lambda self: self._parse_having(), 523 exp.Identifier: lambda self: self._parse_id_var(), 524 exp.Join: lambda self: self._parse_join(), 525 exp.Lambda: lambda self: self._parse_lambda(), 526 exp.Lateral: lambda self: self._parse_lateral(), 527 exp.Limit: lambda self: self._parse_limit(), 528 exp.Offset: lambda self: self._parse_offset(), 529 exp.Order: lambda self: self._parse_order(), 530 exp.Ordered: lambda self: self._parse_ordered(), 531 exp.Properties: lambda self: self._parse_properties(), 532 exp.Qualify: lambda self: self._parse_qualify(), 533 exp.Returning: lambda self: self._parse_returning(), 534 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 535 exp.Table: lambda self: self._parse_table_parts(), 536 exp.TableAlias: lambda self: self._parse_table_alias(), 537 exp.Where: lambda self: self._parse_where(), 538 exp.Window: lambda self: self._parse_named_window(), 539 exp.With: lambda self: self._parse_with(), 540 "JOIN_TYPE": lambda self: self._parse_join_parts(), 541 } 542 543 STATEMENT_PARSERS = { 544 TokenType.ALTER: lambda self: self._parse_alter(), 545 TokenType.BEGIN: lambda self: self._parse_transaction(), 546 TokenType.CACHE: lambda self: self._parse_cache(), 547 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 548 TokenType.COMMENT: lambda self: self._parse_comment(), 549 TokenType.CREATE: lambda self: self._parse_create(), 550 TokenType.DELETE: lambda self: self._parse_delete(), 551 TokenType.DESC: lambda self: self._parse_describe(), 552 TokenType.DESCRIBE: lambda self: self._parse_describe(), 553 TokenType.DROP: lambda self: self._parse_drop(), 554 TokenType.INSERT: lambda self: self._parse_insert(), 555 TokenType.KILL: lambda self: self._parse_kill(), 556 TokenType.LOAD: lambda self: self._parse_load(), 557 TokenType.MERGE: lambda self: self._parse_merge(), 558 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 559 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 560 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 561 TokenType.SET: lambda self: self._parse_set(), 562 TokenType.UNCACHE: lambda self: self._parse_uncache(), 563 TokenType.UPDATE: lambda self: self._parse_update(), 564 TokenType.USE: lambda self: self.expression( 565 exp.Use, 566 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 567 and exp.var(self._prev.text), 568 this=self._parse_table(schema=False), 569 ), 570 } 571 572 UNARY_PARSERS = { 573 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 574 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 575 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 576 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 577 } 578 579 PRIMARY_PARSERS = { 580 TokenType.STRING: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=True 582 ), 583 TokenType.NUMBER: lambda self, token: self.expression( 584 exp.Literal, this=token.text, is_string=False 585 ), 586 TokenType.STAR: lambda self, _: self.expression( 587 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 588 ), 589 TokenType.NULL: lambda self, _: self.expression(exp.Null), 590 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 591 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 592 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 593 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 594 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 595 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 596 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 597 exp.National, this=token.text 598 ), 599 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 600 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 601 exp.RawString, this=token.text 602 ), 603 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 604 } 605 606 PLACEHOLDER_PARSERS = { 607 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 608 TokenType.PARAMETER: lambda self: self._parse_parameter(), 609 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 610 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 611 else None, 612 } 613 614 RANGE_PARSERS = { 615 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 616 TokenType.GLOB: binary_range_parser(exp.Glob), 617 TokenType.ILIKE: binary_range_parser(exp.ILike), 618 TokenType.IN: lambda self, this: self._parse_in(this), 619 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 620 TokenType.IS: lambda self, this: self._parse_is(this), 621 TokenType.LIKE: binary_range_parser(exp.Like), 622 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 623 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 624 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 625 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 626 } 627 628 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 629 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 630 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 631 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 632 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 633 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 634 "CHECKSUM": lambda self: self._parse_checksum(), 635 "CLUSTER BY": lambda self: self._parse_cluster(), 636 "CLUSTERED": lambda self: self._parse_clustered_by(), 637 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 638 exp.CollateProperty, **kwargs 639 ), 640 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 641 "COPY": lambda self: self._parse_copy_property(), 642 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 643 "DEFINER": lambda self: self._parse_definer(), 644 "DETERMINISTIC": lambda self: self.expression( 645 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 646 ), 647 "DISTKEY": lambda self: self._parse_distkey(), 648 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 649 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 650 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 651 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 652 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 653 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 654 "FREESPACE": lambda self: self._parse_freespace(), 655 "HEAP": lambda self: self.expression(exp.HeapProperty), 656 "IMMUTABLE": lambda self: self.expression( 657 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 658 ), 659 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 660 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 661 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 662 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 663 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 664 "LIKE": lambda self: self._parse_create_like(), 665 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 666 "LOCK": lambda self: self._parse_locking(), 667 "LOCKING": lambda self: self._parse_locking(), 668 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 669 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 670 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 671 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 672 "NO": lambda self: self._parse_no_property(), 673 "ON": lambda self: self._parse_on_property(), 674 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 675 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 676 "PARTITION BY": lambda self: self._parse_partitioned_by(), 677 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 678 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 679 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 680 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 681 "REMOTE": lambda self: self._parse_remote_with_connection(), 682 "RETURNS": lambda self: self._parse_returns(), 683 "ROW": lambda self: self._parse_row(), 684 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 685 "SAMPLE": lambda self: self.expression( 686 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 687 ), 688 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 689 "SETTINGS": lambda self: self.expression( 690 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 691 ), 692 "SORTKEY": lambda self: self._parse_sortkey(), 693 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 694 "STABLE": lambda self: self.expression( 695 exp.StabilityProperty, this=exp.Literal.string("STABLE") 696 ), 697 "STORED": lambda self: self._parse_stored(), 698 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 699 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 700 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 701 "TO": lambda self: self._parse_to_table(), 702 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 703 "TRANSFORM": lambda self: self.expression( 704 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 705 ), 706 "TTL": lambda self: self._parse_ttl(), 707 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 708 "VOLATILE": lambda self: self._parse_volatile_property(), 709 "WITH": lambda self: self._parse_with_property(), 710 } 711 712 CONSTRAINT_PARSERS = { 713 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 714 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 715 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 716 "CHARACTER SET": lambda self: self.expression( 717 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 718 ), 719 "CHECK": lambda self: self.expression( 720 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 721 ), 722 "COLLATE": lambda self: self.expression( 723 exp.CollateColumnConstraint, this=self._parse_var() 724 ), 725 "COMMENT": lambda self: self.expression( 726 exp.CommentColumnConstraint, this=self._parse_string() 727 ), 728 "COMPRESS": lambda self: self._parse_compress(), 729 "CLUSTERED": lambda self: self.expression( 730 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 731 ), 732 "NONCLUSTERED": lambda self: self.expression( 733 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 734 ), 735 "DEFAULT": lambda self: self.expression( 736 exp.DefaultColumnConstraint, this=self._parse_bitwise() 737 ), 738 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 739 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 740 "FORMAT": lambda self: self.expression( 741 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 742 ), 743 "GENERATED": lambda self: self._parse_generated_as_identity(), 744 "IDENTITY": lambda self: self._parse_auto_increment(), 745 "INLINE": lambda self: self._parse_inline(), 746 "LIKE": lambda self: self._parse_create_like(), 747 "NOT": lambda self: self._parse_not_constraint(), 748 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 749 "ON": lambda self: ( 750 self._match(TokenType.UPDATE) 751 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 752 ) 753 or self.expression(exp.OnProperty, this=self._parse_id_var()), 754 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 755 "PRIMARY KEY": lambda self: self._parse_primary_key(), 756 "REFERENCES": lambda self: self._parse_references(match=False), 757 "TITLE": lambda self: self.expression( 758 exp.TitleColumnConstraint, this=self._parse_var_or_string() 759 ), 760 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 761 "UNIQUE": lambda self: self._parse_unique(), 762 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 763 "WITH": lambda self: self.expression( 764 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 765 ), 766 } 767 768 ALTER_PARSERS = { 769 "ADD": lambda self: self._parse_alter_table_add(), 770 "ALTER": lambda self: self._parse_alter_table_alter(), 771 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 772 "DROP": lambda self: self._parse_alter_table_drop(), 773 "RENAME": lambda self: self._parse_alter_table_rename(), 774 } 775 776 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 777 778 NO_PAREN_FUNCTION_PARSERS = { 779 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 780 "CASE": lambda self: self._parse_case(), 781 "IF": lambda self: self._parse_if(), 782 "NEXT": lambda self: self._parse_next_value_for(), 783 } 784 785 INVALID_FUNC_NAME_TOKENS = { 786 TokenType.IDENTIFIER, 787 TokenType.STRING, 788 } 789 790 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 791 792 FUNCTION_PARSERS = { 793 "ANY_VALUE": lambda self: self._parse_any_value(), 794 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 795 "CONCAT": lambda self: self._parse_concat(), 796 "CONCAT_WS": lambda self: self._parse_concat_ws(), 797 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 798 "DECODE": lambda self: self._parse_decode(), 799 "EXTRACT": lambda self: self._parse_extract(), 800 "JSON_OBJECT": lambda self: self._parse_json_object(), 801 "JSON_TABLE": lambda self: self._parse_json_table(), 802 "LOG": lambda self: self._parse_logarithm(), 803 "MATCH": lambda self: self._parse_match_against(), 804 "OPENJSON": lambda self: self._parse_open_json(), 805 "POSITION": lambda self: self._parse_position(), 806 "PREDICT": lambda self: self._parse_predict(), 807 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 808 "STRING_AGG": lambda self: self._parse_string_agg(), 809 "SUBSTRING": lambda self: self._parse_substring(), 810 "TRIM": lambda self: self._parse_trim(), 811 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 812 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 813 } 814 815 QUERY_MODIFIER_PARSERS = { 816 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 817 TokenType.WHERE: lambda self: ("where", self._parse_where()), 818 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 819 TokenType.HAVING: lambda self: ("having", self._parse_having()), 820 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 821 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 822 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 823 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 824 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 825 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 826 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 827 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 828 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 829 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 830 TokenType.CLUSTER_BY: lambda self: ( 831 "cluster", 832 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 833 ), 834 TokenType.DISTRIBUTE_BY: lambda self: ( 835 "distribute", 836 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 837 ), 838 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 839 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 840 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 841 } 842 843 SET_PARSERS = { 844 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 845 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 846 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 847 "TRANSACTION": lambda self: self._parse_set_transaction(), 848 } 849 850 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 851 852 TYPE_LITERAL_PARSERS = { 853 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 854 } 855 856 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 857 858 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 859 860 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 861 862 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 863 TRANSACTION_CHARACTERISTICS = { 864 "ISOLATION LEVEL REPEATABLE READ", 865 "ISOLATION LEVEL READ COMMITTED", 866 "ISOLATION LEVEL READ UNCOMMITTED", 867 "ISOLATION LEVEL SERIALIZABLE", 868 "READ WRITE", 869 "READ ONLY", 870 } 871 872 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 873 874 CLONE_KEYWORDS = {"CLONE", "COPY"} 875 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 876 877 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 878 879 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 880 881 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 882 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 883 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 884 885 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 886 887 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 888 889 DISTINCT_TOKENS = {TokenType.DISTINCT} 890 891 NULL_TOKENS = {TokenType.NULL} 892 893 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 894 895 STRICT_CAST = True 896 897 # A NULL arg in CONCAT yields NULL by default 898 CONCAT_NULL_OUTPUTS_STRING = False 899 900 PREFIXED_PIVOT_COLUMNS = False 901 IDENTIFY_PIVOT_STRINGS = False 902 903 LOG_BASE_FIRST = True 904 LOG_DEFAULTS_TO_LN = False 905 906 # Whether or not ADD is present for each column added by ALTER TABLE 907 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 908 909 # Whether or not the table sample clause expects CSV syntax 910 TABLESAMPLE_CSV = False 911 912 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 913 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 914 915 # Whether the TRIM function expects the characters to trim as its first argument 916 TRIM_PATTERN_FIRST = False 917 918 __slots__ = ( 919 "error_level", 920 "error_message_context", 921 "max_errors", 922 "sql", 923 "errors", 924 "_tokens", 925 "_index", 926 "_curr", 927 "_next", 928 "_prev", 929 "_prev_comments", 930 "_tokenizer", 931 ) 932 933 # Autofilled 934 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 935 INDEX_OFFSET: int = 0 936 UNNEST_COLUMN_ONLY: bool = False 937 ALIAS_POST_TABLESAMPLE: bool = False 938 STRICT_STRING_CONCAT = False 939 SUPPORTS_USER_DEFINED_TYPES = True 940 NORMALIZE_FUNCTIONS = "upper" 941 NULL_ORDERING: str = "nulls_are_small" 942 SHOW_TRIE: t.Dict = {} 943 SET_TRIE: t.Dict = {} 944 FORMAT_MAPPING: t.Dict[str, str] = {} 945 FORMAT_TRIE: t.Dict = {} 946 TIME_MAPPING: t.Dict[str, str] = {} 947 TIME_TRIE: t.Dict = {} 948 949 def __init__( 950 self, 951 error_level: t.Optional[ErrorLevel] = None, 952 error_message_context: int = 100, 953 max_errors: int = 3, 954 ): 955 self.error_level = error_level or ErrorLevel.IMMEDIATE 956 self.error_message_context = error_message_context 957 self.max_errors = max_errors 958 self._tokenizer = self.TOKENIZER_CLASS() 959 self.reset() 960 961 def reset(self): 962 self.sql = "" 963 self.errors = [] 964 self._tokens = [] 965 self._index = 0 966 self._curr = None 967 self._next = None 968 self._prev = None 969 self._prev_comments = None 970 971 def parse( 972 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 973 ) -> t.List[t.Optional[exp.Expression]]: 974 """ 975 Parses a list of tokens and returns a list of syntax trees, one tree 976 per parsed SQL statement. 977 978 Args: 979 raw_tokens: The list of tokens. 980 sql: The original SQL string, used to produce helpful debug messages. 981 982 Returns: 983 The list of the produced syntax trees. 984 """ 985 return self._parse( 986 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 987 ) 988 989 def parse_into( 990 self, 991 expression_types: exp.IntoType, 992 raw_tokens: t.List[Token], 993 sql: t.Optional[str] = None, 994 ) -> t.List[t.Optional[exp.Expression]]: 995 """ 996 Parses a list of tokens into a given Expression type. If a collection of Expression 997 types is given instead, this method will try to parse the token list into each one 998 of them, stopping at the first for which the parsing succeeds. 999 1000 Args: 1001 expression_types: The expression type(s) to try and parse the token list into. 1002 raw_tokens: The list of tokens. 1003 sql: The original SQL string, used to produce helpful debug messages. 1004 1005 Returns: 1006 The target Expression. 1007 """ 1008 errors = [] 1009 for expression_type in ensure_list(expression_types): 1010 parser = self.EXPRESSION_PARSERS.get(expression_type) 1011 if not parser: 1012 raise TypeError(f"No parser registered for {expression_type}") 1013 1014 try: 1015 return self._parse(parser, raw_tokens, sql) 1016 except ParseError as e: 1017 e.errors[0]["into_expression"] = expression_type 1018 errors.append(e) 1019 1020 raise ParseError( 1021 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1022 errors=merge_errors(errors), 1023 ) from errors[-1] 1024 1025 def _parse( 1026 self, 1027 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1028 raw_tokens: t.List[Token], 1029 sql: t.Optional[str] = None, 1030 ) -> t.List[t.Optional[exp.Expression]]: 1031 self.reset() 1032 self.sql = sql or "" 1033 1034 total = len(raw_tokens) 1035 chunks: t.List[t.List[Token]] = [[]] 1036 1037 for i, token in enumerate(raw_tokens): 1038 if token.token_type == TokenType.SEMICOLON: 1039 if i < total - 1: 1040 chunks.append([]) 1041 else: 1042 chunks[-1].append(token) 1043 1044 expressions = [] 1045 1046 for tokens in chunks: 1047 self._index = -1 1048 self._tokens = tokens 1049 self._advance() 1050 1051 expressions.append(parse_method(self)) 1052 1053 if self._index < len(self._tokens): 1054 self.raise_error("Invalid expression / Unexpected token") 1055 1056 self.check_errors() 1057 1058 return expressions 1059 1060 def check_errors(self) -> None: 1061 """Logs or raises any found errors, depending on the chosen error level setting.""" 1062 if self.error_level == ErrorLevel.WARN: 1063 for error in self.errors: 1064 logger.error(str(error)) 1065 elif self.error_level == ErrorLevel.RAISE and self.errors: 1066 raise ParseError( 1067 concat_messages(self.errors, self.max_errors), 1068 errors=merge_errors(self.errors), 1069 ) 1070 1071 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1072 """ 1073 Appends an error in the list of recorded errors or raises it, depending on the chosen 1074 error level setting. 1075 """ 1076 token = token or self._curr or self._prev or Token.string("") 1077 start = token.start 1078 end = token.end + 1 1079 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1080 highlight = self.sql[start:end] 1081 end_context = self.sql[end : end + self.error_message_context] 1082 1083 error = ParseError.new( 1084 f"{message}. Line {token.line}, Col: {token.col}.\n" 1085 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1086 description=message, 1087 line=token.line, 1088 col=token.col, 1089 start_context=start_context, 1090 highlight=highlight, 1091 end_context=end_context, 1092 ) 1093 1094 if self.error_level == ErrorLevel.IMMEDIATE: 1095 raise error 1096 1097 self.errors.append(error) 1098 1099 def expression( 1100 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1101 ) -> E: 1102 """ 1103 Creates a new, validated Expression. 1104 1105 Args: 1106 exp_class: The expression class to instantiate. 1107 comments: An optional list of comments to attach to the expression. 1108 kwargs: The arguments to set for the expression along with their respective values. 1109 1110 Returns: 1111 The target expression. 1112 """ 1113 instance = exp_class(**kwargs) 1114 instance.add_comments(comments) if comments else self._add_comments(instance) 1115 return self.validate_expression(instance) 1116 1117 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1118 if expression and self._prev_comments: 1119 expression.add_comments(self._prev_comments) 1120 self._prev_comments = None 1121 1122 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1123 """ 1124 Validates an Expression, making sure that all its mandatory arguments are set. 1125 1126 Args: 1127 expression: The expression to validate. 1128 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1129 1130 Returns: 1131 The validated expression. 1132 """ 1133 if self.error_level != ErrorLevel.IGNORE: 1134 for error_message in expression.error_messages(args): 1135 self.raise_error(error_message) 1136 1137 return expression 1138 1139 def _find_sql(self, start: Token, end: Token) -> str: 1140 return self.sql[start.start : end.end + 1] 1141 1142 def _advance(self, times: int = 1) -> None: 1143 self._index += times 1144 self._curr = seq_get(self._tokens, self._index) 1145 self._next = seq_get(self._tokens, self._index + 1) 1146 1147 if self._index > 0: 1148 self._prev = self._tokens[self._index - 1] 1149 self._prev_comments = self._prev.comments 1150 else: 1151 self._prev = None 1152 self._prev_comments = None 1153 1154 def _retreat(self, index: int) -> None: 1155 if index != self._index: 1156 self._advance(index - self._index) 1157 1158 def _parse_command(self) -> exp.Command: 1159 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1160 1161 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1162 start = self._prev 1163 exists = self._parse_exists() if allow_exists else None 1164 1165 self._match(TokenType.ON) 1166 1167 kind = self._match_set(self.CREATABLES) and self._prev 1168 if not kind: 1169 return self._parse_as_command(start) 1170 1171 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1172 this = self._parse_user_defined_function(kind=kind.token_type) 1173 elif kind.token_type == TokenType.TABLE: 1174 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1175 elif kind.token_type == TokenType.COLUMN: 1176 this = self._parse_column() 1177 else: 1178 this = self._parse_id_var() 1179 1180 self._match(TokenType.IS) 1181 1182 return self.expression( 1183 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1184 ) 1185 1186 def _parse_to_table( 1187 self, 1188 ) -> exp.ToTableProperty: 1189 table = self._parse_table_parts(schema=True) 1190 return self.expression(exp.ToTableProperty, this=table) 1191 1192 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1193 def _parse_ttl(self) -> exp.Expression: 1194 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1195 this = self._parse_bitwise() 1196 1197 if self._match_text_seq("DELETE"): 1198 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1199 if self._match_text_seq("RECOMPRESS"): 1200 return self.expression( 1201 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1202 ) 1203 if self._match_text_seq("TO", "DISK"): 1204 return self.expression( 1205 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1206 ) 1207 if self._match_text_seq("TO", "VOLUME"): 1208 return self.expression( 1209 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1210 ) 1211 1212 return this 1213 1214 expressions = self._parse_csv(_parse_ttl_action) 1215 where = self._parse_where() 1216 group = self._parse_group() 1217 1218 aggregates = None 1219 if group and self._match(TokenType.SET): 1220 aggregates = self._parse_csv(self._parse_set_item) 1221 1222 return self.expression( 1223 exp.MergeTreeTTL, 1224 expressions=expressions, 1225 where=where, 1226 group=group, 1227 aggregates=aggregates, 1228 ) 1229 1230 def _parse_statement(self) -> t.Optional[exp.Expression]: 1231 if self._curr is None: 1232 return None 1233 1234 if self._match_set(self.STATEMENT_PARSERS): 1235 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1236 1237 if self._match_set(Tokenizer.COMMANDS): 1238 return self._parse_command() 1239 1240 expression = self._parse_expression() 1241 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1242 return self._parse_query_modifiers(expression) 1243 1244 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1245 start = self._prev 1246 temporary = self._match(TokenType.TEMPORARY) 1247 materialized = self._match_text_seq("MATERIALIZED") 1248 1249 kind = self._match_set(self.CREATABLES) and self._prev.text 1250 if not kind: 1251 return self._parse_as_command(start) 1252 1253 return self.expression( 1254 exp.Drop, 1255 comments=start.comments, 1256 exists=exists or self._parse_exists(), 1257 this=self._parse_table(schema=True), 1258 kind=kind, 1259 temporary=temporary, 1260 materialized=materialized, 1261 cascade=self._match_text_seq("CASCADE"), 1262 constraints=self._match_text_seq("CONSTRAINTS"), 1263 purge=self._match_text_seq("PURGE"), 1264 ) 1265 1266 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1267 return ( 1268 self._match_text_seq("IF") 1269 and (not not_ or self._match(TokenType.NOT)) 1270 and self._match(TokenType.EXISTS) 1271 ) 1272 1273 def _parse_create(self) -> exp.Create | exp.Command: 1274 # Note: this can't be None because we've matched a statement parser 1275 start = self._prev 1276 comments = self._prev_comments 1277 1278 replace = start.text.upper() == "REPLACE" or self._match_pair( 1279 TokenType.OR, TokenType.REPLACE 1280 ) 1281 unique = self._match(TokenType.UNIQUE) 1282 1283 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1284 self._advance() 1285 1286 properties = None 1287 create_token = self._match_set(self.CREATABLES) and self._prev 1288 1289 if not create_token: 1290 # exp.Properties.Location.POST_CREATE 1291 properties = self._parse_properties() 1292 create_token = self._match_set(self.CREATABLES) and self._prev 1293 1294 if not properties or not create_token: 1295 return self._parse_as_command(start) 1296 1297 exists = self._parse_exists(not_=True) 1298 this = None 1299 expression: t.Optional[exp.Expression] = None 1300 indexes = None 1301 no_schema_binding = None 1302 begin = None 1303 end = None 1304 clone = None 1305 1306 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1307 nonlocal properties 1308 if properties and temp_props: 1309 properties.expressions.extend(temp_props.expressions) 1310 elif temp_props: 1311 properties = temp_props 1312 1313 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1314 this = self._parse_user_defined_function(kind=create_token.token_type) 1315 1316 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1317 extend_props(self._parse_properties()) 1318 1319 self._match(TokenType.ALIAS) 1320 1321 if self._match(TokenType.COMMAND): 1322 expression = self._parse_as_command(self._prev) 1323 else: 1324 begin = self._match(TokenType.BEGIN) 1325 return_ = self._match_text_seq("RETURN") 1326 1327 if self._match(TokenType.STRING, advance=False): 1328 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1329 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1330 expression = self._parse_string() 1331 extend_props(self._parse_properties()) 1332 else: 1333 expression = self._parse_statement() 1334 1335 end = self._match_text_seq("END") 1336 1337 if return_: 1338 expression = self.expression(exp.Return, this=expression) 1339 elif create_token.token_type == TokenType.INDEX: 1340 this = self._parse_index(index=self._parse_id_var()) 1341 elif create_token.token_type in self.DB_CREATABLES: 1342 table_parts = self._parse_table_parts(schema=True) 1343 1344 # exp.Properties.Location.POST_NAME 1345 self._match(TokenType.COMMA) 1346 extend_props(self._parse_properties(before=True)) 1347 1348 this = self._parse_schema(this=table_parts) 1349 1350 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1351 extend_props(self._parse_properties()) 1352 1353 self._match(TokenType.ALIAS) 1354 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1355 # exp.Properties.Location.POST_ALIAS 1356 extend_props(self._parse_properties()) 1357 1358 expression = self._parse_ddl_select() 1359 1360 if create_token.token_type == TokenType.TABLE: 1361 # exp.Properties.Location.POST_EXPRESSION 1362 extend_props(self._parse_properties()) 1363 1364 indexes = [] 1365 while True: 1366 index = self._parse_index() 1367 1368 # exp.Properties.Location.POST_INDEX 1369 extend_props(self._parse_properties()) 1370 1371 if not index: 1372 break 1373 else: 1374 self._match(TokenType.COMMA) 1375 indexes.append(index) 1376 elif create_token.token_type == TokenType.VIEW: 1377 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1378 no_schema_binding = True 1379 1380 shallow = self._match_text_seq("SHALLOW") 1381 1382 if self._match_texts(self.CLONE_KEYWORDS): 1383 copy = self._prev.text.lower() == "copy" 1384 clone = self._parse_table(schema=True) 1385 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1386 clone_kind = ( 1387 self._match(TokenType.L_PAREN) 1388 and self._match_texts(self.CLONE_KINDS) 1389 and self._prev.text.upper() 1390 ) 1391 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1392 self._match(TokenType.R_PAREN) 1393 clone = self.expression( 1394 exp.Clone, 1395 this=clone, 1396 when=when, 1397 kind=clone_kind, 1398 shallow=shallow, 1399 expression=clone_expression, 1400 copy=copy, 1401 ) 1402 1403 return self.expression( 1404 exp.Create, 1405 comments=comments, 1406 this=this, 1407 kind=create_token.text, 1408 replace=replace, 1409 unique=unique, 1410 expression=expression, 1411 exists=exists, 1412 properties=properties, 1413 indexes=indexes, 1414 no_schema_binding=no_schema_binding, 1415 begin=begin, 1416 end=end, 1417 clone=clone, 1418 ) 1419 1420 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1421 # only used for teradata currently 1422 self._match(TokenType.COMMA) 1423 1424 kwargs = { 1425 "no": self._match_text_seq("NO"), 1426 "dual": self._match_text_seq("DUAL"), 1427 "before": self._match_text_seq("BEFORE"), 1428 "default": self._match_text_seq("DEFAULT"), 1429 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1430 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1431 "after": self._match_text_seq("AFTER"), 1432 "minimum": self._match_texts(("MIN", "MINIMUM")), 1433 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1434 } 1435 1436 if self._match_texts(self.PROPERTY_PARSERS): 1437 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1438 try: 1439 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1440 except TypeError: 1441 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1442 1443 return None 1444 1445 def _parse_property(self) -> t.Optional[exp.Expression]: 1446 if self._match_texts(self.PROPERTY_PARSERS): 1447 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1448 1449 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1450 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1451 1452 if self._match_text_seq("COMPOUND", "SORTKEY"): 1453 return self._parse_sortkey(compound=True) 1454 1455 if self._match_text_seq("SQL", "SECURITY"): 1456 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1457 1458 index = self._index 1459 key = self._parse_column() 1460 1461 if not self._match(TokenType.EQ): 1462 self._retreat(index) 1463 return None 1464 1465 return self.expression( 1466 exp.Property, 1467 this=key.to_dot() if isinstance(key, exp.Column) else key, 1468 value=self._parse_column() or self._parse_var(any_token=True), 1469 ) 1470 1471 def _parse_stored(self) -> exp.FileFormatProperty: 1472 self._match(TokenType.ALIAS) 1473 1474 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1475 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1476 1477 return self.expression( 1478 exp.FileFormatProperty, 1479 this=self.expression( 1480 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1481 ) 1482 if input_format or output_format 1483 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1484 ) 1485 1486 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1487 self._match(TokenType.EQ) 1488 self._match(TokenType.ALIAS) 1489 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1490 1491 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1492 properties = [] 1493 while True: 1494 if before: 1495 prop = self._parse_property_before() 1496 else: 1497 prop = self._parse_property() 1498 1499 if not prop: 1500 break 1501 for p in ensure_list(prop): 1502 properties.append(p) 1503 1504 if properties: 1505 return self.expression(exp.Properties, expressions=properties) 1506 1507 return None 1508 1509 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1510 return self.expression( 1511 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1512 ) 1513 1514 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1515 if self._index >= 2: 1516 pre_volatile_token = self._tokens[self._index - 2] 1517 else: 1518 pre_volatile_token = None 1519 1520 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1521 return exp.VolatileProperty() 1522 1523 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1524 1525 def _parse_with_property( 1526 self, 1527 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1528 if self._match(TokenType.L_PAREN, advance=False): 1529 return self._parse_wrapped_csv(self._parse_property) 1530 1531 if self._match_text_seq("JOURNAL"): 1532 return self._parse_withjournaltable() 1533 1534 if self._match_text_seq("DATA"): 1535 return self._parse_withdata(no=False) 1536 elif self._match_text_seq("NO", "DATA"): 1537 return self._parse_withdata(no=True) 1538 1539 if not self._next: 1540 return None 1541 1542 return self._parse_withisolatedloading() 1543 1544 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1545 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1546 self._match(TokenType.EQ) 1547 1548 user = self._parse_id_var() 1549 self._match(TokenType.PARAMETER) 1550 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1551 1552 if not user or not host: 1553 return None 1554 1555 return exp.DefinerProperty(this=f"{user}@{host}") 1556 1557 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1558 self._match(TokenType.TABLE) 1559 self._match(TokenType.EQ) 1560 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1561 1562 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1563 return self.expression(exp.LogProperty, no=no) 1564 1565 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1566 return self.expression(exp.JournalProperty, **kwargs) 1567 1568 def _parse_checksum(self) -> exp.ChecksumProperty: 1569 self._match(TokenType.EQ) 1570 1571 on = None 1572 if self._match(TokenType.ON): 1573 on = True 1574 elif self._match_text_seq("OFF"): 1575 on = False 1576 1577 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1578 1579 def _parse_cluster(self) -> exp.Cluster: 1580 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1581 1582 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1583 self._match_text_seq("BY") 1584 1585 self._match_l_paren() 1586 expressions = self._parse_csv(self._parse_column) 1587 self._match_r_paren() 1588 1589 if self._match_text_seq("SORTED", "BY"): 1590 self._match_l_paren() 1591 sorted_by = self._parse_csv(self._parse_ordered) 1592 self._match_r_paren() 1593 else: 1594 sorted_by = None 1595 1596 self._match(TokenType.INTO) 1597 buckets = self._parse_number() 1598 self._match_text_seq("BUCKETS") 1599 1600 return self.expression( 1601 exp.ClusteredByProperty, 1602 expressions=expressions, 1603 sorted_by=sorted_by, 1604 buckets=buckets, 1605 ) 1606 1607 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1608 if not self._match_text_seq("GRANTS"): 1609 self._retreat(self._index - 1) 1610 return None 1611 1612 return self.expression(exp.CopyGrantsProperty) 1613 1614 def _parse_freespace(self) -> exp.FreespaceProperty: 1615 self._match(TokenType.EQ) 1616 return self.expression( 1617 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1618 ) 1619 1620 def _parse_mergeblockratio( 1621 self, no: bool = False, default: bool = False 1622 ) -> exp.MergeBlockRatioProperty: 1623 if self._match(TokenType.EQ): 1624 return self.expression( 1625 exp.MergeBlockRatioProperty, 1626 this=self._parse_number(), 1627 percent=self._match(TokenType.PERCENT), 1628 ) 1629 1630 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1631 1632 def _parse_datablocksize( 1633 self, 1634 default: t.Optional[bool] = None, 1635 minimum: t.Optional[bool] = None, 1636 maximum: t.Optional[bool] = None, 1637 ) -> exp.DataBlocksizeProperty: 1638 self._match(TokenType.EQ) 1639 size = self._parse_number() 1640 1641 units = None 1642 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1643 units = self._prev.text 1644 1645 return self.expression( 1646 exp.DataBlocksizeProperty, 1647 size=size, 1648 units=units, 1649 default=default, 1650 minimum=minimum, 1651 maximum=maximum, 1652 ) 1653 1654 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1655 self._match(TokenType.EQ) 1656 always = self._match_text_seq("ALWAYS") 1657 manual = self._match_text_seq("MANUAL") 1658 never = self._match_text_seq("NEVER") 1659 default = self._match_text_seq("DEFAULT") 1660 1661 autotemp = None 1662 if self._match_text_seq("AUTOTEMP"): 1663 autotemp = self._parse_schema() 1664 1665 return self.expression( 1666 exp.BlockCompressionProperty, 1667 always=always, 1668 manual=manual, 1669 never=never, 1670 default=default, 1671 autotemp=autotemp, 1672 ) 1673 1674 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1675 no = self._match_text_seq("NO") 1676 concurrent = self._match_text_seq("CONCURRENT") 1677 self._match_text_seq("ISOLATED", "LOADING") 1678 for_all = self._match_text_seq("FOR", "ALL") 1679 for_insert = self._match_text_seq("FOR", "INSERT") 1680 for_none = self._match_text_seq("FOR", "NONE") 1681 return self.expression( 1682 exp.IsolatedLoadingProperty, 1683 no=no, 1684 concurrent=concurrent, 1685 for_all=for_all, 1686 for_insert=for_insert, 1687 for_none=for_none, 1688 ) 1689 1690 def _parse_locking(self) -> exp.LockingProperty: 1691 if self._match(TokenType.TABLE): 1692 kind = "TABLE" 1693 elif self._match(TokenType.VIEW): 1694 kind = "VIEW" 1695 elif self._match(TokenType.ROW): 1696 kind = "ROW" 1697 elif self._match_text_seq("DATABASE"): 1698 kind = "DATABASE" 1699 else: 1700 kind = None 1701 1702 if kind in ("DATABASE", "TABLE", "VIEW"): 1703 this = self._parse_table_parts() 1704 else: 1705 this = None 1706 1707 if self._match(TokenType.FOR): 1708 for_or_in = "FOR" 1709 elif self._match(TokenType.IN): 1710 for_or_in = "IN" 1711 else: 1712 for_or_in = None 1713 1714 if self._match_text_seq("ACCESS"): 1715 lock_type = "ACCESS" 1716 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1717 lock_type = "EXCLUSIVE" 1718 elif self._match_text_seq("SHARE"): 1719 lock_type = "SHARE" 1720 elif self._match_text_seq("READ"): 1721 lock_type = "READ" 1722 elif self._match_text_seq("WRITE"): 1723 lock_type = "WRITE" 1724 elif self._match_text_seq("CHECKSUM"): 1725 lock_type = "CHECKSUM" 1726 else: 1727 lock_type = None 1728 1729 override = self._match_text_seq("OVERRIDE") 1730 1731 return self.expression( 1732 exp.LockingProperty, 1733 this=this, 1734 kind=kind, 1735 for_or_in=for_or_in, 1736 lock_type=lock_type, 1737 override=override, 1738 ) 1739 1740 def _parse_partition_by(self) -> t.List[exp.Expression]: 1741 if self._match(TokenType.PARTITION_BY): 1742 return self._parse_csv(self._parse_conjunction) 1743 return [] 1744 1745 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1746 self._match(TokenType.EQ) 1747 return self.expression( 1748 exp.PartitionedByProperty, 1749 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1750 ) 1751 1752 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1753 if self._match_text_seq("AND", "STATISTICS"): 1754 statistics = True 1755 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1756 statistics = False 1757 else: 1758 statistics = None 1759 1760 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1761 1762 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1763 if self._match_text_seq("PRIMARY", "INDEX"): 1764 return exp.NoPrimaryIndexProperty() 1765 return None 1766 1767 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1768 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1769 return exp.OnCommitProperty() 1770 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1771 return exp.OnCommitProperty(delete=True) 1772 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1773 1774 def _parse_distkey(self) -> exp.DistKeyProperty: 1775 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1776 1777 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1778 table = self._parse_table(schema=True) 1779 1780 options = [] 1781 while self._match_texts(("INCLUDING", "EXCLUDING")): 1782 this = self._prev.text.upper() 1783 1784 id_var = self._parse_id_var() 1785 if not id_var: 1786 return None 1787 1788 options.append( 1789 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1790 ) 1791 1792 return self.expression(exp.LikeProperty, this=table, expressions=options) 1793 1794 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1795 return self.expression( 1796 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1797 ) 1798 1799 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1800 self._match(TokenType.EQ) 1801 return self.expression( 1802 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1803 ) 1804 1805 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1806 self._match_text_seq("WITH", "CONNECTION") 1807 return self.expression( 1808 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1809 ) 1810 1811 def _parse_returns(self) -> exp.ReturnsProperty: 1812 value: t.Optional[exp.Expression] 1813 is_table = self._match(TokenType.TABLE) 1814 1815 if is_table: 1816 if self._match(TokenType.LT): 1817 value = self.expression( 1818 exp.Schema, 1819 this="TABLE", 1820 expressions=self._parse_csv(self._parse_struct_types), 1821 ) 1822 if not self._match(TokenType.GT): 1823 self.raise_error("Expecting >") 1824 else: 1825 value = self._parse_schema(exp.var("TABLE")) 1826 else: 1827 value = self._parse_types() 1828 1829 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1830 1831 def _parse_describe(self) -> exp.Describe: 1832 kind = self._match_set(self.CREATABLES) and self._prev.text 1833 this = self._parse_table(schema=True) 1834 properties = self._parse_properties() 1835 expressions = properties.expressions if properties else None 1836 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1837 1838 def _parse_insert(self) -> exp.Insert: 1839 comments = ensure_list(self._prev_comments) 1840 overwrite = self._match(TokenType.OVERWRITE) 1841 ignore = self._match(TokenType.IGNORE) 1842 local = self._match_text_seq("LOCAL") 1843 alternative = None 1844 1845 if self._match_text_seq("DIRECTORY"): 1846 this: t.Optional[exp.Expression] = self.expression( 1847 exp.Directory, 1848 this=self._parse_var_or_string(), 1849 local=local, 1850 row_format=self._parse_row_format(match_row=True), 1851 ) 1852 else: 1853 if self._match(TokenType.OR): 1854 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1855 1856 self._match(TokenType.INTO) 1857 comments += ensure_list(self._prev_comments) 1858 self._match(TokenType.TABLE) 1859 this = self._parse_table(schema=True) 1860 1861 returning = self._parse_returning() 1862 1863 return self.expression( 1864 exp.Insert, 1865 comments=comments, 1866 this=this, 1867 by_name=self._match_text_seq("BY", "NAME"), 1868 exists=self._parse_exists(), 1869 partition=self._parse_partition(), 1870 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1871 and self._parse_conjunction(), 1872 expression=self._parse_ddl_select(), 1873 conflict=self._parse_on_conflict(), 1874 returning=returning or self._parse_returning(), 1875 overwrite=overwrite, 1876 alternative=alternative, 1877 ignore=ignore, 1878 ) 1879 1880 def _parse_kill(self) -> exp.Kill: 1881 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1882 1883 return self.expression( 1884 exp.Kill, 1885 this=self._parse_primary(), 1886 kind=kind, 1887 ) 1888 1889 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1890 conflict = self._match_text_seq("ON", "CONFLICT") 1891 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1892 1893 if not conflict and not duplicate: 1894 return None 1895 1896 nothing = None 1897 expressions = None 1898 key = None 1899 constraint = None 1900 1901 if conflict: 1902 if self._match_text_seq("ON", "CONSTRAINT"): 1903 constraint = self._parse_id_var() 1904 else: 1905 key = self._parse_csv(self._parse_value) 1906 1907 self._match_text_seq("DO") 1908 if self._match_text_seq("NOTHING"): 1909 nothing = True 1910 else: 1911 self._match(TokenType.UPDATE) 1912 self._match(TokenType.SET) 1913 expressions = self._parse_csv(self._parse_equality) 1914 1915 return self.expression( 1916 exp.OnConflict, 1917 duplicate=duplicate, 1918 expressions=expressions, 1919 nothing=nothing, 1920 key=key, 1921 constraint=constraint, 1922 ) 1923 1924 def _parse_returning(self) -> t.Optional[exp.Returning]: 1925 if not self._match(TokenType.RETURNING): 1926 return None 1927 return self.expression( 1928 exp.Returning, 1929 expressions=self._parse_csv(self._parse_expression), 1930 into=self._match(TokenType.INTO) and self._parse_table_part(), 1931 ) 1932 1933 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1934 if not self._match(TokenType.FORMAT): 1935 return None 1936 return self._parse_row_format() 1937 1938 def _parse_row_format( 1939 self, match_row: bool = False 1940 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1941 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1942 return None 1943 1944 if self._match_text_seq("SERDE"): 1945 this = self._parse_string() 1946 1947 serde_properties = None 1948 if self._match(TokenType.SERDE_PROPERTIES): 1949 serde_properties = self.expression( 1950 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1951 ) 1952 1953 return self.expression( 1954 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1955 ) 1956 1957 self._match_text_seq("DELIMITED") 1958 1959 kwargs = {} 1960 1961 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1962 kwargs["fields"] = self._parse_string() 1963 if self._match_text_seq("ESCAPED", "BY"): 1964 kwargs["escaped"] = self._parse_string() 1965 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1966 kwargs["collection_items"] = self._parse_string() 1967 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1968 kwargs["map_keys"] = self._parse_string() 1969 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1970 kwargs["lines"] = self._parse_string() 1971 if self._match_text_seq("NULL", "DEFINED", "AS"): 1972 kwargs["null"] = self._parse_string() 1973 1974 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1975 1976 def _parse_load(self) -> exp.LoadData | exp.Command: 1977 if self._match_text_seq("DATA"): 1978 local = self._match_text_seq("LOCAL") 1979 self._match_text_seq("INPATH") 1980 inpath = self._parse_string() 1981 overwrite = self._match(TokenType.OVERWRITE) 1982 self._match_pair(TokenType.INTO, TokenType.TABLE) 1983 1984 return self.expression( 1985 exp.LoadData, 1986 this=self._parse_table(schema=True), 1987 local=local, 1988 overwrite=overwrite, 1989 inpath=inpath, 1990 partition=self._parse_partition(), 1991 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1992 serde=self._match_text_seq("SERDE") and self._parse_string(), 1993 ) 1994 return self._parse_as_command(self._prev) 1995 1996 def _parse_delete(self) -> exp.Delete: 1997 # This handles MySQL's "Multiple-Table Syntax" 1998 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1999 tables = None 2000 comments = self._prev_comments 2001 if not self._match(TokenType.FROM, advance=False): 2002 tables = self._parse_csv(self._parse_table) or None 2003 2004 returning = self._parse_returning() 2005 2006 return self.expression( 2007 exp.Delete, 2008 comments=comments, 2009 tables=tables, 2010 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2011 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2012 where=self._parse_where(), 2013 returning=returning or self._parse_returning(), 2014 limit=self._parse_limit(), 2015 ) 2016 2017 def _parse_update(self) -> exp.Update: 2018 comments = self._prev_comments 2019 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2020 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2021 returning = self._parse_returning() 2022 return self.expression( 2023 exp.Update, 2024 comments=comments, 2025 **{ # type: ignore 2026 "this": this, 2027 "expressions": expressions, 2028 "from": self._parse_from(joins=True), 2029 "where": self._parse_where(), 2030 "returning": returning or self._parse_returning(), 2031 "order": self._parse_order(), 2032 "limit": self._parse_limit(), 2033 }, 2034 ) 2035 2036 def _parse_uncache(self) -> exp.Uncache: 2037 if not self._match(TokenType.TABLE): 2038 self.raise_error("Expecting TABLE after UNCACHE") 2039 2040 return self.expression( 2041 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2042 ) 2043 2044 def _parse_cache(self) -> exp.Cache: 2045 lazy = self._match_text_seq("LAZY") 2046 self._match(TokenType.TABLE) 2047 table = self._parse_table(schema=True) 2048 2049 options = [] 2050 if self._match_text_seq("OPTIONS"): 2051 self._match_l_paren() 2052 k = self._parse_string() 2053 self._match(TokenType.EQ) 2054 v = self._parse_string() 2055 options = [k, v] 2056 self._match_r_paren() 2057 2058 self._match(TokenType.ALIAS) 2059 return self.expression( 2060 exp.Cache, 2061 this=table, 2062 lazy=lazy, 2063 options=options, 2064 expression=self._parse_select(nested=True), 2065 ) 2066 2067 def _parse_partition(self) -> t.Optional[exp.Partition]: 2068 if not self._match(TokenType.PARTITION): 2069 return None 2070 2071 return self.expression( 2072 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2073 ) 2074 2075 def _parse_value(self) -> exp.Tuple: 2076 if self._match(TokenType.L_PAREN): 2077 expressions = self._parse_csv(self._parse_conjunction) 2078 self._match_r_paren() 2079 return self.expression(exp.Tuple, expressions=expressions) 2080 2081 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2082 # https://prestodb.io/docs/current/sql/values.html 2083 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2084 2085 def _parse_projections(self) -> t.List[exp.Expression]: 2086 return self._parse_expressions() 2087 2088 def _parse_select( 2089 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2090 ) -> t.Optional[exp.Expression]: 2091 cte = self._parse_with() 2092 2093 if cte: 2094 this = self._parse_statement() 2095 2096 if not this: 2097 self.raise_error("Failed to parse any statement following CTE") 2098 return cte 2099 2100 if "with" in this.arg_types: 2101 this.set("with", cte) 2102 else: 2103 self.raise_error(f"{this.key} does not support CTE") 2104 this = cte 2105 2106 return this 2107 2108 # duckdb supports leading with FROM x 2109 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2110 2111 if self._match(TokenType.SELECT): 2112 comments = self._prev_comments 2113 2114 hint = self._parse_hint() 2115 all_ = self._match(TokenType.ALL) 2116 distinct = self._match_set(self.DISTINCT_TOKENS) 2117 2118 kind = ( 2119 self._match(TokenType.ALIAS) 2120 and self._match_texts(("STRUCT", "VALUE")) 2121 and self._prev.text 2122 ) 2123 2124 if distinct: 2125 distinct = self.expression( 2126 exp.Distinct, 2127 on=self._parse_value() if self._match(TokenType.ON) else None, 2128 ) 2129 2130 if all_ and distinct: 2131 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2132 2133 limit = self._parse_limit(top=True) 2134 projections = self._parse_projections() 2135 2136 this = self.expression( 2137 exp.Select, 2138 kind=kind, 2139 hint=hint, 2140 distinct=distinct, 2141 expressions=projections, 2142 limit=limit, 2143 ) 2144 this.comments = comments 2145 2146 into = self._parse_into() 2147 if into: 2148 this.set("into", into) 2149 2150 if not from_: 2151 from_ = self._parse_from() 2152 2153 if from_: 2154 this.set("from", from_) 2155 2156 this = self._parse_query_modifiers(this) 2157 elif (table or nested) and self._match(TokenType.L_PAREN): 2158 if self._match(TokenType.PIVOT): 2159 this = self._parse_simplified_pivot() 2160 elif self._match(TokenType.FROM): 2161 this = exp.select("*").from_( 2162 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2163 ) 2164 else: 2165 this = self._parse_table() if table else self._parse_select(nested=True) 2166 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2167 2168 self._match_r_paren() 2169 2170 # We return early here so that the UNION isn't attached to the subquery by the 2171 # following call to _parse_set_operations, but instead becomes the parent node 2172 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2173 elif self._match(TokenType.VALUES): 2174 this = self.expression( 2175 exp.Values, 2176 expressions=self._parse_csv(self._parse_value), 2177 alias=self._parse_table_alias(), 2178 ) 2179 elif from_: 2180 this = exp.select("*").from_(from_.this, copy=False) 2181 else: 2182 this = None 2183 2184 return self._parse_set_operations(this) 2185 2186 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2187 if not skip_with_token and not self._match(TokenType.WITH): 2188 return None 2189 2190 comments = self._prev_comments 2191 recursive = self._match(TokenType.RECURSIVE) 2192 2193 expressions = [] 2194 while True: 2195 expressions.append(self._parse_cte()) 2196 2197 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2198 break 2199 else: 2200 self._match(TokenType.WITH) 2201 2202 return self.expression( 2203 exp.With, comments=comments, expressions=expressions, recursive=recursive 2204 ) 2205 2206 def _parse_cte(self) -> exp.CTE: 2207 alias = self._parse_table_alias() 2208 if not alias or not alias.this: 2209 self.raise_error("Expected CTE to have alias") 2210 2211 self._match(TokenType.ALIAS) 2212 return self.expression( 2213 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2214 ) 2215 2216 def _parse_table_alias( 2217 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2218 ) -> t.Optional[exp.TableAlias]: 2219 any_token = self._match(TokenType.ALIAS) 2220 alias = ( 2221 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2222 or self._parse_string_as_identifier() 2223 ) 2224 2225 index = self._index 2226 if self._match(TokenType.L_PAREN): 2227 columns = self._parse_csv(self._parse_function_parameter) 2228 self._match_r_paren() if columns else self._retreat(index) 2229 else: 2230 columns = None 2231 2232 if not alias and not columns: 2233 return None 2234 2235 return self.expression(exp.TableAlias, this=alias, columns=columns) 2236 2237 def _parse_subquery( 2238 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2239 ) -> t.Optional[exp.Subquery]: 2240 if not this: 2241 return None 2242 2243 return self.expression( 2244 exp.Subquery, 2245 this=this, 2246 pivots=self._parse_pivots(), 2247 alias=self._parse_table_alias() if parse_alias else None, 2248 ) 2249 2250 def _parse_query_modifiers( 2251 self, this: t.Optional[exp.Expression] 2252 ) -> t.Optional[exp.Expression]: 2253 if isinstance(this, self.MODIFIABLES): 2254 for join in iter(self._parse_join, None): 2255 this.append("joins", join) 2256 for lateral in iter(self._parse_lateral, None): 2257 this.append("laterals", lateral) 2258 2259 while True: 2260 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2261 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2262 key, expression = parser(self) 2263 2264 if expression: 2265 this.set(key, expression) 2266 if key == "limit": 2267 offset = expression.args.pop("offset", None) 2268 if offset: 2269 this.set("offset", exp.Offset(expression=offset)) 2270 continue 2271 break 2272 return this 2273 2274 def _parse_hint(self) -> t.Optional[exp.Hint]: 2275 if self._match(TokenType.HINT): 2276 hints = [] 2277 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2278 hints.extend(hint) 2279 2280 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2281 self.raise_error("Expected */ after HINT") 2282 2283 return self.expression(exp.Hint, expressions=hints) 2284 2285 return None 2286 2287 def _parse_into(self) -> t.Optional[exp.Into]: 2288 if not self._match(TokenType.INTO): 2289 return None 2290 2291 temp = self._match(TokenType.TEMPORARY) 2292 unlogged = self._match_text_seq("UNLOGGED") 2293 self._match(TokenType.TABLE) 2294 2295 return self.expression( 2296 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2297 ) 2298 2299 def _parse_from( 2300 self, joins: bool = False, skip_from_token: bool = False 2301 ) -> t.Optional[exp.From]: 2302 if not skip_from_token and not self._match(TokenType.FROM): 2303 return None 2304 2305 return self.expression( 2306 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2307 ) 2308 2309 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2310 if not self._match(TokenType.MATCH_RECOGNIZE): 2311 return None 2312 2313 self._match_l_paren() 2314 2315 partition = self._parse_partition_by() 2316 order = self._parse_order() 2317 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2318 2319 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2320 rows = exp.var("ONE ROW PER MATCH") 2321 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2322 text = "ALL ROWS PER MATCH" 2323 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2324 text += f" SHOW EMPTY MATCHES" 2325 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2326 text += f" OMIT EMPTY MATCHES" 2327 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2328 text += f" WITH UNMATCHED ROWS" 2329 rows = exp.var(text) 2330 else: 2331 rows = None 2332 2333 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2334 text = "AFTER MATCH SKIP" 2335 if self._match_text_seq("PAST", "LAST", "ROW"): 2336 text += f" PAST LAST ROW" 2337 elif self._match_text_seq("TO", "NEXT", "ROW"): 2338 text += f" TO NEXT ROW" 2339 elif self._match_text_seq("TO", "FIRST"): 2340 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2341 elif self._match_text_seq("TO", "LAST"): 2342 text += f" TO LAST {self._advance_any().text}" # type: ignore 2343 after = exp.var(text) 2344 else: 2345 after = None 2346 2347 if self._match_text_seq("PATTERN"): 2348 self._match_l_paren() 2349 2350 if not self._curr: 2351 self.raise_error("Expecting )", self._curr) 2352 2353 paren = 1 2354 start = self._curr 2355 2356 while self._curr and paren > 0: 2357 if self._curr.token_type == TokenType.L_PAREN: 2358 paren += 1 2359 if self._curr.token_type == TokenType.R_PAREN: 2360 paren -= 1 2361 2362 end = self._prev 2363 self._advance() 2364 2365 if paren > 0: 2366 self.raise_error("Expecting )", self._curr) 2367 2368 pattern = exp.var(self._find_sql(start, end)) 2369 else: 2370 pattern = None 2371 2372 define = ( 2373 self._parse_csv( 2374 lambda: self.expression( 2375 exp.Alias, 2376 alias=self._parse_id_var(any_token=True), 2377 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2378 ) 2379 ) 2380 if self._match_text_seq("DEFINE") 2381 else None 2382 ) 2383 2384 self._match_r_paren() 2385 2386 return self.expression( 2387 exp.MatchRecognize, 2388 partition_by=partition, 2389 order=order, 2390 measures=measures, 2391 rows=rows, 2392 after=after, 2393 pattern=pattern, 2394 define=define, 2395 alias=self._parse_table_alias(), 2396 ) 2397 2398 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2399 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2400 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2401 2402 if outer_apply or cross_apply: 2403 this = self._parse_select(table=True) 2404 view = None 2405 outer = not cross_apply 2406 elif self._match(TokenType.LATERAL): 2407 this = self._parse_select(table=True) 2408 view = self._match(TokenType.VIEW) 2409 outer = self._match(TokenType.OUTER) 2410 else: 2411 return None 2412 2413 if not this: 2414 this = ( 2415 self._parse_unnest() 2416 or self._parse_function() 2417 or self._parse_id_var(any_token=False) 2418 ) 2419 2420 while self._match(TokenType.DOT): 2421 this = exp.Dot( 2422 this=this, 2423 expression=self._parse_function() or self._parse_id_var(any_token=False), 2424 ) 2425 2426 if view: 2427 table = self._parse_id_var(any_token=False) 2428 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2429 table_alias: t.Optional[exp.TableAlias] = self.expression( 2430 exp.TableAlias, this=table, columns=columns 2431 ) 2432 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2433 # We move the alias from the lateral's child node to the lateral itself 2434 table_alias = this.args["alias"].pop() 2435 else: 2436 table_alias = self._parse_table_alias() 2437 2438 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2439 2440 def _parse_join_parts( 2441 self, 2442 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2443 return ( 2444 self._match_set(self.JOIN_METHODS) and self._prev, 2445 self._match_set(self.JOIN_SIDES) and self._prev, 2446 self._match_set(self.JOIN_KINDS) and self._prev, 2447 ) 2448 2449 def _parse_join( 2450 self, skip_join_token: bool = False, parse_bracket: bool = False 2451 ) -> t.Optional[exp.Join]: 2452 if self._match(TokenType.COMMA): 2453 return self.expression(exp.Join, this=self._parse_table()) 2454 2455 index = self._index 2456 method, side, kind = self._parse_join_parts() 2457 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2458 join = self._match(TokenType.JOIN) 2459 2460 if not skip_join_token and not join: 2461 self._retreat(index) 2462 kind = None 2463 method = None 2464 side = None 2465 2466 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2467 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2468 2469 if not skip_join_token and not join and not outer_apply and not cross_apply: 2470 return None 2471 2472 if outer_apply: 2473 side = Token(TokenType.LEFT, "LEFT") 2474 2475 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2476 2477 if method: 2478 kwargs["method"] = method.text 2479 if side: 2480 kwargs["side"] = side.text 2481 if kind: 2482 kwargs["kind"] = kind.text 2483 if hint: 2484 kwargs["hint"] = hint 2485 2486 if self._match(TokenType.ON): 2487 kwargs["on"] = self._parse_conjunction() 2488 elif self._match(TokenType.USING): 2489 kwargs["using"] = self._parse_wrapped_id_vars() 2490 elif not (kind and kind.token_type == TokenType.CROSS): 2491 index = self._index 2492 join = self._parse_join() 2493 2494 if join and self._match(TokenType.ON): 2495 kwargs["on"] = self._parse_conjunction() 2496 elif join and self._match(TokenType.USING): 2497 kwargs["using"] = self._parse_wrapped_id_vars() 2498 else: 2499 join = None 2500 self._retreat(index) 2501 2502 kwargs["this"].set("joins", [join] if join else None) 2503 2504 comments = [c for token in (method, side, kind) if token for c in token.comments] 2505 return self.expression(exp.Join, comments=comments, **kwargs) 2506 2507 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2508 this = self._parse_conjunction() 2509 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2510 return this 2511 2512 opclass = self._parse_var(any_token=True) 2513 if opclass: 2514 return self.expression(exp.Opclass, this=this, expression=opclass) 2515 2516 return this 2517 2518 def _parse_index( 2519 self, 2520 index: t.Optional[exp.Expression] = None, 2521 ) -> t.Optional[exp.Index]: 2522 if index: 2523 unique = None 2524 primary = None 2525 amp = None 2526 2527 self._match(TokenType.ON) 2528 self._match(TokenType.TABLE) # hive 2529 table = self._parse_table_parts(schema=True) 2530 else: 2531 unique = self._match(TokenType.UNIQUE) 2532 primary = self._match_text_seq("PRIMARY") 2533 amp = self._match_text_seq("AMP") 2534 2535 if not self._match(TokenType.INDEX): 2536 return None 2537 2538 index = self._parse_id_var() 2539 table = None 2540 2541 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2542 2543 if self._match(TokenType.L_PAREN, advance=False): 2544 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2545 else: 2546 columns = None 2547 2548 return self.expression( 2549 exp.Index, 2550 this=index, 2551 table=table, 2552 using=using, 2553 columns=columns, 2554 unique=unique, 2555 primary=primary, 2556 amp=amp, 2557 partition_by=self._parse_partition_by(), 2558 where=self._parse_where(), 2559 ) 2560 2561 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2562 hints: t.List[exp.Expression] = [] 2563 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2564 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2565 hints.append( 2566 self.expression( 2567 exp.WithTableHint, 2568 expressions=self._parse_csv( 2569 lambda: self._parse_function() or self._parse_var(any_token=True) 2570 ), 2571 ) 2572 ) 2573 self._match_r_paren() 2574 else: 2575 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2576 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2577 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2578 2579 self._match_texts({"INDEX", "KEY"}) 2580 if self._match(TokenType.FOR): 2581 hint.set("target", self._advance_any() and self._prev.text.upper()) 2582 2583 hint.set("expressions", self._parse_wrapped_id_vars()) 2584 hints.append(hint) 2585 2586 return hints or None 2587 2588 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2589 return ( 2590 (not schema and self._parse_function(optional_parens=False)) 2591 or self._parse_id_var(any_token=False) 2592 or self._parse_string_as_identifier() 2593 or self._parse_placeholder() 2594 ) 2595 2596 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2597 catalog = None 2598 db = None 2599 table = self._parse_table_part(schema=schema) 2600 2601 while self._match(TokenType.DOT): 2602 if catalog: 2603 # This allows nesting the table in arbitrarily many dot expressions if needed 2604 table = self.expression( 2605 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2606 ) 2607 else: 2608 catalog = db 2609 db = table 2610 table = self._parse_table_part(schema=schema) 2611 2612 if not table: 2613 self.raise_error(f"Expected table name but got {self._curr}") 2614 2615 return self.expression( 2616 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2617 ) 2618 2619 def _parse_table( 2620 self, 2621 schema: bool = False, 2622 joins: bool = False, 2623 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2624 parse_bracket: bool = False, 2625 ) -> t.Optional[exp.Expression]: 2626 lateral = self._parse_lateral() 2627 if lateral: 2628 return lateral 2629 2630 unnest = self._parse_unnest() 2631 if unnest: 2632 return unnest 2633 2634 values = self._parse_derived_table_values() 2635 if values: 2636 return values 2637 2638 subquery = self._parse_select(table=True) 2639 if subquery: 2640 if not subquery.args.get("pivots"): 2641 subquery.set("pivots", self._parse_pivots()) 2642 return subquery 2643 2644 bracket = parse_bracket and self._parse_bracket(None) 2645 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2646 this = t.cast( 2647 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2648 ) 2649 2650 if schema: 2651 return self._parse_schema(this=this) 2652 2653 version = self._parse_version() 2654 2655 if version: 2656 this.set("version", version) 2657 2658 if self.ALIAS_POST_TABLESAMPLE: 2659 table_sample = self._parse_table_sample() 2660 2661 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2662 if alias: 2663 this.set("alias", alias) 2664 2665 if self._match_text_seq("AT"): 2666 this.set("index", self._parse_id_var()) 2667 2668 this.set("hints", self._parse_table_hints()) 2669 2670 if not this.args.get("pivots"): 2671 this.set("pivots", self._parse_pivots()) 2672 2673 if not self.ALIAS_POST_TABLESAMPLE: 2674 table_sample = self._parse_table_sample() 2675 2676 if table_sample: 2677 table_sample.set("this", this) 2678 this = table_sample 2679 2680 if joins: 2681 for join in iter(self._parse_join, None): 2682 this.append("joins", join) 2683 2684 return this 2685 2686 def _parse_version(self) -> t.Optional[exp.Version]: 2687 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2688 this = "TIMESTAMP" 2689 elif self._match(TokenType.VERSION_SNAPSHOT): 2690 this = "VERSION" 2691 else: 2692 return None 2693 2694 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2695 kind = self._prev.text.upper() 2696 start = self._parse_bitwise() 2697 self._match_texts(("TO", "AND")) 2698 end = self._parse_bitwise() 2699 expression: t.Optional[exp.Expression] = self.expression( 2700 exp.Tuple, expressions=[start, end] 2701 ) 2702 elif self._match_text_seq("CONTAINED", "IN"): 2703 kind = "CONTAINED IN" 2704 expression = self.expression( 2705 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2706 ) 2707 elif self._match(TokenType.ALL): 2708 kind = "ALL" 2709 expression = None 2710 else: 2711 self._match_text_seq("AS", "OF") 2712 kind = "AS OF" 2713 expression = self._parse_type() 2714 2715 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2716 2717 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2718 if not self._match(TokenType.UNNEST): 2719 return None 2720 2721 expressions = self._parse_wrapped_csv(self._parse_type) 2722 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2723 2724 alias = self._parse_table_alias() if with_alias else None 2725 2726 if alias: 2727 if self.UNNEST_COLUMN_ONLY: 2728 if alias.args.get("columns"): 2729 self.raise_error("Unexpected extra column alias in unnest.") 2730 2731 alias.set("columns", [alias.this]) 2732 alias.set("this", None) 2733 2734 columns = alias.args.get("columns") or [] 2735 if offset and len(expressions) < len(columns): 2736 offset = columns.pop() 2737 2738 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2739 self._match(TokenType.ALIAS) 2740 offset = self._parse_id_var( 2741 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2742 ) or exp.to_identifier("offset") 2743 2744 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2745 2746 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2747 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2748 if not is_derived and not self._match(TokenType.VALUES): 2749 return None 2750 2751 expressions = self._parse_csv(self._parse_value) 2752 alias = self._parse_table_alias() 2753 2754 if is_derived: 2755 self._match_r_paren() 2756 2757 return self.expression( 2758 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2759 ) 2760 2761 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2762 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2763 as_modifier and self._match_text_seq("USING", "SAMPLE") 2764 ): 2765 return None 2766 2767 bucket_numerator = None 2768 bucket_denominator = None 2769 bucket_field = None 2770 percent = None 2771 rows = None 2772 size = None 2773 seed = None 2774 2775 kind = ( 2776 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2777 ) 2778 method = self._parse_var(tokens=(TokenType.ROW,)) 2779 2780 matched_l_paren = self._match(TokenType.L_PAREN) 2781 2782 if self.TABLESAMPLE_CSV: 2783 num = None 2784 expressions = self._parse_csv(self._parse_primary) 2785 else: 2786 expressions = None 2787 num = ( 2788 self._parse_factor() 2789 if self._match(TokenType.NUMBER, advance=False) 2790 else self._parse_primary() 2791 ) 2792 2793 if self._match_text_seq("BUCKET"): 2794 bucket_numerator = self._parse_number() 2795 self._match_text_seq("OUT", "OF") 2796 bucket_denominator = bucket_denominator = self._parse_number() 2797 self._match(TokenType.ON) 2798 bucket_field = self._parse_field() 2799 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2800 percent = num 2801 elif self._match(TokenType.ROWS): 2802 rows = num 2803 elif num: 2804 size = num 2805 2806 if matched_l_paren: 2807 self._match_r_paren() 2808 2809 if self._match(TokenType.L_PAREN): 2810 method = self._parse_var() 2811 seed = self._match(TokenType.COMMA) and self._parse_number() 2812 self._match_r_paren() 2813 elif self._match_texts(("SEED", "REPEATABLE")): 2814 seed = self._parse_wrapped(self._parse_number) 2815 2816 return self.expression( 2817 exp.TableSample, 2818 expressions=expressions, 2819 method=method, 2820 bucket_numerator=bucket_numerator, 2821 bucket_denominator=bucket_denominator, 2822 bucket_field=bucket_field, 2823 percent=percent, 2824 rows=rows, 2825 size=size, 2826 seed=seed, 2827 kind=kind, 2828 ) 2829 2830 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2831 return list(iter(self._parse_pivot, None)) or None 2832 2833 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2834 return list(iter(self._parse_join, None)) or None 2835 2836 # https://duckdb.org/docs/sql/statements/pivot 2837 def _parse_simplified_pivot(self) -> exp.Pivot: 2838 def _parse_on() -> t.Optional[exp.Expression]: 2839 this = self._parse_bitwise() 2840 return self._parse_in(this) if self._match(TokenType.IN) else this 2841 2842 this = self._parse_table() 2843 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2844 using = self._match(TokenType.USING) and self._parse_csv( 2845 lambda: self._parse_alias(self._parse_function()) 2846 ) 2847 group = self._parse_group() 2848 return self.expression( 2849 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2850 ) 2851 2852 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2853 index = self._index 2854 include_nulls = None 2855 2856 if self._match(TokenType.PIVOT): 2857 unpivot = False 2858 elif self._match(TokenType.UNPIVOT): 2859 unpivot = True 2860 2861 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2862 if self._match_text_seq("INCLUDE", "NULLS"): 2863 include_nulls = True 2864 elif self._match_text_seq("EXCLUDE", "NULLS"): 2865 include_nulls = False 2866 else: 2867 return None 2868 2869 expressions = [] 2870 field = None 2871 2872 if not self._match(TokenType.L_PAREN): 2873 self._retreat(index) 2874 return None 2875 2876 if unpivot: 2877 expressions = self._parse_csv(self._parse_column) 2878 else: 2879 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2880 2881 if not expressions: 2882 self.raise_error("Failed to parse PIVOT's aggregation list") 2883 2884 if not self._match(TokenType.FOR): 2885 self.raise_error("Expecting FOR") 2886 2887 value = self._parse_column() 2888 2889 if not self._match(TokenType.IN): 2890 self.raise_error("Expecting IN") 2891 2892 field = self._parse_in(value, alias=True) 2893 2894 self._match_r_paren() 2895 2896 pivot = self.expression( 2897 exp.Pivot, 2898 expressions=expressions, 2899 field=field, 2900 unpivot=unpivot, 2901 include_nulls=include_nulls, 2902 ) 2903 2904 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2905 pivot.set("alias", self._parse_table_alias()) 2906 2907 if not unpivot: 2908 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2909 2910 columns: t.List[exp.Expression] = [] 2911 for fld in pivot.args["field"].expressions: 2912 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2913 for name in names: 2914 if self.PREFIXED_PIVOT_COLUMNS: 2915 name = f"{name}_{field_name}" if name else field_name 2916 else: 2917 name = f"{field_name}_{name}" if name else field_name 2918 2919 columns.append(exp.to_identifier(name)) 2920 2921 pivot.set("columns", columns) 2922 2923 return pivot 2924 2925 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2926 return [agg.alias for agg in aggregations] 2927 2928 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2929 if not skip_where_token and not self._match(TokenType.WHERE): 2930 return None 2931 2932 return self.expression( 2933 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2934 ) 2935 2936 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2937 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2938 return None 2939 2940 elements = defaultdict(list) 2941 2942 if self._match(TokenType.ALL): 2943 return self.expression(exp.Group, all=True) 2944 2945 while True: 2946 expressions = self._parse_csv(self._parse_conjunction) 2947 if expressions: 2948 elements["expressions"].extend(expressions) 2949 2950 grouping_sets = self._parse_grouping_sets() 2951 if grouping_sets: 2952 elements["grouping_sets"].extend(grouping_sets) 2953 2954 rollup = None 2955 cube = None 2956 totals = None 2957 2958 index = self._index 2959 with_ = self._match(TokenType.WITH) 2960 if self._match(TokenType.ROLLUP): 2961 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2962 elements["rollup"].extend(ensure_list(rollup)) 2963 2964 if self._match(TokenType.CUBE): 2965 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2966 elements["cube"].extend(ensure_list(cube)) 2967 2968 if self._match_text_seq("TOTALS"): 2969 totals = True 2970 elements["totals"] = True # type: ignore 2971 2972 if not (grouping_sets or rollup or cube or totals): 2973 if with_: 2974 self._retreat(index) 2975 break 2976 2977 return self.expression(exp.Group, **elements) # type: ignore 2978 2979 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2980 if not self._match(TokenType.GROUPING_SETS): 2981 return None 2982 2983 return self._parse_wrapped_csv(self._parse_grouping_set) 2984 2985 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2986 if self._match(TokenType.L_PAREN): 2987 grouping_set = self._parse_csv(self._parse_column) 2988 self._match_r_paren() 2989 return self.expression(exp.Tuple, expressions=grouping_set) 2990 2991 return self._parse_column() 2992 2993 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2994 if not skip_having_token and not self._match(TokenType.HAVING): 2995 return None 2996 return self.expression(exp.Having, this=self._parse_conjunction()) 2997 2998 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2999 if not self._match(TokenType.QUALIFY): 3000 return None 3001 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3002 3003 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3004 if skip_start_token: 3005 start = None 3006 elif self._match(TokenType.START_WITH): 3007 start = self._parse_conjunction() 3008 else: 3009 return None 3010 3011 self._match(TokenType.CONNECT_BY) 3012 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3013 exp.Prior, this=self._parse_bitwise() 3014 ) 3015 connect = self._parse_conjunction() 3016 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3017 3018 if not start and self._match(TokenType.START_WITH): 3019 start = self._parse_conjunction() 3020 3021 return self.expression(exp.Connect, start=start, connect=connect) 3022 3023 def _parse_order( 3024 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3025 ) -> t.Optional[exp.Expression]: 3026 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3027 return this 3028 3029 return self.expression( 3030 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3031 ) 3032 3033 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3034 if not self._match(token): 3035 return None 3036 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3037 3038 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3039 this = parse_method() if parse_method else self._parse_conjunction() 3040 3041 asc = self._match(TokenType.ASC) 3042 desc = self._match(TokenType.DESC) or (asc and False) 3043 3044 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3045 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3046 3047 nulls_first = is_nulls_first or False 3048 explicitly_null_ordered = is_nulls_first or is_nulls_last 3049 3050 if ( 3051 not explicitly_null_ordered 3052 and ( 3053 (not desc and self.NULL_ORDERING == "nulls_are_small") 3054 or (desc and self.NULL_ORDERING != "nulls_are_small") 3055 ) 3056 and self.NULL_ORDERING != "nulls_are_last" 3057 ): 3058 nulls_first = True 3059 3060 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3061 3062 def _parse_limit( 3063 self, this: t.Optional[exp.Expression] = None, top: bool = False 3064 ) -> t.Optional[exp.Expression]: 3065 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3066 comments = self._prev_comments 3067 if top: 3068 limit_paren = self._match(TokenType.L_PAREN) 3069 expression = self._parse_number() 3070 3071 if limit_paren: 3072 self._match_r_paren() 3073 else: 3074 expression = self._parse_term() 3075 3076 if self._match(TokenType.COMMA): 3077 offset = expression 3078 expression = self._parse_term() 3079 else: 3080 offset = None 3081 3082 limit_exp = self.expression( 3083 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3084 ) 3085 3086 return limit_exp 3087 3088 if self._match(TokenType.FETCH): 3089 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3090 direction = self._prev.text if direction else "FIRST" 3091 3092 count = self._parse_field(tokens=self.FETCH_TOKENS) 3093 percent = self._match(TokenType.PERCENT) 3094 3095 self._match_set((TokenType.ROW, TokenType.ROWS)) 3096 3097 only = self._match_text_seq("ONLY") 3098 with_ties = self._match_text_seq("WITH", "TIES") 3099 3100 if only and with_ties: 3101 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3102 3103 return self.expression( 3104 exp.Fetch, 3105 direction=direction, 3106 count=count, 3107 percent=percent, 3108 with_ties=with_ties, 3109 ) 3110 3111 return this 3112 3113 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3114 if not self._match(TokenType.OFFSET): 3115 return this 3116 3117 count = self._parse_term() 3118 self._match_set((TokenType.ROW, TokenType.ROWS)) 3119 return self.expression(exp.Offset, this=this, expression=count) 3120 3121 def _parse_locks(self) -> t.List[exp.Lock]: 3122 locks = [] 3123 while True: 3124 if self._match_text_seq("FOR", "UPDATE"): 3125 update = True 3126 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3127 "LOCK", "IN", "SHARE", "MODE" 3128 ): 3129 update = False 3130 else: 3131 break 3132 3133 expressions = None 3134 if self._match_text_seq("OF"): 3135 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3136 3137 wait: t.Optional[bool | exp.Expression] = None 3138 if self._match_text_seq("NOWAIT"): 3139 wait = True 3140 elif self._match_text_seq("WAIT"): 3141 wait = self._parse_primary() 3142 elif self._match_text_seq("SKIP", "LOCKED"): 3143 wait = False 3144 3145 locks.append( 3146 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3147 ) 3148 3149 return locks 3150 3151 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3152 if not self._match_set(self.SET_OPERATIONS): 3153 return this 3154 3155 token_type = self._prev.token_type 3156 3157 if token_type == TokenType.UNION: 3158 expression = exp.Union 3159 elif token_type == TokenType.EXCEPT: 3160 expression = exp.Except 3161 else: 3162 expression = exp.Intersect 3163 3164 return self.expression( 3165 expression, 3166 comments=self._prev.comments, 3167 this=this, 3168 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3169 by_name=self._match_text_seq("BY", "NAME"), 3170 expression=self._parse_set_operations(self._parse_select(nested=True)), 3171 ) 3172 3173 def _parse_expression(self) -> t.Optional[exp.Expression]: 3174 return self._parse_alias(self._parse_conjunction()) 3175 3176 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3177 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3178 3179 def _parse_equality(self) -> t.Optional[exp.Expression]: 3180 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3181 3182 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3183 return self._parse_tokens(self._parse_range, self.COMPARISON) 3184 3185 def _parse_range(self) -> t.Optional[exp.Expression]: 3186 this = self._parse_bitwise() 3187 negate = self._match(TokenType.NOT) 3188 3189 if self._match_set(self.RANGE_PARSERS): 3190 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3191 if not expression: 3192 return this 3193 3194 this = expression 3195 elif self._match(TokenType.ISNULL): 3196 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3197 3198 # Postgres supports ISNULL and NOTNULL for conditions. 3199 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3200 if self._match(TokenType.NOTNULL): 3201 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3202 this = self.expression(exp.Not, this=this) 3203 3204 if negate: 3205 this = self.expression(exp.Not, this=this) 3206 3207 if self._match(TokenType.IS): 3208 this = self._parse_is(this) 3209 3210 return this 3211 3212 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3213 index = self._index - 1 3214 negate = self._match(TokenType.NOT) 3215 3216 if self._match_text_seq("DISTINCT", "FROM"): 3217 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3218 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3219 3220 expression = self._parse_null() or self._parse_boolean() 3221 if not expression: 3222 self._retreat(index) 3223 return None 3224 3225 this = self.expression(exp.Is, this=this, expression=expression) 3226 return self.expression(exp.Not, this=this) if negate else this 3227 3228 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3229 unnest = self._parse_unnest(with_alias=False) 3230 if unnest: 3231 this = self.expression(exp.In, this=this, unnest=unnest) 3232 elif self._match(TokenType.L_PAREN): 3233 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3234 3235 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3236 this = self.expression(exp.In, this=this, query=expressions[0]) 3237 else: 3238 this = self.expression(exp.In, this=this, expressions=expressions) 3239 3240 self._match_r_paren(this) 3241 else: 3242 this = self.expression(exp.In, this=this, field=self._parse_field()) 3243 3244 return this 3245 3246 def _parse_between(self, this: exp.Expression) -> exp.Between: 3247 low = self._parse_bitwise() 3248 self._match(TokenType.AND) 3249 high = self._parse_bitwise() 3250 return self.expression(exp.Between, this=this, low=low, high=high) 3251 3252 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3253 if not self._match(TokenType.ESCAPE): 3254 return this 3255 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3256 3257 def _parse_interval(self) -> t.Optional[exp.Interval]: 3258 index = self._index 3259 3260 if not self._match(TokenType.INTERVAL): 3261 return None 3262 3263 if self._match(TokenType.STRING, advance=False): 3264 this = self._parse_primary() 3265 else: 3266 this = self._parse_term() 3267 3268 if not this: 3269 self._retreat(index) 3270 return None 3271 3272 unit = self._parse_function() or self._parse_var(any_token=True) 3273 3274 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3275 # each INTERVAL expression into this canonical form so it's easy to transpile 3276 if this and this.is_number: 3277 this = exp.Literal.string(this.name) 3278 elif this and this.is_string: 3279 parts = this.name.split() 3280 3281 if len(parts) == 2: 3282 if unit: 3283 # This is not actually a unit, it's something else (e.g. a "window side") 3284 unit = None 3285 self._retreat(self._index - 1) 3286 3287 this = exp.Literal.string(parts[0]) 3288 unit = self.expression(exp.Var, this=parts[1]) 3289 3290 return self.expression(exp.Interval, this=this, unit=unit) 3291 3292 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3293 this = self._parse_term() 3294 3295 while True: 3296 if self._match_set(self.BITWISE): 3297 this = self.expression( 3298 self.BITWISE[self._prev.token_type], 3299 this=this, 3300 expression=self._parse_term(), 3301 ) 3302 elif self._match(TokenType.DQMARK): 3303 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3304 elif self._match_pair(TokenType.LT, TokenType.LT): 3305 this = self.expression( 3306 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3307 ) 3308 elif self._match_pair(TokenType.GT, TokenType.GT): 3309 this = self.expression( 3310 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3311 ) 3312 else: 3313 break 3314 3315 return this 3316 3317 def _parse_term(self) -> t.Optional[exp.Expression]: 3318 return self._parse_tokens(self._parse_factor, self.TERM) 3319 3320 def _parse_factor(self) -> t.Optional[exp.Expression]: 3321 return self._parse_tokens(self._parse_unary, self.FACTOR) 3322 3323 def _parse_unary(self) -> t.Optional[exp.Expression]: 3324 if self._match_set(self.UNARY_PARSERS): 3325 return self.UNARY_PARSERS[self._prev.token_type](self) 3326 return self._parse_at_time_zone(self._parse_type()) 3327 3328 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3329 interval = parse_interval and self._parse_interval() 3330 if interval: 3331 return interval 3332 3333 index = self._index 3334 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3335 this = self._parse_column() 3336 3337 if data_type: 3338 if isinstance(this, exp.Literal): 3339 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3340 if parser: 3341 return parser(self, this, data_type) 3342 return self.expression(exp.Cast, this=this, to=data_type) 3343 if not data_type.expressions: 3344 self._retreat(index) 3345 return self._parse_column() 3346 return self._parse_column_ops(data_type) 3347 3348 return this and self._parse_column_ops(this) 3349 3350 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3351 this = self._parse_type() 3352 if not this: 3353 return None 3354 3355 return self.expression( 3356 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3357 ) 3358 3359 def _parse_types( 3360 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3361 ) -> t.Optional[exp.Expression]: 3362 index = self._index 3363 3364 prefix = self._match_text_seq("SYSUDTLIB", ".") 3365 3366 if not self._match_set(self.TYPE_TOKENS): 3367 identifier = allow_identifiers and self._parse_id_var( 3368 any_token=False, tokens=(TokenType.VAR,) 3369 ) 3370 3371 if identifier: 3372 tokens = self._tokenizer.tokenize(identifier.name) 3373 3374 if len(tokens) != 1: 3375 self.raise_error("Unexpected identifier", self._prev) 3376 3377 if tokens[0].token_type in self.TYPE_TOKENS: 3378 self._prev = tokens[0] 3379 elif self.SUPPORTS_USER_DEFINED_TYPES: 3380 type_name = identifier.name 3381 3382 while self._match(TokenType.DOT): 3383 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3384 3385 return exp.DataType.build(type_name, udt=True) 3386 else: 3387 return None 3388 else: 3389 return None 3390 3391 type_token = self._prev.token_type 3392 3393 if type_token == TokenType.PSEUDO_TYPE: 3394 return self.expression(exp.PseudoType, this=self._prev.text) 3395 3396 if type_token == TokenType.OBJECT_IDENTIFIER: 3397 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3398 3399 nested = type_token in self.NESTED_TYPE_TOKENS 3400 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3401 expressions = None 3402 maybe_func = False 3403 3404 if self._match(TokenType.L_PAREN): 3405 if is_struct: 3406 expressions = self._parse_csv(self._parse_struct_types) 3407 elif nested: 3408 expressions = self._parse_csv( 3409 lambda: self._parse_types( 3410 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3411 ) 3412 ) 3413 elif type_token in self.ENUM_TYPE_TOKENS: 3414 expressions = self._parse_csv(self._parse_equality) 3415 else: 3416 expressions = self._parse_csv(self._parse_type_size) 3417 3418 if not expressions or not self._match(TokenType.R_PAREN): 3419 self._retreat(index) 3420 return None 3421 3422 maybe_func = True 3423 3424 this: t.Optional[exp.Expression] = None 3425 values: t.Optional[t.List[exp.Expression]] = None 3426 3427 if nested and self._match(TokenType.LT): 3428 if is_struct: 3429 expressions = self._parse_csv(self._parse_struct_types) 3430 else: 3431 expressions = self._parse_csv( 3432 lambda: self._parse_types( 3433 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3434 ) 3435 ) 3436 3437 if not self._match(TokenType.GT): 3438 self.raise_error("Expecting >") 3439 3440 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3441 values = self._parse_csv(self._parse_conjunction) 3442 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3443 3444 if type_token in self.TIMESTAMPS: 3445 if self._match_text_seq("WITH", "TIME", "ZONE"): 3446 maybe_func = False 3447 tz_type = ( 3448 exp.DataType.Type.TIMETZ 3449 if type_token in self.TIMES 3450 else exp.DataType.Type.TIMESTAMPTZ 3451 ) 3452 this = exp.DataType(this=tz_type, expressions=expressions) 3453 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3454 maybe_func = False 3455 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3456 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3457 maybe_func = False 3458 elif type_token == TokenType.INTERVAL: 3459 unit = self._parse_var() 3460 3461 if self._match_text_seq("TO"): 3462 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3463 else: 3464 span = None 3465 3466 if span or not unit: 3467 this = self.expression( 3468 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3469 ) 3470 else: 3471 this = self.expression(exp.Interval, unit=unit) 3472 3473 if maybe_func and check_func: 3474 index2 = self._index 3475 peek = self._parse_string() 3476 3477 if not peek: 3478 self._retreat(index) 3479 return None 3480 3481 self._retreat(index2) 3482 3483 if not this: 3484 if self._match_text_seq("UNSIGNED"): 3485 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3486 if not unsigned_type_token: 3487 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3488 3489 type_token = unsigned_type_token or type_token 3490 3491 this = exp.DataType( 3492 this=exp.DataType.Type[type_token.value], 3493 expressions=expressions, 3494 nested=nested, 3495 values=values, 3496 prefix=prefix, 3497 ) 3498 3499 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3500 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3501 3502 return this 3503 3504 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3505 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3506 self._match(TokenType.COLON) 3507 return self._parse_column_def(this) 3508 3509 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3510 if not self._match_text_seq("AT", "TIME", "ZONE"): 3511 return this 3512 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3513 3514 def _parse_column(self) -> t.Optional[exp.Expression]: 3515 this = self._parse_field() 3516 if isinstance(this, exp.Identifier): 3517 this = self.expression(exp.Column, this=this) 3518 elif not this: 3519 return self._parse_bracket(this) 3520 return self._parse_column_ops(this) 3521 3522 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3523 this = self._parse_bracket(this) 3524 3525 while self._match_set(self.COLUMN_OPERATORS): 3526 op_token = self._prev.token_type 3527 op = self.COLUMN_OPERATORS.get(op_token) 3528 3529 if op_token == TokenType.DCOLON: 3530 field = self._parse_types() 3531 if not field: 3532 self.raise_error("Expected type") 3533 elif op and self._curr: 3534 self._advance() 3535 value = self._prev.text 3536 field = ( 3537 exp.Literal.number(value) 3538 if self._prev.token_type == TokenType.NUMBER 3539 else exp.Literal.string(value) 3540 ) 3541 else: 3542 field = self._parse_field(anonymous_func=True, any_token=True) 3543 3544 if isinstance(field, exp.Func): 3545 # bigquery allows function calls like x.y.count(...) 3546 # SAFE.SUBSTR(...) 3547 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3548 this = self._replace_columns_with_dots(this) 3549 3550 if op: 3551 this = op(self, this, field) 3552 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3553 this = self.expression( 3554 exp.Column, 3555 this=field, 3556 table=this.this, 3557 db=this.args.get("table"), 3558 catalog=this.args.get("db"), 3559 ) 3560 else: 3561 this = self.expression(exp.Dot, this=this, expression=field) 3562 this = self._parse_bracket(this) 3563 return this 3564 3565 def _parse_primary(self) -> t.Optional[exp.Expression]: 3566 if self._match_set(self.PRIMARY_PARSERS): 3567 token_type = self._prev.token_type 3568 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3569 3570 if token_type == TokenType.STRING: 3571 expressions = [primary] 3572 while self._match(TokenType.STRING): 3573 expressions.append(exp.Literal.string(self._prev.text)) 3574 3575 if len(expressions) > 1: 3576 return self.expression(exp.Concat, expressions=expressions) 3577 3578 return primary 3579 3580 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3581 return exp.Literal.number(f"0.{self._prev.text}") 3582 3583 if self._match(TokenType.L_PAREN): 3584 comments = self._prev_comments 3585 query = self._parse_select() 3586 3587 if query: 3588 expressions = [query] 3589 else: 3590 expressions = self._parse_expressions() 3591 3592 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3593 3594 if isinstance(this, exp.Subqueryable): 3595 this = self._parse_set_operations( 3596 self._parse_subquery(this=this, parse_alias=False) 3597 ) 3598 elif len(expressions) > 1: 3599 this = self.expression(exp.Tuple, expressions=expressions) 3600 else: 3601 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3602 3603 if this: 3604 this.add_comments(comments) 3605 3606 self._match_r_paren(expression=this) 3607 return this 3608 3609 return None 3610 3611 def _parse_field( 3612 self, 3613 any_token: bool = False, 3614 tokens: t.Optional[t.Collection[TokenType]] = None, 3615 anonymous_func: bool = False, 3616 ) -> t.Optional[exp.Expression]: 3617 return ( 3618 self._parse_primary() 3619 or self._parse_function(anonymous=anonymous_func) 3620 or self._parse_id_var(any_token=any_token, tokens=tokens) 3621 ) 3622 3623 def _parse_function( 3624 self, 3625 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3626 anonymous: bool = False, 3627 optional_parens: bool = True, 3628 ) -> t.Optional[exp.Expression]: 3629 if not self._curr: 3630 return None 3631 3632 token_type = self._curr.token_type 3633 this = self._curr.text 3634 upper = this.upper() 3635 3636 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3637 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3638 self._advance() 3639 return parser(self) 3640 3641 if not self._next or self._next.token_type != TokenType.L_PAREN: 3642 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3643 self._advance() 3644 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3645 3646 return None 3647 3648 if token_type not in self.FUNC_TOKENS: 3649 return None 3650 3651 self._advance(2) 3652 3653 parser = self.FUNCTION_PARSERS.get(upper) 3654 if parser and not anonymous: 3655 this = parser(self) 3656 else: 3657 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3658 3659 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3660 this = self.expression(subquery_predicate, this=self._parse_select()) 3661 self._match_r_paren() 3662 return this 3663 3664 if functions is None: 3665 functions = self.FUNCTIONS 3666 3667 function = functions.get(upper) 3668 3669 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3670 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3671 3672 if function and not anonymous: 3673 func = self.validate_expression(function(args), args) 3674 if not self.NORMALIZE_FUNCTIONS: 3675 func.meta["name"] = this 3676 this = func 3677 else: 3678 this = self.expression(exp.Anonymous, this=this, expressions=args) 3679 3680 self._match_r_paren(this) 3681 return self._parse_window(this) 3682 3683 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3684 return self._parse_column_def(self._parse_id_var()) 3685 3686 def _parse_user_defined_function( 3687 self, kind: t.Optional[TokenType] = None 3688 ) -> t.Optional[exp.Expression]: 3689 this = self._parse_id_var() 3690 3691 while self._match(TokenType.DOT): 3692 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3693 3694 if not self._match(TokenType.L_PAREN): 3695 return this 3696 3697 expressions = self._parse_csv(self._parse_function_parameter) 3698 self._match_r_paren() 3699 return self.expression( 3700 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3701 ) 3702 3703 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3704 literal = self._parse_primary() 3705 if literal: 3706 return self.expression(exp.Introducer, this=token.text, expression=literal) 3707 3708 return self.expression(exp.Identifier, this=token.text) 3709 3710 def _parse_session_parameter(self) -> exp.SessionParameter: 3711 kind = None 3712 this = self._parse_id_var() or self._parse_primary() 3713 3714 if this and self._match(TokenType.DOT): 3715 kind = this.name 3716 this = self._parse_var() or self._parse_primary() 3717 3718 return self.expression(exp.SessionParameter, this=this, kind=kind) 3719 3720 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3721 index = self._index 3722 3723 if self._match(TokenType.L_PAREN): 3724 expressions = t.cast( 3725 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3726 ) 3727 3728 if not self._match(TokenType.R_PAREN): 3729 self._retreat(index) 3730 else: 3731 expressions = [self._parse_id_var()] 3732 3733 if self._match_set(self.LAMBDAS): 3734 return self.LAMBDAS[self._prev.token_type](self, expressions) 3735 3736 self._retreat(index) 3737 3738 this: t.Optional[exp.Expression] 3739 3740 if self._match(TokenType.DISTINCT): 3741 this = self.expression( 3742 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3743 ) 3744 else: 3745 this = self._parse_select_or_expression(alias=alias) 3746 3747 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3748 3749 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3750 index = self._index 3751 3752 if not self.errors: 3753 try: 3754 if self._parse_select(nested=True): 3755 return this 3756 except ParseError: 3757 pass 3758 finally: 3759 self.errors.clear() 3760 self._retreat(index) 3761 3762 if not self._match(TokenType.L_PAREN): 3763 return this 3764 3765 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3766 3767 self._match_r_paren() 3768 return self.expression(exp.Schema, this=this, expressions=args) 3769 3770 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3771 return self._parse_column_def(self._parse_field(any_token=True)) 3772 3773 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3774 # column defs are not really columns, they're identifiers 3775 if isinstance(this, exp.Column): 3776 this = this.this 3777 3778 kind = self._parse_types(schema=True) 3779 3780 if self._match_text_seq("FOR", "ORDINALITY"): 3781 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3782 3783 constraints: t.List[exp.Expression] = [] 3784 3785 if not kind and self._match(TokenType.ALIAS): 3786 constraints.append( 3787 self.expression( 3788 exp.ComputedColumnConstraint, 3789 this=self._parse_conjunction(), 3790 persisted=self._match_text_seq("PERSISTED"), 3791 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3792 ) 3793 ) 3794 3795 while True: 3796 constraint = self._parse_column_constraint() 3797 if not constraint: 3798 break 3799 constraints.append(constraint) 3800 3801 if not kind and not constraints: 3802 return this 3803 3804 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3805 3806 def _parse_auto_increment( 3807 self, 3808 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3809 start = None 3810 increment = None 3811 3812 if self._match(TokenType.L_PAREN, advance=False): 3813 args = self._parse_wrapped_csv(self._parse_bitwise) 3814 start = seq_get(args, 0) 3815 increment = seq_get(args, 1) 3816 elif self._match_text_seq("START"): 3817 start = self._parse_bitwise() 3818 self._match_text_seq("INCREMENT") 3819 increment = self._parse_bitwise() 3820 3821 if start and increment: 3822 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3823 3824 return exp.AutoIncrementColumnConstraint() 3825 3826 def _parse_compress(self) -> exp.CompressColumnConstraint: 3827 if self._match(TokenType.L_PAREN, advance=False): 3828 return self.expression( 3829 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3830 ) 3831 3832 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3833 3834 def _parse_generated_as_identity( 3835 self, 3836 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3837 if self._match_text_seq("BY", "DEFAULT"): 3838 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3839 this = self.expression( 3840 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3841 ) 3842 else: 3843 self._match_text_seq("ALWAYS") 3844 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3845 3846 self._match(TokenType.ALIAS) 3847 identity = self._match_text_seq("IDENTITY") 3848 3849 if self._match(TokenType.L_PAREN): 3850 if self._match(TokenType.START_WITH): 3851 this.set("start", self._parse_bitwise()) 3852 if self._match_text_seq("INCREMENT", "BY"): 3853 this.set("increment", self._parse_bitwise()) 3854 if self._match_text_seq("MINVALUE"): 3855 this.set("minvalue", self._parse_bitwise()) 3856 if self._match_text_seq("MAXVALUE"): 3857 this.set("maxvalue", self._parse_bitwise()) 3858 3859 if self._match_text_seq("CYCLE"): 3860 this.set("cycle", True) 3861 elif self._match_text_seq("NO", "CYCLE"): 3862 this.set("cycle", False) 3863 3864 if not identity: 3865 this.set("expression", self._parse_bitwise()) 3866 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3867 args = self._parse_csv(self._parse_bitwise) 3868 this.set("start", seq_get(args, 0)) 3869 this.set("increment", seq_get(args, 1)) 3870 3871 self._match_r_paren() 3872 3873 return this 3874 3875 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3876 self._match_text_seq("LENGTH") 3877 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3878 3879 def _parse_not_constraint( 3880 self, 3881 ) -> t.Optional[exp.Expression]: 3882 if self._match_text_seq("NULL"): 3883 return self.expression(exp.NotNullColumnConstraint) 3884 if self._match_text_seq("CASESPECIFIC"): 3885 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3886 if self._match_text_seq("FOR", "REPLICATION"): 3887 return self.expression(exp.NotForReplicationColumnConstraint) 3888 return None 3889 3890 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3891 if self._match(TokenType.CONSTRAINT): 3892 this = self._parse_id_var() 3893 else: 3894 this = None 3895 3896 if self._match_texts(self.CONSTRAINT_PARSERS): 3897 return self.expression( 3898 exp.ColumnConstraint, 3899 this=this, 3900 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3901 ) 3902 3903 return this 3904 3905 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3906 if not self._match(TokenType.CONSTRAINT): 3907 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3908 3909 this = self._parse_id_var() 3910 expressions = [] 3911 3912 while True: 3913 constraint = self._parse_unnamed_constraint() or self._parse_function() 3914 if not constraint: 3915 break 3916 expressions.append(constraint) 3917 3918 return self.expression(exp.Constraint, this=this, expressions=expressions) 3919 3920 def _parse_unnamed_constraint( 3921 self, constraints: t.Optional[t.Collection[str]] = None 3922 ) -> t.Optional[exp.Expression]: 3923 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3924 constraints or self.CONSTRAINT_PARSERS 3925 ): 3926 return None 3927 3928 constraint = self._prev.text.upper() 3929 if constraint not in self.CONSTRAINT_PARSERS: 3930 self.raise_error(f"No parser found for schema constraint {constraint}.") 3931 3932 return self.CONSTRAINT_PARSERS[constraint](self) 3933 3934 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3935 self._match_text_seq("KEY") 3936 return self.expression( 3937 exp.UniqueColumnConstraint, 3938 this=self._parse_schema(self._parse_id_var(any_token=False)), 3939 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3940 ) 3941 3942 def _parse_key_constraint_options(self) -> t.List[str]: 3943 options = [] 3944 while True: 3945 if not self._curr: 3946 break 3947 3948 if self._match(TokenType.ON): 3949 action = None 3950 on = self._advance_any() and self._prev.text 3951 3952 if self._match_text_seq("NO", "ACTION"): 3953 action = "NO ACTION" 3954 elif self._match_text_seq("CASCADE"): 3955 action = "CASCADE" 3956 elif self._match_text_seq("RESTRICT"): 3957 action = "RESTRICT" 3958 elif self._match_pair(TokenType.SET, TokenType.NULL): 3959 action = "SET NULL" 3960 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3961 action = "SET DEFAULT" 3962 else: 3963 self.raise_error("Invalid key constraint") 3964 3965 options.append(f"ON {on} {action}") 3966 elif self._match_text_seq("NOT", "ENFORCED"): 3967 options.append("NOT ENFORCED") 3968 elif self._match_text_seq("DEFERRABLE"): 3969 options.append("DEFERRABLE") 3970 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3971 options.append("INITIALLY DEFERRED") 3972 elif self._match_text_seq("NORELY"): 3973 options.append("NORELY") 3974 elif self._match_text_seq("MATCH", "FULL"): 3975 options.append("MATCH FULL") 3976 else: 3977 break 3978 3979 return options 3980 3981 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3982 if match and not self._match(TokenType.REFERENCES): 3983 return None 3984 3985 expressions = None 3986 this = self._parse_table(schema=True) 3987 options = self._parse_key_constraint_options() 3988 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3989 3990 def _parse_foreign_key(self) -> exp.ForeignKey: 3991 expressions = self._parse_wrapped_id_vars() 3992 reference = self._parse_references() 3993 options = {} 3994 3995 while self._match(TokenType.ON): 3996 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3997 self.raise_error("Expected DELETE or UPDATE") 3998 3999 kind = self._prev.text.lower() 4000 4001 if self._match_text_seq("NO", "ACTION"): 4002 action = "NO ACTION" 4003 elif self._match(TokenType.SET): 4004 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4005 action = "SET " + self._prev.text.upper() 4006 else: 4007 self._advance() 4008 action = self._prev.text.upper() 4009 4010 options[kind] = action 4011 4012 return self.expression( 4013 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4014 ) 4015 4016 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4017 return self._parse_field() 4018 4019 def _parse_primary_key( 4020 self, wrapped_optional: bool = False, in_props: bool = False 4021 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4022 desc = ( 4023 self._match_set((TokenType.ASC, TokenType.DESC)) 4024 and self._prev.token_type == TokenType.DESC 4025 ) 4026 4027 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4028 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4029 4030 expressions = self._parse_wrapped_csv( 4031 self._parse_primary_key_part, optional=wrapped_optional 4032 ) 4033 options = self._parse_key_constraint_options() 4034 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4035 4036 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4037 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4038 return this 4039 4040 bracket_kind = self._prev.token_type 4041 4042 if self._match(TokenType.COLON): 4043 expressions: t.List[exp.Expression] = [ 4044 self.expression(exp.Slice, expression=self._parse_conjunction()) 4045 ] 4046 else: 4047 expressions = self._parse_csv( 4048 lambda: self._parse_slice( 4049 self._parse_alias(self._parse_conjunction(), explicit=True) 4050 ) 4051 ) 4052 4053 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4054 if bracket_kind == TokenType.L_BRACE: 4055 this = self.expression(exp.Struct, expressions=expressions) 4056 elif not this or this.name.upper() == "ARRAY": 4057 this = self.expression(exp.Array, expressions=expressions) 4058 else: 4059 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4060 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4061 4062 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4063 self.raise_error("Expected ]") 4064 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4065 self.raise_error("Expected }") 4066 4067 self._add_comments(this) 4068 return self._parse_bracket(this) 4069 4070 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4071 if self._match(TokenType.COLON): 4072 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4073 return this 4074 4075 def _parse_case(self) -> t.Optional[exp.Expression]: 4076 ifs = [] 4077 default = None 4078 4079 comments = self._prev_comments 4080 expression = self._parse_conjunction() 4081 4082 while self._match(TokenType.WHEN): 4083 this = self._parse_conjunction() 4084 self._match(TokenType.THEN) 4085 then = self._parse_conjunction() 4086 ifs.append(self.expression(exp.If, this=this, true=then)) 4087 4088 if self._match(TokenType.ELSE): 4089 default = self._parse_conjunction() 4090 4091 if not self._match(TokenType.END): 4092 self.raise_error("Expected END after CASE", self._prev) 4093 4094 return self._parse_window( 4095 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4096 ) 4097 4098 def _parse_if(self) -> t.Optional[exp.Expression]: 4099 if self._match(TokenType.L_PAREN): 4100 args = self._parse_csv(self._parse_conjunction) 4101 this = self.validate_expression(exp.If.from_arg_list(args), args) 4102 self._match_r_paren() 4103 else: 4104 index = self._index - 1 4105 condition = self._parse_conjunction() 4106 4107 if not condition: 4108 self._retreat(index) 4109 return None 4110 4111 self._match(TokenType.THEN) 4112 true = self._parse_conjunction() 4113 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4114 self._match(TokenType.END) 4115 this = self.expression(exp.If, this=condition, true=true, false=false) 4116 4117 return self._parse_window(this) 4118 4119 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4120 if not self._match_text_seq("VALUE", "FOR"): 4121 self._retreat(self._index - 1) 4122 return None 4123 4124 return self.expression( 4125 exp.NextValueFor, 4126 this=self._parse_column(), 4127 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4128 ) 4129 4130 def _parse_extract(self) -> exp.Extract: 4131 this = self._parse_function() or self._parse_var() or self._parse_type() 4132 4133 if self._match(TokenType.FROM): 4134 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4135 4136 if not self._match(TokenType.COMMA): 4137 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4138 4139 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4140 4141 def _parse_any_value(self) -> exp.AnyValue: 4142 this = self._parse_lambda() 4143 is_max = None 4144 having = None 4145 4146 if self._match(TokenType.HAVING): 4147 self._match_texts(("MAX", "MIN")) 4148 is_max = self._prev.text == "MAX" 4149 having = self._parse_column() 4150 4151 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4152 4153 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4154 this = self._parse_conjunction() 4155 4156 if not self._match(TokenType.ALIAS): 4157 if self._match(TokenType.COMMA): 4158 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4159 4160 self.raise_error("Expected AS after CAST") 4161 4162 fmt = None 4163 to = self._parse_types() 4164 4165 if not to: 4166 self.raise_error("Expected TYPE after CAST") 4167 elif isinstance(to, exp.Identifier): 4168 to = exp.DataType.build(to.name, udt=True) 4169 elif to.this == exp.DataType.Type.CHAR: 4170 if self._match(TokenType.CHARACTER_SET): 4171 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4172 elif self._match(TokenType.FORMAT): 4173 fmt_string = self._parse_string() 4174 fmt = self._parse_at_time_zone(fmt_string) 4175 4176 if to.this in exp.DataType.TEMPORAL_TYPES: 4177 this = self.expression( 4178 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4179 this=this, 4180 format=exp.Literal.string( 4181 format_time( 4182 fmt_string.this if fmt_string else "", 4183 self.FORMAT_MAPPING or self.TIME_MAPPING, 4184 self.FORMAT_TRIE or self.TIME_TRIE, 4185 ) 4186 ), 4187 ) 4188 4189 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4190 this.set("zone", fmt.args["zone"]) 4191 4192 return this 4193 4194 return self.expression( 4195 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4196 ) 4197 4198 def _parse_concat(self) -> t.Optional[exp.Expression]: 4199 args = self._parse_csv(self._parse_conjunction) 4200 if self.CONCAT_NULL_OUTPUTS_STRING: 4201 args = self._ensure_string_if_null(args) 4202 4203 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4204 # we find such a call we replace it with its argument. 4205 if len(args) == 1: 4206 return args[0] 4207 4208 return self.expression( 4209 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4210 ) 4211 4212 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4213 args = self._parse_csv(self._parse_conjunction) 4214 if len(args) < 2: 4215 return self.expression(exp.ConcatWs, expressions=args) 4216 delim, *values = args 4217 if self.CONCAT_NULL_OUTPUTS_STRING: 4218 values = self._ensure_string_if_null(values) 4219 4220 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4221 4222 def _parse_string_agg(self) -> exp.Expression: 4223 if self._match(TokenType.DISTINCT): 4224 args: t.List[t.Optional[exp.Expression]] = [ 4225 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4226 ] 4227 if self._match(TokenType.COMMA): 4228 args.extend(self._parse_csv(self._parse_conjunction)) 4229 else: 4230 args = self._parse_csv(self._parse_conjunction) # type: ignore 4231 4232 index = self._index 4233 if not self._match(TokenType.R_PAREN) and args: 4234 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4235 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4236 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4237 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4238 4239 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4240 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4241 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4242 if not self._match_text_seq("WITHIN", "GROUP"): 4243 self._retreat(index) 4244 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4245 4246 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4247 order = self._parse_order(this=seq_get(args, 0)) 4248 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4249 4250 def _parse_convert( 4251 self, strict: bool, safe: t.Optional[bool] = None 4252 ) -> t.Optional[exp.Expression]: 4253 this = self._parse_bitwise() 4254 4255 if self._match(TokenType.USING): 4256 to: t.Optional[exp.Expression] = self.expression( 4257 exp.CharacterSet, this=self._parse_var() 4258 ) 4259 elif self._match(TokenType.COMMA): 4260 to = self._parse_types() 4261 else: 4262 to = None 4263 4264 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4265 4266 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4267 """ 4268 There are generally two variants of the DECODE function: 4269 4270 - DECODE(bin, charset) 4271 - DECODE(expression, search, result [, search, result] ... [, default]) 4272 4273 The second variant will always be parsed into a CASE expression. Note that NULL 4274 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4275 instead of relying on pattern matching. 4276 """ 4277 args = self._parse_csv(self._parse_conjunction) 4278 4279 if len(args) < 3: 4280 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4281 4282 expression, *expressions = args 4283 if not expression: 4284 return None 4285 4286 ifs = [] 4287 for search, result in zip(expressions[::2], expressions[1::2]): 4288 if not search or not result: 4289 return None 4290 4291 if isinstance(search, exp.Literal): 4292 ifs.append( 4293 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4294 ) 4295 elif isinstance(search, exp.Null): 4296 ifs.append( 4297 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4298 ) 4299 else: 4300 cond = exp.or_( 4301 exp.EQ(this=expression.copy(), expression=search), 4302 exp.and_( 4303 exp.Is(this=expression.copy(), expression=exp.Null()), 4304 exp.Is(this=search.copy(), expression=exp.Null()), 4305 copy=False, 4306 ), 4307 copy=False, 4308 ) 4309 ifs.append(exp.If(this=cond, true=result)) 4310 4311 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4312 4313 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4314 self._match_text_seq("KEY") 4315 key = self._parse_column() 4316 self._match_set((TokenType.COLON, TokenType.COMMA)) 4317 self._match_text_seq("VALUE") 4318 value = self._parse_bitwise() 4319 4320 if not key and not value: 4321 return None 4322 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4323 4324 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4325 if not this or not self._match_text_seq("FORMAT", "JSON"): 4326 return this 4327 4328 return self.expression(exp.FormatJson, this=this) 4329 4330 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4331 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4332 for value in values: 4333 if self._match_text_seq(value, "ON", on): 4334 return f"{value} ON {on}" 4335 4336 return None 4337 4338 def _parse_json_object(self) -> exp.JSONObject: 4339 star = self._parse_star() 4340 expressions = ( 4341 [star] 4342 if star 4343 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4344 ) 4345 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4346 4347 unique_keys = None 4348 if self._match_text_seq("WITH", "UNIQUE"): 4349 unique_keys = True 4350 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4351 unique_keys = False 4352 4353 self._match_text_seq("KEYS") 4354 4355 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4356 self._parse_type() 4357 ) 4358 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4359 4360 return self.expression( 4361 exp.JSONObject, 4362 expressions=expressions, 4363 null_handling=null_handling, 4364 unique_keys=unique_keys, 4365 return_type=return_type, 4366 encoding=encoding, 4367 ) 4368 4369 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4370 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4371 if not self._match_text_seq("NESTED"): 4372 this = self._parse_id_var() 4373 kind = self._parse_types(allow_identifiers=False) 4374 nested = None 4375 else: 4376 this = None 4377 kind = None 4378 nested = True 4379 4380 path = self._match_text_seq("PATH") and self._parse_string() 4381 nested_schema = nested and self._parse_json_schema() 4382 4383 return self.expression( 4384 exp.JSONColumnDef, 4385 this=this, 4386 kind=kind, 4387 path=path, 4388 nested_schema=nested_schema, 4389 ) 4390 4391 def _parse_json_schema(self) -> exp.JSONSchema: 4392 self._match_text_seq("COLUMNS") 4393 return self.expression( 4394 exp.JSONSchema, 4395 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4396 ) 4397 4398 def _parse_json_table(self) -> exp.JSONTable: 4399 this = self._parse_format_json(self._parse_bitwise()) 4400 path = self._match(TokenType.COMMA) and self._parse_string() 4401 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4402 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4403 schema = self._parse_json_schema() 4404 4405 return exp.JSONTable( 4406 this=this, 4407 schema=schema, 4408 path=path, 4409 error_handling=error_handling, 4410 empty_handling=empty_handling, 4411 ) 4412 4413 def _parse_logarithm(self) -> exp.Func: 4414 # Default argument order is base, expression 4415 args = self._parse_csv(self._parse_range) 4416 4417 if len(args) > 1: 4418 if not self.LOG_BASE_FIRST: 4419 args.reverse() 4420 return exp.Log.from_arg_list(args) 4421 4422 return self.expression( 4423 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4424 ) 4425 4426 def _parse_match_against(self) -> exp.MatchAgainst: 4427 expressions = self._parse_csv(self._parse_column) 4428 4429 self._match_text_seq(")", "AGAINST", "(") 4430 4431 this = self._parse_string() 4432 4433 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4434 modifier = "IN NATURAL LANGUAGE MODE" 4435 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4436 modifier = f"{modifier} WITH QUERY EXPANSION" 4437 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4438 modifier = "IN BOOLEAN MODE" 4439 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4440 modifier = "WITH QUERY EXPANSION" 4441 else: 4442 modifier = None 4443 4444 return self.expression( 4445 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4446 ) 4447 4448 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4449 def _parse_open_json(self) -> exp.OpenJSON: 4450 this = self._parse_bitwise() 4451 path = self._match(TokenType.COMMA) and self._parse_string() 4452 4453 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4454 this = self._parse_field(any_token=True) 4455 kind = self._parse_types() 4456 path = self._parse_string() 4457 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4458 4459 return self.expression( 4460 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4461 ) 4462 4463 expressions = None 4464 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4465 self._match_l_paren() 4466 expressions = self._parse_csv(_parse_open_json_column_def) 4467 4468 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4469 4470 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4471 args = self._parse_csv(self._parse_bitwise) 4472 4473 if self._match(TokenType.IN): 4474 return self.expression( 4475 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4476 ) 4477 4478 if haystack_first: 4479 haystack = seq_get(args, 0) 4480 needle = seq_get(args, 1) 4481 else: 4482 needle = seq_get(args, 0) 4483 haystack = seq_get(args, 1) 4484 4485 return self.expression( 4486 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4487 ) 4488 4489 def _parse_predict(self) -> exp.Predict: 4490 self._match_text_seq("MODEL") 4491 this = self._parse_table() 4492 4493 self._match(TokenType.COMMA) 4494 self._match_text_seq("TABLE") 4495 4496 return self.expression( 4497 exp.Predict, 4498 this=this, 4499 expression=self._parse_table(), 4500 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4501 ) 4502 4503 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4504 args = self._parse_csv(self._parse_table) 4505 return exp.JoinHint(this=func_name.upper(), expressions=args) 4506 4507 def _parse_substring(self) -> exp.Substring: 4508 # Postgres supports the form: substring(string [from int] [for int]) 4509 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4510 4511 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4512 4513 if self._match(TokenType.FROM): 4514 args.append(self._parse_bitwise()) 4515 if self._match(TokenType.FOR): 4516 args.append(self._parse_bitwise()) 4517 4518 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4519 4520 def _parse_trim(self) -> exp.Trim: 4521 # https://www.w3resource.com/sql/character-functions/trim.php 4522 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4523 4524 position = None 4525 collation = None 4526 expression = None 4527 4528 if self._match_texts(self.TRIM_TYPES): 4529 position = self._prev.text.upper() 4530 4531 this = self._parse_bitwise() 4532 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4533 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4534 expression = self._parse_bitwise() 4535 4536 if invert_order: 4537 this, expression = expression, this 4538 4539 if self._match(TokenType.COLLATE): 4540 collation = self._parse_bitwise() 4541 4542 return self.expression( 4543 exp.Trim, this=this, position=position, expression=expression, collation=collation 4544 ) 4545 4546 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4547 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4548 4549 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4550 return self._parse_window(self._parse_id_var(), alias=True) 4551 4552 def _parse_respect_or_ignore_nulls( 4553 self, this: t.Optional[exp.Expression] 4554 ) -> t.Optional[exp.Expression]: 4555 if self._match_text_seq("IGNORE", "NULLS"): 4556 return self.expression(exp.IgnoreNulls, this=this) 4557 if self._match_text_seq("RESPECT", "NULLS"): 4558 return self.expression(exp.RespectNulls, this=this) 4559 return this 4560 4561 def _parse_window( 4562 self, this: t.Optional[exp.Expression], alias: bool = False 4563 ) -> t.Optional[exp.Expression]: 4564 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4565 self._match(TokenType.WHERE) 4566 this = self.expression( 4567 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4568 ) 4569 self._match_r_paren() 4570 4571 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4572 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4573 if self._match_text_seq("WITHIN", "GROUP"): 4574 order = self._parse_wrapped(self._parse_order) 4575 this = self.expression(exp.WithinGroup, this=this, expression=order) 4576 4577 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4578 # Some dialects choose to implement and some do not. 4579 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4580 4581 # There is some code above in _parse_lambda that handles 4582 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4583 4584 # The below changes handle 4585 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4586 4587 # Oracle allows both formats 4588 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4589 # and Snowflake chose to do the same for familiarity 4590 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4591 this = self._parse_respect_or_ignore_nulls(this) 4592 4593 # bigquery select from window x AS (partition by ...) 4594 if alias: 4595 over = None 4596 self._match(TokenType.ALIAS) 4597 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4598 return this 4599 else: 4600 over = self._prev.text.upper() 4601 4602 if not self._match(TokenType.L_PAREN): 4603 return self.expression( 4604 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4605 ) 4606 4607 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4608 4609 first = self._match(TokenType.FIRST) 4610 if self._match_text_seq("LAST"): 4611 first = False 4612 4613 partition, order = self._parse_partition_and_order() 4614 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4615 4616 if kind: 4617 self._match(TokenType.BETWEEN) 4618 start = self._parse_window_spec() 4619 self._match(TokenType.AND) 4620 end = self._parse_window_spec() 4621 4622 spec = self.expression( 4623 exp.WindowSpec, 4624 kind=kind, 4625 start=start["value"], 4626 start_side=start["side"], 4627 end=end["value"], 4628 end_side=end["side"], 4629 ) 4630 else: 4631 spec = None 4632 4633 self._match_r_paren() 4634 4635 window = self.expression( 4636 exp.Window, 4637 this=this, 4638 partition_by=partition, 4639 order=order, 4640 spec=spec, 4641 alias=window_alias, 4642 over=over, 4643 first=first, 4644 ) 4645 4646 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4647 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4648 return self._parse_window(window, alias=alias) 4649 4650 return window 4651 4652 def _parse_partition_and_order( 4653 self, 4654 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4655 return self._parse_partition_by(), self._parse_order() 4656 4657 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4658 self._match(TokenType.BETWEEN) 4659 4660 return { 4661 "value": ( 4662 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4663 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4664 or self._parse_bitwise() 4665 ), 4666 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4667 } 4668 4669 def _parse_alias( 4670 self, this: t.Optional[exp.Expression], explicit: bool = False 4671 ) -> t.Optional[exp.Expression]: 4672 any_token = self._match(TokenType.ALIAS) 4673 4674 if explicit and not any_token: 4675 return this 4676 4677 if self._match(TokenType.L_PAREN): 4678 aliases = self.expression( 4679 exp.Aliases, 4680 this=this, 4681 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4682 ) 4683 self._match_r_paren(aliases) 4684 return aliases 4685 4686 alias = self._parse_id_var(any_token) 4687 4688 if alias: 4689 return self.expression(exp.Alias, this=this, alias=alias) 4690 4691 return this 4692 4693 def _parse_id_var( 4694 self, 4695 any_token: bool = True, 4696 tokens: t.Optional[t.Collection[TokenType]] = None, 4697 ) -> t.Optional[exp.Expression]: 4698 identifier = self._parse_identifier() 4699 4700 if identifier: 4701 return identifier 4702 4703 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4704 quoted = self._prev.token_type == TokenType.STRING 4705 return exp.Identifier(this=self._prev.text, quoted=quoted) 4706 4707 return None 4708 4709 def _parse_string(self) -> t.Optional[exp.Expression]: 4710 if self._match(TokenType.STRING): 4711 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4712 return self._parse_placeholder() 4713 4714 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4715 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4716 4717 def _parse_number(self) -> t.Optional[exp.Expression]: 4718 if self._match(TokenType.NUMBER): 4719 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4720 return self._parse_placeholder() 4721 4722 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4723 if self._match(TokenType.IDENTIFIER): 4724 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4725 return self._parse_placeholder() 4726 4727 def _parse_var( 4728 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4729 ) -> t.Optional[exp.Expression]: 4730 if ( 4731 (any_token and self._advance_any()) 4732 or self._match(TokenType.VAR) 4733 or (self._match_set(tokens) if tokens else False) 4734 ): 4735 return self.expression(exp.Var, this=self._prev.text) 4736 return self._parse_placeholder() 4737 4738 def _advance_any(self) -> t.Optional[Token]: 4739 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4740 self._advance() 4741 return self._prev 4742 return None 4743 4744 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4745 return self._parse_var() or self._parse_string() 4746 4747 def _parse_null(self) -> t.Optional[exp.Expression]: 4748 if self._match_set(self.NULL_TOKENS): 4749 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4750 return self._parse_placeholder() 4751 4752 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4753 if self._match(TokenType.TRUE): 4754 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4755 if self._match(TokenType.FALSE): 4756 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4757 return self._parse_placeholder() 4758 4759 def _parse_star(self) -> t.Optional[exp.Expression]: 4760 if self._match(TokenType.STAR): 4761 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4762 return self._parse_placeholder() 4763 4764 def _parse_parameter(self) -> exp.Parameter: 4765 wrapped = self._match(TokenType.L_BRACE) 4766 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4767 self._match(TokenType.R_BRACE) 4768 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4769 4770 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4771 if self._match_set(self.PLACEHOLDER_PARSERS): 4772 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4773 if placeholder: 4774 return placeholder 4775 self._advance(-1) 4776 return None 4777 4778 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4779 if not self._match(TokenType.EXCEPT): 4780 return None 4781 if self._match(TokenType.L_PAREN, advance=False): 4782 return self._parse_wrapped_csv(self._parse_column) 4783 4784 except_column = self._parse_column() 4785 return [except_column] if except_column else None 4786 4787 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4788 if not self._match(TokenType.REPLACE): 4789 return None 4790 if self._match(TokenType.L_PAREN, advance=False): 4791 return self._parse_wrapped_csv(self._parse_expression) 4792 4793 replace_expression = self._parse_expression() 4794 return [replace_expression] if replace_expression else None 4795 4796 def _parse_csv( 4797 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4798 ) -> t.List[exp.Expression]: 4799 parse_result = parse_method() 4800 items = [parse_result] if parse_result is not None else [] 4801 4802 while self._match(sep): 4803 self._add_comments(parse_result) 4804 parse_result = parse_method() 4805 if parse_result is not None: 4806 items.append(parse_result) 4807 4808 return items 4809 4810 def _parse_tokens( 4811 self, parse_method: t.Callable, expressions: t.Dict 4812 ) -> t.Optional[exp.Expression]: 4813 this = parse_method() 4814 4815 while self._match_set(expressions): 4816 this = self.expression( 4817 expressions[self._prev.token_type], 4818 this=this, 4819 comments=self._prev_comments, 4820 expression=parse_method(), 4821 ) 4822 4823 return this 4824 4825 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4826 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4827 4828 def _parse_wrapped_csv( 4829 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4830 ) -> t.List[exp.Expression]: 4831 return self._parse_wrapped( 4832 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4833 ) 4834 4835 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4836 wrapped = self._match(TokenType.L_PAREN) 4837 if not wrapped and not optional: 4838 self.raise_error("Expecting (") 4839 parse_result = parse_method() 4840 if wrapped: 4841 self._match_r_paren() 4842 return parse_result 4843 4844 def _parse_expressions(self) -> t.List[exp.Expression]: 4845 return self._parse_csv(self._parse_expression) 4846 4847 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4848 return self._parse_select() or self._parse_set_operations( 4849 self._parse_expression() if alias else self._parse_conjunction() 4850 ) 4851 4852 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4853 return self._parse_query_modifiers( 4854 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4855 ) 4856 4857 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4858 this = None 4859 if self._match_texts(self.TRANSACTION_KIND): 4860 this = self._prev.text 4861 4862 self._match_texts({"TRANSACTION", "WORK"}) 4863 4864 modes = [] 4865 while True: 4866 mode = [] 4867 while self._match(TokenType.VAR): 4868 mode.append(self._prev.text) 4869 4870 if mode: 4871 modes.append(" ".join(mode)) 4872 if not self._match(TokenType.COMMA): 4873 break 4874 4875 return self.expression(exp.Transaction, this=this, modes=modes) 4876 4877 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4878 chain = None 4879 savepoint = None 4880 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4881 4882 self._match_texts({"TRANSACTION", "WORK"}) 4883 4884 if self._match_text_seq("TO"): 4885 self._match_text_seq("SAVEPOINT") 4886 savepoint = self._parse_id_var() 4887 4888 if self._match(TokenType.AND): 4889 chain = not self._match_text_seq("NO") 4890 self._match_text_seq("CHAIN") 4891 4892 if is_rollback: 4893 return self.expression(exp.Rollback, savepoint=savepoint) 4894 4895 return self.expression(exp.Commit, chain=chain) 4896 4897 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4898 if not self._match_text_seq("ADD"): 4899 return None 4900 4901 self._match(TokenType.COLUMN) 4902 exists_column = self._parse_exists(not_=True) 4903 expression = self._parse_field_def() 4904 4905 if expression: 4906 expression.set("exists", exists_column) 4907 4908 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4909 if self._match_texts(("FIRST", "AFTER")): 4910 position = self._prev.text 4911 column_position = self.expression( 4912 exp.ColumnPosition, this=self._parse_column(), position=position 4913 ) 4914 expression.set("position", column_position) 4915 4916 return expression 4917 4918 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4919 drop = self._match(TokenType.DROP) and self._parse_drop() 4920 if drop and not isinstance(drop, exp.Command): 4921 drop.set("kind", drop.args.get("kind", "COLUMN")) 4922 return drop 4923 4924 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4925 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4926 return self.expression( 4927 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4928 ) 4929 4930 def _parse_add_constraint(self) -> exp.AddConstraint: 4931 this = None 4932 kind = self._prev.token_type 4933 4934 if kind == TokenType.CONSTRAINT: 4935 this = self._parse_id_var() 4936 4937 if self._match_text_seq("CHECK"): 4938 expression = self._parse_wrapped(self._parse_conjunction) 4939 enforced = self._match_text_seq("ENFORCED") 4940 4941 return self.expression( 4942 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4943 ) 4944 4945 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4946 expression = self._parse_foreign_key() 4947 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4948 expression = self._parse_primary_key() 4949 else: 4950 expression = None 4951 4952 return self.expression(exp.AddConstraint, this=this, expression=expression) 4953 4954 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4955 index = self._index - 1 4956 4957 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4958 return self._parse_csv(self._parse_add_constraint) 4959 4960 self._retreat(index) 4961 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4962 return self._parse_csv(self._parse_field_def) 4963 4964 return self._parse_csv(self._parse_add_column) 4965 4966 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4967 self._match(TokenType.COLUMN) 4968 column = self._parse_field(any_token=True) 4969 4970 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4971 return self.expression(exp.AlterColumn, this=column, drop=True) 4972 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4973 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4974 4975 self._match_text_seq("SET", "DATA") 4976 return self.expression( 4977 exp.AlterColumn, 4978 this=column, 4979 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4980 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4981 using=self._match(TokenType.USING) and self._parse_conjunction(), 4982 ) 4983 4984 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4985 index = self._index - 1 4986 4987 partition_exists = self._parse_exists() 4988 if self._match(TokenType.PARTITION, advance=False): 4989 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4990 4991 self._retreat(index) 4992 return self._parse_csv(self._parse_drop_column) 4993 4994 def _parse_alter_table_rename(self) -> exp.RenameTable: 4995 self._match_text_seq("TO") 4996 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4997 4998 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4999 start = self._prev 5000 5001 if not self._match(TokenType.TABLE): 5002 return self._parse_as_command(start) 5003 5004 exists = self._parse_exists() 5005 only = self._match_text_seq("ONLY") 5006 this = self._parse_table(schema=True) 5007 5008 if self._next: 5009 self._advance() 5010 5011 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5012 if parser: 5013 actions = ensure_list(parser(self)) 5014 5015 if not self._curr: 5016 return self.expression( 5017 exp.AlterTable, 5018 this=this, 5019 exists=exists, 5020 actions=actions, 5021 only=only, 5022 ) 5023 5024 return self._parse_as_command(start) 5025 5026 def _parse_merge(self) -> exp.Merge: 5027 self._match(TokenType.INTO) 5028 target = self._parse_table() 5029 5030 if target and self._match(TokenType.ALIAS, advance=False): 5031 target.set("alias", self._parse_table_alias()) 5032 5033 self._match(TokenType.USING) 5034 using = self._parse_table() 5035 5036 self._match(TokenType.ON) 5037 on = self._parse_conjunction() 5038 5039 return self.expression( 5040 exp.Merge, 5041 this=target, 5042 using=using, 5043 on=on, 5044 expressions=self._parse_when_matched(), 5045 ) 5046 5047 def _parse_when_matched(self) -> t.List[exp.When]: 5048 whens = [] 5049 5050 while self._match(TokenType.WHEN): 5051 matched = not self._match(TokenType.NOT) 5052 self._match_text_seq("MATCHED") 5053 source = ( 5054 False 5055 if self._match_text_seq("BY", "TARGET") 5056 else self._match_text_seq("BY", "SOURCE") 5057 ) 5058 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5059 5060 self._match(TokenType.THEN) 5061 5062 if self._match(TokenType.INSERT): 5063 _this = self._parse_star() 5064 if _this: 5065 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5066 else: 5067 then = self.expression( 5068 exp.Insert, 5069 this=self._parse_value(), 5070 expression=self._match(TokenType.VALUES) and self._parse_value(), 5071 ) 5072 elif self._match(TokenType.UPDATE): 5073 expressions = self._parse_star() 5074 if expressions: 5075 then = self.expression(exp.Update, expressions=expressions) 5076 else: 5077 then = self.expression( 5078 exp.Update, 5079 expressions=self._match(TokenType.SET) 5080 and self._parse_csv(self._parse_equality), 5081 ) 5082 elif self._match(TokenType.DELETE): 5083 then = self.expression(exp.Var, this=self._prev.text) 5084 else: 5085 then = None 5086 5087 whens.append( 5088 self.expression( 5089 exp.When, 5090 matched=matched, 5091 source=source, 5092 condition=condition, 5093 then=then, 5094 ) 5095 ) 5096 return whens 5097 5098 def _parse_show(self) -> t.Optional[exp.Expression]: 5099 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5100 if parser: 5101 return parser(self) 5102 return self._parse_as_command(self._prev) 5103 5104 def _parse_set_item_assignment( 5105 self, kind: t.Optional[str] = None 5106 ) -> t.Optional[exp.Expression]: 5107 index = self._index 5108 5109 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5110 return self._parse_set_transaction(global_=kind == "GLOBAL") 5111 5112 left = self._parse_primary() or self._parse_id_var() 5113 assignment_delimiter = self._match_texts(("=", "TO")) 5114 5115 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5116 self._retreat(index) 5117 return None 5118 5119 right = self._parse_statement() or self._parse_id_var() 5120 this = self.expression(exp.EQ, this=left, expression=right) 5121 5122 return self.expression(exp.SetItem, this=this, kind=kind) 5123 5124 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5125 self._match_text_seq("TRANSACTION") 5126 characteristics = self._parse_csv( 5127 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5128 ) 5129 return self.expression( 5130 exp.SetItem, 5131 expressions=characteristics, 5132 kind="TRANSACTION", 5133 **{"global": global_}, # type: ignore 5134 ) 5135 5136 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5137 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5138 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5139 5140 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5141 index = self._index 5142 set_ = self.expression( 5143 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5144 ) 5145 5146 if self._curr: 5147 self._retreat(index) 5148 return self._parse_as_command(self._prev) 5149 5150 return set_ 5151 5152 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5153 for option in options: 5154 if self._match_text_seq(*option.split(" ")): 5155 return exp.var(option) 5156 return None 5157 5158 def _parse_as_command(self, start: Token) -> exp.Command: 5159 while self._curr: 5160 self._advance() 5161 text = self._find_sql(start, self._prev) 5162 size = len(start.text) 5163 return exp.Command(this=text[:size], expression=text[size:]) 5164 5165 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5166 settings = [] 5167 5168 self._match_l_paren() 5169 kind = self._parse_id_var() 5170 5171 if self._match(TokenType.L_PAREN): 5172 while True: 5173 key = self._parse_id_var() 5174 value = self._parse_primary() 5175 5176 if not key and value is None: 5177 break 5178 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5179 self._match(TokenType.R_PAREN) 5180 5181 self._match_r_paren() 5182 5183 return self.expression( 5184 exp.DictProperty, 5185 this=this, 5186 kind=kind.this if kind else None, 5187 settings=settings, 5188 ) 5189 5190 def _parse_dict_range(self, this: str) -> exp.DictRange: 5191 self._match_l_paren() 5192 has_min = self._match_text_seq("MIN") 5193 if has_min: 5194 min = self._parse_var() or self._parse_primary() 5195 self._match_text_seq("MAX") 5196 max = self._parse_var() or self._parse_primary() 5197 else: 5198 max = self._parse_var() or self._parse_primary() 5199 min = exp.Literal.number(0) 5200 self._match_r_paren() 5201 return self.expression(exp.DictRange, this=this, min=min, max=max) 5202 5203 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5204 index = self._index 5205 expression = self._parse_column() 5206 if not self._match(TokenType.IN): 5207 self._retreat(index - 1) 5208 return None 5209 iterator = self._parse_column() 5210 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5211 return self.expression( 5212 exp.Comprehension, 5213 this=this, 5214 expression=expression, 5215 iterator=iterator, 5216 condition=condition, 5217 ) 5218 5219 def _find_parser( 5220 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5221 ) -> t.Optional[t.Callable]: 5222 if not self._curr: 5223 return None 5224 5225 index = self._index 5226 this = [] 5227 while True: 5228 # The current token might be multiple words 5229 curr = self._curr.text.upper() 5230 key = curr.split(" ") 5231 this.append(curr) 5232 5233 self._advance() 5234 result, trie = in_trie(trie, key) 5235 if result == TrieResult.FAILED: 5236 break 5237 5238 if result == TrieResult.EXISTS: 5239 subparser = parsers[" ".join(this)] 5240 return subparser 5241 5242 self._retreat(index) 5243 return None 5244 5245 def _match(self, token_type, advance=True, expression=None): 5246 if not self._curr: 5247 return None 5248 5249 if self._curr.token_type == token_type: 5250 if advance: 5251 self._advance() 5252 self._add_comments(expression) 5253 return True 5254 5255 return None 5256 5257 def _match_set(self, types, advance=True): 5258 if not self._curr: 5259 return None 5260 5261 if self._curr.token_type in types: 5262 if advance: 5263 self._advance() 5264 return True 5265 5266 return None 5267 5268 def _match_pair(self, token_type_a, token_type_b, advance=True): 5269 if not self._curr or not self._next: 5270 return None 5271 5272 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5273 if advance: 5274 self._advance(2) 5275 return True 5276 5277 return None 5278 5279 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5280 if not self._match(TokenType.L_PAREN, expression=expression): 5281 self.raise_error("Expecting (") 5282 5283 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5284 if not self._match(TokenType.R_PAREN, expression=expression): 5285 self.raise_error("Expecting )") 5286 5287 def _match_texts(self, texts, advance=True): 5288 if self._curr and self._curr.text.upper() in texts: 5289 if advance: 5290 self._advance() 5291 return True 5292 return False 5293 5294 def _match_text_seq(self, *texts, advance=True): 5295 index = self._index 5296 for text in texts: 5297 if self._curr and self._curr.text.upper() == text: 5298 self._advance() 5299 else: 5300 self._retreat(index) 5301 return False 5302 5303 if not advance: 5304 self._retreat(index) 5305 5306 return True 5307 5308 @t.overload 5309 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5310 ... 5311 5312 @t.overload 5313 def _replace_columns_with_dots( 5314 self, this: t.Optional[exp.Expression] 5315 ) -> t.Optional[exp.Expression]: 5316 ... 5317 5318 def _replace_columns_with_dots(self, this): 5319 if isinstance(this, exp.Dot): 5320 exp.replace_children(this, self._replace_columns_with_dots) 5321 elif isinstance(this, exp.Column): 5322 exp.replace_children(this, self._replace_columns_with_dots) 5323 table = this.args.get("table") 5324 this = ( 5325 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5326 ) 5327 5328 return this 5329 5330 def _replace_lambda( 5331 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5332 ) -> t.Optional[exp.Expression]: 5333 if not node: 5334 return node 5335 5336 for column in node.find_all(exp.Column): 5337 if column.parts[0].name in lambda_variables: 5338 dot_or_id = column.to_dot() if column.table else column.this 5339 parent = column.parent 5340 5341 while isinstance(parent, exp.Dot): 5342 if not isinstance(parent.parent, exp.Dot): 5343 parent.replace(dot_or_id) 5344 break 5345 parent = parent.parent 5346 else: 5347 if column is node: 5348 node = dot_or_id 5349 else: 5350 column.replace(dot_or_id) 5351 return node 5352 5353 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5354 return [ 5355 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5356 for value in values 5357 if value 5358 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.REFERENCES, 304 TokenType.RIGHT, 305 TokenType.ROW, 306 TokenType.ROWS, 307 TokenType.SEMI, 308 TokenType.SET, 309 TokenType.SETTINGS, 310 TokenType.SHOW, 311 TokenType.TEMPORARY, 312 TokenType.TOP, 313 TokenType.TRUE, 314 TokenType.UNIQUE, 315 TokenType.UNPIVOT, 316 TokenType.UPDATE, 317 TokenType.VOLATILE, 318 TokenType.WINDOW, 319 *CREATABLES, 320 *SUBQUERY_PREDICATES, 321 *TYPE_TOKENS, 322 *NO_PAREN_FUNCTIONS, 323 } 324 325 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 326 327 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 328 TokenType.ANTI, 329 TokenType.APPLY, 330 TokenType.ASOF, 331 TokenType.FULL, 332 TokenType.LEFT, 333 TokenType.LOCK, 334 TokenType.NATURAL, 335 TokenType.OFFSET, 336 TokenType.RIGHT, 337 TokenType.SEMI, 338 TokenType.WINDOW, 339 } 340 341 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 342 343 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 344 345 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 346 347 FUNC_TOKENS = { 348 TokenType.COLLATE, 349 TokenType.COMMAND, 350 TokenType.CURRENT_DATE, 351 TokenType.CURRENT_DATETIME, 352 TokenType.CURRENT_TIMESTAMP, 353 TokenType.CURRENT_TIME, 354 TokenType.CURRENT_USER, 355 TokenType.FILTER, 356 TokenType.FIRST, 357 TokenType.FORMAT, 358 TokenType.GLOB, 359 TokenType.IDENTIFIER, 360 TokenType.INDEX, 361 TokenType.ISNULL, 362 TokenType.ILIKE, 363 TokenType.INSERT, 364 TokenType.LIKE, 365 TokenType.MERGE, 366 TokenType.OFFSET, 367 TokenType.PRIMARY_KEY, 368 TokenType.RANGE, 369 TokenType.REPLACE, 370 TokenType.RLIKE, 371 TokenType.ROW, 372 TokenType.UNNEST, 373 TokenType.VAR, 374 TokenType.LEFT, 375 TokenType.RIGHT, 376 TokenType.DATE, 377 TokenType.DATETIME, 378 TokenType.TABLE, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.WINDOW, 382 TokenType.XOR, 383 *TYPE_TOKENS, 384 *SUBQUERY_PREDICATES, 385 } 386 387 CONJUNCTION = { 388 TokenType.AND: exp.And, 389 TokenType.OR: exp.Or, 390 } 391 392 EQUALITY = { 393 TokenType.EQ: exp.EQ, 394 TokenType.NEQ: exp.NEQ, 395 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 396 } 397 398 COMPARISON = { 399 TokenType.GT: exp.GT, 400 TokenType.GTE: exp.GTE, 401 TokenType.LT: exp.LT, 402 TokenType.LTE: exp.LTE, 403 } 404 405 BITWISE = { 406 TokenType.AMP: exp.BitwiseAnd, 407 TokenType.CARET: exp.BitwiseXor, 408 TokenType.PIPE: exp.BitwiseOr, 409 TokenType.DPIPE: exp.DPipe, 410 } 411 412 TERM = { 413 TokenType.DASH: exp.Sub, 414 TokenType.PLUS: exp.Add, 415 TokenType.MOD: exp.Mod, 416 TokenType.COLLATE: exp.Collate, 417 } 418 419 FACTOR = { 420 TokenType.DIV: exp.IntDiv, 421 TokenType.LR_ARROW: exp.Distance, 422 TokenType.SLASH: exp.Div, 423 TokenType.STAR: exp.Mul, 424 } 425 426 TIMES = { 427 TokenType.TIME, 428 TokenType.TIMETZ, 429 } 430 431 TIMESTAMPS = { 432 TokenType.TIMESTAMP, 433 TokenType.TIMESTAMPTZ, 434 TokenType.TIMESTAMPLTZ, 435 *TIMES, 436 } 437 438 SET_OPERATIONS = { 439 TokenType.UNION, 440 TokenType.INTERSECT, 441 TokenType.EXCEPT, 442 } 443 444 JOIN_METHODS = { 445 TokenType.NATURAL, 446 TokenType.ASOF, 447 } 448 449 JOIN_SIDES = { 450 TokenType.LEFT, 451 TokenType.RIGHT, 452 TokenType.FULL, 453 } 454 455 JOIN_KINDS = { 456 TokenType.INNER, 457 TokenType.OUTER, 458 TokenType.CROSS, 459 TokenType.SEMI, 460 TokenType.ANTI, 461 } 462 463 JOIN_HINTS: t.Set[str] = set() 464 465 LAMBDAS = { 466 TokenType.ARROW: lambda self, expressions: self.expression( 467 exp.Lambda, 468 this=self._replace_lambda( 469 self._parse_conjunction(), 470 {node.name for node in expressions}, 471 ), 472 expressions=expressions, 473 ), 474 TokenType.FARROW: lambda self, expressions: self.expression( 475 exp.Kwarg, 476 this=exp.var(expressions[0].name), 477 expression=self._parse_conjunction(), 478 ), 479 } 480 481 COLUMN_OPERATORS = { 482 TokenType.DOT: None, 483 TokenType.DCOLON: lambda self, this, to: self.expression( 484 exp.Cast if self.STRICT_CAST else exp.TryCast, 485 this=this, 486 to=to, 487 ), 488 TokenType.ARROW: lambda self, this, path: self.expression( 489 exp.JSONExtract, 490 this=this, 491 expression=path, 492 ), 493 TokenType.DARROW: lambda self, this, path: self.expression( 494 exp.JSONExtractScalar, 495 this=this, 496 expression=path, 497 ), 498 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtract, 500 this=this, 501 expression=path, 502 ), 503 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 504 exp.JSONBExtractScalar, 505 this=this, 506 expression=path, 507 ), 508 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 509 exp.JSONBContains, 510 this=this, 511 expression=key, 512 ), 513 } 514 515 EXPRESSION_PARSERS = { 516 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 517 exp.Column: lambda self: self._parse_column(), 518 exp.Condition: lambda self: self._parse_conjunction(), 519 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 520 exp.Expression: lambda self: self._parse_statement(), 521 exp.From: lambda self: self._parse_from(), 522 exp.Group: lambda self: self._parse_group(), 523 exp.Having: lambda self: self._parse_having(), 524 exp.Identifier: lambda self: self._parse_id_var(), 525 exp.Join: lambda self: self._parse_join(), 526 exp.Lambda: lambda self: self._parse_lambda(), 527 exp.Lateral: lambda self: self._parse_lateral(), 528 exp.Limit: lambda self: self._parse_limit(), 529 exp.Offset: lambda self: self._parse_offset(), 530 exp.Order: lambda self: self._parse_order(), 531 exp.Ordered: lambda self: self._parse_ordered(), 532 exp.Properties: lambda self: self._parse_properties(), 533 exp.Qualify: lambda self: self._parse_qualify(), 534 exp.Returning: lambda self: self._parse_returning(), 535 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 536 exp.Table: lambda self: self._parse_table_parts(), 537 exp.TableAlias: lambda self: self._parse_table_alias(), 538 exp.Where: lambda self: self._parse_where(), 539 exp.Window: lambda self: self._parse_named_window(), 540 exp.With: lambda self: self._parse_with(), 541 "JOIN_TYPE": lambda self: self._parse_join_parts(), 542 } 543 544 STATEMENT_PARSERS = { 545 TokenType.ALTER: lambda self: self._parse_alter(), 546 TokenType.BEGIN: lambda self: self._parse_transaction(), 547 TokenType.CACHE: lambda self: self._parse_cache(), 548 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 549 TokenType.COMMENT: lambda self: self._parse_comment(), 550 TokenType.CREATE: lambda self: self._parse_create(), 551 TokenType.DELETE: lambda self: self._parse_delete(), 552 TokenType.DESC: lambda self: self._parse_describe(), 553 TokenType.DESCRIBE: lambda self: self._parse_describe(), 554 TokenType.DROP: lambda self: self._parse_drop(), 555 TokenType.INSERT: lambda self: self._parse_insert(), 556 TokenType.KILL: lambda self: self._parse_kill(), 557 TokenType.LOAD: lambda self: self._parse_load(), 558 TokenType.MERGE: lambda self: self._parse_merge(), 559 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 560 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 561 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 562 TokenType.SET: lambda self: self._parse_set(), 563 TokenType.UNCACHE: lambda self: self._parse_uncache(), 564 TokenType.UPDATE: lambda self: self._parse_update(), 565 TokenType.USE: lambda self: self.expression( 566 exp.Use, 567 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 568 and exp.var(self._prev.text), 569 this=self._parse_table(schema=False), 570 ), 571 } 572 573 UNARY_PARSERS = { 574 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 575 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 576 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 577 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 578 } 579 580 PRIMARY_PARSERS = { 581 TokenType.STRING: lambda self, token: self.expression( 582 exp.Literal, this=token.text, is_string=True 583 ), 584 TokenType.NUMBER: lambda self, token: self.expression( 585 exp.Literal, this=token.text, is_string=False 586 ), 587 TokenType.STAR: lambda self, _: self.expression( 588 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 589 ), 590 TokenType.NULL: lambda self, _: self.expression(exp.Null), 591 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 592 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 593 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 594 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 595 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 596 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 597 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 598 exp.National, this=token.text 599 ), 600 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 601 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 602 exp.RawString, this=token.text 603 ), 604 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 605 } 606 607 PLACEHOLDER_PARSERS = { 608 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 609 TokenType.PARAMETER: lambda self: self._parse_parameter(), 610 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 611 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 612 else None, 613 } 614 615 RANGE_PARSERS = { 616 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 617 TokenType.GLOB: binary_range_parser(exp.Glob), 618 TokenType.ILIKE: binary_range_parser(exp.ILike), 619 TokenType.IN: lambda self, this: self._parse_in(this), 620 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 621 TokenType.IS: lambda self, this: self._parse_is(this), 622 TokenType.LIKE: binary_range_parser(exp.Like), 623 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 624 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 625 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 626 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 627 } 628 629 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 630 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 631 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 632 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 633 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 634 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 635 "CHECKSUM": lambda self: self._parse_checksum(), 636 "CLUSTER BY": lambda self: self._parse_cluster(), 637 "CLUSTERED": lambda self: self._parse_clustered_by(), 638 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 639 exp.CollateProperty, **kwargs 640 ), 641 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 642 "COPY": lambda self: self._parse_copy_property(), 643 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 644 "DEFINER": lambda self: self._parse_definer(), 645 "DETERMINISTIC": lambda self: self.expression( 646 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 647 ), 648 "DISTKEY": lambda self: self._parse_distkey(), 649 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 650 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 651 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 652 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 653 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 654 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 655 "FREESPACE": lambda self: self._parse_freespace(), 656 "HEAP": lambda self: self.expression(exp.HeapProperty), 657 "IMMUTABLE": lambda self: self.expression( 658 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 659 ), 660 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 661 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 662 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 663 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 664 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 665 "LIKE": lambda self: self._parse_create_like(), 666 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 667 "LOCK": lambda self: self._parse_locking(), 668 "LOCKING": lambda self: self._parse_locking(), 669 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 670 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 671 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 672 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 673 "NO": lambda self: self._parse_no_property(), 674 "ON": lambda self: self._parse_on_property(), 675 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 676 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 677 "PARTITION BY": lambda self: self._parse_partitioned_by(), 678 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 679 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 680 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 681 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 682 "REMOTE": lambda self: self._parse_remote_with_connection(), 683 "RETURNS": lambda self: self._parse_returns(), 684 "ROW": lambda self: self._parse_row(), 685 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 686 "SAMPLE": lambda self: self.expression( 687 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 688 ), 689 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 690 "SETTINGS": lambda self: self.expression( 691 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 692 ), 693 "SORTKEY": lambda self: self._parse_sortkey(), 694 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 695 "STABLE": lambda self: self.expression( 696 exp.StabilityProperty, this=exp.Literal.string("STABLE") 697 ), 698 "STORED": lambda self: self._parse_stored(), 699 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 700 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 701 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 702 "TO": lambda self: self._parse_to_table(), 703 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 704 "TRANSFORM": lambda self: self.expression( 705 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 706 ), 707 "TTL": lambda self: self._parse_ttl(), 708 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 709 "VOLATILE": lambda self: self._parse_volatile_property(), 710 "WITH": lambda self: self._parse_with_property(), 711 } 712 713 CONSTRAINT_PARSERS = { 714 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 715 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 716 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 717 "CHARACTER SET": lambda self: self.expression( 718 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 719 ), 720 "CHECK": lambda self: self.expression( 721 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 722 ), 723 "COLLATE": lambda self: self.expression( 724 exp.CollateColumnConstraint, this=self._parse_var() 725 ), 726 "COMMENT": lambda self: self.expression( 727 exp.CommentColumnConstraint, this=self._parse_string() 728 ), 729 "COMPRESS": lambda self: self._parse_compress(), 730 "CLUSTERED": lambda self: self.expression( 731 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 732 ), 733 "NONCLUSTERED": lambda self: self.expression( 734 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 735 ), 736 "DEFAULT": lambda self: self.expression( 737 exp.DefaultColumnConstraint, this=self._parse_bitwise() 738 ), 739 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 740 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 741 "FORMAT": lambda self: self.expression( 742 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 743 ), 744 "GENERATED": lambda self: self._parse_generated_as_identity(), 745 "IDENTITY": lambda self: self._parse_auto_increment(), 746 "INLINE": lambda self: self._parse_inline(), 747 "LIKE": lambda self: self._parse_create_like(), 748 "NOT": lambda self: self._parse_not_constraint(), 749 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 750 "ON": lambda self: ( 751 self._match(TokenType.UPDATE) 752 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 753 ) 754 or self.expression(exp.OnProperty, this=self._parse_id_var()), 755 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 756 "PRIMARY KEY": lambda self: self._parse_primary_key(), 757 "REFERENCES": lambda self: self._parse_references(match=False), 758 "TITLE": lambda self: self.expression( 759 exp.TitleColumnConstraint, this=self._parse_var_or_string() 760 ), 761 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 762 "UNIQUE": lambda self: self._parse_unique(), 763 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 764 "WITH": lambda self: self.expression( 765 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 766 ), 767 } 768 769 ALTER_PARSERS = { 770 "ADD": lambda self: self._parse_alter_table_add(), 771 "ALTER": lambda self: self._parse_alter_table_alter(), 772 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 773 "DROP": lambda self: self._parse_alter_table_drop(), 774 "RENAME": lambda self: self._parse_alter_table_rename(), 775 } 776 777 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 778 779 NO_PAREN_FUNCTION_PARSERS = { 780 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 781 "CASE": lambda self: self._parse_case(), 782 "IF": lambda self: self._parse_if(), 783 "NEXT": lambda self: self._parse_next_value_for(), 784 } 785 786 INVALID_FUNC_NAME_TOKENS = { 787 TokenType.IDENTIFIER, 788 TokenType.STRING, 789 } 790 791 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 792 793 FUNCTION_PARSERS = { 794 "ANY_VALUE": lambda self: self._parse_any_value(), 795 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 796 "CONCAT": lambda self: self._parse_concat(), 797 "CONCAT_WS": lambda self: self._parse_concat_ws(), 798 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 799 "DECODE": lambda self: self._parse_decode(), 800 "EXTRACT": lambda self: self._parse_extract(), 801 "JSON_OBJECT": lambda self: self._parse_json_object(), 802 "JSON_TABLE": lambda self: self._parse_json_table(), 803 "LOG": lambda self: self._parse_logarithm(), 804 "MATCH": lambda self: self._parse_match_against(), 805 "OPENJSON": lambda self: self._parse_open_json(), 806 "POSITION": lambda self: self._parse_position(), 807 "PREDICT": lambda self: self._parse_predict(), 808 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 809 "STRING_AGG": lambda self: self._parse_string_agg(), 810 "SUBSTRING": lambda self: self._parse_substring(), 811 "TRIM": lambda self: self._parse_trim(), 812 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 813 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 814 } 815 816 QUERY_MODIFIER_PARSERS = { 817 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 818 TokenType.WHERE: lambda self: ("where", self._parse_where()), 819 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 820 TokenType.HAVING: lambda self: ("having", self._parse_having()), 821 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 822 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 823 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 824 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 825 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 826 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 827 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 828 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 829 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 830 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 831 TokenType.CLUSTER_BY: lambda self: ( 832 "cluster", 833 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 834 ), 835 TokenType.DISTRIBUTE_BY: lambda self: ( 836 "distribute", 837 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 838 ), 839 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 840 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 841 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 842 } 843 844 SET_PARSERS = { 845 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 846 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 847 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 848 "TRANSACTION": lambda self: self._parse_set_transaction(), 849 } 850 851 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 852 853 TYPE_LITERAL_PARSERS = { 854 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 855 } 856 857 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 858 859 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 860 861 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 862 863 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 864 TRANSACTION_CHARACTERISTICS = { 865 "ISOLATION LEVEL REPEATABLE READ", 866 "ISOLATION LEVEL READ COMMITTED", 867 "ISOLATION LEVEL READ UNCOMMITTED", 868 "ISOLATION LEVEL SERIALIZABLE", 869 "READ WRITE", 870 "READ ONLY", 871 } 872 873 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 874 875 CLONE_KEYWORDS = {"CLONE", "COPY"} 876 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 877 878 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 879 880 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 881 882 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 883 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 884 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 885 886 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 887 888 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 889 890 DISTINCT_TOKENS = {TokenType.DISTINCT} 891 892 NULL_TOKENS = {TokenType.NULL} 893 894 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 895 896 STRICT_CAST = True 897 898 # A NULL arg in CONCAT yields NULL by default 899 CONCAT_NULL_OUTPUTS_STRING = False 900 901 PREFIXED_PIVOT_COLUMNS = False 902 IDENTIFY_PIVOT_STRINGS = False 903 904 LOG_BASE_FIRST = True 905 LOG_DEFAULTS_TO_LN = False 906 907 # Whether or not ADD is present for each column added by ALTER TABLE 908 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 909 910 # Whether or not the table sample clause expects CSV syntax 911 TABLESAMPLE_CSV = False 912 913 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 914 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 915 916 # Whether the TRIM function expects the characters to trim as its first argument 917 TRIM_PATTERN_FIRST = False 918 919 __slots__ = ( 920 "error_level", 921 "error_message_context", 922 "max_errors", 923 "sql", 924 "errors", 925 "_tokens", 926 "_index", 927 "_curr", 928 "_next", 929 "_prev", 930 "_prev_comments", 931 "_tokenizer", 932 ) 933 934 # Autofilled 935 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 936 INDEX_OFFSET: int = 0 937 UNNEST_COLUMN_ONLY: bool = False 938 ALIAS_POST_TABLESAMPLE: bool = False 939 STRICT_STRING_CONCAT = False 940 SUPPORTS_USER_DEFINED_TYPES = True 941 NORMALIZE_FUNCTIONS = "upper" 942 NULL_ORDERING: str = "nulls_are_small" 943 SHOW_TRIE: t.Dict = {} 944 SET_TRIE: t.Dict = {} 945 FORMAT_MAPPING: t.Dict[str, str] = {} 946 FORMAT_TRIE: t.Dict = {} 947 TIME_MAPPING: t.Dict[str, str] = {} 948 TIME_TRIE: t.Dict = {} 949 950 def __init__( 951 self, 952 error_level: t.Optional[ErrorLevel] = None, 953 error_message_context: int = 100, 954 max_errors: int = 3, 955 ): 956 self.error_level = error_level or ErrorLevel.IMMEDIATE 957 self.error_message_context = error_message_context 958 self.max_errors = max_errors 959 self._tokenizer = self.TOKENIZER_CLASS() 960 self.reset() 961 962 def reset(self): 963 self.sql = "" 964 self.errors = [] 965 self._tokens = [] 966 self._index = 0 967 self._curr = None 968 self._next = None 969 self._prev = None 970 self._prev_comments = None 971 972 def parse( 973 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 974 ) -> t.List[t.Optional[exp.Expression]]: 975 """ 976 Parses a list of tokens and returns a list of syntax trees, one tree 977 per parsed SQL statement. 978 979 Args: 980 raw_tokens: The list of tokens. 981 sql: The original SQL string, used to produce helpful debug messages. 982 983 Returns: 984 The list of the produced syntax trees. 985 """ 986 return self._parse( 987 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 988 ) 989 990 def parse_into( 991 self, 992 expression_types: exp.IntoType, 993 raw_tokens: t.List[Token], 994 sql: t.Optional[str] = None, 995 ) -> t.List[t.Optional[exp.Expression]]: 996 """ 997 Parses a list of tokens into a given Expression type. If a collection of Expression 998 types is given instead, this method will try to parse the token list into each one 999 of them, stopping at the first for which the parsing succeeds. 1000 1001 Args: 1002 expression_types: The expression type(s) to try and parse the token list into. 1003 raw_tokens: The list of tokens. 1004 sql: The original SQL string, used to produce helpful debug messages. 1005 1006 Returns: 1007 The target Expression. 1008 """ 1009 errors = [] 1010 for expression_type in ensure_list(expression_types): 1011 parser = self.EXPRESSION_PARSERS.get(expression_type) 1012 if not parser: 1013 raise TypeError(f"No parser registered for {expression_type}") 1014 1015 try: 1016 return self._parse(parser, raw_tokens, sql) 1017 except ParseError as e: 1018 e.errors[0]["into_expression"] = expression_type 1019 errors.append(e) 1020 1021 raise ParseError( 1022 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1023 errors=merge_errors(errors), 1024 ) from errors[-1] 1025 1026 def _parse( 1027 self, 1028 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1029 raw_tokens: t.List[Token], 1030 sql: t.Optional[str] = None, 1031 ) -> t.List[t.Optional[exp.Expression]]: 1032 self.reset() 1033 self.sql = sql or "" 1034 1035 total = len(raw_tokens) 1036 chunks: t.List[t.List[Token]] = [[]] 1037 1038 for i, token in enumerate(raw_tokens): 1039 if token.token_type == TokenType.SEMICOLON: 1040 if i < total - 1: 1041 chunks.append([]) 1042 else: 1043 chunks[-1].append(token) 1044 1045 expressions = [] 1046 1047 for tokens in chunks: 1048 self._index = -1 1049 self._tokens = tokens 1050 self._advance() 1051 1052 expressions.append(parse_method(self)) 1053 1054 if self._index < len(self._tokens): 1055 self.raise_error("Invalid expression / Unexpected token") 1056 1057 self.check_errors() 1058 1059 return expressions 1060 1061 def check_errors(self) -> None: 1062 """Logs or raises any found errors, depending on the chosen error level setting.""" 1063 if self.error_level == ErrorLevel.WARN: 1064 for error in self.errors: 1065 logger.error(str(error)) 1066 elif self.error_level == ErrorLevel.RAISE and self.errors: 1067 raise ParseError( 1068 concat_messages(self.errors, self.max_errors), 1069 errors=merge_errors(self.errors), 1070 ) 1071 1072 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1073 """ 1074 Appends an error in the list of recorded errors or raises it, depending on the chosen 1075 error level setting. 1076 """ 1077 token = token or self._curr or self._prev or Token.string("") 1078 start = token.start 1079 end = token.end + 1 1080 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1081 highlight = self.sql[start:end] 1082 end_context = self.sql[end : end + self.error_message_context] 1083 1084 error = ParseError.new( 1085 f"{message}. Line {token.line}, Col: {token.col}.\n" 1086 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1087 description=message, 1088 line=token.line, 1089 col=token.col, 1090 start_context=start_context, 1091 highlight=highlight, 1092 end_context=end_context, 1093 ) 1094 1095 if self.error_level == ErrorLevel.IMMEDIATE: 1096 raise error 1097 1098 self.errors.append(error) 1099 1100 def expression( 1101 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1102 ) -> E: 1103 """ 1104 Creates a new, validated Expression. 1105 1106 Args: 1107 exp_class: The expression class to instantiate. 1108 comments: An optional list of comments to attach to the expression. 1109 kwargs: The arguments to set for the expression along with their respective values. 1110 1111 Returns: 1112 The target expression. 1113 """ 1114 instance = exp_class(**kwargs) 1115 instance.add_comments(comments) if comments else self._add_comments(instance) 1116 return self.validate_expression(instance) 1117 1118 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1119 if expression and self._prev_comments: 1120 expression.add_comments(self._prev_comments) 1121 self._prev_comments = None 1122 1123 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1124 """ 1125 Validates an Expression, making sure that all its mandatory arguments are set. 1126 1127 Args: 1128 expression: The expression to validate. 1129 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1130 1131 Returns: 1132 The validated expression. 1133 """ 1134 if self.error_level != ErrorLevel.IGNORE: 1135 for error_message in expression.error_messages(args): 1136 self.raise_error(error_message) 1137 1138 return expression 1139 1140 def _find_sql(self, start: Token, end: Token) -> str: 1141 return self.sql[start.start : end.end + 1] 1142 1143 def _advance(self, times: int = 1) -> None: 1144 self._index += times 1145 self._curr = seq_get(self._tokens, self._index) 1146 self._next = seq_get(self._tokens, self._index + 1) 1147 1148 if self._index > 0: 1149 self._prev = self._tokens[self._index - 1] 1150 self._prev_comments = self._prev.comments 1151 else: 1152 self._prev = None 1153 self._prev_comments = None 1154 1155 def _retreat(self, index: int) -> None: 1156 if index != self._index: 1157 self._advance(index - self._index) 1158 1159 def _parse_command(self) -> exp.Command: 1160 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1161 1162 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1163 start = self._prev 1164 exists = self._parse_exists() if allow_exists else None 1165 1166 self._match(TokenType.ON) 1167 1168 kind = self._match_set(self.CREATABLES) and self._prev 1169 if not kind: 1170 return self._parse_as_command(start) 1171 1172 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1173 this = self._parse_user_defined_function(kind=kind.token_type) 1174 elif kind.token_type == TokenType.TABLE: 1175 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1176 elif kind.token_type == TokenType.COLUMN: 1177 this = self._parse_column() 1178 else: 1179 this = self._parse_id_var() 1180 1181 self._match(TokenType.IS) 1182 1183 return self.expression( 1184 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1185 ) 1186 1187 def _parse_to_table( 1188 self, 1189 ) -> exp.ToTableProperty: 1190 table = self._parse_table_parts(schema=True) 1191 return self.expression(exp.ToTableProperty, this=table) 1192 1193 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1194 def _parse_ttl(self) -> exp.Expression: 1195 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1196 this = self._parse_bitwise() 1197 1198 if self._match_text_seq("DELETE"): 1199 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1200 if self._match_text_seq("RECOMPRESS"): 1201 return self.expression( 1202 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1203 ) 1204 if self._match_text_seq("TO", "DISK"): 1205 return self.expression( 1206 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1207 ) 1208 if self._match_text_seq("TO", "VOLUME"): 1209 return self.expression( 1210 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1211 ) 1212 1213 return this 1214 1215 expressions = self._parse_csv(_parse_ttl_action) 1216 where = self._parse_where() 1217 group = self._parse_group() 1218 1219 aggregates = None 1220 if group and self._match(TokenType.SET): 1221 aggregates = self._parse_csv(self._parse_set_item) 1222 1223 return self.expression( 1224 exp.MergeTreeTTL, 1225 expressions=expressions, 1226 where=where, 1227 group=group, 1228 aggregates=aggregates, 1229 ) 1230 1231 def _parse_statement(self) -> t.Optional[exp.Expression]: 1232 if self._curr is None: 1233 return None 1234 1235 if self._match_set(self.STATEMENT_PARSERS): 1236 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1237 1238 if self._match_set(Tokenizer.COMMANDS): 1239 return self._parse_command() 1240 1241 expression = self._parse_expression() 1242 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1243 return self._parse_query_modifiers(expression) 1244 1245 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1246 start = self._prev 1247 temporary = self._match(TokenType.TEMPORARY) 1248 materialized = self._match_text_seq("MATERIALIZED") 1249 1250 kind = self._match_set(self.CREATABLES) and self._prev.text 1251 if not kind: 1252 return self._parse_as_command(start) 1253 1254 return self.expression( 1255 exp.Drop, 1256 comments=start.comments, 1257 exists=exists or self._parse_exists(), 1258 this=self._parse_table(schema=True), 1259 kind=kind, 1260 temporary=temporary, 1261 materialized=materialized, 1262 cascade=self._match_text_seq("CASCADE"), 1263 constraints=self._match_text_seq("CONSTRAINTS"), 1264 purge=self._match_text_seq("PURGE"), 1265 ) 1266 1267 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1268 return ( 1269 self._match_text_seq("IF") 1270 and (not not_ or self._match(TokenType.NOT)) 1271 and self._match(TokenType.EXISTS) 1272 ) 1273 1274 def _parse_create(self) -> exp.Create | exp.Command: 1275 # Note: this can't be None because we've matched a statement parser 1276 start = self._prev 1277 comments = self._prev_comments 1278 1279 replace = start.text.upper() == "REPLACE" or self._match_pair( 1280 TokenType.OR, TokenType.REPLACE 1281 ) 1282 unique = self._match(TokenType.UNIQUE) 1283 1284 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1285 self._advance() 1286 1287 properties = None 1288 create_token = self._match_set(self.CREATABLES) and self._prev 1289 1290 if not create_token: 1291 # exp.Properties.Location.POST_CREATE 1292 properties = self._parse_properties() 1293 create_token = self._match_set(self.CREATABLES) and self._prev 1294 1295 if not properties or not create_token: 1296 return self._parse_as_command(start) 1297 1298 exists = self._parse_exists(not_=True) 1299 this = None 1300 expression: t.Optional[exp.Expression] = None 1301 indexes = None 1302 no_schema_binding = None 1303 begin = None 1304 end = None 1305 clone = None 1306 1307 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1308 nonlocal properties 1309 if properties and temp_props: 1310 properties.expressions.extend(temp_props.expressions) 1311 elif temp_props: 1312 properties = temp_props 1313 1314 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1315 this = self._parse_user_defined_function(kind=create_token.token_type) 1316 1317 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1318 extend_props(self._parse_properties()) 1319 1320 self._match(TokenType.ALIAS) 1321 1322 if self._match(TokenType.COMMAND): 1323 expression = self._parse_as_command(self._prev) 1324 else: 1325 begin = self._match(TokenType.BEGIN) 1326 return_ = self._match_text_seq("RETURN") 1327 1328 if self._match(TokenType.STRING, advance=False): 1329 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1330 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1331 expression = self._parse_string() 1332 extend_props(self._parse_properties()) 1333 else: 1334 expression = self._parse_statement() 1335 1336 end = self._match_text_seq("END") 1337 1338 if return_: 1339 expression = self.expression(exp.Return, this=expression) 1340 elif create_token.token_type == TokenType.INDEX: 1341 this = self._parse_index(index=self._parse_id_var()) 1342 elif create_token.token_type in self.DB_CREATABLES: 1343 table_parts = self._parse_table_parts(schema=True) 1344 1345 # exp.Properties.Location.POST_NAME 1346 self._match(TokenType.COMMA) 1347 extend_props(self._parse_properties(before=True)) 1348 1349 this = self._parse_schema(this=table_parts) 1350 1351 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1352 extend_props(self._parse_properties()) 1353 1354 self._match(TokenType.ALIAS) 1355 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1356 # exp.Properties.Location.POST_ALIAS 1357 extend_props(self._parse_properties()) 1358 1359 expression = self._parse_ddl_select() 1360 1361 if create_token.token_type == TokenType.TABLE: 1362 # exp.Properties.Location.POST_EXPRESSION 1363 extend_props(self._parse_properties()) 1364 1365 indexes = [] 1366 while True: 1367 index = self._parse_index() 1368 1369 # exp.Properties.Location.POST_INDEX 1370 extend_props(self._parse_properties()) 1371 1372 if not index: 1373 break 1374 else: 1375 self._match(TokenType.COMMA) 1376 indexes.append(index) 1377 elif create_token.token_type == TokenType.VIEW: 1378 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1379 no_schema_binding = True 1380 1381 shallow = self._match_text_seq("SHALLOW") 1382 1383 if self._match_texts(self.CLONE_KEYWORDS): 1384 copy = self._prev.text.lower() == "copy" 1385 clone = self._parse_table(schema=True) 1386 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1387 clone_kind = ( 1388 self._match(TokenType.L_PAREN) 1389 and self._match_texts(self.CLONE_KINDS) 1390 and self._prev.text.upper() 1391 ) 1392 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1393 self._match(TokenType.R_PAREN) 1394 clone = self.expression( 1395 exp.Clone, 1396 this=clone, 1397 when=when, 1398 kind=clone_kind, 1399 shallow=shallow, 1400 expression=clone_expression, 1401 copy=copy, 1402 ) 1403 1404 return self.expression( 1405 exp.Create, 1406 comments=comments, 1407 this=this, 1408 kind=create_token.text, 1409 replace=replace, 1410 unique=unique, 1411 expression=expression, 1412 exists=exists, 1413 properties=properties, 1414 indexes=indexes, 1415 no_schema_binding=no_schema_binding, 1416 begin=begin, 1417 end=end, 1418 clone=clone, 1419 ) 1420 1421 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1422 # only used for teradata currently 1423 self._match(TokenType.COMMA) 1424 1425 kwargs = { 1426 "no": self._match_text_seq("NO"), 1427 "dual": self._match_text_seq("DUAL"), 1428 "before": self._match_text_seq("BEFORE"), 1429 "default": self._match_text_seq("DEFAULT"), 1430 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1431 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1432 "after": self._match_text_seq("AFTER"), 1433 "minimum": self._match_texts(("MIN", "MINIMUM")), 1434 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1435 } 1436 1437 if self._match_texts(self.PROPERTY_PARSERS): 1438 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1439 try: 1440 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1441 except TypeError: 1442 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1443 1444 return None 1445 1446 def _parse_property(self) -> t.Optional[exp.Expression]: 1447 if self._match_texts(self.PROPERTY_PARSERS): 1448 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1449 1450 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1451 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1452 1453 if self._match_text_seq("COMPOUND", "SORTKEY"): 1454 return self._parse_sortkey(compound=True) 1455 1456 if self._match_text_seq("SQL", "SECURITY"): 1457 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1458 1459 index = self._index 1460 key = self._parse_column() 1461 1462 if not self._match(TokenType.EQ): 1463 self._retreat(index) 1464 return None 1465 1466 return self.expression( 1467 exp.Property, 1468 this=key.to_dot() if isinstance(key, exp.Column) else key, 1469 value=self._parse_column() or self._parse_var(any_token=True), 1470 ) 1471 1472 def _parse_stored(self) -> exp.FileFormatProperty: 1473 self._match(TokenType.ALIAS) 1474 1475 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1476 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1477 1478 return self.expression( 1479 exp.FileFormatProperty, 1480 this=self.expression( 1481 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1482 ) 1483 if input_format or output_format 1484 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1485 ) 1486 1487 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1488 self._match(TokenType.EQ) 1489 self._match(TokenType.ALIAS) 1490 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1491 1492 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1493 properties = [] 1494 while True: 1495 if before: 1496 prop = self._parse_property_before() 1497 else: 1498 prop = self._parse_property() 1499 1500 if not prop: 1501 break 1502 for p in ensure_list(prop): 1503 properties.append(p) 1504 1505 if properties: 1506 return self.expression(exp.Properties, expressions=properties) 1507 1508 return None 1509 1510 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1511 return self.expression( 1512 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1513 ) 1514 1515 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1516 if self._index >= 2: 1517 pre_volatile_token = self._tokens[self._index - 2] 1518 else: 1519 pre_volatile_token = None 1520 1521 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1522 return exp.VolatileProperty() 1523 1524 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1525 1526 def _parse_with_property( 1527 self, 1528 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1529 if self._match(TokenType.L_PAREN, advance=False): 1530 return self._parse_wrapped_csv(self._parse_property) 1531 1532 if self._match_text_seq("JOURNAL"): 1533 return self._parse_withjournaltable() 1534 1535 if self._match_text_seq("DATA"): 1536 return self._parse_withdata(no=False) 1537 elif self._match_text_seq("NO", "DATA"): 1538 return self._parse_withdata(no=True) 1539 1540 if not self._next: 1541 return None 1542 1543 return self._parse_withisolatedloading() 1544 1545 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1546 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1547 self._match(TokenType.EQ) 1548 1549 user = self._parse_id_var() 1550 self._match(TokenType.PARAMETER) 1551 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1552 1553 if not user or not host: 1554 return None 1555 1556 return exp.DefinerProperty(this=f"{user}@{host}") 1557 1558 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1559 self._match(TokenType.TABLE) 1560 self._match(TokenType.EQ) 1561 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1562 1563 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1564 return self.expression(exp.LogProperty, no=no) 1565 1566 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1567 return self.expression(exp.JournalProperty, **kwargs) 1568 1569 def _parse_checksum(self) -> exp.ChecksumProperty: 1570 self._match(TokenType.EQ) 1571 1572 on = None 1573 if self._match(TokenType.ON): 1574 on = True 1575 elif self._match_text_seq("OFF"): 1576 on = False 1577 1578 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1579 1580 def _parse_cluster(self) -> exp.Cluster: 1581 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1582 1583 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1584 self._match_text_seq("BY") 1585 1586 self._match_l_paren() 1587 expressions = self._parse_csv(self._parse_column) 1588 self._match_r_paren() 1589 1590 if self._match_text_seq("SORTED", "BY"): 1591 self._match_l_paren() 1592 sorted_by = self._parse_csv(self._parse_ordered) 1593 self._match_r_paren() 1594 else: 1595 sorted_by = None 1596 1597 self._match(TokenType.INTO) 1598 buckets = self._parse_number() 1599 self._match_text_seq("BUCKETS") 1600 1601 return self.expression( 1602 exp.ClusteredByProperty, 1603 expressions=expressions, 1604 sorted_by=sorted_by, 1605 buckets=buckets, 1606 ) 1607 1608 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1609 if not self._match_text_seq("GRANTS"): 1610 self._retreat(self._index - 1) 1611 return None 1612 1613 return self.expression(exp.CopyGrantsProperty) 1614 1615 def _parse_freespace(self) -> exp.FreespaceProperty: 1616 self._match(TokenType.EQ) 1617 return self.expression( 1618 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1619 ) 1620 1621 def _parse_mergeblockratio( 1622 self, no: bool = False, default: bool = False 1623 ) -> exp.MergeBlockRatioProperty: 1624 if self._match(TokenType.EQ): 1625 return self.expression( 1626 exp.MergeBlockRatioProperty, 1627 this=self._parse_number(), 1628 percent=self._match(TokenType.PERCENT), 1629 ) 1630 1631 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1632 1633 def _parse_datablocksize( 1634 self, 1635 default: t.Optional[bool] = None, 1636 minimum: t.Optional[bool] = None, 1637 maximum: t.Optional[bool] = None, 1638 ) -> exp.DataBlocksizeProperty: 1639 self._match(TokenType.EQ) 1640 size = self._parse_number() 1641 1642 units = None 1643 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1644 units = self._prev.text 1645 1646 return self.expression( 1647 exp.DataBlocksizeProperty, 1648 size=size, 1649 units=units, 1650 default=default, 1651 minimum=minimum, 1652 maximum=maximum, 1653 ) 1654 1655 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1656 self._match(TokenType.EQ) 1657 always = self._match_text_seq("ALWAYS") 1658 manual = self._match_text_seq("MANUAL") 1659 never = self._match_text_seq("NEVER") 1660 default = self._match_text_seq("DEFAULT") 1661 1662 autotemp = None 1663 if self._match_text_seq("AUTOTEMP"): 1664 autotemp = self._parse_schema() 1665 1666 return self.expression( 1667 exp.BlockCompressionProperty, 1668 always=always, 1669 manual=manual, 1670 never=never, 1671 default=default, 1672 autotemp=autotemp, 1673 ) 1674 1675 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1676 no = self._match_text_seq("NO") 1677 concurrent = self._match_text_seq("CONCURRENT") 1678 self._match_text_seq("ISOLATED", "LOADING") 1679 for_all = self._match_text_seq("FOR", "ALL") 1680 for_insert = self._match_text_seq("FOR", "INSERT") 1681 for_none = self._match_text_seq("FOR", "NONE") 1682 return self.expression( 1683 exp.IsolatedLoadingProperty, 1684 no=no, 1685 concurrent=concurrent, 1686 for_all=for_all, 1687 for_insert=for_insert, 1688 for_none=for_none, 1689 ) 1690 1691 def _parse_locking(self) -> exp.LockingProperty: 1692 if self._match(TokenType.TABLE): 1693 kind = "TABLE" 1694 elif self._match(TokenType.VIEW): 1695 kind = "VIEW" 1696 elif self._match(TokenType.ROW): 1697 kind = "ROW" 1698 elif self._match_text_seq("DATABASE"): 1699 kind = "DATABASE" 1700 else: 1701 kind = None 1702 1703 if kind in ("DATABASE", "TABLE", "VIEW"): 1704 this = self._parse_table_parts() 1705 else: 1706 this = None 1707 1708 if self._match(TokenType.FOR): 1709 for_or_in = "FOR" 1710 elif self._match(TokenType.IN): 1711 for_or_in = "IN" 1712 else: 1713 for_or_in = None 1714 1715 if self._match_text_seq("ACCESS"): 1716 lock_type = "ACCESS" 1717 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1718 lock_type = "EXCLUSIVE" 1719 elif self._match_text_seq("SHARE"): 1720 lock_type = "SHARE" 1721 elif self._match_text_seq("READ"): 1722 lock_type = "READ" 1723 elif self._match_text_seq("WRITE"): 1724 lock_type = "WRITE" 1725 elif self._match_text_seq("CHECKSUM"): 1726 lock_type = "CHECKSUM" 1727 else: 1728 lock_type = None 1729 1730 override = self._match_text_seq("OVERRIDE") 1731 1732 return self.expression( 1733 exp.LockingProperty, 1734 this=this, 1735 kind=kind, 1736 for_or_in=for_or_in, 1737 lock_type=lock_type, 1738 override=override, 1739 ) 1740 1741 def _parse_partition_by(self) -> t.List[exp.Expression]: 1742 if self._match(TokenType.PARTITION_BY): 1743 return self._parse_csv(self._parse_conjunction) 1744 return [] 1745 1746 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1747 self._match(TokenType.EQ) 1748 return self.expression( 1749 exp.PartitionedByProperty, 1750 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1751 ) 1752 1753 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1754 if self._match_text_seq("AND", "STATISTICS"): 1755 statistics = True 1756 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1757 statistics = False 1758 else: 1759 statistics = None 1760 1761 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1762 1763 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1764 if self._match_text_seq("PRIMARY", "INDEX"): 1765 return exp.NoPrimaryIndexProperty() 1766 return None 1767 1768 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1769 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1770 return exp.OnCommitProperty() 1771 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1772 return exp.OnCommitProperty(delete=True) 1773 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1774 1775 def _parse_distkey(self) -> exp.DistKeyProperty: 1776 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1777 1778 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1779 table = self._parse_table(schema=True) 1780 1781 options = [] 1782 while self._match_texts(("INCLUDING", "EXCLUDING")): 1783 this = self._prev.text.upper() 1784 1785 id_var = self._parse_id_var() 1786 if not id_var: 1787 return None 1788 1789 options.append( 1790 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1791 ) 1792 1793 return self.expression(exp.LikeProperty, this=table, expressions=options) 1794 1795 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1796 return self.expression( 1797 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1798 ) 1799 1800 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1801 self._match(TokenType.EQ) 1802 return self.expression( 1803 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1804 ) 1805 1806 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1807 self._match_text_seq("WITH", "CONNECTION") 1808 return self.expression( 1809 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1810 ) 1811 1812 def _parse_returns(self) -> exp.ReturnsProperty: 1813 value: t.Optional[exp.Expression] 1814 is_table = self._match(TokenType.TABLE) 1815 1816 if is_table: 1817 if self._match(TokenType.LT): 1818 value = self.expression( 1819 exp.Schema, 1820 this="TABLE", 1821 expressions=self._parse_csv(self._parse_struct_types), 1822 ) 1823 if not self._match(TokenType.GT): 1824 self.raise_error("Expecting >") 1825 else: 1826 value = self._parse_schema(exp.var("TABLE")) 1827 else: 1828 value = self._parse_types() 1829 1830 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1831 1832 def _parse_describe(self) -> exp.Describe: 1833 kind = self._match_set(self.CREATABLES) and self._prev.text 1834 this = self._parse_table(schema=True) 1835 properties = self._parse_properties() 1836 expressions = properties.expressions if properties else None 1837 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1838 1839 def _parse_insert(self) -> exp.Insert: 1840 comments = ensure_list(self._prev_comments) 1841 overwrite = self._match(TokenType.OVERWRITE) 1842 ignore = self._match(TokenType.IGNORE) 1843 local = self._match_text_seq("LOCAL") 1844 alternative = None 1845 1846 if self._match_text_seq("DIRECTORY"): 1847 this: t.Optional[exp.Expression] = self.expression( 1848 exp.Directory, 1849 this=self._parse_var_or_string(), 1850 local=local, 1851 row_format=self._parse_row_format(match_row=True), 1852 ) 1853 else: 1854 if self._match(TokenType.OR): 1855 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1856 1857 self._match(TokenType.INTO) 1858 comments += ensure_list(self._prev_comments) 1859 self._match(TokenType.TABLE) 1860 this = self._parse_table(schema=True) 1861 1862 returning = self._parse_returning() 1863 1864 return self.expression( 1865 exp.Insert, 1866 comments=comments, 1867 this=this, 1868 by_name=self._match_text_seq("BY", "NAME"), 1869 exists=self._parse_exists(), 1870 partition=self._parse_partition(), 1871 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1872 and self._parse_conjunction(), 1873 expression=self._parse_ddl_select(), 1874 conflict=self._parse_on_conflict(), 1875 returning=returning or self._parse_returning(), 1876 overwrite=overwrite, 1877 alternative=alternative, 1878 ignore=ignore, 1879 ) 1880 1881 def _parse_kill(self) -> exp.Kill: 1882 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1883 1884 return self.expression( 1885 exp.Kill, 1886 this=self._parse_primary(), 1887 kind=kind, 1888 ) 1889 1890 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1891 conflict = self._match_text_seq("ON", "CONFLICT") 1892 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1893 1894 if not conflict and not duplicate: 1895 return None 1896 1897 nothing = None 1898 expressions = None 1899 key = None 1900 constraint = None 1901 1902 if conflict: 1903 if self._match_text_seq("ON", "CONSTRAINT"): 1904 constraint = self._parse_id_var() 1905 else: 1906 key = self._parse_csv(self._parse_value) 1907 1908 self._match_text_seq("DO") 1909 if self._match_text_seq("NOTHING"): 1910 nothing = True 1911 else: 1912 self._match(TokenType.UPDATE) 1913 self._match(TokenType.SET) 1914 expressions = self._parse_csv(self._parse_equality) 1915 1916 return self.expression( 1917 exp.OnConflict, 1918 duplicate=duplicate, 1919 expressions=expressions, 1920 nothing=nothing, 1921 key=key, 1922 constraint=constraint, 1923 ) 1924 1925 def _parse_returning(self) -> t.Optional[exp.Returning]: 1926 if not self._match(TokenType.RETURNING): 1927 return None 1928 return self.expression( 1929 exp.Returning, 1930 expressions=self._parse_csv(self._parse_expression), 1931 into=self._match(TokenType.INTO) and self._parse_table_part(), 1932 ) 1933 1934 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1935 if not self._match(TokenType.FORMAT): 1936 return None 1937 return self._parse_row_format() 1938 1939 def _parse_row_format( 1940 self, match_row: bool = False 1941 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1942 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1943 return None 1944 1945 if self._match_text_seq("SERDE"): 1946 this = self._parse_string() 1947 1948 serde_properties = None 1949 if self._match(TokenType.SERDE_PROPERTIES): 1950 serde_properties = self.expression( 1951 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1952 ) 1953 1954 return self.expression( 1955 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1956 ) 1957 1958 self._match_text_seq("DELIMITED") 1959 1960 kwargs = {} 1961 1962 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1963 kwargs["fields"] = self._parse_string() 1964 if self._match_text_seq("ESCAPED", "BY"): 1965 kwargs["escaped"] = self._parse_string() 1966 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1967 kwargs["collection_items"] = self._parse_string() 1968 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1969 kwargs["map_keys"] = self._parse_string() 1970 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1971 kwargs["lines"] = self._parse_string() 1972 if self._match_text_seq("NULL", "DEFINED", "AS"): 1973 kwargs["null"] = self._parse_string() 1974 1975 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1976 1977 def _parse_load(self) -> exp.LoadData | exp.Command: 1978 if self._match_text_seq("DATA"): 1979 local = self._match_text_seq("LOCAL") 1980 self._match_text_seq("INPATH") 1981 inpath = self._parse_string() 1982 overwrite = self._match(TokenType.OVERWRITE) 1983 self._match_pair(TokenType.INTO, TokenType.TABLE) 1984 1985 return self.expression( 1986 exp.LoadData, 1987 this=self._parse_table(schema=True), 1988 local=local, 1989 overwrite=overwrite, 1990 inpath=inpath, 1991 partition=self._parse_partition(), 1992 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1993 serde=self._match_text_seq("SERDE") and self._parse_string(), 1994 ) 1995 return self._parse_as_command(self._prev) 1996 1997 def _parse_delete(self) -> exp.Delete: 1998 # This handles MySQL's "Multiple-Table Syntax" 1999 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2000 tables = None 2001 comments = self._prev_comments 2002 if not self._match(TokenType.FROM, advance=False): 2003 tables = self._parse_csv(self._parse_table) or None 2004 2005 returning = self._parse_returning() 2006 2007 return self.expression( 2008 exp.Delete, 2009 comments=comments, 2010 tables=tables, 2011 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2012 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2013 where=self._parse_where(), 2014 returning=returning or self._parse_returning(), 2015 limit=self._parse_limit(), 2016 ) 2017 2018 def _parse_update(self) -> exp.Update: 2019 comments = self._prev_comments 2020 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2021 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2022 returning = self._parse_returning() 2023 return self.expression( 2024 exp.Update, 2025 comments=comments, 2026 **{ # type: ignore 2027 "this": this, 2028 "expressions": expressions, 2029 "from": self._parse_from(joins=True), 2030 "where": self._parse_where(), 2031 "returning": returning or self._parse_returning(), 2032 "order": self._parse_order(), 2033 "limit": self._parse_limit(), 2034 }, 2035 ) 2036 2037 def _parse_uncache(self) -> exp.Uncache: 2038 if not self._match(TokenType.TABLE): 2039 self.raise_error("Expecting TABLE after UNCACHE") 2040 2041 return self.expression( 2042 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2043 ) 2044 2045 def _parse_cache(self) -> exp.Cache: 2046 lazy = self._match_text_seq("LAZY") 2047 self._match(TokenType.TABLE) 2048 table = self._parse_table(schema=True) 2049 2050 options = [] 2051 if self._match_text_seq("OPTIONS"): 2052 self._match_l_paren() 2053 k = self._parse_string() 2054 self._match(TokenType.EQ) 2055 v = self._parse_string() 2056 options = [k, v] 2057 self._match_r_paren() 2058 2059 self._match(TokenType.ALIAS) 2060 return self.expression( 2061 exp.Cache, 2062 this=table, 2063 lazy=lazy, 2064 options=options, 2065 expression=self._parse_select(nested=True), 2066 ) 2067 2068 def _parse_partition(self) -> t.Optional[exp.Partition]: 2069 if not self._match(TokenType.PARTITION): 2070 return None 2071 2072 return self.expression( 2073 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2074 ) 2075 2076 def _parse_value(self) -> exp.Tuple: 2077 if self._match(TokenType.L_PAREN): 2078 expressions = self._parse_csv(self._parse_conjunction) 2079 self._match_r_paren() 2080 return self.expression(exp.Tuple, expressions=expressions) 2081 2082 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2083 # https://prestodb.io/docs/current/sql/values.html 2084 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2085 2086 def _parse_projections(self) -> t.List[exp.Expression]: 2087 return self._parse_expressions() 2088 2089 def _parse_select( 2090 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2091 ) -> t.Optional[exp.Expression]: 2092 cte = self._parse_with() 2093 2094 if cte: 2095 this = self._parse_statement() 2096 2097 if not this: 2098 self.raise_error("Failed to parse any statement following CTE") 2099 return cte 2100 2101 if "with" in this.arg_types: 2102 this.set("with", cte) 2103 else: 2104 self.raise_error(f"{this.key} does not support CTE") 2105 this = cte 2106 2107 return this 2108 2109 # duckdb supports leading with FROM x 2110 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2111 2112 if self._match(TokenType.SELECT): 2113 comments = self._prev_comments 2114 2115 hint = self._parse_hint() 2116 all_ = self._match(TokenType.ALL) 2117 distinct = self._match_set(self.DISTINCT_TOKENS) 2118 2119 kind = ( 2120 self._match(TokenType.ALIAS) 2121 and self._match_texts(("STRUCT", "VALUE")) 2122 and self._prev.text 2123 ) 2124 2125 if distinct: 2126 distinct = self.expression( 2127 exp.Distinct, 2128 on=self._parse_value() if self._match(TokenType.ON) else None, 2129 ) 2130 2131 if all_ and distinct: 2132 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2133 2134 limit = self._parse_limit(top=True) 2135 projections = self._parse_projections() 2136 2137 this = self.expression( 2138 exp.Select, 2139 kind=kind, 2140 hint=hint, 2141 distinct=distinct, 2142 expressions=projections, 2143 limit=limit, 2144 ) 2145 this.comments = comments 2146 2147 into = self._parse_into() 2148 if into: 2149 this.set("into", into) 2150 2151 if not from_: 2152 from_ = self._parse_from() 2153 2154 if from_: 2155 this.set("from", from_) 2156 2157 this = self._parse_query_modifiers(this) 2158 elif (table or nested) and self._match(TokenType.L_PAREN): 2159 if self._match(TokenType.PIVOT): 2160 this = self._parse_simplified_pivot() 2161 elif self._match(TokenType.FROM): 2162 this = exp.select("*").from_( 2163 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2164 ) 2165 else: 2166 this = self._parse_table() if table else self._parse_select(nested=True) 2167 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2168 2169 self._match_r_paren() 2170 2171 # We return early here so that the UNION isn't attached to the subquery by the 2172 # following call to _parse_set_operations, but instead becomes the parent node 2173 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2174 elif self._match(TokenType.VALUES): 2175 this = self.expression( 2176 exp.Values, 2177 expressions=self._parse_csv(self._parse_value), 2178 alias=self._parse_table_alias(), 2179 ) 2180 elif from_: 2181 this = exp.select("*").from_(from_.this, copy=False) 2182 else: 2183 this = None 2184 2185 return self._parse_set_operations(this) 2186 2187 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2188 if not skip_with_token and not self._match(TokenType.WITH): 2189 return None 2190 2191 comments = self._prev_comments 2192 recursive = self._match(TokenType.RECURSIVE) 2193 2194 expressions = [] 2195 while True: 2196 expressions.append(self._parse_cte()) 2197 2198 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2199 break 2200 else: 2201 self._match(TokenType.WITH) 2202 2203 return self.expression( 2204 exp.With, comments=comments, expressions=expressions, recursive=recursive 2205 ) 2206 2207 def _parse_cte(self) -> exp.CTE: 2208 alias = self._parse_table_alias() 2209 if not alias or not alias.this: 2210 self.raise_error("Expected CTE to have alias") 2211 2212 self._match(TokenType.ALIAS) 2213 return self.expression( 2214 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2215 ) 2216 2217 def _parse_table_alias( 2218 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2219 ) -> t.Optional[exp.TableAlias]: 2220 any_token = self._match(TokenType.ALIAS) 2221 alias = ( 2222 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2223 or self._parse_string_as_identifier() 2224 ) 2225 2226 index = self._index 2227 if self._match(TokenType.L_PAREN): 2228 columns = self._parse_csv(self._parse_function_parameter) 2229 self._match_r_paren() if columns else self._retreat(index) 2230 else: 2231 columns = None 2232 2233 if not alias and not columns: 2234 return None 2235 2236 return self.expression(exp.TableAlias, this=alias, columns=columns) 2237 2238 def _parse_subquery( 2239 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2240 ) -> t.Optional[exp.Subquery]: 2241 if not this: 2242 return None 2243 2244 return self.expression( 2245 exp.Subquery, 2246 this=this, 2247 pivots=self._parse_pivots(), 2248 alias=self._parse_table_alias() if parse_alias else None, 2249 ) 2250 2251 def _parse_query_modifiers( 2252 self, this: t.Optional[exp.Expression] 2253 ) -> t.Optional[exp.Expression]: 2254 if isinstance(this, self.MODIFIABLES): 2255 for join in iter(self._parse_join, None): 2256 this.append("joins", join) 2257 for lateral in iter(self._parse_lateral, None): 2258 this.append("laterals", lateral) 2259 2260 while True: 2261 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2262 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2263 key, expression = parser(self) 2264 2265 if expression: 2266 this.set(key, expression) 2267 if key == "limit": 2268 offset = expression.args.pop("offset", None) 2269 if offset: 2270 this.set("offset", exp.Offset(expression=offset)) 2271 continue 2272 break 2273 return this 2274 2275 def _parse_hint(self) -> t.Optional[exp.Hint]: 2276 if self._match(TokenType.HINT): 2277 hints = [] 2278 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2279 hints.extend(hint) 2280 2281 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2282 self.raise_error("Expected */ after HINT") 2283 2284 return self.expression(exp.Hint, expressions=hints) 2285 2286 return None 2287 2288 def _parse_into(self) -> t.Optional[exp.Into]: 2289 if not self._match(TokenType.INTO): 2290 return None 2291 2292 temp = self._match(TokenType.TEMPORARY) 2293 unlogged = self._match_text_seq("UNLOGGED") 2294 self._match(TokenType.TABLE) 2295 2296 return self.expression( 2297 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2298 ) 2299 2300 def _parse_from( 2301 self, joins: bool = False, skip_from_token: bool = False 2302 ) -> t.Optional[exp.From]: 2303 if not skip_from_token and not self._match(TokenType.FROM): 2304 return None 2305 2306 return self.expression( 2307 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2308 ) 2309 2310 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2311 if not self._match(TokenType.MATCH_RECOGNIZE): 2312 return None 2313 2314 self._match_l_paren() 2315 2316 partition = self._parse_partition_by() 2317 order = self._parse_order() 2318 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2319 2320 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2321 rows = exp.var("ONE ROW PER MATCH") 2322 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2323 text = "ALL ROWS PER MATCH" 2324 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2325 text += f" SHOW EMPTY MATCHES" 2326 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2327 text += f" OMIT EMPTY MATCHES" 2328 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2329 text += f" WITH UNMATCHED ROWS" 2330 rows = exp.var(text) 2331 else: 2332 rows = None 2333 2334 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2335 text = "AFTER MATCH SKIP" 2336 if self._match_text_seq("PAST", "LAST", "ROW"): 2337 text += f" PAST LAST ROW" 2338 elif self._match_text_seq("TO", "NEXT", "ROW"): 2339 text += f" TO NEXT ROW" 2340 elif self._match_text_seq("TO", "FIRST"): 2341 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2342 elif self._match_text_seq("TO", "LAST"): 2343 text += f" TO LAST {self._advance_any().text}" # type: ignore 2344 after = exp.var(text) 2345 else: 2346 after = None 2347 2348 if self._match_text_seq("PATTERN"): 2349 self._match_l_paren() 2350 2351 if not self._curr: 2352 self.raise_error("Expecting )", self._curr) 2353 2354 paren = 1 2355 start = self._curr 2356 2357 while self._curr and paren > 0: 2358 if self._curr.token_type == TokenType.L_PAREN: 2359 paren += 1 2360 if self._curr.token_type == TokenType.R_PAREN: 2361 paren -= 1 2362 2363 end = self._prev 2364 self._advance() 2365 2366 if paren > 0: 2367 self.raise_error("Expecting )", self._curr) 2368 2369 pattern = exp.var(self._find_sql(start, end)) 2370 else: 2371 pattern = None 2372 2373 define = ( 2374 self._parse_csv( 2375 lambda: self.expression( 2376 exp.Alias, 2377 alias=self._parse_id_var(any_token=True), 2378 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2379 ) 2380 ) 2381 if self._match_text_seq("DEFINE") 2382 else None 2383 ) 2384 2385 self._match_r_paren() 2386 2387 return self.expression( 2388 exp.MatchRecognize, 2389 partition_by=partition, 2390 order=order, 2391 measures=measures, 2392 rows=rows, 2393 after=after, 2394 pattern=pattern, 2395 define=define, 2396 alias=self._parse_table_alias(), 2397 ) 2398 2399 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2400 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2401 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2402 2403 if outer_apply or cross_apply: 2404 this = self._parse_select(table=True) 2405 view = None 2406 outer = not cross_apply 2407 elif self._match(TokenType.LATERAL): 2408 this = self._parse_select(table=True) 2409 view = self._match(TokenType.VIEW) 2410 outer = self._match(TokenType.OUTER) 2411 else: 2412 return None 2413 2414 if not this: 2415 this = ( 2416 self._parse_unnest() 2417 or self._parse_function() 2418 or self._parse_id_var(any_token=False) 2419 ) 2420 2421 while self._match(TokenType.DOT): 2422 this = exp.Dot( 2423 this=this, 2424 expression=self._parse_function() or self._parse_id_var(any_token=False), 2425 ) 2426 2427 if view: 2428 table = self._parse_id_var(any_token=False) 2429 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2430 table_alias: t.Optional[exp.TableAlias] = self.expression( 2431 exp.TableAlias, this=table, columns=columns 2432 ) 2433 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2434 # We move the alias from the lateral's child node to the lateral itself 2435 table_alias = this.args["alias"].pop() 2436 else: 2437 table_alias = self._parse_table_alias() 2438 2439 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2440 2441 def _parse_join_parts( 2442 self, 2443 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2444 return ( 2445 self._match_set(self.JOIN_METHODS) and self._prev, 2446 self._match_set(self.JOIN_SIDES) and self._prev, 2447 self._match_set(self.JOIN_KINDS) and self._prev, 2448 ) 2449 2450 def _parse_join( 2451 self, skip_join_token: bool = False, parse_bracket: bool = False 2452 ) -> t.Optional[exp.Join]: 2453 if self._match(TokenType.COMMA): 2454 return self.expression(exp.Join, this=self._parse_table()) 2455 2456 index = self._index 2457 method, side, kind = self._parse_join_parts() 2458 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2459 join = self._match(TokenType.JOIN) 2460 2461 if not skip_join_token and not join: 2462 self._retreat(index) 2463 kind = None 2464 method = None 2465 side = None 2466 2467 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2468 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2469 2470 if not skip_join_token and not join and not outer_apply and not cross_apply: 2471 return None 2472 2473 if outer_apply: 2474 side = Token(TokenType.LEFT, "LEFT") 2475 2476 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2477 2478 if method: 2479 kwargs["method"] = method.text 2480 if side: 2481 kwargs["side"] = side.text 2482 if kind: 2483 kwargs["kind"] = kind.text 2484 if hint: 2485 kwargs["hint"] = hint 2486 2487 if self._match(TokenType.ON): 2488 kwargs["on"] = self._parse_conjunction() 2489 elif self._match(TokenType.USING): 2490 kwargs["using"] = self._parse_wrapped_id_vars() 2491 elif not (kind and kind.token_type == TokenType.CROSS): 2492 index = self._index 2493 join = self._parse_join() 2494 2495 if join and self._match(TokenType.ON): 2496 kwargs["on"] = self._parse_conjunction() 2497 elif join and self._match(TokenType.USING): 2498 kwargs["using"] = self._parse_wrapped_id_vars() 2499 else: 2500 join = None 2501 self._retreat(index) 2502 2503 kwargs["this"].set("joins", [join] if join else None) 2504 2505 comments = [c for token in (method, side, kind) if token for c in token.comments] 2506 return self.expression(exp.Join, comments=comments, **kwargs) 2507 2508 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2509 this = self._parse_conjunction() 2510 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2511 return this 2512 2513 opclass = self._parse_var(any_token=True) 2514 if opclass: 2515 return self.expression(exp.Opclass, this=this, expression=opclass) 2516 2517 return this 2518 2519 def _parse_index( 2520 self, 2521 index: t.Optional[exp.Expression] = None, 2522 ) -> t.Optional[exp.Index]: 2523 if index: 2524 unique = None 2525 primary = None 2526 amp = None 2527 2528 self._match(TokenType.ON) 2529 self._match(TokenType.TABLE) # hive 2530 table = self._parse_table_parts(schema=True) 2531 else: 2532 unique = self._match(TokenType.UNIQUE) 2533 primary = self._match_text_seq("PRIMARY") 2534 amp = self._match_text_seq("AMP") 2535 2536 if not self._match(TokenType.INDEX): 2537 return None 2538 2539 index = self._parse_id_var() 2540 table = None 2541 2542 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2543 2544 if self._match(TokenType.L_PAREN, advance=False): 2545 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2546 else: 2547 columns = None 2548 2549 return self.expression( 2550 exp.Index, 2551 this=index, 2552 table=table, 2553 using=using, 2554 columns=columns, 2555 unique=unique, 2556 primary=primary, 2557 amp=amp, 2558 partition_by=self._parse_partition_by(), 2559 where=self._parse_where(), 2560 ) 2561 2562 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2563 hints: t.List[exp.Expression] = [] 2564 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2565 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2566 hints.append( 2567 self.expression( 2568 exp.WithTableHint, 2569 expressions=self._parse_csv( 2570 lambda: self._parse_function() or self._parse_var(any_token=True) 2571 ), 2572 ) 2573 ) 2574 self._match_r_paren() 2575 else: 2576 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2577 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2578 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2579 2580 self._match_texts({"INDEX", "KEY"}) 2581 if self._match(TokenType.FOR): 2582 hint.set("target", self._advance_any() and self._prev.text.upper()) 2583 2584 hint.set("expressions", self._parse_wrapped_id_vars()) 2585 hints.append(hint) 2586 2587 return hints or None 2588 2589 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2590 return ( 2591 (not schema and self._parse_function(optional_parens=False)) 2592 or self._parse_id_var(any_token=False) 2593 or self._parse_string_as_identifier() 2594 or self._parse_placeholder() 2595 ) 2596 2597 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2598 catalog = None 2599 db = None 2600 table = self._parse_table_part(schema=schema) 2601 2602 while self._match(TokenType.DOT): 2603 if catalog: 2604 # This allows nesting the table in arbitrarily many dot expressions if needed 2605 table = self.expression( 2606 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2607 ) 2608 else: 2609 catalog = db 2610 db = table 2611 table = self._parse_table_part(schema=schema) 2612 2613 if not table: 2614 self.raise_error(f"Expected table name but got {self._curr}") 2615 2616 return self.expression( 2617 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2618 ) 2619 2620 def _parse_table( 2621 self, 2622 schema: bool = False, 2623 joins: bool = False, 2624 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2625 parse_bracket: bool = False, 2626 ) -> t.Optional[exp.Expression]: 2627 lateral = self._parse_lateral() 2628 if lateral: 2629 return lateral 2630 2631 unnest = self._parse_unnest() 2632 if unnest: 2633 return unnest 2634 2635 values = self._parse_derived_table_values() 2636 if values: 2637 return values 2638 2639 subquery = self._parse_select(table=True) 2640 if subquery: 2641 if not subquery.args.get("pivots"): 2642 subquery.set("pivots", self._parse_pivots()) 2643 return subquery 2644 2645 bracket = parse_bracket and self._parse_bracket(None) 2646 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2647 this = t.cast( 2648 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2649 ) 2650 2651 if schema: 2652 return self._parse_schema(this=this) 2653 2654 version = self._parse_version() 2655 2656 if version: 2657 this.set("version", version) 2658 2659 if self.ALIAS_POST_TABLESAMPLE: 2660 table_sample = self._parse_table_sample() 2661 2662 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2663 if alias: 2664 this.set("alias", alias) 2665 2666 if self._match_text_seq("AT"): 2667 this.set("index", self._parse_id_var()) 2668 2669 this.set("hints", self._parse_table_hints()) 2670 2671 if not this.args.get("pivots"): 2672 this.set("pivots", self._parse_pivots()) 2673 2674 if not self.ALIAS_POST_TABLESAMPLE: 2675 table_sample = self._parse_table_sample() 2676 2677 if table_sample: 2678 table_sample.set("this", this) 2679 this = table_sample 2680 2681 if joins: 2682 for join in iter(self._parse_join, None): 2683 this.append("joins", join) 2684 2685 return this 2686 2687 def _parse_version(self) -> t.Optional[exp.Version]: 2688 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2689 this = "TIMESTAMP" 2690 elif self._match(TokenType.VERSION_SNAPSHOT): 2691 this = "VERSION" 2692 else: 2693 return None 2694 2695 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2696 kind = self._prev.text.upper() 2697 start = self._parse_bitwise() 2698 self._match_texts(("TO", "AND")) 2699 end = self._parse_bitwise() 2700 expression: t.Optional[exp.Expression] = self.expression( 2701 exp.Tuple, expressions=[start, end] 2702 ) 2703 elif self._match_text_seq("CONTAINED", "IN"): 2704 kind = "CONTAINED IN" 2705 expression = self.expression( 2706 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2707 ) 2708 elif self._match(TokenType.ALL): 2709 kind = "ALL" 2710 expression = None 2711 else: 2712 self._match_text_seq("AS", "OF") 2713 kind = "AS OF" 2714 expression = self._parse_type() 2715 2716 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2717 2718 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2719 if not self._match(TokenType.UNNEST): 2720 return None 2721 2722 expressions = self._parse_wrapped_csv(self._parse_type) 2723 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2724 2725 alias = self._parse_table_alias() if with_alias else None 2726 2727 if alias: 2728 if self.UNNEST_COLUMN_ONLY: 2729 if alias.args.get("columns"): 2730 self.raise_error("Unexpected extra column alias in unnest.") 2731 2732 alias.set("columns", [alias.this]) 2733 alias.set("this", None) 2734 2735 columns = alias.args.get("columns") or [] 2736 if offset and len(expressions) < len(columns): 2737 offset = columns.pop() 2738 2739 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2740 self._match(TokenType.ALIAS) 2741 offset = self._parse_id_var( 2742 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2743 ) or exp.to_identifier("offset") 2744 2745 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2746 2747 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2748 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2749 if not is_derived and not self._match(TokenType.VALUES): 2750 return None 2751 2752 expressions = self._parse_csv(self._parse_value) 2753 alias = self._parse_table_alias() 2754 2755 if is_derived: 2756 self._match_r_paren() 2757 2758 return self.expression( 2759 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2760 ) 2761 2762 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2763 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2764 as_modifier and self._match_text_seq("USING", "SAMPLE") 2765 ): 2766 return None 2767 2768 bucket_numerator = None 2769 bucket_denominator = None 2770 bucket_field = None 2771 percent = None 2772 rows = None 2773 size = None 2774 seed = None 2775 2776 kind = ( 2777 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2778 ) 2779 method = self._parse_var(tokens=(TokenType.ROW,)) 2780 2781 matched_l_paren = self._match(TokenType.L_PAREN) 2782 2783 if self.TABLESAMPLE_CSV: 2784 num = None 2785 expressions = self._parse_csv(self._parse_primary) 2786 else: 2787 expressions = None 2788 num = ( 2789 self._parse_factor() 2790 if self._match(TokenType.NUMBER, advance=False) 2791 else self._parse_primary() 2792 ) 2793 2794 if self._match_text_seq("BUCKET"): 2795 bucket_numerator = self._parse_number() 2796 self._match_text_seq("OUT", "OF") 2797 bucket_denominator = bucket_denominator = self._parse_number() 2798 self._match(TokenType.ON) 2799 bucket_field = self._parse_field() 2800 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2801 percent = num 2802 elif self._match(TokenType.ROWS): 2803 rows = num 2804 elif num: 2805 size = num 2806 2807 if matched_l_paren: 2808 self._match_r_paren() 2809 2810 if self._match(TokenType.L_PAREN): 2811 method = self._parse_var() 2812 seed = self._match(TokenType.COMMA) and self._parse_number() 2813 self._match_r_paren() 2814 elif self._match_texts(("SEED", "REPEATABLE")): 2815 seed = self._parse_wrapped(self._parse_number) 2816 2817 return self.expression( 2818 exp.TableSample, 2819 expressions=expressions, 2820 method=method, 2821 bucket_numerator=bucket_numerator, 2822 bucket_denominator=bucket_denominator, 2823 bucket_field=bucket_field, 2824 percent=percent, 2825 rows=rows, 2826 size=size, 2827 seed=seed, 2828 kind=kind, 2829 ) 2830 2831 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2832 return list(iter(self._parse_pivot, None)) or None 2833 2834 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2835 return list(iter(self._parse_join, None)) or None 2836 2837 # https://duckdb.org/docs/sql/statements/pivot 2838 def _parse_simplified_pivot(self) -> exp.Pivot: 2839 def _parse_on() -> t.Optional[exp.Expression]: 2840 this = self._parse_bitwise() 2841 return self._parse_in(this) if self._match(TokenType.IN) else this 2842 2843 this = self._parse_table() 2844 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2845 using = self._match(TokenType.USING) and self._parse_csv( 2846 lambda: self._parse_alias(self._parse_function()) 2847 ) 2848 group = self._parse_group() 2849 return self.expression( 2850 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2851 ) 2852 2853 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2854 index = self._index 2855 include_nulls = None 2856 2857 if self._match(TokenType.PIVOT): 2858 unpivot = False 2859 elif self._match(TokenType.UNPIVOT): 2860 unpivot = True 2861 2862 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2863 if self._match_text_seq("INCLUDE", "NULLS"): 2864 include_nulls = True 2865 elif self._match_text_seq("EXCLUDE", "NULLS"): 2866 include_nulls = False 2867 else: 2868 return None 2869 2870 expressions = [] 2871 field = None 2872 2873 if not self._match(TokenType.L_PAREN): 2874 self._retreat(index) 2875 return None 2876 2877 if unpivot: 2878 expressions = self._parse_csv(self._parse_column) 2879 else: 2880 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2881 2882 if not expressions: 2883 self.raise_error("Failed to parse PIVOT's aggregation list") 2884 2885 if not self._match(TokenType.FOR): 2886 self.raise_error("Expecting FOR") 2887 2888 value = self._parse_column() 2889 2890 if not self._match(TokenType.IN): 2891 self.raise_error("Expecting IN") 2892 2893 field = self._parse_in(value, alias=True) 2894 2895 self._match_r_paren() 2896 2897 pivot = self.expression( 2898 exp.Pivot, 2899 expressions=expressions, 2900 field=field, 2901 unpivot=unpivot, 2902 include_nulls=include_nulls, 2903 ) 2904 2905 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2906 pivot.set("alias", self._parse_table_alias()) 2907 2908 if not unpivot: 2909 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2910 2911 columns: t.List[exp.Expression] = [] 2912 for fld in pivot.args["field"].expressions: 2913 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2914 for name in names: 2915 if self.PREFIXED_PIVOT_COLUMNS: 2916 name = f"{name}_{field_name}" if name else field_name 2917 else: 2918 name = f"{field_name}_{name}" if name else field_name 2919 2920 columns.append(exp.to_identifier(name)) 2921 2922 pivot.set("columns", columns) 2923 2924 return pivot 2925 2926 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2927 return [agg.alias for agg in aggregations] 2928 2929 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2930 if not skip_where_token and not self._match(TokenType.WHERE): 2931 return None 2932 2933 return self.expression( 2934 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2935 ) 2936 2937 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2938 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2939 return None 2940 2941 elements = defaultdict(list) 2942 2943 if self._match(TokenType.ALL): 2944 return self.expression(exp.Group, all=True) 2945 2946 while True: 2947 expressions = self._parse_csv(self._parse_conjunction) 2948 if expressions: 2949 elements["expressions"].extend(expressions) 2950 2951 grouping_sets = self._parse_grouping_sets() 2952 if grouping_sets: 2953 elements["grouping_sets"].extend(grouping_sets) 2954 2955 rollup = None 2956 cube = None 2957 totals = None 2958 2959 index = self._index 2960 with_ = self._match(TokenType.WITH) 2961 if self._match(TokenType.ROLLUP): 2962 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2963 elements["rollup"].extend(ensure_list(rollup)) 2964 2965 if self._match(TokenType.CUBE): 2966 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2967 elements["cube"].extend(ensure_list(cube)) 2968 2969 if self._match_text_seq("TOTALS"): 2970 totals = True 2971 elements["totals"] = True # type: ignore 2972 2973 if not (grouping_sets or rollup or cube or totals): 2974 if with_: 2975 self._retreat(index) 2976 break 2977 2978 return self.expression(exp.Group, **elements) # type: ignore 2979 2980 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2981 if not self._match(TokenType.GROUPING_SETS): 2982 return None 2983 2984 return self._parse_wrapped_csv(self._parse_grouping_set) 2985 2986 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2987 if self._match(TokenType.L_PAREN): 2988 grouping_set = self._parse_csv(self._parse_column) 2989 self._match_r_paren() 2990 return self.expression(exp.Tuple, expressions=grouping_set) 2991 2992 return self._parse_column() 2993 2994 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2995 if not skip_having_token and not self._match(TokenType.HAVING): 2996 return None 2997 return self.expression(exp.Having, this=self._parse_conjunction()) 2998 2999 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3000 if not self._match(TokenType.QUALIFY): 3001 return None 3002 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3003 3004 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3005 if skip_start_token: 3006 start = None 3007 elif self._match(TokenType.START_WITH): 3008 start = self._parse_conjunction() 3009 else: 3010 return None 3011 3012 self._match(TokenType.CONNECT_BY) 3013 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3014 exp.Prior, this=self._parse_bitwise() 3015 ) 3016 connect = self._parse_conjunction() 3017 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3018 3019 if not start and self._match(TokenType.START_WITH): 3020 start = self._parse_conjunction() 3021 3022 return self.expression(exp.Connect, start=start, connect=connect) 3023 3024 def _parse_order( 3025 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3026 ) -> t.Optional[exp.Expression]: 3027 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3028 return this 3029 3030 return self.expression( 3031 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3032 ) 3033 3034 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3035 if not self._match(token): 3036 return None 3037 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3038 3039 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3040 this = parse_method() if parse_method else self._parse_conjunction() 3041 3042 asc = self._match(TokenType.ASC) 3043 desc = self._match(TokenType.DESC) or (asc and False) 3044 3045 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3046 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3047 3048 nulls_first = is_nulls_first or False 3049 explicitly_null_ordered = is_nulls_first or is_nulls_last 3050 3051 if ( 3052 not explicitly_null_ordered 3053 and ( 3054 (not desc and self.NULL_ORDERING == "nulls_are_small") 3055 or (desc and self.NULL_ORDERING != "nulls_are_small") 3056 ) 3057 and self.NULL_ORDERING != "nulls_are_last" 3058 ): 3059 nulls_first = True 3060 3061 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3062 3063 def _parse_limit( 3064 self, this: t.Optional[exp.Expression] = None, top: bool = False 3065 ) -> t.Optional[exp.Expression]: 3066 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3067 comments = self._prev_comments 3068 if top: 3069 limit_paren = self._match(TokenType.L_PAREN) 3070 expression = self._parse_number() 3071 3072 if limit_paren: 3073 self._match_r_paren() 3074 else: 3075 expression = self._parse_term() 3076 3077 if self._match(TokenType.COMMA): 3078 offset = expression 3079 expression = self._parse_term() 3080 else: 3081 offset = None 3082 3083 limit_exp = self.expression( 3084 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3085 ) 3086 3087 return limit_exp 3088 3089 if self._match(TokenType.FETCH): 3090 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3091 direction = self._prev.text if direction else "FIRST" 3092 3093 count = self._parse_field(tokens=self.FETCH_TOKENS) 3094 percent = self._match(TokenType.PERCENT) 3095 3096 self._match_set((TokenType.ROW, TokenType.ROWS)) 3097 3098 only = self._match_text_seq("ONLY") 3099 with_ties = self._match_text_seq("WITH", "TIES") 3100 3101 if only and with_ties: 3102 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3103 3104 return self.expression( 3105 exp.Fetch, 3106 direction=direction, 3107 count=count, 3108 percent=percent, 3109 with_ties=with_ties, 3110 ) 3111 3112 return this 3113 3114 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3115 if not self._match(TokenType.OFFSET): 3116 return this 3117 3118 count = self._parse_term() 3119 self._match_set((TokenType.ROW, TokenType.ROWS)) 3120 return self.expression(exp.Offset, this=this, expression=count) 3121 3122 def _parse_locks(self) -> t.List[exp.Lock]: 3123 locks = [] 3124 while True: 3125 if self._match_text_seq("FOR", "UPDATE"): 3126 update = True 3127 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3128 "LOCK", "IN", "SHARE", "MODE" 3129 ): 3130 update = False 3131 else: 3132 break 3133 3134 expressions = None 3135 if self._match_text_seq("OF"): 3136 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3137 3138 wait: t.Optional[bool | exp.Expression] = None 3139 if self._match_text_seq("NOWAIT"): 3140 wait = True 3141 elif self._match_text_seq("WAIT"): 3142 wait = self._parse_primary() 3143 elif self._match_text_seq("SKIP", "LOCKED"): 3144 wait = False 3145 3146 locks.append( 3147 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3148 ) 3149 3150 return locks 3151 3152 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3153 if not self._match_set(self.SET_OPERATIONS): 3154 return this 3155 3156 token_type = self._prev.token_type 3157 3158 if token_type == TokenType.UNION: 3159 expression = exp.Union 3160 elif token_type == TokenType.EXCEPT: 3161 expression = exp.Except 3162 else: 3163 expression = exp.Intersect 3164 3165 return self.expression( 3166 expression, 3167 comments=self._prev.comments, 3168 this=this, 3169 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3170 by_name=self._match_text_seq("BY", "NAME"), 3171 expression=self._parse_set_operations(self._parse_select(nested=True)), 3172 ) 3173 3174 def _parse_expression(self) -> t.Optional[exp.Expression]: 3175 return self._parse_alias(self._parse_conjunction()) 3176 3177 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3178 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3179 3180 def _parse_equality(self) -> t.Optional[exp.Expression]: 3181 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3182 3183 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3184 return self._parse_tokens(self._parse_range, self.COMPARISON) 3185 3186 def _parse_range(self) -> t.Optional[exp.Expression]: 3187 this = self._parse_bitwise() 3188 negate = self._match(TokenType.NOT) 3189 3190 if self._match_set(self.RANGE_PARSERS): 3191 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3192 if not expression: 3193 return this 3194 3195 this = expression 3196 elif self._match(TokenType.ISNULL): 3197 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3198 3199 # Postgres supports ISNULL and NOTNULL for conditions. 3200 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3201 if self._match(TokenType.NOTNULL): 3202 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3203 this = self.expression(exp.Not, this=this) 3204 3205 if negate: 3206 this = self.expression(exp.Not, this=this) 3207 3208 if self._match(TokenType.IS): 3209 this = self._parse_is(this) 3210 3211 return this 3212 3213 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3214 index = self._index - 1 3215 negate = self._match(TokenType.NOT) 3216 3217 if self._match_text_seq("DISTINCT", "FROM"): 3218 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3219 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3220 3221 expression = self._parse_null() or self._parse_boolean() 3222 if not expression: 3223 self._retreat(index) 3224 return None 3225 3226 this = self.expression(exp.Is, this=this, expression=expression) 3227 return self.expression(exp.Not, this=this) if negate else this 3228 3229 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3230 unnest = self._parse_unnest(with_alias=False) 3231 if unnest: 3232 this = self.expression(exp.In, this=this, unnest=unnest) 3233 elif self._match(TokenType.L_PAREN): 3234 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3235 3236 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3237 this = self.expression(exp.In, this=this, query=expressions[0]) 3238 else: 3239 this = self.expression(exp.In, this=this, expressions=expressions) 3240 3241 self._match_r_paren(this) 3242 else: 3243 this = self.expression(exp.In, this=this, field=self._parse_field()) 3244 3245 return this 3246 3247 def _parse_between(self, this: exp.Expression) -> exp.Between: 3248 low = self._parse_bitwise() 3249 self._match(TokenType.AND) 3250 high = self._parse_bitwise() 3251 return self.expression(exp.Between, this=this, low=low, high=high) 3252 3253 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3254 if not self._match(TokenType.ESCAPE): 3255 return this 3256 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3257 3258 def _parse_interval(self) -> t.Optional[exp.Interval]: 3259 index = self._index 3260 3261 if not self._match(TokenType.INTERVAL): 3262 return None 3263 3264 if self._match(TokenType.STRING, advance=False): 3265 this = self._parse_primary() 3266 else: 3267 this = self._parse_term() 3268 3269 if not this: 3270 self._retreat(index) 3271 return None 3272 3273 unit = self._parse_function() or self._parse_var(any_token=True) 3274 3275 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3276 # each INTERVAL expression into this canonical form so it's easy to transpile 3277 if this and this.is_number: 3278 this = exp.Literal.string(this.name) 3279 elif this and this.is_string: 3280 parts = this.name.split() 3281 3282 if len(parts) == 2: 3283 if unit: 3284 # This is not actually a unit, it's something else (e.g. a "window side") 3285 unit = None 3286 self._retreat(self._index - 1) 3287 3288 this = exp.Literal.string(parts[0]) 3289 unit = self.expression(exp.Var, this=parts[1]) 3290 3291 return self.expression(exp.Interval, this=this, unit=unit) 3292 3293 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3294 this = self._parse_term() 3295 3296 while True: 3297 if self._match_set(self.BITWISE): 3298 this = self.expression( 3299 self.BITWISE[self._prev.token_type], 3300 this=this, 3301 expression=self._parse_term(), 3302 ) 3303 elif self._match(TokenType.DQMARK): 3304 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3305 elif self._match_pair(TokenType.LT, TokenType.LT): 3306 this = self.expression( 3307 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3308 ) 3309 elif self._match_pair(TokenType.GT, TokenType.GT): 3310 this = self.expression( 3311 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3312 ) 3313 else: 3314 break 3315 3316 return this 3317 3318 def _parse_term(self) -> t.Optional[exp.Expression]: 3319 return self._parse_tokens(self._parse_factor, self.TERM) 3320 3321 def _parse_factor(self) -> t.Optional[exp.Expression]: 3322 return self._parse_tokens(self._parse_unary, self.FACTOR) 3323 3324 def _parse_unary(self) -> t.Optional[exp.Expression]: 3325 if self._match_set(self.UNARY_PARSERS): 3326 return self.UNARY_PARSERS[self._prev.token_type](self) 3327 return self._parse_at_time_zone(self._parse_type()) 3328 3329 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3330 interval = parse_interval and self._parse_interval() 3331 if interval: 3332 return interval 3333 3334 index = self._index 3335 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3336 this = self._parse_column() 3337 3338 if data_type: 3339 if isinstance(this, exp.Literal): 3340 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3341 if parser: 3342 return parser(self, this, data_type) 3343 return self.expression(exp.Cast, this=this, to=data_type) 3344 if not data_type.expressions: 3345 self._retreat(index) 3346 return self._parse_column() 3347 return self._parse_column_ops(data_type) 3348 3349 return this and self._parse_column_ops(this) 3350 3351 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3352 this = self._parse_type() 3353 if not this: 3354 return None 3355 3356 return self.expression( 3357 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3358 ) 3359 3360 def _parse_types( 3361 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3362 ) -> t.Optional[exp.Expression]: 3363 index = self._index 3364 3365 prefix = self._match_text_seq("SYSUDTLIB", ".") 3366 3367 if not self._match_set(self.TYPE_TOKENS): 3368 identifier = allow_identifiers and self._parse_id_var( 3369 any_token=False, tokens=(TokenType.VAR,) 3370 ) 3371 3372 if identifier: 3373 tokens = self._tokenizer.tokenize(identifier.name) 3374 3375 if len(tokens) != 1: 3376 self.raise_error("Unexpected identifier", self._prev) 3377 3378 if tokens[0].token_type in self.TYPE_TOKENS: 3379 self._prev = tokens[0] 3380 elif self.SUPPORTS_USER_DEFINED_TYPES: 3381 type_name = identifier.name 3382 3383 while self._match(TokenType.DOT): 3384 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3385 3386 return exp.DataType.build(type_name, udt=True) 3387 else: 3388 return None 3389 else: 3390 return None 3391 3392 type_token = self._prev.token_type 3393 3394 if type_token == TokenType.PSEUDO_TYPE: 3395 return self.expression(exp.PseudoType, this=self._prev.text) 3396 3397 if type_token == TokenType.OBJECT_IDENTIFIER: 3398 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3399 3400 nested = type_token in self.NESTED_TYPE_TOKENS 3401 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3402 expressions = None 3403 maybe_func = False 3404 3405 if self._match(TokenType.L_PAREN): 3406 if is_struct: 3407 expressions = self._parse_csv(self._parse_struct_types) 3408 elif nested: 3409 expressions = self._parse_csv( 3410 lambda: self._parse_types( 3411 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3412 ) 3413 ) 3414 elif type_token in self.ENUM_TYPE_TOKENS: 3415 expressions = self._parse_csv(self._parse_equality) 3416 else: 3417 expressions = self._parse_csv(self._parse_type_size) 3418 3419 if not expressions or not self._match(TokenType.R_PAREN): 3420 self._retreat(index) 3421 return None 3422 3423 maybe_func = True 3424 3425 this: t.Optional[exp.Expression] = None 3426 values: t.Optional[t.List[exp.Expression]] = None 3427 3428 if nested and self._match(TokenType.LT): 3429 if is_struct: 3430 expressions = self._parse_csv(self._parse_struct_types) 3431 else: 3432 expressions = self._parse_csv( 3433 lambda: self._parse_types( 3434 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3435 ) 3436 ) 3437 3438 if not self._match(TokenType.GT): 3439 self.raise_error("Expecting >") 3440 3441 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3442 values = self._parse_csv(self._parse_conjunction) 3443 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3444 3445 if type_token in self.TIMESTAMPS: 3446 if self._match_text_seq("WITH", "TIME", "ZONE"): 3447 maybe_func = False 3448 tz_type = ( 3449 exp.DataType.Type.TIMETZ 3450 if type_token in self.TIMES 3451 else exp.DataType.Type.TIMESTAMPTZ 3452 ) 3453 this = exp.DataType(this=tz_type, expressions=expressions) 3454 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3455 maybe_func = False 3456 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3457 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3458 maybe_func = False 3459 elif type_token == TokenType.INTERVAL: 3460 unit = self._parse_var() 3461 3462 if self._match_text_seq("TO"): 3463 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3464 else: 3465 span = None 3466 3467 if span or not unit: 3468 this = self.expression( 3469 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3470 ) 3471 else: 3472 this = self.expression(exp.Interval, unit=unit) 3473 3474 if maybe_func and check_func: 3475 index2 = self._index 3476 peek = self._parse_string() 3477 3478 if not peek: 3479 self._retreat(index) 3480 return None 3481 3482 self._retreat(index2) 3483 3484 if not this: 3485 if self._match_text_seq("UNSIGNED"): 3486 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3487 if not unsigned_type_token: 3488 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3489 3490 type_token = unsigned_type_token or type_token 3491 3492 this = exp.DataType( 3493 this=exp.DataType.Type[type_token.value], 3494 expressions=expressions, 3495 nested=nested, 3496 values=values, 3497 prefix=prefix, 3498 ) 3499 3500 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3501 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3502 3503 return this 3504 3505 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3506 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3507 self._match(TokenType.COLON) 3508 return self._parse_column_def(this) 3509 3510 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3511 if not self._match_text_seq("AT", "TIME", "ZONE"): 3512 return this 3513 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3514 3515 def _parse_column(self) -> t.Optional[exp.Expression]: 3516 this = self._parse_field() 3517 if isinstance(this, exp.Identifier): 3518 this = self.expression(exp.Column, this=this) 3519 elif not this: 3520 return self._parse_bracket(this) 3521 return self._parse_column_ops(this) 3522 3523 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3524 this = self._parse_bracket(this) 3525 3526 while self._match_set(self.COLUMN_OPERATORS): 3527 op_token = self._prev.token_type 3528 op = self.COLUMN_OPERATORS.get(op_token) 3529 3530 if op_token == TokenType.DCOLON: 3531 field = self._parse_types() 3532 if not field: 3533 self.raise_error("Expected type") 3534 elif op and self._curr: 3535 self._advance() 3536 value = self._prev.text 3537 field = ( 3538 exp.Literal.number(value) 3539 if self._prev.token_type == TokenType.NUMBER 3540 else exp.Literal.string(value) 3541 ) 3542 else: 3543 field = self._parse_field(anonymous_func=True, any_token=True) 3544 3545 if isinstance(field, exp.Func): 3546 # bigquery allows function calls like x.y.count(...) 3547 # SAFE.SUBSTR(...) 3548 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3549 this = self._replace_columns_with_dots(this) 3550 3551 if op: 3552 this = op(self, this, field) 3553 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3554 this = self.expression( 3555 exp.Column, 3556 this=field, 3557 table=this.this, 3558 db=this.args.get("table"), 3559 catalog=this.args.get("db"), 3560 ) 3561 else: 3562 this = self.expression(exp.Dot, this=this, expression=field) 3563 this = self._parse_bracket(this) 3564 return this 3565 3566 def _parse_primary(self) -> t.Optional[exp.Expression]: 3567 if self._match_set(self.PRIMARY_PARSERS): 3568 token_type = self._prev.token_type 3569 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3570 3571 if token_type == TokenType.STRING: 3572 expressions = [primary] 3573 while self._match(TokenType.STRING): 3574 expressions.append(exp.Literal.string(self._prev.text)) 3575 3576 if len(expressions) > 1: 3577 return self.expression(exp.Concat, expressions=expressions) 3578 3579 return primary 3580 3581 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3582 return exp.Literal.number(f"0.{self._prev.text}") 3583 3584 if self._match(TokenType.L_PAREN): 3585 comments = self._prev_comments 3586 query = self._parse_select() 3587 3588 if query: 3589 expressions = [query] 3590 else: 3591 expressions = self._parse_expressions() 3592 3593 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3594 3595 if isinstance(this, exp.Subqueryable): 3596 this = self._parse_set_operations( 3597 self._parse_subquery(this=this, parse_alias=False) 3598 ) 3599 elif len(expressions) > 1: 3600 this = self.expression(exp.Tuple, expressions=expressions) 3601 else: 3602 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3603 3604 if this: 3605 this.add_comments(comments) 3606 3607 self._match_r_paren(expression=this) 3608 return this 3609 3610 return None 3611 3612 def _parse_field( 3613 self, 3614 any_token: bool = False, 3615 tokens: t.Optional[t.Collection[TokenType]] = None, 3616 anonymous_func: bool = False, 3617 ) -> t.Optional[exp.Expression]: 3618 return ( 3619 self._parse_primary() 3620 or self._parse_function(anonymous=anonymous_func) 3621 or self._parse_id_var(any_token=any_token, tokens=tokens) 3622 ) 3623 3624 def _parse_function( 3625 self, 3626 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3627 anonymous: bool = False, 3628 optional_parens: bool = True, 3629 ) -> t.Optional[exp.Expression]: 3630 if not self._curr: 3631 return None 3632 3633 token_type = self._curr.token_type 3634 this = self._curr.text 3635 upper = this.upper() 3636 3637 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3638 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3639 self._advance() 3640 return parser(self) 3641 3642 if not self._next or self._next.token_type != TokenType.L_PAREN: 3643 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3644 self._advance() 3645 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3646 3647 return None 3648 3649 if token_type not in self.FUNC_TOKENS: 3650 return None 3651 3652 self._advance(2) 3653 3654 parser = self.FUNCTION_PARSERS.get(upper) 3655 if parser and not anonymous: 3656 this = parser(self) 3657 else: 3658 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3659 3660 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3661 this = self.expression(subquery_predicate, this=self._parse_select()) 3662 self._match_r_paren() 3663 return this 3664 3665 if functions is None: 3666 functions = self.FUNCTIONS 3667 3668 function = functions.get(upper) 3669 3670 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3671 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3672 3673 if function and not anonymous: 3674 func = self.validate_expression(function(args), args) 3675 if not self.NORMALIZE_FUNCTIONS: 3676 func.meta["name"] = this 3677 this = func 3678 else: 3679 this = self.expression(exp.Anonymous, this=this, expressions=args) 3680 3681 self._match_r_paren(this) 3682 return self._parse_window(this) 3683 3684 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3685 return self._parse_column_def(self._parse_id_var()) 3686 3687 def _parse_user_defined_function( 3688 self, kind: t.Optional[TokenType] = None 3689 ) -> t.Optional[exp.Expression]: 3690 this = self._parse_id_var() 3691 3692 while self._match(TokenType.DOT): 3693 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3694 3695 if not self._match(TokenType.L_PAREN): 3696 return this 3697 3698 expressions = self._parse_csv(self._parse_function_parameter) 3699 self._match_r_paren() 3700 return self.expression( 3701 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3702 ) 3703 3704 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3705 literal = self._parse_primary() 3706 if literal: 3707 return self.expression(exp.Introducer, this=token.text, expression=literal) 3708 3709 return self.expression(exp.Identifier, this=token.text) 3710 3711 def _parse_session_parameter(self) -> exp.SessionParameter: 3712 kind = None 3713 this = self._parse_id_var() or self._parse_primary() 3714 3715 if this and self._match(TokenType.DOT): 3716 kind = this.name 3717 this = self._parse_var() or self._parse_primary() 3718 3719 return self.expression(exp.SessionParameter, this=this, kind=kind) 3720 3721 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3722 index = self._index 3723 3724 if self._match(TokenType.L_PAREN): 3725 expressions = t.cast( 3726 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3727 ) 3728 3729 if not self._match(TokenType.R_PAREN): 3730 self._retreat(index) 3731 else: 3732 expressions = [self._parse_id_var()] 3733 3734 if self._match_set(self.LAMBDAS): 3735 return self.LAMBDAS[self._prev.token_type](self, expressions) 3736 3737 self._retreat(index) 3738 3739 this: t.Optional[exp.Expression] 3740 3741 if self._match(TokenType.DISTINCT): 3742 this = self.expression( 3743 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3744 ) 3745 else: 3746 this = self._parse_select_or_expression(alias=alias) 3747 3748 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3749 3750 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3751 index = self._index 3752 3753 if not self.errors: 3754 try: 3755 if self._parse_select(nested=True): 3756 return this 3757 except ParseError: 3758 pass 3759 finally: 3760 self.errors.clear() 3761 self._retreat(index) 3762 3763 if not self._match(TokenType.L_PAREN): 3764 return this 3765 3766 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3767 3768 self._match_r_paren() 3769 return self.expression(exp.Schema, this=this, expressions=args) 3770 3771 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3772 return self._parse_column_def(self._parse_field(any_token=True)) 3773 3774 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3775 # column defs are not really columns, they're identifiers 3776 if isinstance(this, exp.Column): 3777 this = this.this 3778 3779 kind = self._parse_types(schema=True) 3780 3781 if self._match_text_seq("FOR", "ORDINALITY"): 3782 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3783 3784 constraints: t.List[exp.Expression] = [] 3785 3786 if not kind and self._match(TokenType.ALIAS): 3787 constraints.append( 3788 self.expression( 3789 exp.ComputedColumnConstraint, 3790 this=self._parse_conjunction(), 3791 persisted=self._match_text_seq("PERSISTED"), 3792 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3793 ) 3794 ) 3795 3796 while True: 3797 constraint = self._parse_column_constraint() 3798 if not constraint: 3799 break 3800 constraints.append(constraint) 3801 3802 if not kind and not constraints: 3803 return this 3804 3805 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3806 3807 def _parse_auto_increment( 3808 self, 3809 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3810 start = None 3811 increment = None 3812 3813 if self._match(TokenType.L_PAREN, advance=False): 3814 args = self._parse_wrapped_csv(self._parse_bitwise) 3815 start = seq_get(args, 0) 3816 increment = seq_get(args, 1) 3817 elif self._match_text_seq("START"): 3818 start = self._parse_bitwise() 3819 self._match_text_seq("INCREMENT") 3820 increment = self._parse_bitwise() 3821 3822 if start and increment: 3823 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3824 3825 return exp.AutoIncrementColumnConstraint() 3826 3827 def _parse_compress(self) -> exp.CompressColumnConstraint: 3828 if self._match(TokenType.L_PAREN, advance=False): 3829 return self.expression( 3830 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3831 ) 3832 3833 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3834 3835 def _parse_generated_as_identity( 3836 self, 3837 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3838 if self._match_text_seq("BY", "DEFAULT"): 3839 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3840 this = self.expression( 3841 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3842 ) 3843 else: 3844 self._match_text_seq("ALWAYS") 3845 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3846 3847 self._match(TokenType.ALIAS) 3848 identity = self._match_text_seq("IDENTITY") 3849 3850 if self._match(TokenType.L_PAREN): 3851 if self._match(TokenType.START_WITH): 3852 this.set("start", self._parse_bitwise()) 3853 if self._match_text_seq("INCREMENT", "BY"): 3854 this.set("increment", self._parse_bitwise()) 3855 if self._match_text_seq("MINVALUE"): 3856 this.set("minvalue", self._parse_bitwise()) 3857 if self._match_text_seq("MAXVALUE"): 3858 this.set("maxvalue", self._parse_bitwise()) 3859 3860 if self._match_text_seq("CYCLE"): 3861 this.set("cycle", True) 3862 elif self._match_text_seq("NO", "CYCLE"): 3863 this.set("cycle", False) 3864 3865 if not identity: 3866 this.set("expression", self._parse_bitwise()) 3867 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3868 args = self._parse_csv(self._parse_bitwise) 3869 this.set("start", seq_get(args, 0)) 3870 this.set("increment", seq_get(args, 1)) 3871 3872 self._match_r_paren() 3873 3874 return this 3875 3876 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3877 self._match_text_seq("LENGTH") 3878 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3879 3880 def _parse_not_constraint( 3881 self, 3882 ) -> t.Optional[exp.Expression]: 3883 if self._match_text_seq("NULL"): 3884 return self.expression(exp.NotNullColumnConstraint) 3885 if self._match_text_seq("CASESPECIFIC"): 3886 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3887 if self._match_text_seq("FOR", "REPLICATION"): 3888 return self.expression(exp.NotForReplicationColumnConstraint) 3889 return None 3890 3891 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3892 if self._match(TokenType.CONSTRAINT): 3893 this = self._parse_id_var() 3894 else: 3895 this = None 3896 3897 if self._match_texts(self.CONSTRAINT_PARSERS): 3898 return self.expression( 3899 exp.ColumnConstraint, 3900 this=this, 3901 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3902 ) 3903 3904 return this 3905 3906 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3907 if not self._match(TokenType.CONSTRAINT): 3908 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3909 3910 this = self._parse_id_var() 3911 expressions = [] 3912 3913 while True: 3914 constraint = self._parse_unnamed_constraint() or self._parse_function() 3915 if not constraint: 3916 break 3917 expressions.append(constraint) 3918 3919 return self.expression(exp.Constraint, this=this, expressions=expressions) 3920 3921 def _parse_unnamed_constraint( 3922 self, constraints: t.Optional[t.Collection[str]] = None 3923 ) -> t.Optional[exp.Expression]: 3924 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3925 constraints or self.CONSTRAINT_PARSERS 3926 ): 3927 return None 3928 3929 constraint = self._prev.text.upper() 3930 if constraint not in self.CONSTRAINT_PARSERS: 3931 self.raise_error(f"No parser found for schema constraint {constraint}.") 3932 3933 return self.CONSTRAINT_PARSERS[constraint](self) 3934 3935 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3936 self._match_text_seq("KEY") 3937 return self.expression( 3938 exp.UniqueColumnConstraint, 3939 this=self._parse_schema(self._parse_id_var(any_token=False)), 3940 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3941 ) 3942 3943 def _parse_key_constraint_options(self) -> t.List[str]: 3944 options = [] 3945 while True: 3946 if not self._curr: 3947 break 3948 3949 if self._match(TokenType.ON): 3950 action = None 3951 on = self._advance_any() and self._prev.text 3952 3953 if self._match_text_seq("NO", "ACTION"): 3954 action = "NO ACTION" 3955 elif self._match_text_seq("CASCADE"): 3956 action = "CASCADE" 3957 elif self._match_text_seq("RESTRICT"): 3958 action = "RESTRICT" 3959 elif self._match_pair(TokenType.SET, TokenType.NULL): 3960 action = "SET NULL" 3961 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3962 action = "SET DEFAULT" 3963 else: 3964 self.raise_error("Invalid key constraint") 3965 3966 options.append(f"ON {on} {action}") 3967 elif self._match_text_seq("NOT", "ENFORCED"): 3968 options.append("NOT ENFORCED") 3969 elif self._match_text_seq("DEFERRABLE"): 3970 options.append("DEFERRABLE") 3971 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3972 options.append("INITIALLY DEFERRED") 3973 elif self._match_text_seq("NORELY"): 3974 options.append("NORELY") 3975 elif self._match_text_seq("MATCH", "FULL"): 3976 options.append("MATCH FULL") 3977 else: 3978 break 3979 3980 return options 3981 3982 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3983 if match and not self._match(TokenType.REFERENCES): 3984 return None 3985 3986 expressions = None 3987 this = self._parse_table(schema=True) 3988 options = self._parse_key_constraint_options() 3989 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3990 3991 def _parse_foreign_key(self) -> exp.ForeignKey: 3992 expressions = self._parse_wrapped_id_vars() 3993 reference = self._parse_references() 3994 options = {} 3995 3996 while self._match(TokenType.ON): 3997 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3998 self.raise_error("Expected DELETE or UPDATE") 3999 4000 kind = self._prev.text.lower() 4001 4002 if self._match_text_seq("NO", "ACTION"): 4003 action = "NO ACTION" 4004 elif self._match(TokenType.SET): 4005 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4006 action = "SET " + self._prev.text.upper() 4007 else: 4008 self._advance() 4009 action = self._prev.text.upper() 4010 4011 options[kind] = action 4012 4013 return self.expression( 4014 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4015 ) 4016 4017 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4018 return self._parse_field() 4019 4020 def _parse_primary_key( 4021 self, wrapped_optional: bool = False, in_props: bool = False 4022 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4023 desc = ( 4024 self._match_set((TokenType.ASC, TokenType.DESC)) 4025 and self._prev.token_type == TokenType.DESC 4026 ) 4027 4028 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4029 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4030 4031 expressions = self._parse_wrapped_csv( 4032 self._parse_primary_key_part, optional=wrapped_optional 4033 ) 4034 options = self._parse_key_constraint_options() 4035 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4036 4037 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4038 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4039 return this 4040 4041 bracket_kind = self._prev.token_type 4042 4043 if self._match(TokenType.COLON): 4044 expressions: t.List[exp.Expression] = [ 4045 self.expression(exp.Slice, expression=self._parse_conjunction()) 4046 ] 4047 else: 4048 expressions = self._parse_csv( 4049 lambda: self._parse_slice( 4050 self._parse_alias(self._parse_conjunction(), explicit=True) 4051 ) 4052 ) 4053 4054 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4055 if bracket_kind == TokenType.L_BRACE: 4056 this = self.expression(exp.Struct, expressions=expressions) 4057 elif not this or this.name.upper() == "ARRAY": 4058 this = self.expression(exp.Array, expressions=expressions) 4059 else: 4060 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4061 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4062 4063 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4064 self.raise_error("Expected ]") 4065 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4066 self.raise_error("Expected }") 4067 4068 self._add_comments(this) 4069 return self._parse_bracket(this) 4070 4071 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4072 if self._match(TokenType.COLON): 4073 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4074 return this 4075 4076 def _parse_case(self) -> t.Optional[exp.Expression]: 4077 ifs = [] 4078 default = None 4079 4080 comments = self._prev_comments 4081 expression = self._parse_conjunction() 4082 4083 while self._match(TokenType.WHEN): 4084 this = self._parse_conjunction() 4085 self._match(TokenType.THEN) 4086 then = self._parse_conjunction() 4087 ifs.append(self.expression(exp.If, this=this, true=then)) 4088 4089 if self._match(TokenType.ELSE): 4090 default = self._parse_conjunction() 4091 4092 if not self._match(TokenType.END): 4093 self.raise_error("Expected END after CASE", self._prev) 4094 4095 return self._parse_window( 4096 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4097 ) 4098 4099 def _parse_if(self) -> t.Optional[exp.Expression]: 4100 if self._match(TokenType.L_PAREN): 4101 args = self._parse_csv(self._parse_conjunction) 4102 this = self.validate_expression(exp.If.from_arg_list(args), args) 4103 self._match_r_paren() 4104 else: 4105 index = self._index - 1 4106 condition = self._parse_conjunction() 4107 4108 if not condition: 4109 self._retreat(index) 4110 return None 4111 4112 self._match(TokenType.THEN) 4113 true = self._parse_conjunction() 4114 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4115 self._match(TokenType.END) 4116 this = self.expression(exp.If, this=condition, true=true, false=false) 4117 4118 return self._parse_window(this) 4119 4120 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4121 if not self._match_text_seq("VALUE", "FOR"): 4122 self._retreat(self._index - 1) 4123 return None 4124 4125 return self.expression( 4126 exp.NextValueFor, 4127 this=self._parse_column(), 4128 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4129 ) 4130 4131 def _parse_extract(self) -> exp.Extract: 4132 this = self._parse_function() or self._parse_var() or self._parse_type() 4133 4134 if self._match(TokenType.FROM): 4135 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4136 4137 if not self._match(TokenType.COMMA): 4138 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4139 4140 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4141 4142 def _parse_any_value(self) -> exp.AnyValue: 4143 this = self._parse_lambda() 4144 is_max = None 4145 having = None 4146 4147 if self._match(TokenType.HAVING): 4148 self._match_texts(("MAX", "MIN")) 4149 is_max = self._prev.text == "MAX" 4150 having = self._parse_column() 4151 4152 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4153 4154 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4155 this = self._parse_conjunction() 4156 4157 if not self._match(TokenType.ALIAS): 4158 if self._match(TokenType.COMMA): 4159 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4160 4161 self.raise_error("Expected AS after CAST") 4162 4163 fmt = None 4164 to = self._parse_types() 4165 4166 if not to: 4167 self.raise_error("Expected TYPE after CAST") 4168 elif isinstance(to, exp.Identifier): 4169 to = exp.DataType.build(to.name, udt=True) 4170 elif to.this == exp.DataType.Type.CHAR: 4171 if self._match(TokenType.CHARACTER_SET): 4172 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4173 elif self._match(TokenType.FORMAT): 4174 fmt_string = self._parse_string() 4175 fmt = self._parse_at_time_zone(fmt_string) 4176 4177 if to.this in exp.DataType.TEMPORAL_TYPES: 4178 this = self.expression( 4179 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4180 this=this, 4181 format=exp.Literal.string( 4182 format_time( 4183 fmt_string.this if fmt_string else "", 4184 self.FORMAT_MAPPING or self.TIME_MAPPING, 4185 self.FORMAT_TRIE or self.TIME_TRIE, 4186 ) 4187 ), 4188 ) 4189 4190 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4191 this.set("zone", fmt.args["zone"]) 4192 4193 return this 4194 4195 return self.expression( 4196 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4197 ) 4198 4199 def _parse_concat(self) -> t.Optional[exp.Expression]: 4200 args = self._parse_csv(self._parse_conjunction) 4201 if self.CONCAT_NULL_OUTPUTS_STRING: 4202 args = self._ensure_string_if_null(args) 4203 4204 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4205 # we find such a call we replace it with its argument. 4206 if len(args) == 1: 4207 return args[0] 4208 4209 return self.expression( 4210 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4211 ) 4212 4213 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4214 args = self._parse_csv(self._parse_conjunction) 4215 if len(args) < 2: 4216 return self.expression(exp.ConcatWs, expressions=args) 4217 delim, *values = args 4218 if self.CONCAT_NULL_OUTPUTS_STRING: 4219 values = self._ensure_string_if_null(values) 4220 4221 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4222 4223 def _parse_string_agg(self) -> exp.Expression: 4224 if self._match(TokenType.DISTINCT): 4225 args: t.List[t.Optional[exp.Expression]] = [ 4226 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4227 ] 4228 if self._match(TokenType.COMMA): 4229 args.extend(self._parse_csv(self._parse_conjunction)) 4230 else: 4231 args = self._parse_csv(self._parse_conjunction) # type: ignore 4232 4233 index = self._index 4234 if not self._match(TokenType.R_PAREN) and args: 4235 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4236 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4237 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4238 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4239 4240 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4241 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4242 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4243 if not self._match_text_seq("WITHIN", "GROUP"): 4244 self._retreat(index) 4245 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4246 4247 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4248 order = self._parse_order(this=seq_get(args, 0)) 4249 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4250 4251 def _parse_convert( 4252 self, strict: bool, safe: t.Optional[bool] = None 4253 ) -> t.Optional[exp.Expression]: 4254 this = self._parse_bitwise() 4255 4256 if self._match(TokenType.USING): 4257 to: t.Optional[exp.Expression] = self.expression( 4258 exp.CharacterSet, this=self._parse_var() 4259 ) 4260 elif self._match(TokenType.COMMA): 4261 to = self._parse_types() 4262 else: 4263 to = None 4264 4265 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4266 4267 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4268 """ 4269 There are generally two variants of the DECODE function: 4270 4271 - DECODE(bin, charset) 4272 - DECODE(expression, search, result [, search, result] ... [, default]) 4273 4274 The second variant will always be parsed into a CASE expression. Note that NULL 4275 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4276 instead of relying on pattern matching. 4277 """ 4278 args = self._parse_csv(self._parse_conjunction) 4279 4280 if len(args) < 3: 4281 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4282 4283 expression, *expressions = args 4284 if not expression: 4285 return None 4286 4287 ifs = [] 4288 for search, result in zip(expressions[::2], expressions[1::2]): 4289 if not search or not result: 4290 return None 4291 4292 if isinstance(search, exp.Literal): 4293 ifs.append( 4294 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4295 ) 4296 elif isinstance(search, exp.Null): 4297 ifs.append( 4298 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4299 ) 4300 else: 4301 cond = exp.or_( 4302 exp.EQ(this=expression.copy(), expression=search), 4303 exp.and_( 4304 exp.Is(this=expression.copy(), expression=exp.Null()), 4305 exp.Is(this=search.copy(), expression=exp.Null()), 4306 copy=False, 4307 ), 4308 copy=False, 4309 ) 4310 ifs.append(exp.If(this=cond, true=result)) 4311 4312 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4313 4314 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4315 self._match_text_seq("KEY") 4316 key = self._parse_column() 4317 self._match_set((TokenType.COLON, TokenType.COMMA)) 4318 self._match_text_seq("VALUE") 4319 value = self._parse_bitwise() 4320 4321 if not key and not value: 4322 return None 4323 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4324 4325 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4326 if not this or not self._match_text_seq("FORMAT", "JSON"): 4327 return this 4328 4329 return self.expression(exp.FormatJson, this=this) 4330 4331 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4332 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4333 for value in values: 4334 if self._match_text_seq(value, "ON", on): 4335 return f"{value} ON {on}" 4336 4337 return None 4338 4339 def _parse_json_object(self) -> exp.JSONObject: 4340 star = self._parse_star() 4341 expressions = ( 4342 [star] 4343 if star 4344 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4345 ) 4346 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4347 4348 unique_keys = None 4349 if self._match_text_seq("WITH", "UNIQUE"): 4350 unique_keys = True 4351 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4352 unique_keys = False 4353 4354 self._match_text_seq("KEYS") 4355 4356 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4357 self._parse_type() 4358 ) 4359 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4360 4361 return self.expression( 4362 exp.JSONObject, 4363 expressions=expressions, 4364 null_handling=null_handling, 4365 unique_keys=unique_keys, 4366 return_type=return_type, 4367 encoding=encoding, 4368 ) 4369 4370 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4371 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4372 if not self._match_text_seq("NESTED"): 4373 this = self._parse_id_var() 4374 kind = self._parse_types(allow_identifiers=False) 4375 nested = None 4376 else: 4377 this = None 4378 kind = None 4379 nested = True 4380 4381 path = self._match_text_seq("PATH") and self._parse_string() 4382 nested_schema = nested and self._parse_json_schema() 4383 4384 return self.expression( 4385 exp.JSONColumnDef, 4386 this=this, 4387 kind=kind, 4388 path=path, 4389 nested_schema=nested_schema, 4390 ) 4391 4392 def _parse_json_schema(self) -> exp.JSONSchema: 4393 self._match_text_seq("COLUMNS") 4394 return self.expression( 4395 exp.JSONSchema, 4396 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4397 ) 4398 4399 def _parse_json_table(self) -> exp.JSONTable: 4400 this = self._parse_format_json(self._parse_bitwise()) 4401 path = self._match(TokenType.COMMA) and self._parse_string() 4402 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4403 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4404 schema = self._parse_json_schema() 4405 4406 return exp.JSONTable( 4407 this=this, 4408 schema=schema, 4409 path=path, 4410 error_handling=error_handling, 4411 empty_handling=empty_handling, 4412 ) 4413 4414 def _parse_logarithm(self) -> exp.Func: 4415 # Default argument order is base, expression 4416 args = self._parse_csv(self._parse_range) 4417 4418 if len(args) > 1: 4419 if not self.LOG_BASE_FIRST: 4420 args.reverse() 4421 return exp.Log.from_arg_list(args) 4422 4423 return self.expression( 4424 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4425 ) 4426 4427 def _parse_match_against(self) -> exp.MatchAgainst: 4428 expressions = self._parse_csv(self._parse_column) 4429 4430 self._match_text_seq(")", "AGAINST", "(") 4431 4432 this = self._parse_string() 4433 4434 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4435 modifier = "IN NATURAL LANGUAGE MODE" 4436 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4437 modifier = f"{modifier} WITH QUERY EXPANSION" 4438 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4439 modifier = "IN BOOLEAN MODE" 4440 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4441 modifier = "WITH QUERY EXPANSION" 4442 else: 4443 modifier = None 4444 4445 return self.expression( 4446 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4447 ) 4448 4449 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4450 def _parse_open_json(self) -> exp.OpenJSON: 4451 this = self._parse_bitwise() 4452 path = self._match(TokenType.COMMA) and self._parse_string() 4453 4454 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4455 this = self._parse_field(any_token=True) 4456 kind = self._parse_types() 4457 path = self._parse_string() 4458 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4459 4460 return self.expression( 4461 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4462 ) 4463 4464 expressions = None 4465 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4466 self._match_l_paren() 4467 expressions = self._parse_csv(_parse_open_json_column_def) 4468 4469 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4470 4471 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4472 args = self._parse_csv(self._parse_bitwise) 4473 4474 if self._match(TokenType.IN): 4475 return self.expression( 4476 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4477 ) 4478 4479 if haystack_first: 4480 haystack = seq_get(args, 0) 4481 needle = seq_get(args, 1) 4482 else: 4483 needle = seq_get(args, 0) 4484 haystack = seq_get(args, 1) 4485 4486 return self.expression( 4487 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4488 ) 4489 4490 def _parse_predict(self) -> exp.Predict: 4491 self._match_text_seq("MODEL") 4492 this = self._parse_table() 4493 4494 self._match(TokenType.COMMA) 4495 self._match_text_seq("TABLE") 4496 4497 return self.expression( 4498 exp.Predict, 4499 this=this, 4500 expression=self._parse_table(), 4501 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4502 ) 4503 4504 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4505 args = self._parse_csv(self._parse_table) 4506 return exp.JoinHint(this=func_name.upper(), expressions=args) 4507 4508 def _parse_substring(self) -> exp.Substring: 4509 # Postgres supports the form: substring(string [from int] [for int]) 4510 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4511 4512 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4513 4514 if self._match(TokenType.FROM): 4515 args.append(self._parse_bitwise()) 4516 if self._match(TokenType.FOR): 4517 args.append(self._parse_bitwise()) 4518 4519 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4520 4521 def _parse_trim(self) -> exp.Trim: 4522 # https://www.w3resource.com/sql/character-functions/trim.php 4523 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4524 4525 position = None 4526 collation = None 4527 expression = None 4528 4529 if self._match_texts(self.TRIM_TYPES): 4530 position = self._prev.text.upper() 4531 4532 this = self._parse_bitwise() 4533 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4534 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4535 expression = self._parse_bitwise() 4536 4537 if invert_order: 4538 this, expression = expression, this 4539 4540 if self._match(TokenType.COLLATE): 4541 collation = self._parse_bitwise() 4542 4543 return self.expression( 4544 exp.Trim, this=this, position=position, expression=expression, collation=collation 4545 ) 4546 4547 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4548 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4549 4550 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4551 return self._parse_window(self._parse_id_var(), alias=True) 4552 4553 def _parse_respect_or_ignore_nulls( 4554 self, this: t.Optional[exp.Expression] 4555 ) -> t.Optional[exp.Expression]: 4556 if self._match_text_seq("IGNORE", "NULLS"): 4557 return self.expression(exp.IgnoreNulls, this=this) 4558 if self._match_text_seq("RESPECT", "NULLS"): 4559 return self.expression(exp.RespectNulls, this=this) 4560 return this 4561 4562 def _parse_window( 4563 self, this: t.Optional[exp.Expression], alias: bool = False 4564 ) -> t.Optional[exp.Expression]: 4565 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4566 self._match(TokenType.WHERE) 4567 this = self.expression( 4568 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4569 ) 4570 self._match_r_paren() 4571 4572 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4573 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4574 if self._match_text_seq("WITHIN", "GROUP"): 4575 order = self._parse_wrapped(self._parse_order) 4576 this = self.expression(exp.WithinGroup, this=this, expression=order) 4577 4578 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4579 # Some dialects choose to implement and some do not. 4580 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4581 4582 # There is some code above in _parse_lambda that handles 4583 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4584 4585 # The below changes handle 4586 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4587 4588 # Oracle allows both formats 4589 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4590 # and Snowflake chose to do the same for familiarity 4591 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4592 this = self._parse_respect_or_ignore_nulls(this) 4593 4594 # bigquery select from window x AS (partition by ...) 4595 if alias: 4596 over = None 4597 self._match(TokenType.ALIAS) 4598 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4599 return this 4600 else: 4601 over = self._prev.text.upper() 4602 4603 if not self._match(TokenType.L_PAREN): 4604 return self.expression( 4605 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4606 ) 4607 4608 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4609 4610 first = self._match(TokenType.FIRST) 4611 if self._match_text_seq("LAST"): 4612 first = False 4613 4614 partition, order = self._parse_partition_and_order() 4615 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4616 4617 if kind: 4618 self._match(TokenType.BETWEEN) 4619 start = self._parse_window_spec() 4620 self._match(TokenType.AND) 4621 end = self._parse_window_spec() 4622 4623 spec = self.expression( 4624 exp.WindowSpec, 4625 kind=kind, 4626 start=start["value"], 4627 start_side=start["side"], 4628 end=end["value"], 4629 end_side=end["side"], 4630 ) 4631 else: 4632 spec = None 4633 4634 self._match_r_paren() 4635 4636 window = self.expression( 4637 exp.Window, 4638 this=this, 4639 partition_by=partition, 4640 order=order, 4641 spec=spec, 4642 alias=window_alias, 4643 over=over, 4644 first=first, 4645 ) 4646 4647 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4648 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4649 return self._parse_window(window, alias=alias) 4650 4651 return window 4652 4653 def _parse_partition_and_order( 4654 self, 4655 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4656 return self._parse_partition_by(), self._parse_order() 4657 4658 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4659 self._match(TokenType.BETWEEN) 4660 4661 return { 4662 "value": ( 4663 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4664 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4665 or self._parse_bitwise() 4666 ), 4667 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4668 } 4669 4670 def _parse_alias( 4671 self, this: t.Optional[exp.Expression], explicit: bool = False 4672 ) -> t.Optional[exp.Expression]: 4673 any_token = self._match(TokenType.ALIAS) 4674 4675 if explicit and not any_token: 4676 return this 4677 4678 if self._match(TokenType.L_PAREN): 4679 aliases = self.expression( 4680 exp.Aliases, 4681 this=this, 4682 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4683 ) 4684 self._match_r_paren(aliases) 4685 return aliases 4686 4687 alias = self._parse_id_var(any_token) 4688 4689 if alias: 4690 return self.expression(exp.Alias, this=this, alias=alias) 4691 4692 return this 4693 4694 def _parse_id_var( 4695 self, 4696 any_token: bool = True, 4697 tokens: t.Optional[t.Collection[TokenType]] = None, 4698 ) -> t.Optional[exp.Expression]: 4699 identifier = self._parse_identifier() 4700 4701 if identifier: 4702 return identifier 4703 4704 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4705 quoted = self._prev.token_type == TokenType.STRING 4706 return exp.Identifier(this=self._prev.text, quoted=quoted) 4707 4708 return None 4709 4710 def _parse_string(self) -> t.Optional[exp.Expression]: 4711 if self._match(TokenType.STRING): 4712 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4713 return self._parse_placeholder() 4714 4715 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4716 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4717 4718 def _parse_number(self) -> t.Optional[exp.Expression]: 4719 if self._match(TokenType.NUMBER): 4720 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4721 return self._parse_placeholder() 4722 4723 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4724 if self._match(TokenType.IDENTIFIER): 4725 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4726 return self._parse_placeholder() 4727 4728 def _parse_var( 4729 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4730 ) -> t.Optional[exp.Expression]: 4731 if ( 4732 (any_token and self._advance_any()) 4733 or self._match(TokenType.VAR) 4734 or (self._match_set(tokens) if tokens else False) 4735 ): 4736 return self.expression(exp.Var, this=self._prev.text) 4737 return self._parse_placeholder() 4738 4739 def _advance_any(self) -> t.Optional[Token]: 4740 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4741 self._advance() 4742 return self._prev 4743 return None 4744 4745 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4746 return self._parse_var() or self._parse_string() 4747 4748 def _parse_null(self) -> t.Optional[exp.Expression]: 4749 if self._match_set(self.NULL_TOKENS): 4750 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4751 return self._parse_placeholder() 4752 4753 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4754 if self._match(TokenType.TRUE): 4755 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4756 if self._match(TokenType.FALSE): 4757 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4758 return self._parse_placeholder() 4759 4760 def _parse_star(self) -> t.Optional[exp.Expression]: 4761 if self._match(TokenType.STAR): 4762 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4763 return self._parse_placeholder() 4764 4765 def _parse_parameter(self) -> exp.Parameter: 4766 wrapped = self._match(TokenType.L_BRACE) 4767 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4768 self._match(TokenType.R_BRACE) 4769 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4770 4771 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4772 if self._match_set(self.PLACEHOLDER_PARSERS): 4773 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4774 if placeholder: 4775 return placeholder 4776 self._advance(-1) 4777 return None 4778 4779 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4780 if not self._match(TokenType.EXCEPT): 4781 return None 4782 if self._match(TokenType.L_PAREN, advance=False): 4783 return self._parse_wrapped_csv(self._parse_column) 4784 4785 except_column = self._parse_column() 4786 return [except_column] if except_column else None 4787 4788 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4789 if not self._match(TokenType.REPLACE): 4790 return None 4791 if self._match(TokenType.L_PAREN, advance=False): 4792 return self._parse_wrapped_csv(self._parse_expression) 4793 4794 replace_expression = self._parse_expression() 4795 return [replace_expression] if replace_expression else None 4796 4797 def _parse_csv( 4798 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4799 ) -> t.List[exp.Expression]: 4800 parse_result = parse_method() 4801 items = [parse_result] if parse_result is not None else [] 4802 4803 while self._match(sep): 4804 self._add_comments(parse_result) 4805 parse_result = parse_method() 4806 if parse_result is not None: 4807 items.append(parse_result) 4808 4809 return items 4810 4811 def _parse_tokens( 4812 self, parse_method: t.Callable, expressions: t.Dict 4813 ) -> t.Optional[exp.Expression]: 4814 this = parse_method() 4815 4816 while self._match_set(expressions): 4817 this = self.expression( 4818 expressions[self._prev.token_type], 4819 this=this, 4820 comments=self._prev_comments, 4821 expression=parse_method(), 4822 ) 4823 4824 return this 4825 4826 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4827 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4828 4829 def _parse_wrapped_csv( 4830 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4831 ) -> t.List[exp.Expression]: 4832 return self._parse_wrapped( 4833 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4834 ) 4835 4836 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4837 wrapped = self._match(TokenType.L_PAREN) 4838 if not wrapped and not optional: 4839 self.raise_error("Expecting (") 4840 parse_result = parse_method() 4841 if wrapped: 4842 self._match_r_paren() 4843 return parse_result 4844 4845 def _parse_expressions(self) -> t.List[exp.Expression]: 4846 return self._parse_csv(self._parse_expression) 4847 4848 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4849 return self._parse_select() or self._parse_set_operations( 4850 self._parse_expression() if alias else self._parse_conjunction() 4851 ) 4852 4853 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4854 return self._parse_query_modifiers( 4855 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4856 ) 4857 4858 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4859 this = None 4860 if self._match_texts(self.TRANSACTION_KIND): 4861 this = self._prev.text 4862 4863 self._match_texts({"TRANSACTION", "WORK"}) 4864 4865 modes = [] 4866 while True: 4867 mode = [] 4868 while self._match(TokenType.VAR): 4869 mode.append(self._prev.text) 4870 4871 if mode: 4872 modes.append(" ".join(mode)) 4873 if not self._match(TokenType.COMMA): 4874 break 4875 4876 return self.expression(exp.Transaction, this=this, modes=modes) 4877 4878 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4879 chain = None 4880 savepoint = None 4881 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4882 4883 self._match_texts({"TRANSACTION", "WORK"}) 4884 4885 if self._match_text_seq("TO"): 4886 self._match_text_seq("SAVEPOINT") 4887 savepoint = self._parse_id_var() 4888 4889 if self._match(TokenType.AND): 4890 chain = not self._match_text_seq("NO") 4891 self._match_text_seq("CHAIN") 4892 4893 if is_rollback: 4894 return self.expression(exp.Rollback, savepoint=savepoint) 4895 4896 return self.expression(exp.Commit, chain=chain) 4897 4898 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4899 if not self._match_text_seq("ADD"): 4900 return None 4901 4902 self._match(TokenType.COLUMN) 4903 exists_column = self._parse_exists(not_=True) 4904 expression = self._parse_field_def() 4905 4906 if expression: 4907 expression.set("exists", exists_column) 4908 4909 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4910 if self._match_texts(("FIRST", "AFTER")): 4911 position = self._prev.text 4912 column_position = self.expression( 4913 exp.ColumnPosition, this=self._parse_column(), position=position 4914 ) 4915 expression.set("position", column_position) 4916 4917 return expression 4918 4919 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4920 drop = self._match(TokenType.DROP) and self._parse_drop() 4921 if drop and not isinstance(drop, exp.Command): 4922 drop.set("kind", drop.args.get("kind", "COLUMN")) 4923 return drop 4924 4925 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4926 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4927 return self.expression( 4928 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4929 ) 4930 4931 def _parse_add_constraint(self) -> exp.AddConstraint: 4932 this = None 4933 kind = self._prev.token_type 4934 4935 if kind == TokenType.CONSTRAINT: 4936 this = self._parse_id_var() 4937 4938 if self._match_text_seq("CHECK"): 4939 expression = self._parse_wrapped(self._parse_conjunction) 4940 enforced = self._match_text_seq("ENFORCED") 4941 4942 return self.expression( 4943 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4944 ) 4945 4946 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4947 expression = self._parse_foreign_key() 4948 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4949 expression = self._parse_primary_key() 4950 else: 4951 expression = None 4952 4953 return self.expression(exp.AddConstraint, this=this, expression=expression) 4954 4955 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4956 index = self._index - 1 4957 4958 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4959 return self._parse_csv(self._parse_add_constraint) 4960 4961 self._retreat(index) 4962 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4963 return self._parse_csv(self._parse_field_def) 4964 4965 return self._parse_csv(self._parse_add_column) 4966 4967 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4968 self._match(TokenType.COLUMN) 4969 column = self._parse_field(any_token=True) 4970 4971 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4972 return self.expression(exp.AlterColumn, this=column, drop=True) 4973 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4974 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4975 4976 self._match_text_seq("SET", "DATA") 4977 return self.expression( 4978 exp.AlterColumn, 4979 this=column, 4980 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4981 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4982 using=self._match(TokenType.USING) and self._parse_conjunction(), 4983 ) 4984 4985 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4986 index = self._index - 1 4987 4988 partition_exists = self._parse_exists() 4989 if self._match(TokenType.PARTITION, advance=False): 4990 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4991 4992 self._retreat(index) 4993 return self._parse_csv(self._parse_drop_column) 4994 4995 def _parse_alter_table_rename(self) -> exp.RenameTable: 4996 self._match_text_seq("TO") 4997 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4998 4999 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5000 start = self._prev 5001 5002 if not self._match(TokenType.TABLE): 5003 return self._parse_as_command(start) 5004 5005 exists = self._parse_exists() 5006 only = self._match_text_seq("ONLY") 5007 this = self._parse_table(schema=True) 5008 5009 if self._next: 5010 self._advance() 5011 5012 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5013 if parser: 5014 actions = ensure_list(parser(self)) 5015 5016 if not self._curr: 5017 return self.expression( 5018 exp.AlterTable, 5019 this=this, 5020 exists=exists, 5021 actions=actions, 5022 only=only, 5023 ) 5024 5025 return self._parse_as_command(start) 5026 5027 def _parse_merge(self) -> exp.Merge: 5028 self._match(TokenType.INTO) 5029 target = self._parse_table() 5030 5031 if target and self._match(TokenType.ALIAS, advance=False): 5032 target.set("alias", self._parse_table_alias()) 5033 5034 self._match(TokenType.USING) 5035 using = self._parse_table() 5036 5037 self._match(TokenType.ON) 5038 on = self._parse_conjunction() 5039 5040 return self.expression( 5041 exp.Merge, 5042 this=target, 5043 using=using, 5044 on=on, 5045 expressions=self._parse_when_matched(), 5046 ) 5047 5048 def _parse_when_matched(self) -> t.List[exp.When]: 5049 whens = [] 5050 5051 while self._match(TokenType.WHEN): 5052 matched = not self._match(TokenType.NOT) 5053 self._match_text_seq("MATCHED") 5054 source = ( 5055 False 5056 if self._match_text_seq("BY", "TARGET") 5057 else self._match_text_seq("BY", "SOURCE") 5058 ) 5059 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5060 5061 self._match(TokenType.THEN) 5062 5063 if self._match(TokenType.INSERT): 5064 _this = self._parse_star() 5065 if _this: 5066 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5067 else: 5068 then = self.expression( 5069 exp.Insert, 5070 this=self._parse_value(), 5071 expression=self._match(TokenType.VALUES) and self._parse_value(), 5072 ) 5073 elif self._match(TokenType.UPDATE): 5074 expressions = self._parse_star() 5075 if expressions: 5076 then = self.expression(exp.Update, expressions=expressions) 5077 else: 5078 then = self.expression( 5079 exp.Update, 5080 expressions=self._match(TokenType.SET) 5081 and self._parse_csv(self._parse_equality), 5082 ) 5083 elif self._match(TokenType.DELETE): 5084 then = self.expression(exp.Var, this=self._prev.text) 5085 else: 5086 then = None 5087 5088 whens.append( 5089 self.expression( 5090 exp.When, 5091 matched=matched, 5092 source=source, 5093 condition=condition, 5094 then=then, 5095 ) 5096 ) 5097 return whens 5098 5099 def _parse_show(self) -> t.Optional[exp.Expression]: 5100 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5101 if parser: 5102 return parser(self) 5103 return self._parse_as_command(self._prev) 5104 5105 def _parse_set_item_assignment( 5106 self, kind: t.Optional[str] = None 5107 ) -> t.Optional[exp.Expression]: 5108 index = self._index 5109 5110 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5111 return self._parse_set_transaction(global_=kind == "GLOBAL") 5112 5113 left = self._parse_primary() or self._parse_id_var() 5114 assignment_delimiter = self._match_texts(("=", "TO")) 5115 5116 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5117 self._retreat(index) 5118 return None 5119 5120 right = self._parse_statement() or self._parse_id_var() 5121 this = self.expression(exp.EQ, this=left, expression=right) 5122 5123 return self.expression(exp.SetItem, this=this, kind=kind) 5124 5125 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5126 self._match_text_seq("TRANSACTION") 5127 characteristics = self._parse_csv( 5128 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5129 ) 5130 return self.expression( 5131 exp.SetItem, 5132 expressions=characteristics, 5133 kind="TRANSACTION", 5134 **{"global": global_}, # type: ignore 5135 ) 5136 5137 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5138 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5139 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5140 5141 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5142 index = self._index 5143 set_ = self.expression( 5144 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5145 ) 5146 5147 if self._curr: 5148 self._retreat(index) 5149 return self._parse_as_command(self._prev) 5150 5151 return set_ 5152 5153 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5154 for option in options: 5155 if self._match_text_seq(*option.split(" ")): 5156 return exp.var(option) 5157 return None 5158 5159 def _parse_as_command(self, start: Token) -> exp.Command: 5160 while self._curr: 5161 self._advance() 5162 text = self._find_sql(start, self._prev) 5163 size = len(start.text) 5164 return exp.Command(this=text[:size], expression=text[size:]) 5165 5166 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5167 settings = [] 5168 5169 self._match_l_paren() 5170 kind = self._parse_id_var() 5171 5172 if self._match(TokenType.L_PAREN): 5173 while True: 5174 key = self._parse_id_var() 5175 value = self._parse_primary() 5176 5177 if not key and value is None: 5178 break 5179 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5180 self._match(TokenType.R_PAREN) 5181 5182 self._match_r_paren() 5183 5184 return self.expression( 5185 exp.DictProperty, 5186 this=this, 5187 kind=kind.this if kind else None, 5188 settings=settings, 5189 ) 5190 5191 def _parse_dict_range(self, this: str) -> exp.DictRange: 5192 self._match_l_paren() 5193 has_min = self._match_text_seq("MIN") 5194 if has_min: 5195 min = self._parse_var() or self._parse_primary() 5196 self._match_text_seq("MAX") 5197 max = self._parse_var() or self._parse_primary() 5198 else: 5199 max = self._parse_var() or self._parse_primary() 5200 min = exp.Literal.number(0) 5201 self._match_r_paren() 5202 return self.expression(exp.DictRange, this=this, min=min, max=max) 5203 5204 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5205 index = self._index 5206 expression = self._parse_column() 5207 if not self._match(TokenType.IN): 5208 self._retreat(index - 1) 5209 return None 5210 iterator = self._parse_column() 5211 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5212 return self.expression( 5213 exp.Comprehension, 5214 this=this, 5215 expression=expression, 5216 iterator=iterator, 5217 condition=condition, 5218 ) 5219 5220 def _find_parser( 5221 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5222 ) -> t.Optional[t.Callable]: 5223 if not self._curr: 5224 return None 5225 5226 index = self._index 5227 this = [] 5228 while True: 5229 # The current token might be multiple words 5230 curr = self._curr.text.upper() 5231 key = curr.split(" ") 5232 this.append(curr) 5233 5234 self._advance() 5235 result, trie = in_trie(trie, key) 5236 if result == TrieResult.FAILED: 5237 break 5238 5239 if result == TrieResult.EXISTS: 5240 subparser = parsers[" ".join(this)] 5241 return subparser 5242 5243 self._retreat(index) 5244 return None 5245 5246 def _match(self, token_type, advance=True, expression=None): 5247 if not self._curr: 5248 return None 5249 5250 if self._curr.token_type == token_type: 5251 if advance: 5252 self._advance() 5253 self._add_comments(expression) 5254 return True 5255 5256 return None 5257 5258 def _match_set(self, types, advance=True): 5259 if not self._curr: 5260 return None 5261 5262 if self._curr.token_type in types: 5263 if advance: 5264 self._advance() 5265 return True 5266 5267 return None 5268 5269 def _match_pair(self, token_type_a, token_type_b, advance=True): 5270 if not self._curr or not self._next: 5271 return None 5272 5273 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5274 if advance: 5275 self._advance(2) 5276 return True 5277 5278 return None 5279 5280 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5281 if not self._match(TokenType.L_PAREN, expression=expression): 5282 self.raise_error("Expecting (") 5283 5284 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5285 if not self._match(TokenType.R_PAREN, expression=expression): 5286 self.raise_error("Expecting )") 5287 5288 def _match_texts(self, texts, advance=True): 5289 if self._curr and self._curr.text.upper() in texts: 5290 if advance: 5291 self._advance() 5292 return True 5293 return False 5294 5295 def _match_text_seq(self, *texts, advance=True): 5296 index = self._index 5297 for text in texts: 5298 if self._curr and self._curr.text.upper() == text: 5299 self._advance() 5300 else: 5301 self._retreat(index) 5302 return False 5303 5304 if not advance: 5305 self._retreat(index) 5306 5307 return True 5308 5309 @t.overload 5310 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5311 ... 5312 5313 @t.overload 5314 def _replace_columns_with_dots( 5315 self, this: t.Optional[exp.Expression] 5316 ) -> t.Optional[exp.Expression]: 5317 ... 5318 5319 def _replace_columns_with_dots(self, this): 5320 if isinstance(this, exp.Dot): 5321 exp.replace_children(this, self._replace_columns_with_dots) 5322 elif isinstance(this, exp.Column): 5323 exp.replace_children(this, self._replace_columns_with_dots) 5324 table = this.args.get("table") 5325 this = ( 5326 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5327 ) 5328 5329 return this 5330 5331 def _replace_lambda( 5332 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5333 ) -> t.Optional[exp.Expression]: 5334 if not node: 5335 return node 5336 5337 for column in node.find_all(exp.Column): 5338 if column.parts[0].name in lambda_variables: 5339 dot_or_id = column.to_dot() if column.table else column.this 5340 parent = column.parent 5341 5342 while isinstance(parent, exp.Dot): 5343 if not isinstance(parent.parent, exp.Dot): 5344 parent.replace(dot_or_id) 5345 break 5346 parent = parent.parent 5347 else: 5348 if column is node: 5349 node = dot_or_id 5350 else: 5351 column.replace(dot_or_id) 5352 return node 5353 5354 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5355 return [ 5356 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5357 for value in values 5358 if value 5359 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
950 def __init__( 951 self, 952 error_level: t.Optional[ErrorLevel] = None, 953 error_message_context: int = 100, 954 max_errors: int = 3, 955 ): 956 self.error_level = error_level or ErrorLevel.IMMEDIATE 957 self.error_message_context = error_message_context 958 self.max_errors = max_errors 959 self._tokenizer = self.TOKENIZER_CLASS() 960 self.reset()
972 def parse( 973 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 974 ) -> t.List[t.Optional[exp.Expression]]: 975 """ 976 Parses a list of tokens and returns a list of syntax trees, one tree 977 per parsed SQL statement. 978 979 Args: 980 raw_tokens: The list of tokens. 981 sql: The original SQL string, used to produce helpful debug messages. 982 983 Returns: 984 The list of the produced syntax trees. 985 """ 986 return self._parse( 987 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 988 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
990 def parse_into( 991 self, 992 expression_types: exp.IntoType, 993 raw_tokens: t.List[Token], 994 sql: t.Optional[str] = None, 995 ) -> t.List[t.Optional[exp.Expression]]: 996 """ 997 Parses a list of tokens into a given Expression type. If a collection of Expression 998 types is given instead, this method will try to parse the token list into each one 999 of them, stopping at the first for which the parsing succeeds. 1000 1001 Args: 1002 expression_types: The expression type(s) to try and parse the token list into. 1003 raw_tokens: The list of tokens. 1004 sql: The original SQL string, used to produce helpful debug messages. 1005 1006 Returns: 1007 The target Expression. 1008 """ 1009 errors = [] 1010 for expression_type in ensure_list(expression_types): 1011 parser = self.EXPRESSION_PARSERS.get(expression_type) 1012 if not parser: 1013 raise TypeError(f"No parser registered for {expression_type}") 1014 1015 try: 1016 return self._parse(parser, raw_tokens, sql) 1017 except ParseError as e: 1018 e.errors[0]["into_expression"] = expression_type 1019 errors.append(e) 1020 1021 raise ParseError( 1022 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1023 errors=merge_errors(errors), 1024 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1061 def check_errors(self) -> None: 1062 """Logs or raises any found errors, depending on the chosen error level setting.""" 1063 if self.error_level == ErrorLevel.WARN: 1064 for error in self.errors: 1065 logger.error(str(error)) 1066 elif self.error_level == ErrorLevel.RAISE and self.errors: 1067 raise ParseError( 1068 concat_messages(self.errors, self.max_errors), 1069 errors=merge_errors(self.errors), 1070 )
Logs or raises any found errors, depending on the chosen error level setting.
1072 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1073 """ 1074 Appends an error in the list of recorded errors or raises it, depending on the chosen 1075 error level setting. 1076 """ 1077 token = token or self._curr or self._prev or Token.string("") 1078 start = token.start 1079 end = token.end + 1 1080 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1081 highlight = self.sql[start:end] 1082 end_context = self.sql[end : end + self.error_message_context] 1083 1084 error = ParseError.new( 1085 f"{message}. Line {token.line}, Col: {token.col}.\n" 1086 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1087 description=message, 1088 line=token.line, 1089 col=token.col, 1090 start_context=start_context, 1091 highlight=highlight, 1092 end_context=end_context, 1093 ) 1094 1095 if self.error_level == ErrorLevel.IMMEDIATE: 1096 raise error 1097 1098 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1100 def expression( 1101 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1102 ) -> E: 1103 """ 1104 Creates a new, validated Expression. 1105 1106 Args: 1107 exp_class: The expression class to instantiate. 1108 comments: An optional list of comments to attach to the expression. 1109 kwargs: The arguments to set for the expression along with their respective values. 1110 1111 Returns: 1112 The target expression. 1113 """ 1114 instance = exp_class(**kwargs) 1115 instance.add_comments(comments) if comments else self._add_comments(instance) 1116 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1123 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1124 """ 1125 Validates an Expression, making sure that all its mandatory arguments are set. 1126 1127 Args: 1128 expression: The expression to validate. 1129 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1130 1131 Returns: 1132 The validated expression. 1133 """ 1134 if self.error_level != ErrorLevel.IGNORE: 1135 for error_message in expression.error_messages(args): 1136 self.raise_error(error_message) 1137 1138 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.