sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args): 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34class _Parser(type): 35 def __new__(cls, clsname, bases, attrs): 36 klass = super().__new__(cls, clsname, bases, attrs) 37 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 38 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 39 return klass 40 41 42class Parser(metaclass=_Parser): 43 """ 44 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 45 a parsed syntax tree. 46 47 Args: 48 error_level: the desired error level. 49 Default: ErrorLevel.RAISE 50 error_message_context: determines the amount of context to capture from a 51 query string when displaying the error message (in number of characters). 52 Default: 50. 53 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 54 Default: 0 55 alias_post_tablesample: If the table alias comes after tablesample. 56 Default: False 57 max_errors: Maximum number of error messages to include in a raised ParseError. 58 This is only relevant if error_level is ErrorLevel.RAISE. 59 Default: 3 60 null_ordering: Indicates the default null ordering method to use if not explicitly set. 61 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 62 Default: "nulls_are_small" 63 """ 64 65 FUNCTIONS: t.Dict[str, t.Callable] = { 66 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 67 "DATE_TO_DATE_STR": lambda args: exp.Cast( 68 this=seq_get(args, 0), 69 to=exp.DataType(this=exp.DataType.Type.TEXT), 70 ), 71 "TIME_TO_TIME_STR": lambda args: exp.Cast( 72 this=seq_get(args, 0), 73 to=exp.DataType(this=exp.DataType.Type.TEXT), 74 ), 75 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 76 this=exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 start=exp.Literal.number(1), 81 length=exp.Literal.number(10), 82 ), 83 "VAR_MAP": parse_var_map, 84 "IFNULL": exp.Coalesce.from_arg_list, 85 } 86 87 NO_PAREN_FUNCTIONS = { 88 TokenType.CURRENT_DATE: exp.CurrentDate, 89 TokenType.CURRENT_DATETIME: exp.CurrentDate, 90 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 91 } 92 93 NESTED_TYPE_TOKENS = { 94 TokenType.ARRAY, 95 TokenType.MAP, 96 TokenType.STRUCT, 97 TokenType.NULLABLE, 98 } 99 100 TYPE_TOKENS = { 101 TokenType.BOOLEAN, 102 TokenType.TINYINT, 103 TokenType.SMALLINT, 104 TokenType.INT, 105 TokenType.BIGINT, 106 TokenType.FLOAT, 107 TokenType.DOUBLE, 108 TokenType.CHAR, 109 TokenType.NCHAR, 110 TokenType.VARCHAR, 111 TokenType.NVARCHAR, 112 TokenType.TEXT, 113 TokenType.MEDIUMTEXT, 114 TokenType.LONGTEXT, 115 TokenType.MEDIUMBLOB, 116 TokenType.LONGBLOB, 117 TokenType.BINARY, 118 TokenType.VARBINARY, 119 TokenType.JSON, 120 TokenType.JSONB, 121 TokenType.INTERVAL, 122 TokenType.TIME, 123 TokenType.TIMESTAMP, 124 TokenType.TIMESTAMPTZ, 125 TokenType.TIMESTAMPLTZ, 126 TokenType.DATETIME, 127 TokenType.DATE, 128 TokenType.DECIMAL, 129 TokenType.UUID, 130 TokenType.GEOGRAPHY, 131 TokenType.GEOMETRY, 132 TokenType.HLLSKETCH, 133 TokenType.HSTORE, 134 TokenType.PSEUDO_TYPE, 135 TokenType.SUPER, 136 TokenType.SERIAL, 137 TokenType.SMALLSERIAL, 138 TokenType.BIGSERIAL, 139 TokenType.XML, 140 TokenType.UNIQUEIDENTIFIER, 141 TokenType.MONEY, 142 TokenType.SMALLMONEY, 143 TokenType.ROWVERSION, 144 TokenType.IMAGE, 145 TokenType.VARIANT, 146 TokenType.OBJECT, 147 TokenType.INET, 148 *NESTED_TYPE_TOKENS, 149 } 150 151 SUBQUERY_PREDICATES = { 152 TokenType.ANY: exp.Any, 153 TokenType.ALL: exp.All, 154 TokenType.EXISTS: exp.Exists, 155 TokenType.SOME: exp.Any, 156 } 157 158 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 159 160 DB_CREATABLES = { 161 TokenType.DATABASE, 162 TokenType.SCHEMA, 163 TokenType.TABLE, 164 TokenType.VIEW, 165 } 166 167 CREATABLES = { 168 TokenType.COLUMN, 169 TokenType.FUNCTION, 170 TokenType.INDEX, 171 TokenType.PROCEDURE, 172 *DB_CREATABLES, 173 } 174 175 ID_VAR_TOKENS = { 176 TokenType.VAR, 177 TokenType.ANTI, 178 TokenType.APPLY, 179 TokenType.AUTO_INCREMENT, 180 TokenType.BEGIN, 181 TokenType.BOTH, 182 TokenType.BUCKET, 183 TokenType.CACHE, 184 TokenType.CASCADE, 185 TokenType.COLLATE, 186 TokenType.COMMAND, 187 TokenType.COMMENT, 188 TokenType.COMMIT, 189 TokenType.COMPOUND, 190 TokenType.CONSTRAINT, 191 TokenType.CURRENT_TIME, 192 TokenType.DEFAULT, 193 TokenType.DELETE, 194 TokenType.DESCRIBE, 195 TokenType.DIV, 196 TokenType.END, 197 TokenType.EXECUTE, 198 TokenType.ESCAPE, 199 TokenType.FALSE, 200 TokenType.FIRST, 201 TokenType.FILTER, 202 TokenType.FOLLOWING, 203 TokenType.FORMAT, 204 TokenType.IF, 205 TokenType.ISNULL, 206 TokenType.INTERVAL, 207 TokenType.LAZY, 208 TokenType.LEADING, 209 TokenType.LEFT, 210 TokenType.LOCAL, 211 TokenType.MATERIALIZED, 212 TokenType.MERGE, 213 TokenType.NATURAL, 214 TokenType.NEXT, 215 TokenType.OFFSET, 216 TokenType.ONLY, 217 TokenType.OPTIONS, 218 TokenType.ORDINALITY, 219 TokenType.PERCENT, 220 TokenType.PIVOT, 221 TokenType.PRECEDING, 222 TokenType.RANGE, 223 TokenType.REFERENCES, 224 TokenType.RIGHT, 225 TokenType.ROW, 226 TokenType.ROWS, 227 TokenType.SEED, 228 TokenType.SEMI, 229 TokenType.SET, 230 TokenType.SHOW, 231 TokenType.SORTKEY, 232 TokenType.TEMPORARY, 233 TokenType.TOP, 234 TokenType.TRAILING, 235 TokenType.TRUE, 236 TokenType.UNBOUNDED, 237 TokenType.UNIQUE, 238 TokenType.UNLOGGED, 239 TokenType.UNPIVOT, 240 TokenType.VOLATILE, 241 TokenType.WINDOW, 242 *CREATABLES, 243 *SUBQUERY_PREDICATES, 244 *TYPE_TOKENS, 245 *NO_PAREN_FUNCTIONS, 246 } 247 248 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 249 TokenType.APPLY, 250 TokenType.LEFT, 251 TokenType.NATURAL, 252 TokenType.OFFSET, 253 TokenType.RIGHT, 254 TokenType.WINDOW, 255 } 256 257 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 258 259 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 260 261 FUNC_TOKENS = { 262 TokenType.COMMAND, 263 TokenType.CURRENT_DATE, 264 TokenType.CURRENT_DATETIME, 265 TokenType.CURRENT_TIMESTAMP, 266 TokenType.CURRENT_TIME, 267 TokenType.FILTER, 268 TokenType.FIRST, 269 TokenType.FORMAT, 270 TokenType.IDENTIFIER, 271 TokenType.INDEX, 272 TokenType.ISNULL, 273 TokenType.ILIKE, 274 TokenType.LIKE, 275 TokenType.MERGE, 276 TokenType.OFFSET, 277 TokenType.PRIMARY_KEY, 278 TokenType.REPLACE, 279 TokenType.ROW, 280 TokenType.UNNEST, 281 TokenType.VAR, 282 TokenType.LEFT, 283 TokenType.RIGHT, 284 TokenType.DATE, 285 TokenType.DATETIME, 286 TokenType.TABLE, 287 TokenType.TIMESTAMP, 288 TokenType.TIMESTAMPTZ, 289 TokenType.WINDOW, 290 *TYPE_TOKENS, 291 *SUBQUERY_PREDICATES, 292 } 293 294 CONJUNCTION = { 295 TokenType.AND: exp.And, 296 TokenType.OR: exp.Or, 297 } 298 299 EQUALITY = { 300 TokenType.EQ: exp.EQ, 301 TokenType.NEQ: exp.NEQ, 302 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 303 } 304 305 COMPARISON = { 306 TokenType.GT: exp.GT, 307 TokenType.GTE: exp.GTE, 308 TokenType.LT: exp.LT, 309 TokenType.LTE: exp.LTE, 310 } 311 312 BITWISE = { 313 TokenType.AMP: exp.BitwiseAnd, 314 TokenType.CARET: exp.BitwiseXor, 315 TokenType.PIPE: exp.BitwiseOr, 316 TokenType.DPIPE: exp.DPipe, 317 } 318 319 TERM = { 320 TokenType.DASH: exp.Sub, 321 TokenType.PLUS: exp.Add, 322 TokenType.MOD: exp.Mod, 323 TokenType.COLLATE: exp.Collate, 324 } 325 326 FACTOR = { 327 TokenType.DIV: exp.IntDiv, 328 TokenType.LR_ARROW: exp.Distance, 329 TokenType.SLASH: exp.Div, 330 TokenType.STAR: exp.Mul, 331 } 332 333 TIMESTAMPS = { 334 TokenType.TIME, 335 TokenType.TIMESTAMP, 336 TokenType.TIMESTAMPTZ, 337 TokenType.TIMESTAMPLTZ, 338 } 339 340 SET_OPERATIONS = { 341 TokenType.UNION, 342 TokenType.INTERSECT, 343 TokenType.EXCEPT, 344 } 345 346 JOIN_SIDES = { 347 TokenType.LEFT, 348 TokenType.RIGHT, 349 TokenType.FULL, 350 } 351 352 JOIN_KINDS = { 353 TokenType.INNER, 354 TokenType.OUTER, 355 TokenType.CROSS, 356 TokenType.SEMI, 357 TokenType.ANTI, 358 } 359 360 LAMBDAS = { 361 TokenType.ARROW: lambda self, expressions: self.expression( 362 exp.Lambda, 363 this=self._parse_conjunction().transform( 364 self._replace_lambda, {node.name for node in expressions} 365 ), 366 expressions=expressions, 367 ), 368 TokenType.FARROW: lambda self, expressions: self.expression( 369 exp.Kwarg, 370 this=exp.Var(this=expressions[0].name), 371 expression=self._parse_conjunction(), 372 ), 373 } 374 375 COLUMN_OPERATORS = { 376 TokenType.DOT: None, 377 TokenType.DCOLON: lambda self, this, to: self.expression( 378 exp.Cast, 379 this=this, 380 to=to, 381 ), 382 TokenType.ARROW: lambda self, this, path: self.expression( 383 exp.JSONExtract, 384 this=this, 385 expression=path, 386 ), 387 TokenType.DARROW: lambda self, this, path: self.expression( 388 exp.JSONExtractScalar, 389 this=this, 390 expression=path, 391 ), 392 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 393 exp.JSONBExtract, 394 this=this, 395 expression=path, 396 ), 397 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 398 exp.JSONBExtractScalar, 399 this=this, 400 expression=path, 401 ), 402 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 403 exp.JSONBContains, 404 this=this, 405 expression=key, 406 ), 407 } 408 409 EXPRESSION_PARSERS = { 410 exp.Column: lambda self: self._parse_column(), 411 exp.DataType: lambda self: self._parse_types(), 412 exp.From: lambda self: self._parse_from(), 413 exp.Group: lambda self: self._parse_group(), 414 exp.Identifier: lambda self: self._parse_id_var(), 415 exp.Lateral: lambda self: self._parse_lateral(), 416 exp.Join: lambda self: self._parse_join(), 417 exp.Order: lambda self: self._parse_order(), 418 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 419 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 420 exp.Lambda: lambda self: self._parse_lambda(), 421 exp.Limit: lambda self: self._parse_limit(), 422 exp.Offset: lambda self: self._parse_offset(), 423 exp.TableAlias: lambda self: self._parse_table_alias(), 424 exp.Table: lambda self: self._parse_table(), 425 exp.Condition: lambda self: self._parse_conjunction(), 426 exp.Expression: lambda self: self._parse_statement(), 427 exp.Properties: lambda self: self._parse_properties(), 428 exp.Where: lambda self: self._parse_where(), 429 exp.Ordered: lambda self: self._parse_ordered(), 430 exp.Having: lambda self: self._parse_having(), 431 exp.With: lambda self: self._parse_with(), 432 exp.Window: lambda self: self._parse_named_window(), 433 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 434 } 435 436 STATEMENT_PARSERS = { 437 TokenType.ALTER: lambda self: self._parse_alter(), 438 TokenType.BEGIN: lambda self: self._parse_transaction(), 439 TokenType.CACHE: lambda self: self._parse_cache(), 440 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 441 TokenType.COMMENT: lambda self: self._parse_comment(), 442 TokenType.CREATE: lambda self: self._parse_create(), 443 TokenType.DELETE: lambda self: self._parse_delete(), 444 TokenType.DESC: lambda self: self._parse_describe(), 445 TokenType.DESCRIBE: lambda self: self._parse_describe(), 446 TokenType.DROP: lambda self: self._parse_drop(), 447 TokenType.END: lambda self: self._parse_commit_or_rollback(), 448 TokenType.INSERT: lambda self: self._parse_insert(), 449 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 450 TokenType.MERGE: lambda self: self._parse_merge(), 451 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 452 TokenType.UNCACHE: lambda self: self._parse_uncache(), 453 TokenType.UPDATE: lambda self: self._parse_update(), 454 TokenType.USE: lambda self: self.expression( 455 exp.Use, 456 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 457 and exp.Var(this=self._prev.text), 458 this=self._parse_table(schema=False), 459 ), 460 } 461 462 UNARY_PARSERS = { 463 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 464 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 465 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 466 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 467 } 468 469 PRIMARY_PARSERS = { 470 TokenType.STRING: lambda self, token: self.expression( 471 exp.Literal, this=token.text, is_string=True 472 ), 473 TokenType.NUMBER: lambda self, token: self.expression( 474 exp.Literal, this=token.text, is_string=False 475 ), 476 TokenType.STAR: lambda self, _: self.expression( 477 exp.Star, 478 **{"except": self._parse_except(), "replace": self._parse_replace()}, 479 ), 480 TokenType.NULL: lambda self, _: self.expression(exp.Null), 481 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 482 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 483 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 484 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 485 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 486 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 487 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 488 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 489 } 490 491 PLACEHOLDER_PARSERS = { 492 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 493 TokenType.PARAMETER: lambda self: self._parse_parameter(), 494 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 495 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 496 else None, 497 } 498 499 RANGE_PARSERS = { 500 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 501 TokenType.GLOB: lambda self, this: self._parse_escape( 502 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 503 ), 504 TokenType.OVERLAPS: lambda self, this: self._parse_escape( 505 self.expression(exp.Overlaps, this=this, expression=self._parse_bitwise()) 506 ), 507 TokenType.IN: lambda self, this: self._parse_in(this), 508 TokenType.IS: lambda self, this: self._parse_is(this), 509 TokenType.LIKE: lambda self, this: self._parse_escape( 510 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 511 ), 512 TokenType.ILIKE: lambda self, this: self._parse_escape( 513 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 514 ), 515 TokenType.IRLIKE: lambda self, this: self.expression( 516 exp.RegexpILike, this=this, expression=self._parse_bitwise() 517 ), 518 TokenType.RLIKE: lambda self, this: self.expression( 519 exp.RegexpLike, this=this, expression=self._parse_bitwise() 520 ), 521 TokenType.SIMILAR_TO: lambda self, this: self.expression( 522 exp.SimilarTo, this=this, expression=self._parse_bitwise() 523 ), 524 } 525 526 PROPERTY_PARSERS = { 527 "AFTER": lambda self: self._parse_afterjournal( 528 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 529 ), 530 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 531 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 532 "BEFORE": lambda self: self._parse_journal( 533 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 534 ), 535 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 536 "CHARACTER SET": lambda self: self._parse_character_set(), 537 "CHECKSUM": lambda self: self._parse_checksum(), 538 "CLUSTER BY": lambda self: self.expression( 539 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 540 ), 541 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 542 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 543 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 544 default=self._prev.text.upper() == "DEFAULT" 545 ), 546 "DEFINER": lambda self: self._parse_definer(), 547 "DETERMINISTIC": lambda self: self.expression( 548 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 549 ), 550 "DISTKEY": lambda self: self._parse_distkey(), 551 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 552 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 553 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 554 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 555 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 556 "FREESPACE": lambda self: self._parse_freespace(), 557 "GLOBAL": lambda self: self._parse_temporary(global_=True), 558 "IMMUTABLE": lambda self: self.expression( 559 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 560 ), 561 "JOURNAL": lambda self: self._parse_journal( 562 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 563 ), 564 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 565 "LIKE": lambda self: self._parse_create_like(), 566 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 567 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 568 "LOCK": lambda self: self._parse_locking(), 569 "LOCKING": lambda self: self._parse_locking(), 570 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 571 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 572 "MAX": lambda self: self._parse_datablocksize(), 573 "MAXIMUM": lambda self: self._parse_datablocksize(), 574 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 575 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 576 ), 577 "MIN": lambda self: self._parse_datablocksize(), 578 "MINIMUM": lambda self: self._parse_datablocksize(), 579 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 580 "NO": lambda self: self._parse_noprimaryindex(), 581 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 582 "ON": lambda self: self._parse_oncommit(), 583 "PARTITION BY": lambda self: self._parse_partitioned_by(), 584 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 585 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 586 "RETURNS": lambda self: self._parse_returns(), 587 "ROW": lambda self: self._parse_row(), 588 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 589 "SORTKEY": lambda self: self._parse_sortkey(), 590 "STABLE": lambda self: self.expression( 591 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 592 ), 593 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 594 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 595 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 596 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 597 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 598 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 599 "VOLATILE": lambda self: self.expression( 600 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 601 ), 602 "WITH": lambda self: self._parse_with_property(), 603 } 604 605 CONSTRAINT_PARSERS = { 606 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 607 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 608 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 609 "CHARACTER SET": lambda self: self.expression( 610 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 611 ), 612 "CHECK": lambda self: self.expression( 613 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 614 ), 615 "COLLATE": lambda self: self.expression( 616 exp.CollateColumnConstraint, this=self._parse_var() 617 ), 618 "COMMENT": lambda self: self.expression( 619 exp.CommentColumnConstraint, this=self._parse_string() 620 ), 621 "COMPRESS": lambda self: self._parse_compress(), 622 "DEFAULT": lambda self: self.expression( 623 exp.DefaultColumnConstraint, this=self._parse_bitwise() 624 ), 625 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 626 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 627 "FORMAT": lambda self: self.expression( 628 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 629 ), 630 "GENERATED": lambda self: self._parse_generated_as_identity(), 631 "IDENTITY": lambda self: self._parse_auto_increment(), 632 "INLINE": lambda self: self._parse_inline(), 633 "LIKE": lambda self: self._parse_create_like(), 634 "NOT": lambda self: self._parse_not_constraint(), 635 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 636 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 637 "PRIMARY KEY": lambda self: self._parse_primary_key(), 638 "TITLE": lambda self: self.expression( 639 exp.TitleColumnConstraint, this=self._parse_var_or_string() 640 ), 641 "UNIQUE": lambda self: self._parse_unique(), 642 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 643 } 644 645 ALTER_PARSERS = { 646 "ADD": lambda self: self._parse_alter_table_add(), 647 "ALTER": lambda self: self._parse_alter_table_alter(), 648 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 649 "DROP": lambda self: self._parse_alter_table_drop(), 650 "RENAME": lambda self: self._parse_alter_table_rename(), 651 } 652 653 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 654 655 NO_PAREN_FUNCTION_PARSERS = { 656 TokenType.CASE: lambda self: self._parse_case(), 657 TokenType.IF: lambda self: self._parse_if(), 658 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 659 } 660 661 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 662 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 663 "TRY_CONVERT": lambda self: self._parse_convert(False), 664 "EXTRACT": lambda self: self._parse_extract(), 665 "POSITION": lambda self: self._parse_position(), 666 "SUBSTRING": lambda self: self._parse_substring(), 667 "TRIM": lambda self: self._parse_trim(), 668 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 669 "TRY_CAST": lambda self: self._parse_cast(False), 670 "STRING_AGG": lambda self: self._parse_string_agg(), 671 } 672 673 QUERY_MODIFIER_PARSERS = { 674 "match": lambda self: self._parse_match_recognize(), 675 "where": lambda self: self._parse_where(), 676 "group": lambda self: self._parse_group(), 677 "having": lambda self: self._parse_having(), 678 "qualify": lambda self: self._parse_qualify(), 679 "windows": lambda self: self._parse_window_clause(), 680 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 681 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 682 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 683 "order": lambda self: self._parse_order(), 684 "limit": lambda self: self._parse_limit(), 685 "offset": lambda self: self._parse_offset(), 686 "lock": lambda self: self._parse_lock(), 687 } 688 689 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 690 SET_PARSERS: t.Dict[str, t.Callable] = {} 691 692 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 693 694 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 695 696 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 697 698 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 699 700 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 701 702 STRICT_CAST = True 703 704 __slots__ = ( 705 "error_level", 706 "error_message_context", 707 "sql", 708 "errors", 709 "index_offset", 710 "unnest_column_only", 711 "alias_post_tablesample", 712 "max_errors", 713 "null_ordering", 714 "_tokens", 715 "_index", 716 "_curr", 717 "_next", 718 "_prev", 719 "_prev_comments", 720 "_show_trie", 721 "_set_trie", 722 ) 723 724 def __init__( 725 self, 726 error_level: t.Optional[ErrorLevel] = None, 727 error_message_context: int = 100, 728 index_offset: int = 0, 729 unnest_column_only: bool = False, 730 alias_post_tablesample: bool = False, 731 max_errors: int = 3, 732 null_ordering: t.Optional[str] = None, 733 ): 734 self.error_level = error_level or ErrorLevel.IMMEDIATE 735 self.error_message_context = error_message_context 736 self.index_offset = index_offset 737 self.unnest_column_only = unnest_column_only 738 self.alias_post_tablesample = alias_post_tablesample 739 self.max_errors = max_errors 740 self.null_ordering = null_ordering 741 self.reset() 742 743 def reset(self): 744 self.sql = "" 745 self.errors = [] 746 self._tokens = [] 747 self._index = 0 748 self._curr = None 749 self._next = None 750 self._prev = None 751 self._prev_comments = None 752 753 def parse( 754 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 755 ) -> t.List[t.Optional[exp.Expression]]: 756 """ 757 Parses a list of tokens and returns a list of syntax trees, one tree 758 per parsed SQL statement. 759 760 Args: 761 raw_tokens: the list of tokens. 762 sql: the original SQL string, used to produce helpful debug messages. 763 764 Returns: 765 The list of syntax trees. 766 """ 767 return self._parse( 768 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 769 ) 770 771 def parse_into( 772 self, 773 expression_types: exp.IntoType, 774 raw_tokens: t.List[Token], 775 sql: t.Optional[str] = None, 776 ) -> t.List[t.Optional[exp.Expression]]: 777 """ 778 Parses a list of tokens into a given Expression type. If a collection of Expression 779 types is given instead, this method will try to parse the token list into each one 780 of them, stopping at the first for which the parsing succeeds. 781 782 Args: 783 expression_types: the expression type(s) to try and parse the token list into. 784 raw_tokens: the list of tokens. 785 sql: the original SQL string, used to produce helpful debug messages. 786 787 Returns: 788 The target Expression. 789 """ 790 errors = [] 791 for expression_type in ensure_collection(expression_types): 792 parser = self.EXPRESSION_PARSERS.get(expression_type) 793 if not parser: 794 raise TypeError(f"No parser registered for {expression_type}") 795 try: 796 return self._parse(parser, raw_tokens, sql) 797 except ParseError as e: 798 e.errors[0]["into_expression"] = expression_type 799 errors.append(e) 800 raise ParseError( 801 f"Failed to parse into {expression_types}", 802 errors=merge_errors(errors), 803 ) from errors[-1] 804 805 def _parse( 806 self, 807 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 808 raw_tokens: t.List[Token], 809 sql: t.Optional[str] = None, 810 ) -> t.List[t.Optional[exp.Expression]]: 811 self.reset() 812 self.sql = sql or "" 813 total = len(raw_tokens) 814 chunks: t.List[t.List[Token]] = [[]] 815 816 for i, token in enumerate(raw_tokens): 817 if token.token_type == TokenType.SEMICOLON: 818 if i < total - 1: 819 chunks.append([]) 820 else: 821 chunks[-1].append(token) 822 823 expressions = [] 824 825 for tokens in chunks: 826 self._index = -1 827 self._tokens = tokens 828 self._advance() 829 830 expressions.append(parse_method(self)) 831 832 if self._index < len(self._tokens): 833 self.raise_error("Invalid expression / Unexpected token") 834 835 self.check_errors() 836 837 return expressions 838 839 def check_errors(self) -> None: 840 """ 841 Logs or raises any found errors, depending on the chosen error level setting. 842 """ 843 if self.error_level == ErrorLevel.WARN: 844 for error in self.errors: 845 logger.error(str(error)) 846 elif self.error_level == ErrorLevel.RAISE and self.errors: 847 raise ParseError( 848 concat_messages(self.errors, self.max_errors), 849 errors=merge_errors(self.errors), 850 ) 851 852 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 853 """ 854 Appends an error in the list of recorded errors or raises it, depending on the chosen 855 error level setting. 856 """ 857 token = token or self._curr or self._prev or Token.string("") 858 start = self._find_token(token) 859 end = start + len(token.text) 860 start_context = self.sql[max(start - self.error_message_context, 0) : start] 861 highlight = self.sql[start:end] 862 end_context = self.sql[end : end + self.error_message_context] 863 864 error = ParseError.new( 865 f"{message}. Line {token.line}, Col: {token.col}.\n" 866 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 867 description=message, 868 line=token.line, 869 col=token.col, 870 start_context=start_context, 871 highlight=highlight, 872 end_context=end_context, 873 ) 874 875 if self.error_level == ErrorLevel.IMMEDIATE: 876 raise error 877 878 self.errors.append(error) 879 880 def expression( 881 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 882 ) -> exp.Expression: 883 """ 884 Creates a new, validated Expression. 885 886 Args: 887 exp_class: the expression class to instantiate. 888 comments: an optional list of comments to attach to the expression. 889 kwargs: the arguments to set for the expression along with their respective values. 890 891 Returns: 892 The target expression. 893 """ 894 instance = exp_class(**kwargs) 895 if self._prev_comments: 896 instance.comments = self._prev_comments 897 self._prev_comments = None 898 if comments: 899 instance.comments = comments 900 self.validate_expression(instance) 901 return instance 902 903 def validate_expression( 904 self, expression: exp.Expression, args: t.Optional[t.List] = None 905 ) -> None: 906 """ 907 Validates an already instantiated expression, making sure that all its mandatory arguments 908 are set. 909 910 Args: 911 expression: the expression to validate. 912 args: an optional list of items that was used to instantiate the expression, if it's a Func. 913 """ 914 if self.error_level == ErrorLevel.IGNORE: 915 return 916 917 for error_message in expression.error_messages(args): 918 self.raise_error(error_message) 919 920 def _find_sql(self, start: Token, end: Token) -> str: 921 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 922 923 def _find_token(self, token: Token) -> int: 924 line = 1 925 col = 1 926 index = 0 927 928 while line < token.line or col < token.col: 929 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 930 line += 1 931 col = 1 932 else: 933 col += 1 934 index += 1 935 936 return index 937 938 def _advance(self, times: int = 1) -> None: 939 self._index += times 940 self._curr = seq_get(self._tokens, self._index) 941 self._next = seq_get(self._tokens, self._index + 1) 942 if self._index > 0: 943 self._prev = self._tokens[self._index - 1] 944 self._prev_comments = self._prev.comments 945 else: 946 self._prev = None 947 self._prev_comments = None 948 949 def _retreat(self, index: int) -> None: 950 self._advance(index - self._index) 951 952 def _parse_command(self) -> exp.Expression: 953 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 954 955 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 956 start = self._prev 957 exists = self._parse_exists() if allow_exists else None 958 959 self._match(TokenType.ON) 960 961 kind = self._match_set(self.CREATABLES) and self._prev 962 963 if not kind: 964 return self._parse_as_command(start) 965 966 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 967 this = self._parse_user_defined_function(kind=kind.token_type) 968 elif kind.token_type == TokenType.TABLE: 969 this = self._parse_table() 970 elif kind.token_type == TokenType.COLUMN: 971 this = self._parse_column() 972 else: 973 this = self._parse_id_var() 974 975 self._match(TokenType.IS) 976 977 return self.expression( 978 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 979 ) 980 981 def _parse_statement(self) -> t.Optional[exp.Expression]: 982 if self._curr is None: 983 return None 984 985 if self._match_set(self.STATEMENT_PARSERS): 986 return self.STATEMENT_PARSERS[self._prev.token_type](self) 987 988 if self._match_set(Tokenizer.COMMANDS): 989 return self._parse_command() 990 991 expression = self._parse_expression() 992 expression = self._parse_set_operations(expression) if expression else self._parse_select() 993 994 self._parse_query_modifiers(expression) 995 return expression 996 997 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 998 start = self._prev 999 temporary = self._match(TokenType.TEMPORARY) 1000 materialized = self._match(TokenType.MATERIALIZED) 1001 kind = self._match_set(self.CREATABLES) and self._prev.text 1002 if not kind: 1003 if default_kind: 1004 kind = default_kind 1005 else: 1006 return self._parse_as_command(start) 1007 1008 return self.expression( 1009 exp.Drop, 1010 exists=self._parse_exists(), 1011 this=self._parse_table(schema=True), 1012 kind=kind, 1013 temporary=temporary, 1014 materialized=materialized, 1015 cascade=self._match(TokenType.CASCADE), 1016 ) 1017 1018 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1019 return ( 1020 self._match(TokenType.IF) 1021 and (not not_ or self._match(TokenType.NOT)) 1022 and self._match(TokenType.EXISTS) 1023 ) 1024 1025 def _parse_create(self) -> t.Optional[exp.Expression]: 1026 start = self._prev 1027 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1028 TokenType.OR, TokenType.REPLACE 1029 ) 1030 unique = self._match(TokenType.UNIQUE) 1031 volatile = self._match(TokenType.VOLATILE) 1032 1033 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1034 self._match(TokenType.TABLE) 1035 1036 properties = None 1037 create_token = self._match_set(self.CREATABLES) and self._prev 1038 1039 if not create_token: 1040 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1041 create_token = self._match_set(self.CREATABLES) and self._prev 1042 1043 if not properties or not create_token: 1044 return self._parse_as_command(start) 1045 1046 exists = self._parse_exists(not_=True) 1047 this = None 1048 expression = None 1049 indexes = None 1050 no_schema_binding = None 1051 begin = None 1052 1053 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=create_token.token_type) 1055 temp_properties = self._parse_properties() 1056 if properties and temp_properties: 1057 properties.expressions.extend(temp_properties.expressions) 1058 elif temp_properties: 1059 properties = temp_properties 1060 1061 self._match(TokenType.ALIAS) 1062 begin = self._match(TokenType.BEGIN) 1063 return_ = self._match_text_seq("RETURN") 1064 expression = self._parse_statement() 1065 1066 if return_: 1067 expression = self.expression(exp.Return, this=expression) 1068 elif create_token.token_type == TokenType.INDEX: 1069 this = self._parse_index() 1070 elif create_token.token_type in self.DB_CREATABLES: 1071 table_parts = self._parse_table_parts(schema=True) 1072 1073 # exp.Properties.Location.POST_NAME 1074 if self._match(TokenType.COMMA): 1075 temp_properties = self._parse_properties(before=True) 1076 if properties and temp_properties: 1077 properties.expressions.extend(temp_properties.expressions) 1078 elif temp_properties: 1079 properties = temp_properties 1080 1081 this = self._parse_schema(this=table_parts) 1082 1083 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1084 temp_properties = self._parse_properties() 1085 if properties and temp_properties: 1086 properties.expressions.extend(temp_properties.expressions) 1087 elif temp_properties: 1088 properties = temp_properties 1089 1090 self._match(TokenType.ALIAS) 1091 1092 # exp.Properties.Location.POST_ALIAS 1093 if not ( 1094 self._match(TokenType.SELECT, advance=False) 1095 or self._match(TokenType.WITH, advance=False) 1096 or self._match(TokenType.L_PAREN, advance=False) 1097 ): 1098 temp_properties = self._parse_properties() 1099 if properties and temp_properties: 1100 properties.expressions.extend(temp_properties.expressions) 1101 elif temp_properties: 1102 properties = temp_properties 1103 1104 expression = self._parse_ddl_select() 1105 1106 if create_token.token_type == TokenType.TABLE: 1107 # exp.Properties.Location.POST_EXPRESSION 1108 temp_properties = self._parse_properties() 1109 if properties and temp_properties: 1110 properties.expressions.extend(temp_properties.expressions) 1111 elif temp_properties: 1112 properties = temp_properties 1113 1114 indexes = [] 1115 while True: 1116 index = self._parse_create_table_index() 1117 1118 # exp.Properties.Location.POST_INDEX 1119 if self._match(TokenType.PARTITION_BY, advance=False): 1120 temp_properties = self._parse_properties() 1121 if properties and temp_properties: 1122 properties.expressions.extend(temp_properties.expressions) 1123 elif temp_properties: 1124 properties = temp_properties 1125 1126 if not index: 1127 break 1128 else: 1129 indexes.append(index) 1130 elif create_token.token_type == TokenType.VIEW: 1131 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1132 no_schema_binding = True 1133 1134 return self.expression( 1135 exp.Create, 1136 this=this, 1137 kind=create_token.text, 1138 replace=replace, 1139 unique=unique, 1140 volatile=volatile, 1141 expression=expression, 1142 exists=exists, 1143 properties=properties, 1144 indexes=indexes, 1145 no_schema_binding=no_schema_binding, 1146 begin=begin, 1147 ) 1148 1149 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1150 self._match(TokenType.COMMA) 1151 1152 # parsers look to _prev for no/dual/default, so need to consume first 1153 self._match_text_seq("NO") 1154 self._match_text_seq("DUAL") 1155 self._match_text_seq("DEFAULT") 1156 1157 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1158 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1159 1160 return None 1161 1162 def _parse_property(self) -> t.Optional[exp.Expression]: 1163 if self._match_texts(self.PROPERTY_PARSERS): 1164 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1165 1166 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1167 return self._parse_character_set(default=True) 1168 1169 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1170 return self._parse_sortkey(compound=True) 1171 1172 if self._match_text_seq("SQL", "SECURITY"): 1173 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1174 1175 assignment = self._match_pair( 1176 TokenType.VAR, TokenType.EQ, advance=False 1177 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1178 1179 if assignment: 1180 key = self._parse_var_or_string() 1181 self._match(TokenType.EQ) 1182 return self.expression(exp.Property, this=key, value=self._parse_column()) 1183 1184 return None 1185 1186 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1187 self._match(TokenType.EQ) 1188 self._match(TokenType.ALIAS) 1189 return self.expression( 1190 exp_class, 1191 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1192 ) 1193 1194 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1195 properties = [] 1196 1197 while True: 1198 if before: 1199 identified_property = self._parse_property_before() 1200 else: 1201 identified_property = self._parse_property() 1202 1203 if not identified_property: 1204 break 1205 for p in ensure_collection(identified_property): 1206 properties.append(p) 1207 1208 if properties: 1209 return self.expression(exp.Properties, expressions=properties) 1210 1211 return None 1212 1213 def _parse_fallback(self, no=False) -> exp.Expression: 1214 self._match_text_seq("FALLBACK") 1215 return self.expression( 1216 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1217 ) 1218 1219 def _parse_with_property( 1220 self, 1221 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1222 self._match(TokenType.WITH) 1223 if self._match(TokenType.L_PAREN, advance=False): 1224 return self._parse_wrapped_csv(self._parse_property) 1225 1226 if self._match_text_seq("JOURNAL"): 1227 return self._parse_withjournaltable() 1228 1229 if self._match_text_seq("DATA"): 1230 return self._parse_withdata(no=False) 1231 elif self._match_text_seq("NO", "DATA"): 1232 return self._parse_withdata(no=True) 1233 1234 if not self._next: 1235 return None 1236 1237 return self._parse_withisolatedloading() 1238 1239 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1240 def _parse_definer(self) -> t.Optional[exp.Expression]: 1241 self._match(TokenType.EQ) 1242 1243 user = self._parse_id_var() 1244 self._match(TokenType.PARAMETER) 1245 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1246 1247 if not user or not host: 1248 return None 1249 1250 return exp.DefinerProperty(this=f"{user}@{host}") 1251 1252 def _parse_withjournaltable(self) -> exp.Expression: 1253 self._match(TokenType.TABLE) 1254 self._match(TokenType.EQ) 1255 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1256 1257 def _parse_log(self, no=False) -> exp.Expression: 1258 self._match_text_seq("LOG") 1259 return self.expression(exp.LogProperty, no=no) 1260 1261 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1262 before = self._match_text_seq("BEFORE") 1263 self._match_text_seq("JOURNAL") 1264 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1265 1266 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1267 self._match_text_seq("NOT") 1268 self._match_text_seq("LOCAL") 1269 self._match_text_seq("AFTER", "JOURNAL") 1270 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1271 1272 def _parse_checksum(self) -> exp.Expression: 1273 self._match_text_seq("CHECKSUM") 1274 self._match(TokenType.EQ) 1275 1276 on = None 1277 if self._match(TokenType.ON): 1278 on = True 1279 elif self._match_text_seq("OFF"): 1280 on = False 1281 default = self._match(TokenType.DEFAULT) 1282 1283 return self.expression( 1284 exp.ChecksumProperty, 1285 on=on, 1286 default=default, 1287 ) 1288 1289 def _parse_freespace(self) -> exp.Expression: 1290 self._match_text_seq("FREESPACE") 1291 self._match(TokenType.EQ) 1292 return self.expression( 1293 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1294 ) 1295 1296 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1297 self._match_text_seq("MERGEBLOCKRATIO") 1298 if self._match(TokenType.EQ): 1299 return self.expression( 1300 exp.MergeBlockRatioProperty, 1301 this=self._parse_number(), 1302 percent=self._match(TokenType.PERCENT), 1303 ) 1304 else: 1305 return self.expression( 1306 exp.MergeBlockRatioProperty, 1307 no=no, 1308 default=default, 1309 ) 1310 1311 def _parse_datablocksize(self, default=None) -> exp.Expression: 1312 if default: 1313 self._match_text_seq("DATABLOCKSIZE") 1314 return self.expression(exp.DataBlocksizeProperty, default=True) 1315 elif self._match_texts(("MIN", "MINIMUM")): 1316 self._match_text_seq("DATABLOCKSIZE") 1317 return self.expression(exp.DataBlocksizeProperty, min=True) 1318 elif self._match_texts(("MAX", "MAXIMUM")): 1319 self._match_text_seq("DATABLOCKSIZE") 1320 return self.expression(exp.DataBlocksizeProperty, min=False) 1321 1322 self._match_text_seq("DATABLOCKSIZE") 1323 self._match(TokenType.EQ) 1324 size = self._parse_number() 1325 units = None 1326 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1327 units = self._prev.text 1328 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1329 1330 def _parse_blockcompression(self) -> exp.Expression: 1331 self._match_text_seq("BLOCKCOMPRESSION") 1332 self._match(TokenType.EQ) 1333 always = self._match_text_seq("ALWAYS") 1334 manual = self._match_text_seq("MANUAL") 1335 never = self._match_text_seq("NEVER") 1336 default = self._match_text_seq("DEFAULT") 1337 autotemp = None 1338 if self._match_text_seq("AUTOTEMP"): 1339 autotemp = self._parse_schema() 1340 1341 return self.expression( 1342 exp.BlockCompressionProperty, 1343 always=always, 1344 manual=manual, 1345 never=never, 1346 default=default, 1347 autotemp=autotemp, 1348 ) 1349 1350 def _parse_withisolatedloading(self) -> exp.Expression: 1351 no = self._match_text_seq("NO") 1352 concurrent = self._match_text_seq("CONCURRENT") 1353 self._match_text_seq("ISOLATED", "LOADING") 1354 for_all = self._match_text_seq("FOR", "ALL") 1355 for_insert = self._match_text_seq("FOR", "INSERT") 1356 for_none = self._match_text_seq("FOR", "NONE") 1357 return self.expression( 1358 exp.IsolatedLoadingProperty, 1359 no=no, 1360 concurrent=concurrent, 1361 for_all=for_all, 1362 for_insert=for_insert, 1363 for_none=for_none, 1364 ) 1365 1366 def _parse_locking(self) -> exp.Expression: 1367 if self._match(TokenType.TABLE): 1368 kind = "TABLE" 1369 elif self._match(TokenType.VIEW): 1370 kind = "VIEW" 1371 elif self._match(TokenType.ROW): 1372 kind = "ROW" 1373 elif self._match_text_seq("DATABASE"): 1374 kind = "DATABASE" 1375 else: 1376 kind = None 1377 1378 if kind in ("DATABASE", "TABLE", "VIEW"): 1379 this = self._parse_table_parts() 1380 else: 1381 this = None 1382 1383 if self._match(TokenType.FOR): 1384 for_or_in = "FOR" 1385 elif self._match(TokenType.IN): 1386 for_or_in = "IN" 1387 else: 1388 for_or_in = None 1389 1390 if self._match_text_seq("ACCESS"): 1391 lock_type = "ACCESS" 1392 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1393 lock_type = "EXCLUSIVE" 1394 elif self._match_text_seq("SHARE"): 1395 lock_type = "SHARE" 1396 elif self._match_text_seq("READ"): 1397 lock_type = "READ" 1398 elif self._match_text_seq("WRITE"): 1399 lock_type = "WRITE" 1400 elif self._match_text_seq("CHECKSUM"): 1401 lock_type = "CHECKSUM" 1402 else: 1403 lock_type = None 1404 1405 override = self._match_text_seq("OVERRIDE") 1406 1407 return self.expression( 1408 exp.LockingProperty, 1409 this=this, 1410 kind=kind, 1411 for_or_in=for_or_in, 1412 lock_type=lock_type, 1413 override=override, 1414 ) 1415 1416 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1417 if self._match(TokenType.PARTITION_BY): 1418 return self._parse_csv(self._parse_conjunction) 1419 return [] 1420 1421 def _parse_partitioned_by(self) -> exp.Expression: 1422 self._match(TokenType.EQ) 1423 return self.expression( 1424 exp.PartitionedByProperty, 1425 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1426 ) 1427 1428 def _parse_withdata(self, no=False) -> exp.Expression: 1429 if self._match_text_seq("AND", "STATISTICS"): 1430 statistics = True 1431 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1432 statistics = False 1433 else: 1434 statistics = None 1435 1436 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1437 1438 def _parse_noprimaryindex(self) -> exp.Expression: 1439 self._match_text_seq("PRIMARY", "INDEX") 1440 return exp.NoPrimaryIndexProperty() 1441 1442 def _parse_oncommit(self) -> exp.Expression: 1443 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1444 return exp.OnCommitProperty() 1445 1446 def _parse_distkey(self) -> exp.Expression: 1447 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1448 1449 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1450 table = self._parse_table(schema=True) 1451 options = [] 1452 while self._match_texts(("INCLUDING", "EXCLUDING")): 1453 this = self._prev.text.upper() 1454 id_var = self._parse_id_var() 1455 1456 if not id_var: 1457 return None 1458 1459 options.append( 1460 self.expression( 1461 exp.Property, 1462 this=this, 1463 value=exp.Var(this=id_var.this.upper()), 1464 ) 1465 ) 1466 return self.expression(exp.LikeProperty, this=table, expressions=options) 1467 1468 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1469 return self.expression( 1470 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1471 ) 1472 1473 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1474 self._match(TokenType.EQ) 1475 return self.expression( 1476 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1477 ) 1478 1479 def _parse_returns(self) -> exp.Expression: 1480 value: t.Optional[exp.Expression] 1481 is_table = self._match(TokenType.TABLE) 1482 1483 if is_table: 1484 if self._match(TokenType.LT): 1485 value = self.expression( 1486 exp.Schema, 1487 this="TABLE", 1488 expressions=self._parse_csv(self._parse_struct_kwargs), 1489 ) 1490 if not self._match(TokenType.GT): 1491 self.raise_error("Expecting >") 1492 else: 1493 value = self._parse_schema(exp.Var(this="TABLE")) 1494 else: 1495 value = self._parse_types() 1496 1497 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1498 1499 def _parse_temporary(self, global_=False) -> exp.Expression: 1500 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1501 return self.expression(exp.TemporaryProperty, global_=global_) 1502 1503 def _parse_describe(self) -> exp.Expression: 1504 kind = self._match_set(self.CREATABLES) and self._prev.text 1505 this = self._parse_table() 1506 1507 return self.expression(exp.Describe, this=this, kind=kind) 1508 1509 def _parse_insert(self) -> exp.Expression: 1510 overwrite = self._match(TokenType.OVERWRITE) 1511 local = self._match(TokenType.LOCAL) 1512 1513 this: t.Optional[exp.Expression] 1514 1515 alternative = None 1516 if self._match_text_seq("DIRECTORY"): 1517 this = self.expression( 1518 exp.Directory, 1519 this=self._parse_var_or_string(), 1520 local=local, 1521 row_format=self._parse_row_format(match_row=True), 1522 ) 1523 else: 1524 if self._match(TokenType.OR): 1525 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1526 1527 self._match(TokenType.INTO) 1528 self._match(TokenType.TABLE) 1529 this = self._parse_table(schema=True) 1530 1531 return self.expression( 1532 exp.Insert, 1533 this=this, 1534 exists=self._parse_exists(), 1535 partition=self._parse_partition(), 1536 expression=self._parse_ddl_select(), 1537 overwrite=overwrite, 1538 alternative=alternative, 1539 ) 1540 1541 def _parse_row(self) -> t.Optional[exp.Expression]: 1542 if not self._match(TokenType.FORMAT): 1543 return None 1544 return self._parse_row_format() 1545 1546 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1547 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1548 return None 1549 1550 if self._match_text_seq("SERDE"): 1551 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1552 1553 self._match_text_seq("DELIMITED") 1554 1555 kwargs = {} 1556 1557 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1558 kwargs["fields"] = self._parse_string() 1559 if self._match_text_seq("ESCAPED", "BY"): 1560 kwargs["escaped"] = self._parse_string() 1561 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1562 kwargs["collection_items"] = self._parse_string() 1563 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1564 kwargs["map_keys"] = self._parse_string() 1565 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1566 kwargs["lines"] = self._parse_string() 1567 if self._match_text_seq("NULL", "DEFINED", "AS"): 1568 kwargs["null"] = self._parse_string() 1569 1570 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1571 1572 def _parse_load_data(self) -> exp.Expression: 1573 local = self._match(TokenType.LOCAL) 1574 self._match_text_seq("INPATH") 1575 inpath = self._parse_string() 1576 overwrite = self._match(TokenType.OVERWRITE) 1577 self._match_pair(TokenType.INTO, TokenType.TABLE) 1578 1579 return self.expression( 1580 exp.LoadData, 1581 this=self._parse_table(schema=True), 1582 local=local, 1583 overwrite=overwrite, 1584 inpath=inpath, 1585 partition=self._parse_partition(), 1586 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1587 serde=self._match_text_seq("SERDE") and self._parse_string(), 1588 ) 1589 1590 def _parse_delete(self) -> exp.Expression: 1591 self._match(TokenType.FROM) 1592 1593 return self.expression( 1594 exp.Delete, 1595 this=self._parse_table(schema=True), 1596 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1597 where=self._parse_where(), 1598 ) 1599 1600 def _parse_update(self) -> exp.Expression: 1601 return self.expression( 1602 exp.Update, 1603 **{ # type: ignore 1604 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1605 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1606 "from": self._parse_from(), 1607 "where": self._parse_where(), 1608 }, 1609 ) 1610 1611 def _parse_uncache(self) -> exp.Expression: 1612 if not self._match(TokenType.TABLE): 1613 self.raise_error("Expecting TABLE after UNCACHE") 1614 1615 return self.expression( 1616 exp.Uncache, 1617 exists=self._parse_exists(), 1618 this=self._parse_table(schema=True), 1619 ) 1620 1621 def _parse_cache(self) -> exp.Expression: 1622 lazy = self._match(TokenType.LAZY) 1623 self._match(TokenType.TABLE) 1624 table = self._parse_table(schema=True) 1625 options = [] 1626 1627 if self._match(TokenType.OPTIONS): 1628 self._match_l_paren() 1629 k = self._parse_string() 1630 self._match(TokenType.EQ) 1631 v = self._parse_string() 1632 options = [k, v] 1633 self._match_r_paren() 1634 1635 self._match(TokenType.ALIAS) 1636 return self.expression( 1637 exp.Cache, 1638 this=table, 1639 lazy=lazy, 1640 options=options, 1641 expression=self._parse_select(nested=True), 1642 ) 1643 1644 def _parse_partition(self) -> t.Optional[exp.Expression]: 1645 if not self._match(TokenType.PARTITION): 1646 return None 1647 1648 return self.expression( 1649 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1650 ) 1651 1652 def _parse_value(self) -> exp.Expression: 1653 if self._match(TokenType.L_PAREN): 1654 expressions = self._parse_csv(self._parse_conjunction) 1655 self._match_r_paren() 1656 return self.expression(exp.Tuple, expressions=expressions) 1657 1658 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1659 # Source: https://prestodb.io/docs/current/sql/values.html 1660 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1661 1662 def _parse_select( 1663 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1664 ) -> t.Optional[exp.Expression]: 1665 cte = self._parse_with() 1666 if cte: 1667 this = self._parse_statement() 1668 1669 if not this: 1670 self.raise_error("Failed to parse any statement following CTE") 1671 return cte 1672 1673 if "with" in this.arg_types: 1674 this.set("with", cte) 1675 else: 1676 self.raise_error(f"{this.key} does not support CTE") 1677 this = cte 1678 elif self._match(TokenType.SELECT): 1679 comments = self._prev_comments 1680 1681 hint = self._parse_hint() 1682 all_ = self._match(TokenType.ALL) 1683 distinct = self._match(TokenType.DISTINCT) 1684 1685 if distinct: 1686 distinct = self.expression( 1687 exp.Distinct, 1688 on=self._parse_value() if self._match(TokenType.ON) else None, 1689 ) 1690 1691 if all_ and distinct: 1692 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1693 1694 limit = self._parse_limit(top=True) 1695 expressions = self._parse_csv(self._parse_expression) 1696 1697 this = self.expression( 1698 exp.Select, 1699 hint=hint, 1700 distinct=distinct, 1701 expressions=expressions, 1702 limit=limit, 1703 ) 1704 this.comments = comments 1705 1706 into = self._parse_into() 1707 if into: 1708 this.set("into", into) 1709 1710 from_ = self._parse_from() 1711 if from_: 1712 this.set("from", from_) 1713 1714 self._parse_query_modifiers(this) 1715 elif (table or nested) and self._match(TokenType.L_PAREN): 1716 this = self._parse_table() if table else self._parse_select(nested=True) 1717 self._parse_query_modifiers(this) 1718 this = self._parse_set_operations(this) 1719 self._match_r_paren() 1720 1721 # early return so that subquery unions aren't parsed again 1722 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1723 # Union ALL should be a property of the top select node, not the subquery 1724 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1725 elif self._match(TokenType.VALUES): 1726 this = self.expression( 1727 exp.Values, 1728 expressions=self._parse_csv(self._parse_value), 1729 alias=self._parse_table_alias(), 1730 ) 1731 else: 1732 this = None 1733 1734 return self._parse_set_operations(this) 1735 1736 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1737 if not skip_with_token and not self._match(TokenType.WITH): 1738 return None 1739 1740 recursive = self._match(TokenType.RECURSIVE) 1741 1742 expressions = [] 1743 while True: 1744 expressions.append(self._parse_cte()) 1745 1746 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1747 break 1748 else: 1749 self._match(TokenType.WITH) 1750 1751 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1752 1753 def _parse_cte(self) -> exp.Expression: 1754 alias = self._parse_table_alias() 1755 if not alias or not alias.this: 1756 self.raise_error("Expected CTE to have alias") 1757 1758 self._match(TokenType.ALIAS) 1759 1760 return self.expression( 1761 exp.CTE, 1762 this=self._parse_wrapped(self._parse_statement), 1763 alias=alias, 1764 ) 1765 1766 def _parse_table_alias( 1767 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1768 ) -> t.Optional[exp.Expression]: 1769 any_token = self._match(TokenType.ALIAS) 1770 alias = self._parse_id_var( 1771 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1772 ) 1773 index = self._index 1774 1775 if self._match(TokenType.L_PAREN): 1776 columns = self._parse_csv(self._parse_function_parameter) 1777 self._match_r_paren() if columns else self._retreat(index) 1778 else: 1779 columns = None 1780 1781 if not alias and not columns: 1782 return None 1783 1784 return self.expression(exp.TableAlias, this=alias, columns=columns) 1785 1786 def _parse_subquery( 1787 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1788 ) -> exp.Expression: 1789 return self.expression( 1790 exp.Subquery, 1791 this=this, 1792 pivots=self._parse_pivots(), 1793 alias=self._parse_table_alias() if parse_alias else None, 1794 ) 1795 1796 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1797 if not isinstance(this, self.MODIFIABLES): 1798 return 1799 1800 table = isinstance(this, exp.Table) 1801 1802 while True: 1803 lateral = self._parse_lateral() 1804 join = self._parse_join() 1805 comma = None if table else self._match(TokenType.COMMA) 1806 if lateral: 1807 this.append("laterals", lateral) 1808 if join: 1809 this.append("joins", join) 1810 if comma: 1811 this.args["from"].append("expressions", self._parse_table()) 1812 if not (lateral or join or comma): 1813 break 1814 1815 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1816 expression = parser(self) 1817 1818 if expression: 1819 this.set(key, expression) 1820 1821 def _parse_hint(self) -> t.Optional[exp.Expression]: 1822 if self._match(TokenType.HINT): 1823 hints = self._parse_csv(self._parse_function) 1824 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1825 self.raise_error("Expected */ after HINT") 1826 return self.expression(exp.Hint, expressions=hints) 1827 1828 return None 1829 1830 def _parse_into(self) -> t.Optional[exp.Expression]: 1831 if not self._match(TokenType.INTO): 1832 return None 1833 1834 temp = self._match(TokenType.TEMPORARY) 1835 unlogged = self._match(TokenType.UNLOGGED) 1836 self._match(TokenType.TABLE) 1837 1838 return self.expression( 1839 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1840 ) 1841 1842 def _parse_from(self) -> t.Optional[exp.Expression]: 1843 if not self._match(TokenType.FROM): 1844 return None 1845 1846 return self.expression( 1847 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1848 ) 1849 1850 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1851 if not self._match(TokenType.MATCH_RECOGNIZE): 1852 return None 1853 self._match_l_paren() 1854 1855 partition = self._parse_partition_by() 1856 order = self._parse_order() 1857 measures = ( 1858 self._parse_alias(self._parse_conjunction()) 1859 if self._match_text_seq("MEASURES") 1860 else None 1861 ) 1862 1863 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1864 rows = exp.Var(this="ONE ROW PER MATCH") 1865 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1866 text = "ALL ROWS PER MATCH" 1867 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1868 text += f" SHOW EMPTY MATCHES" 1869 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1870 text += f" OMIT EMPTY MATCHES" 1871 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1872 text += f" WITH UNMATCHED ROWS" 1873 rows = exp.Var(this=text) 1874 else: 1875 rows = None 1876 1877 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1878 text = "AFTER MATCH SKIP" 1879 if self._match_text_seq("PAST", "LAST", "ROW"): 1880 text += f" PAST LAST ROW" 1881 elif self._match_text_seq("TO", "NEXT", "ROW"): 1882 text += f" TO NEXT ROW" 1883 elif self._match_text_seq("TO", "FIRST"): 1884 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1885 elif self._match_text_seq("TO", "LAST"): 1886 text += f" TO LAST {self._advance_any().text}" # type: ignore 1887 after = exp.Var(this=text) 1888 else: 1889 after = None 1890 1891 if self._match_text_seq("PATTERN"): 1892 self._match_l_paren() 1893 1894 if not self._curr: 1895 self.raise_error("Expecting )", self._curr) 1896 1897 paren = 1 1898 start = self._curr 1899 1900 while self._curr and paren > 0: 1901 if self._curr.token_type == TokenType.L_PAREN: 1902 paren += 1 1903 if self._curr.token_type == TokenType.R_PAREN: 1904 paren -= 1 1905 end = self._prev 1906 self._advance() 1907 if paren > 0: 1908 self.raise_error("Expecting )", self._curr) 1909 pattern = exp.Var(this=self._find_sql(start, end)) 1910 else: 1911 pattern = None 1912 1913 define = ( 1914 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1915 ) 1916 self._match_r_paren() 1917 1918 return self.expression( 1919 exp.MatchRecognize, 1920 partition_by=partition, 1921 order=order, 1922 measures=measures, 1923 rows=rows, 1924 after=after, 1925 pattern=pattern, 1926 define=define, 1927 ) 1928 1929 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1930 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1931 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1932 1933 if outer_apply or cross_apply: 1934 this = self._parse_select(table=True) 1935 view = None 1936 outer = not cross_apply 1937 elif self._match(TokenType.LATERAL): 1938 this = self._parse_select(table=True) 1939 view = self._match(TokenType.VIEW) 1940 outer = self._match(TokenType.OUTER) 1941 else: 1942 return None 1943 1944 if not this: 1945 this = self._parse_function() or self._parse_id_var(any_token=False) 1946 while self._match(TokenType.DOT): 1947 this = exp.Dot( 1948 this=this, 1949 expression=self._parse_function() or self._parse_id_var(any_token=False), 1950 ) 1951 1952 table_alias: t.Optional[exp.Expression] 1953 1954 if view: 1955 table = self._parse_id_var(any_token=False) 1956 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1957 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1958 else: 1959 table_alias = self._parse_table_alias() 1960 1961 expression = self.expression( 1962 exp.Lateral, 1963 this=this, 1964 view=view, 1965 outer=outer, 1966 alias=table_alias, 1967 ) 1968 1969 if outer_apply or cross_apply: 1970 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1971 1972 return expression 1973 1974 def _parse_join_side_and_kind( 1975 self, 1976 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1977 return ( 1978 self._match(TokenType.NATURAL) and self._prev, 1979 self._match_set(self.JOIN_SIDES) and self._prev, 1980 self._match_set(self.JOIN_KINDS) and self._prev, 1981 ) 1982 1983 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1984 natural, side, kind = self._parse_join_side_and_kind() 1985 1986 if not skip_join_token and not self._match(TokenType.JOIN): 1987 return None 1988 1989 kwargs: t.Dict[ 1990 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1991 ] = {"this": self._parse_table()} 1992 1993 if natural: 1994 kwargs["natural"] = True 1995 if side: 1996 kwargs["side"] = side.text 1997 if kind: 1998 kwargs["kind"] = kind.text 1999 2000 if self._match(TokenType.ON): 2001 kwargs["on"] = self._parse_conjunction() 2002 elif self._match(TokenType.USING): 2003 kwargs["using"] = self._parse_wrapped_id_vars() 2004 2005 return self.expression(exp.Join, **kwargs) # type: ignore 2006 2007 def _parse_index(self) -> exp.Expression: 2008 index = self._parse_id_var() 2009 self._match(TokenType.ON) 2010 self._match(TokenType.TABLE) # hive 2011 2012 return self.expression( 2013 exp.Index, 2014 this=index, 2015 table=self.expression(exp.Table, this=self._parse_id_var()), 2016 columns=self._parse_expression(), 2017 ) 2018 2019 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2020 unique = self._match(TokenType.UNIQUE) 2021 primary = self._match_text_seq("PRIMARY") 2022 amp = self._match_text_seq("AMP") 2023 if not self._match(TokenType.INDEX): 2024 return None 2025 index = self._parse_id_var() 2026 columns = None 2027 if self._match(TokenType.L_PAREN, advance=False): 2028 columns = self._parse_wrapped_csv(self._parse_column) 2029 return self.expression( 2030 exp.Index, 2031 this=index, 2032 columns=columns, 2033 unique=unique, 2034 primary=primary, 2035 amp=amp, 2036 ) 2037 2038 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2039 catalog = None 2040 db = None 2041 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2042 2043 while self._match(TokenType.DOT): 2044 if catalog: 2045 # This allows nesting the table in arbitrarily many dot expressions if needed 2046 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2047 else: 2048 catalog = db 2049 db = table 2050 table = self._parse_id_var() 2051 2052 if not table: 2053 self.raise_error(f"Expected table name but got {self._curr}") 2054 2055 return self.expression( 2056 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2057 ) 2058 2059 def _parse_table( 2060 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2061 ) -> t.Optional[exp.Expression]: 2062 lateral = self._parse_lateral() 2063 2064 if lateral: 2065 return lateral 2066 2067 unnest = self._parse_unnest() 2068 2069 if unnest: 2070 return unnest 2071 2072 values = self._parse_derived_table_values() 2073 2074 if values: 2075 return values 2076 2077 subquery = self._parse_select(table=True) 2078 2079 if subquery: 2080 return subquery 2081 2082 this = self._parse_table_parts(schema=schema) 2083 2084 if schema: 2085 return self._parse_schema(this=this) 2086 2087 if self.alias_post_tablesample: 2088 table_sample = self._parse_table_sample() 2089 2090 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2091 2092 if alias: 2093 this.set("alias", alias) 2094 2095 if not this.args.get("pivots"): 2096 this.set("pivots", self._parse_pivots()) 2097 2098 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2099 this.set( 2100 "hints", 2101 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2102 ) 2103 self._match_r_paren() 2104 2105 if not self.alias_post_tablesample: 2106 table_sample = self._parse_table_sample() 2107 2108 if table_sample: 2109 table_sample.set("this", this) 2110 this = table_sample 2111 2112 return this 2113 2114 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2115 if not self._match(TokenType.UNNEST): 2116 return None 2117 2118 expressions = self._parse_wrapped_csv(self._parse_column) 2119 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2120 alias = self._parse_table_alias() 2121 2122 if alias and self.unnest_column_only: 2123 if alias.args.get("columns"): 2124 self.raise_error("Unexpected extra column alias in unnest.") 2125 alias.set("columns", [alias.this]) 2126 alias.set("this", None) 2127 2128 offset = None 2129 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2130 self._match(TokenType.ALIAS) 2131 offset = self._parse_conjunction() 2132 2133 return self.expression( 2134 exp.Unnest, 2135 expressions=expressions, 2136 ordinality=ordinality, 2137 alias=alias, 2138 offset=offset, 2139 ) 2140 2141 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2142 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2143 if not is_derived and not self._match(TokenType.VALUES): 2144 return None 2145 2146 expressions = self._parse_csv(self._parse_value) 2147 2148 if is_derived: 2149 self._match_r_paren() 2150 2151 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2152 2153 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 2154 if not self._match(TokenType.TABLE_SAMPLE): 2155 return None 2156 2157 method = self._parse_var() 2158 bucket_numerator = None 2159 bucket_denominator = None 2160 bucket_field = None 2161 percent = None 2162 rows = None 2163 size = None 2164 seed = None 2165 2166 self._match_l_paren() 2167 2168 if self._match(TokenType.BUCKET): 2169 bucket_numerator = self._parse_number() 2170 self._match(TokenType.OUT_OF) 2171 bucket_denominator = bucket_denominator = self._parse_number() 2172 self._match(TokenType.ON) 2173 bucket_field = self._parse_field() 2174 else: 2175 num = self._parse_number() 2176 2177 if self._match(TokenType.PERCENT): 2178 percent = num 2179 elif self._match(TokenType.ROWS): 2180 rows = num 2181 else: 2182 size = num 2183 2184 self._match_r_paren() 2185 2186 if self._match(TokenType.SEED): 2187 seed = self._parse_wrapped(self._parse_number) 2188 2189 return self.expression( 2190 exp.TableSample, 2191 method=method, 2192 bucket_numerator=bucket_numerator, 2193 bucket_denominator=bucket_denominator, 2194 bucket_field=bucket_field, 2195 percent=percent, 2196 rows=rows, 2197 size=size, 2198 seed=seed, 2199 ) 2200 2201 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2202 return list(iter(self._parse_pivot, None)) 2203 2204 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2205 index = self._index 2206 2207 if self._match(TokenType.PIVOT): 2208 unpivot = False 2209 elif self._match(TokenType.UNPIVOT): 2210 unpivot = True 2211 else: 2212 return None 2213 2214 expressions = [] 2215 field = None 2216 2217 if not self._match(TokenType.L_PAREN): 2218 self._retreat(index) 2219 return None 2220 2221 if unpivot: 2222 expressions = self._parse_csv(self._parse_column) 2223 else: 2224 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2225 2226 if not self._match(TokenType.FOR): 2227 self.raise_error("Expecting FOR") 2228 2229 value = self._parse_column() 2230 2231 if not self._match(TokenType.IN): 2232 self.raise_error("Expecting IN") 2233 2234 field = self._parse_in(value) 2235 2236 self._match_r_paren() 2237 2238 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2239 2240 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2241 pivot.set("alias", self._parse_table_alias()) 2242 2243 return pivot 2244 2245 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2246 if not skip_where_token and not self._match(TokenType.WHERE): 2247 return None 2248 2249 return self.expression( 2250 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2251 ) 2252 2253 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2254 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2255 return None 2256 2257 elements = defaultdict(list) 2258 2259 while True: 2260 expressions = self._parse_csv(self._parse_conjunction) 2261 if expressions: 2262 elements["expressions"].extend(expressions) 2263 2264 grouping_sets = self._parse_grouping_sets() 2265 if grouping_sets: 2266 elements["grouping_sets"].extend(grouping_sets) 2267 2268 rollup = None 2269 cube = None 2270 2271 with_ = self._match(TokenType.WITH) 2272 if self._match(TokenType.ROLLUP): 2273 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2274 elements["rollup"].extend(ensure_list(rollup)) 2275 2276 if self._match(TokenType.CUBE): 2277 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2278 elements["cube"].extend(ensure_list(cube)) 2279 2280 if not (expressions or grouping_sets or rollup or cube): 2281 break 2282 2283 return self.expression(exp.Group, **elements) # type: ignore 2284 2285 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2286 if not self._match(TokenType.GROUPING_SETS): 2287 return None 2288 2289 return self._parse_wrapped_csv(self._parse_grouping_set) 2290 2291 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2292 if self._match(TokenType.L_PAREN): 2293 grouping_set = self._parse_csv(self._parse_column) 2294 self._match_r_paren() 2295 return self.expression(exp.Tuple, expressions=grouping_set) 2296 2297 return self._parse_column() 2298 2299 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2300 if not skip_having_token and not self._match(TokenType.HAVING): 2301 return None 2302 return self.expression(exp.Having, this=self._parse_conjunction()) 2303 2304 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2305 if not self._match(TokenType.QUALIFY): 2306 return None 2307 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2308 2309 def _parse_order( 2310 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2311 ) -> t.Optional[exp.Expression]: 2312 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2313 return this 2314 2315 return self.expression( 2316 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2317 ) 2318 2319 def _parse_sort( 2320 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2321 ) -> t.Optional[exp.Expression]: 2322 if not self._match(token_type): 2323 return None 2324 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2325 2326 def _parse_ordered(self) -> exp.Expression: 2327 this = self._parse_conjunction() 2328 self._match(TokenType.ASC) 2329 is_desc = self._match(TokenType.DESC) 2330 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2331 is_nulls_last = self._match(TokenType.NULLS_LAST) 2332 desc = is_desc or False 2333 asc = not desc 2334 nulls_first = is_nulls_first or False 2335 explicitly_null_ordered = is_nulls_first or is_nulls_last 2336 if ( 2337 not explicitly_null_ordered 2338 and ( 2339 (asc and self.null_ordering == "nulls_are_small") 2340 or (desc and self.null_ordering != "nulls_are_small") 2341 ) 2342 and self.null_ordering != "nulls_are_last" 2343 ): 2344 nulls_first = True 2345 2346 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2347 2348 def _parse_limit( 2349 self, this: t.Optional[exp.Expression] = None, top: bool = False 2350 ) -> t.Optional[exp.Expression]: 2351 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2352 limit_paren = self._match(TokenType.L_PAREN) 2353 limit_exp = self.expression( 2354 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2355 ) 2356 2357 if limit_paren: 2358 self._match_r_paren() 2359 2360 return limit_exp 2361 2362 if self._match(TokenType.FETCH): 2363 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2364 direction = self._prev.text if direction else "FIRST" 2365 count = self._parse_number() 2366 self._match_set((TokenType.ROW, TokenType.ROWS)) 2367 self._match(TokenType.ONLY) 2368 return self.expression(exp.Fetch, direction=direction, count=count) 2369 2370 return this 2371 2372 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2373 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2374 return this 2375 2376 count = self._parse_number() 2377 self._match_set((TokenType.ROW, TokenType.ROWS)) 2378 return self.expression(exp.Offset, this=this, expression=count) 2379 2380 def _parse_lock(self) -> t.Optional[exp.Expression]: 2381 if self._match_text_seq("FOR", "UPDATE"): 2382 return self.expression(exp.Lock, update=True) 2383 if self._match_text_seq("FOR", "SHARE"): 2384 return self.expression(exp.Lock, update=False) 2385 2386 return None 2387 2388 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2389 if not self._match_set(self.SET_OPERATIONS): 2390 return this 2391 2392 token_type = self._prev.token_type 2393 2394 if token_type == TokenType.UNION: 2395 expression = exp.Union 2396 elif token_type == TokenType.EXCEPT: 2397 expression = exp.Except 2398 else: 2399 expression = exp.Intersect 2400 2401 return self.expression( 2402 expression, 2403 this=this, 2404 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2405 expression=self._parse_set_operations(self._parse_select(nested=True)), 2406 ) 2407 2408 def _parse_expression(self) -> t.Optional[exp.Expression]: 2409 return self._parse_alias(self._parse_conjunction()) 2410 2411 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2412 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2413 2414 def _parse_equality(self) -> t.Optional[exp.Expression]: 2415 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2416 2417 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2418 return self._parse_tokens(self._parse_range, self.COMPARISON) 2419 2420 def _parse_range(self) -> t.Optional[exp.Expression]: 2421 this = self._parse_bitwise() 2422 negate = self._match(TokenType.NOT) 2423 2424 if self._match_set(self.RANGE_PARSERS): 2425 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2426 elif self._match(TokenType.ISNULL): 2427 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2428 2429 # Postgres supports ISNULL and NOTNULL for conditions. 2430 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2431 if self._match(TokenType.NOTNULL): 2432 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2433 this = self.expression(exp.Not, this=this) 2434 2435 if negate: 2436 this = self.expression(exp.Not, this=this) 2437 2438 if self._match(TokenType.IS): 2439 this = self._parse_is(this) 2440 2441 return this 2442 2443 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2444 negate = self._match(TokenType.NOT) 2445 if self._match(TokenType.DISTINCT_FROM): 2446 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2447 return self.expression(klass, this=this, expression=self._parse_expression()) 2448 2449 this = self.expression( 2450 exp.Is, 2451 this=this, 2452 expression=self._parse_null() or self._parse_boolean(), 2453 ) 2454 return self.expression(exp.Not, this=this) if negate else this 2455 2456 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2457 unnest = self._parse_unnest() 2458 if unnest: 2459 this = self.expression(exp.In, this=this, unnest=unnest) 2460 elif self._match(TokenType.L_PAREN): 2461 expressions = self._parse_csv(self._parse_select_or_expression) 2462 2463 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2464 this = self.expression(exp.In, this=this, query=expressions[0]) 2465 else: 2466 this = self.expression(exp.In, this=this, expressions=expressions) 2467 2468 self._match_r_paren() 2469 else: 2470 this = self.expression(exp.In, this=this, field=self._parse_field()) 2471 2472 return this 2473 2474 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2475 low = self._parse_bitwise() 2476 self._match(TokenType.AND) 2477 high = self._parse_bitwise() 2478 return self.expression(exp.Between, this=this, low=low, high=high) 2479 2480 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2481 if not self._match(TokenType.ESCAPE): 2482 return this 2483 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2484 2485 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2486 this = self._parse_term() 2487 2488 while True: 2489 if self._match_set(self.BITWISE): 2490 this = self.expression( 2491 self.BITWISE[self._prev.token_type], 2492 this=this, 2493 expression=self._parse_term(), 2494 ) 2495 elif self._match_pair(TokenType.LT, TokenType.LT): 2496 this = self.expression( 2497 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2498 ) 2499 elif self._match_pair(TokenType.GT, TokenType.GT): 2500 this = self.expression( 2501 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2502 ) 2503 else: 2504 break 2505 2506 return this 2507 2508 def _parse_term(self) -> t.Optional[exp.Expression]: 2509 return self._parse_tokens(self._parse_factor, self.TERM) 2510 2511 def _parse_factor(self) -> t.Optional[exp.Expression]: 2512 return self._parse_tokens(self._parse_unary, self.FACTOR) 2513 2514 def _parse_unary(self) -> t.Optional[exp.Expression]: 2515 if self._match_set(self.UNARY_PARSERS): 2516 return self.UNARY_PARSERS[self._prev.token_type](self) 2517 return self._parse_at_time_zone(self._parse_type()) 2518 2519 def _parse_type(self) -> t.Optional[exp.Expression]: 2520 if self._match(TokenType.INTERVAL): 2521 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2522 2523 index = self._index 2524 type_token = self._parse_types(check_func=True) 2525 this = self._parse_column() 2526 2527 if type_token: 2528 if this and not isinstance(this, exp.Star): 2529 return self.expression(exp.Cast, this=this, to=type_token) 2530 if not type_token.args.get("expressions"): 2531 self._retreat(index) 2532 return self._parse_column() 2533 return type_token 2534 2535 return this 2536 2537 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2538 index = self._index 2539 2540 prefix = self._match_text_seq("SYSUDTLIB", ".") 2541 2542 if not self._match_set(self.TYPE_TOKENS): 2543 return None 2544 2545 type_token = self._prev.token_type 2546 2547 if type_token == TokenType.PSEUDO_TYPE: 2548 return self.expression(exp.PseudoType, this=self._prev.text) 2549 2550 nested = type_token in self.NESTED_TYPE_TOKENS 2551 is_struct = type_token == TokenType.STRUCT 2552 expressions = None 2553 maybe_func = False 2554 2555 if self._match(TokenType.L_PAREN): 2556 if is_struct: 2557 expressions = self._parse_csv(self._parse_struct_kwargs) 2558 elif nested: 2559 expressions = self._parse_csv(self._parse_types) 2560 else: 2561 expressions = self._parse_csv(self._parse_conjunction) 2562 2563 if not expressions: 2564 self._retreat(index) 2565 return None 2566 2567 self._match_r_paren() 2568 maybe_func = True 2569 2570 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2571 this = exp.DataType( 2572 this=exp.DataType.Type.ARRAY, 2573 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2574 nested=True, 2575 ) 2576 2577 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2578 this = exp.DataType( 2579 this=exp.DataType.Type.ARRAY, 2580 expressions=[this], 2581 nested=True, 2582 ) 2583 2584 return this 2585 2586 if self._match(TokenType.L_BRACKET): 2587 self._retreat(index) 2588 return None 2589 2590 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2591 if nested and self._match(TokenType.LT): 2592 if is_struct: 2593 expressions = self._parse_csv(self._parse_struct_kwargs) 2594 else: 2595 expressions = self._parse_csv(self._parse_types) 2596 2597 if not self._match(TokenType.GT): 2598 self.raise_error("Expecting >") 2599 2600 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2601 values = self._parse_csv(self._parse_conjunction) 2602 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2603 2604 value: t.Optional[exp.Expression] = None 2605 if type_token in self.TIMESTAMPS: 2606 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2607 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2608 elif ( 2609 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2610 ): 2611 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2612 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2613 if type_token == TokenType.TIME: 2614 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2615 else: 2616 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2617 2618 maybe_func = maybe_func and value is None 2619 2620 if value is None: 2621 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2622 elif type_token == TokenType.INTERVAL: 2623 value = self.expression(exp.Interval, unit=self._parse_var()) 2624 2625 if maybe_func and check_func: 2626 index2 = self._index 2627 peek = self._parse_string() 2628 2629 if not peek: 2630 self._retreat(index) 2631 return None 2632 2633 self._retreat(index2) 2634 2635 if value: 2636 return value 2637 2638 return exp.DataType( 2639 this=exp.DataType.Type[type_token.value.upper()], 2640 expressions=expressions, 2641 nested=nested, 2642 values=values, 2643 prefix=prefix, 2644 ) 2645 2646 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2647 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2648 return self._parse_types() 2649 2650 this = self._parse_id_var() 2651 self._match(TokenType.COLON) 2652 data_type = self._parse_types() 2653 2654 if not data_type: 2655 return None 2656 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2657 2658 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2659 if not self._match(TokenType.AT_TIME_ZONE): 2660 return this 2661 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2662 2663 def _parse_column(self) -> t.Optional[exp.Expression]: 2664 this = self._parse_field() 2665 if isinstance(this, exp.Identifier): 2666 this = self.expression(exp.Column, this=this) 2667 elif not this: 2668 return self._parse_bracket(this) 2669 this = self._parse_bracket(this) 2670 2671 while self._match_set(self.COLUMN_OPERATORS): 2672 op_token = self._prev.token_type 2673 op = self.COLUMN_OPERATORS.get(op_token) 2674 2675 if op_token == TokenType.DCOLON: 2676 field = self._parse_types() 2677 if not field: 2678 self.raise_error("Expected type") 2679 elif op: 2680 self._advance() 2681 value = self._prev.text 2682 field = ( 2683 exp.Literal.number(value) 2684 if self._prev.token_type == TokenType.NUMBER 2685 else exp.Literal.string(value) 2686 ) 2687 else: 2688 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2689 2690 if isinstance(field, exp.Func): 2691 # bigquery allows function calls like x.y.count(...) 2692 # SAFE.SUBSTR(...) 2693 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2694 this = self._replace_columns_with_dots(this) 2695 2696 if op: 2697 this = op(self, this, field) 2698 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2699 this = self.expression( 2700 exp.Column, 2701 this=field, 2702 table=this.this, 2703 db=this.args.get("table"), 2704 catalog=this.args.get("db"), 2705 ) 2706 else: 2707 this = self.expression(exp.Dot, this=this, expression=field) 2708 this = self._parse_bracket(this) 2709 2710 return this 2711 2712 def _parse_primary(self) -> t.Optional[exp.Expression]: 2713 if self._match_set(self.PRIMARY_PARSERS): 2714 token_type = self._prev.token_type 2715 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2716 2717 if token_type == TokenType.STRING: 2718 expressions = [primary] 2719 while self._match(TokenType.STRING): 2720 expressions.append(exp.Literal.string(self._prev.text)) 2721 if len(expressions) > 1: 2722 return self.expression(exp.Concat, expressions=expressions) 2723 return primary 2724 2725 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2726 return exp.Literal.number(f"0.{self._prev.text}") 2727 2728 if self._match(TokenType.L_PAREN): 2729 comments = self._prev_comments 2730 query = self._parse_select() 2731 2732 if query: 2733 expressions = [query] 2734 else: 2735 expressions = self._parse_csv( 2736 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2737 ) 2738 2739 this = seq_get(expressions, 0) 2740 self._parse_query_modifiers(this) 2741 self._match_r_paren() 2742 2743 if isinstance(this, exp.Subqueryable): 2744 this = self._parse_set_operations( 2745 self._parse_subquery(this=this, parse_alias=False) 2746 ) 2747 elif len(expressions) > 1: 2748 this = self.expression(exp.Tuple, expressions=expressions) 2749 else: 2750 this = self.expression(exp.Paren, this=this) 2751 2752 if this and comments: 2753 this.comments = comments 2754 2755 return this 2756 2757 return None 2758 2759 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2760 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2761 2762 def _parse_function( 2763 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2764 ) -> t.Optional[exp.Expression]: 2765 if not self._curr: 2766 return None 2767 2768 token_type = self._curr.token_type 2769 2770 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2771 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2772 2773 if not self._next or self._next.token_type != TokenType.L_PAREN: 2774 if token_type in self.NO_PAREN_FUNCTIONS: 2775 self._advance() 2776 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2777 2778 return None 2779 2780 if token_type not in self.FUNC_TOKENS: 2781 return None 2782 2783 this = self._curr.text 2784 upper = this.upper() 2785 self._advance(2) 2786 2787 parser = self.FUNCTION_PARSERS.get(upper) 2788 2789 if parser: 2790 this = parser(self) 2791 else: 2792 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2793 2794 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2795 this = self.expression(subquery_predicate, this=self._parse_select()) 2796 self._match_r_paren() 2797 return this 2798 2799 if functions is None: 2800 functions = self.FUNCTIONS 2801 2802 function = functions.get(upper) 2803 args = self._parse_csv(self._parse_lambda) 2804 2805 if function: 2806 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2807 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2808 if count_params(function) == 2: 2809 params = None 2810 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2811 params = self._parse_csv(self._parse_lambda) 2812 2813 this = function(args, params) 2814 else: 2815 this = function(args) 2816 2817 self.validate_expression(this, args) 2818 else: 2819 this = self.expression(exp.Anonymous, this=this, expressions=args) 2820 2821 self._match_r_paren(this) 2822 return self._parse_window(this) 2823 2824 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2825 return self._parse_column_def(self._parse_id_var()) 2826 2827 def _parse_user_defined_function( 2828 self, kind: t.Optional[TokenType] = None 2829 ) -> t.Optional[exp.Expression]: 2830 this = self._parse_id_var() 2831 2832 while self._match(TokenType.DOT): 2833 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2834 2835 if not self._match(TokenType.L_PAREN): 2836 return this 2837 2838 expressions = self._parse_csv(self._parse_function_parameter) 2839 self._match_r_paren() 2840 return self.expression( 2841 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2842 ) 2843 2844 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2845 literal = self._parse_primary() 2846 if literal: 2847 return self.expression(exp.Introducer, this=token.text, expression=literal) 2848 2849 return self.expression(exp.Identifier, this=token.text) 2850 2851 def _parse_national(self, token: Token) -> exp.Expression: 2852 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2853 2854 def _parse_session_parameter(self) -> exp.Expression: 2855 kind = None 2856 this = self._parse_id_var() or self._parse_primary() 2857 2858 if this and self._match(TokenType.DOT): 2859 kind = this.name 2860 this = self._parse_var() or self._parse_primary() 2861 2862 return self.expression(exp.SessionParameter, this=this, kind=kind) 2863 2864 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2865 index = self._index 2866 2867 if self._match(TokenType.L_PAREN): 2868 expressions = self._parse_csv(self._parse_id_var) 2869 2870 if not self._match(TokenType.R_PAREN): 2871 self._retreat(index) 2872 else: 2873 expressions = [self._parse_id_var()] 2874 2875 if self._match_set(self.LAMBDAS): 2876 return self.LAMBDAS[self._prev.token_type](self, expressions) 2877 2878 self._retreat(index) 2879 2880 this: t.Optional[exp.Expression] 2881 2882 if self._match(TokenType.DISTINCT): 2883 this = self.expression( 2884 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2885 ) 2886 else: 2887 this = self._parse_select_or_expression() 2888 2889 if self._match(TokenType.IGNORE_NULLS): 2890 this = self.expression(exp.IgnoreNulls, this=this) 2891 else: 2892 self._match(TokenType.RESPECT_NULLS) 2893 2894 return self._parse_limit(self._parse_order(this)) 2895 2896 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2897 index = self._index 2898 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2899 self._retreat(index) 2900 return this 2901 2902 args = self._parse_csv( 2903 lambda: self._parse_constraint() 2904 or self._parse_column_def(self._parse_field(any_token=True)) 2905 ) 2906 self._match_r_paren() 2907 return self.expression(exp.Schema, this=this, expressions=args) 2908 2909 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2910 kind = self._parse_types() 2911 2912 if self._match_text_seq("FOR", "ORDINALITY"): 2913 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2914 2915 constraints = [] 2916 while True: 2917 constraint = self._parse_column_constraint() 2918 if not constraint: 2919 break 2920 constraints.append(constraint) 2921 2922 if not kind and not constraints: 2923 return this 2924 2925 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2926 2927 def _parse_auto_increment(self) -> exp.Expression: 2928 start = None 2929 increment = None 2930 2931 if self._match(TokenType.L_PAREN, advance=False): 2932 args = self._parse_wrapped_csv(self._parse_bitwise) 2933 start = seq_get(args, 0) 2934 increment = seq_get(args, 1) 2935 elif self._match_text_seq("START"): 2936 start = self._parse_bitwise() 2937 self._match_text_seq("INCREMENT") 2938 increment = self._parse_bitwise() 2939 2940 if start and increment: 2941 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2942 2943 return exp.AutoIncrementColumnConstraint() 2944 2945 def _parse_compress(self) -> exp.Expression: 2946 if self._match(TokenType.L_PAREN, advance=False): 2947 return self.expression( 2948 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2949 ) 2950 2951 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2952 2953 def _parse_generated_as_identity(self) -> exp.Expression: 2954 if self._match(TokenType.BY_DEFAULT): 2955 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2956 else: 2957 self._match_text_seq("ALWAYS") 2958 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2959 2960 self._match_text_seq("AS", "IDENTITY") 2961 if self._match(TokenType.L_PAREN): 2962 if self._match_text_seq("START", "WITH"): 2963 this.set("start", self._parse_bitwise()) 2964 if self._match_text_seq("INCREMENT", "BY"): 2965 this.set("increment", self._parse_bitwise()) 2966 if self._match_text_seq("MINVALUE"): 2967 this.set("minvalue", self._parse_bitwise()) 2968 if self._match_text_seq("MAXVALUE"): 2969 this.set("maxvalue", self._parse_bitwise()) 2970 2971 if self._match_text_seq("CYCLE"): 2972 this.set("cycle", True) 2973 elif self._match_text_seq("NO", "CYCLE"): 2974 this.set("cycle", False) 2975 2976 self._match_r_paren() 2977 2978 return this 2979 2980 def _parse_inline(self) -> t.Optional[exp.Expression]: 2981 self._match_text_seq("LENGTH") 2982 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 2983 2984 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 2985 if self._match_text_seq("NULL"): 2986 return self.expression(exp.NotNullColumnConstraint) 2987 if self._match_text_seq("CASESPECIFIC"): 2988 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 2989 return None 2990 2991 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2992 this = self._parse_references() 2993 if this: 2994 return this 2995 2996 if self._match(TokenType.CONSTRAINT): 2997 this = self._parse_id_var() 2998 2999 if self._match_texts(self.CONSTRAINT_PARSERS): 3000 return self.expression( 3001 exp.ColumnConstraint, 3002 this=this, 3003 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3004 ) 3005 3006 return this 3007 3008 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3009 if not self._match(TokenType.CONSTRAINT): 3010 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3011 3012 this = self._parse_id_var() 3013 expressions = [] 3014 3015 while True: 3016 constraint = self._parse_unnamed_constraint() or self._parse_function() 3017 if not constraint: 3018 break 3019 expressions.append(constraint) 3020 3021 return self.expression(exp.Constraint, this=this, expressions=expressions) 3022 3023 def _parse_unnamed_constraint( 3024 self, constraints: t.Optional[t.Collection[str]] = None 3025 ) -> t.Optional[exp.Expression]: 3026 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3027 return None 3028 3029 constraint = self._prev.text.upper() 3030 if constraint not in self.CONSTRAINT_PARSERS: 3031 self.raise_error(f"No parser found for schema constraint {constraint}.") 3032 3033 return self.CONSTRAINT_PARSERS[constraint](self) 3034 3035 def _parse_unique(self) -> exp.Expression: 3036 if not self._match(TokenType.L_PAREN, advance=False): 3037 return self.expression(exp.UniqueColumnConstraint) 3038 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3039 3040 def _parse_key_constraint_options(self) -> t.List[str]: 3041 options = [] 3042 while True: 3043 if not self._curr: 3044 break 3045 3046 if self._match(TokenType.ON): 3047 action = None 3048 on = self._advance_any() and self._prev.text 3049 3050 if self._match(TokenType.NO_ACTION): 3051 action = "NO ACTION" 3052 elif self._match(TokenType.CASCADE): 3053 action = "CASCADE" 3054 elif self._match_pair(TokenType.SET, TokenType.NULL): 3055 action = "SET NULL" 3056 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3057 action = "SET DEFAULT" 3058 else: 3059 self.raise_error("Invalid key constraint") 3060 3061 options.append(f"ON {on} {action}") 3062 elif self._match_text_seq("NOT", "ENFORCED"): 3063 options.append("NOT ENFORCED") 3064 elif self._match_text_seq("DEFERRABLE"): 3065 options.append("DEFERRABLE") 3066 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3067 options.append("INITIALLY DEFERRED") 3068 elif self._match_text_seq("NORELY"): 3069 options.append("NORELY") 3070 elif self._match_text_seq("MATCH", "FULL"): 3071 options.append("MATCH FULL") 3072 else: 3073 break 3074 3075 return options 3076 3077 def _parse_references(self) -> t.Optional[exp.Expression]: 3078 if not self._match(TokenType.REFERENCES): 3079 return None 3080 3081 expressions = None 3082 this = self._parse_id_var() 3083 3084 if self._match(TokenType.L_PAREN, advance=False): 3085 expressions = self._parse_wrapped_id_vars() 3086 3087 options = self._parse_key_constraint_options() 3088 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3089 3090 def _parse_foreign_key(self) -> exp.Expression: 3091 expressions = self._parse_wrapped_id_vars() 3092 reference = self._parse_references() 3093 options = {} 3094 3095 while self._match(TokenType.ON): 3096 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3097 self.raise_error("Expected DELETE or UPDATE") 3098 3099 kind = self._prev.text.lower() 3100 3101 if self._match(TokenType.NO_ACTION): 3102 action = "NO ACTION" 3103 elif self._match(TokenType.SET): 3104 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3105 action = "SET " + self._prev.text.upper() 3106 else: 3107 self._advance() 3108 action = self._prev.text.upper() 3109 3110 options[kind] = action 3111 3112 return self.expression( 3113 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3114 ) 3115 3116 def _parse_primary_key(self) -> exp.Expression: 3117 desc = ( 3118 self._match_set((TokenType.ASC, TokenType.DESC)) 3119 and self._prev.token_type == TokenType.DESC 3120 ) 3121 3122 if not self._match(TokenType.L_PAREN, advance=False): 3123 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3124 3125 expressions = self._parse_wrapped_id_vars() 3126 options = self._parse_key_constraint_options() 3127 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3128 3129 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3130 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3131 return this 3132 3133 bracket_kind = self._prev.token_type 3134 expressions: t.List[t.Optional[exp.Expression]] 3135 3136 if self._match(TokenType.COLON): 3137 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3138 else: 3139 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3140 3141 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3142 if bracket_kind == TokenType.L_BRACE: 3143 this = self.expression(exp.Struct, expressions=expressions) 3144 elif not this or this.name.upper() == "ARRAY": 3145 this = self.expression(exp.Array, expressions=expressions) 3146 else: 3147 expressions = apply_index_offset(expressions, -self.index_offset) 3148 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3149 3150 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3151 self.raise_error("Expected ]") 3152 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3153 self.raise_error("Expected }") 3154 3155 this.comments = self._prev_comments 3156 return self._parse_bracket(this) 3157 3158 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3159 if self._match(TokenType.COLON): 3160 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3161 return this 3162 3163 def _parse_case(self) -> t.Optional[exp.Expression]: 3164 ifs = [] 3165 default = None 3166 3167 expression = self._parse_conjunction() 3168 3169 while self._match(TokenType.WHEN): 3170 this = self._parse_conjunction() 3171 self._match(TokenType.THEN) 3172 then = self._parse_conjunction() 3173 ifs.append(self.expression(exp.If, this=this, true=then)) 3174 3175 if self._match(TokenType.ELSE): 3176 default = self._parse_conjunction() 3177 3178 if not self._match(TokenType.END): 3179 self.raise_error("Expected END after CASE", self._prev) 3180 3181 return self._parse_window( 3182 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3183 ) 3184 3185 def _parse_if(self) -> t.Optional[exp.Expression]: 3186 if self._match(TokenType.L_PAREN): 3187 args = self._parse_csv(self._parse_conjunction) 3188 this = exp.If.from_arg_list(args) 3189 self.validate_expression(this, args) 3190 self._match_r_paren() 3191 else: 3192 condition = self._parse_conjunction() 3193 self._match(TokenType.THEN) 3194 true = self._parse_conjunction() 3195 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3196 self._match(TokenType.END) 3197 this = self.expression(exp.If, this=condition, true=true, false=false) 3198 3199 return self._parse_window(this) 3200 3201 def _parse_extract(self) -> exp.Expression: 3202 this = self._parse_function() or self._parse_var() or self._parse_type() 3203 3204 if self._match(TokenType.FROM): 3205 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3206 3207 if not self._match(TokenType.COMMA): 3208 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3209 3210 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3211 3212 def _parse_cast(self, strict: bool) -> exp.Expression: 3213 this = self._parse_conjunction() 3214 3215 if not self._match(TokenType.ALIAS): 3216 self.raise_error("Expected AS after CAST") 3217 3218 to = self._parse_types() 3219 3220 if not to: 3221 self.raise_error("Expected TYPE after CAST") 3222 elif to.this == exp.DataType.Type.CHAR: 3223 if self._match(TokenType.CHARACTER_SET): 3224 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3225 3226 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3227 3228 def _parse_string_agg(self) -> exp.Expression: 3229 expression: t.Optional[exp.Expression] 3230 3231 if self._match(TokenType.DISTINCT): 3232 args = self._parse_csv(self._parse_conjunction) 3233 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3234 else: 3235 args = self._parse_csv(self._parse_conjunction) 3236 expression = seq_get(args, 0) 3237 3238 index = self._index 3239 if not self._match(TokenType.R_PAREN): 3240 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3241 order = self._parse_order(this=expression) 3242 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3243 3244 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3245 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3246 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3247 if not self._match(TokenType.WITHIN_GROUP): 3248 self._retreat(index) 3249 this = exp.GroupConcat.from_arg_list(args) 3250 self.validate_expression(this, args) 3251 return this 3252 3253 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3254 order = self._parse_order(this=expression) 3255 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3256 3257 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3258 to: t.Optional[exp.Expression] 3259 this = self._parse_column() 3260 3261 if self._match(TokenType.USING): 3262 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3263 elif self._match(TokenType.COMMA): 3264 to = self._parse_types() 3265 else: 3266 to = None 3267 3268 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3269 3270 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3271 args = self._parse_csv(self._parse_bitwise) 3272 3273 if self._match(TokenType.IN): 3274 return self.expression( 3275 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3276 ) 3277 3278 if haystack_first: 3279 haystack = seq_get(args, 0) 3280 needle = seq_get(args, 1) 3281 else: 3282 needle = seq_get(args, 0) 3283 haystack = seq_get(args, 1) 3284 3285 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3286 3287 self.validate_expression(this, args) 3288 3289 return this 3290 3291 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3292 args = self._parse_csv(self._parse_table) 3293 return exp.JoinHint(this=func_name.upper(), expressions=args) 3294 3295 def _parse_substring(self) -> exp.Expression: 3296 # Postgres supports the form: substring(string [from int] [for int]) 3297 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3298 3299 args = self._parse_csv(self._parse_bitwise) 3300 3301 if self._match(TokenType.FROM): 3302 args.append(self._parse_bitwise()) 3303 if self._match(TokenType.FOR): 3304 args.append(self._parse_bitwise()) 3305 3306 this = exp.Substring.from_arg_list(args) 3307 self.validate_expression(this, args) 3308 3309 return this 3310 3311 def _parse_trim(self) -> exp.Expression: 3312 # https://www.w3resource.com/sql/character-functions/trim.php 3313 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3314 3315 position = None 3316 collation = None 3317 3318 if self._match_set(self.TRIM_TYPES): 3319 position = self._prev.text.upper() 3320 3321 expression = self._parse_term() 3322 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3323 this = self._parse_term() 3324 else: 3325 this = expression 3326 expression = None 3327 3328 if self._match(TokenType.COLLATE): 3329 collation = self._parse_term() 3330 3331 return self.expression( 3332 exp.Trim, 3333 this=this, 3334 position=position, 3335 expression=expression, 3336 collation=collation, 3337 ) 3338 3339 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3340 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3341 3342 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3343 return self._parse_window(self._parse_id_var(), alias=True) 3344 3345 def _parse_window( 3346 self, this: t.Optional[exp.Expression], alias: bool = False 3347 ) -> t.Optional[exp.Expression]: 3348 if self._match(TokenType.FILTER): 3349 where = self._parse_wrapped(self._parse_where) 3350 this = self.expression(exp.Filter, this=this, expression=where) 3351 3352 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3353 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3354 if self._match(TokenType.WITHIN_GROUP): 3355 order = self._parse_wrapped(self._parse_order) 3356 this = self.expression(exp.WithinGroup, this=this, expression=order) 3357 3358 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3359 # Some dialects choose to implement and some do not. 3360 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3361 3362 # There is some code above in _parse_lambda that handles 3363 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3364 3365 # The below changes handle 3366 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3367 3368 # Oracle allows both formats 3369 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3370 # and Snowflake chose to do the same for familiarity 3371 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3372 if self._match(TokenType.IGNORE_NULLS): 3373 this = self.expression(exp.IgnoreNulls, this=this) 3374 elif self._match(TokenType.RESPECT_NULLS): 3375 this = self.expression(exp.RespectNulls, this=this) 3376 3377 # bigquery select from window x AS (partition by ...) 3378 if alias: 3379 self._match(TokenType.ALIAS) 3380 elif not self._match(TokenType.OVER): 3381 return this 3382 3383 if not self._match(TokenType.L_PAREN): 3384 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3385 3386 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3387 partition = self._parse_partition_by() 3388 order = self._parse_order() 3389 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3390 3391 if kind: 3392 self._match(TokenType.BETWEEN) 3393 start = self._parse_window_spec() 3394 self._match(TokenType.AND) 3395 end = self._parse_window_spec() 3396 3397 spec = self.expression( 3398 exp.WindowSpec, 3399 kind=kind, 3400 start=start["value"], 3401 start_side=start["side"], 3402 end=end["value"], 3403 end_side=end["side"], 3404 ) 3405 else: 3406 spec = None 3407 3408 self._match_r_paren() 3409 3410 return self.expression( 3411 exp.Window, 3412 this=this, 3413 partition_by=partition, 3414 order=order, 3415 spec=spec, 3416 alias=window_alias, 3417 ) 3418 3419 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3420 self._match(TokenType.BETWEEN) 3421 3422 return { 3423 "value": ( 3424 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3425 ) 3426 or self._parse_bitwise(), 3427 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3428 } 3429 3430 def _parse_alias( 3431 self, this: t.Optional[exp.Expression], explicit: bool = False 3432 ) -> t.Optional[exp.Expression]: 3433 any_token = self._match(TokenType.ALIAS) 3434 3435 if explicit and not any_token: 3436 return this 3437 3438 if self._match(TokenType.L_PAREN): 3439 aliases = self.expression( 3440 exp.Aliases, 3441 this=this, 3442 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3443 ) 3444 self._match_r_paren(aliases) 3445 return aliases 3446 3447 alias = self._parse_id_var(any_token) 3448 3449 if alias: 3450 return self.expression(exp.Alias, this=this, alias=alias) 3451 3452 return this 3453 3454 def _parse_id_var( 3455 self, 3456 any_token: bool = True, 3457 tokens: t.Optional[t.Collection[TokenType]] = None, 3458 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3459 ) -> t.Optional[exp.Expression]: 3460 identifier = self._parse_identifier() 3461 3462 if identifier: 3463 return identifier 3464 3465 prefix = "" 3466 3467 if prefix_tokens: 3468 while self._match_set(prefix_tokens): 3469 prefix += self._prev.text 3470 3471 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3472 quoted = self._prev.token_type == TokenType.STRING 3473 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3474 3475 return None 3476 3477 def _parse_string(self) -> t.Optional[exp.Expression]: 3478 if self._match(TokenType.STRING): 3479 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3480 return self._parse_placeholder() 3481 3482 def _parse_number(self) -> t.Optional[exp.Expression]: 3483 if self._match(TokenType.NUMBER): 3484 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3485 return self._parse_placeholder() 3486 3487 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3488 if self._match(TokenType.IDENTIFIER): 3489 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3490 return self._parse_placeholder() 3491 3492 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3493 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3494 return self.expression(exp.Var, this=self._prev.text) 3495 return self._parse_placeholder() 3496 3497 def _advance_any(self) -> t.Optional[Token]: 3498 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3499 self._advance() 3500 return self._prev 3501 return None 3502 3503 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3504 return self._parse_var() or self._parse_string() 3505 3506 def _parse_null(self) -> t.Optional[exp.Expression]: 3507 if self._match(TokenType.NULL): 3508 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3509 return None 3510 3511 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3512 if self._match(TokenType.TRUE): 3513 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3514 if self._match(TokenType.FALSE): 3515 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3516 return None 3517 3518 def _parse_star(self) -> t.Optional[exp.Expression]: 3519 if self._match(TokenType.STAR): 3520 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3521 return None 3522 3523 def _parse_parameter(self) -> exp.Expression: 3524 wrapped = self._match(TokenType.L_BRACE) 3525 this = self._parse_var() or self._parse_primary() 3526 self._match(TokenType.R_BRACE) 3527 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3528 3529 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3530 if self._match_set(self.PLACEHOLDER_PARSERS): 3531 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3532 if placeholder: 3533 return placeholder 3534 self._advance(-1) 3535 return None 3536 3537 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3538 if not self._match(TokenType.EXCEPT): 3539 return None 3540 if self._match(TokenType.L_PAREN, advance=False): 3541 return self._parse_wrapped_csv(self._parse_column) 3542 return self._parse_csv(self._parse_column) 3543 3544 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3545 if not self._match(TokenType.REPLACE): 3546 return None 3547 if self._match(TokenType.L_PAREN, advance=False): 3548 return self._parse_wrapped_csv(self._parse_expression) 3549 return self._parse_csv(self._parse_expression) 3550 3551 def _parse_csv( 3552 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3553 ) -> t.List[t.Optional[exp.Expression]]: 3554 parse_result = parse_method() 3555 items = [parse_result] if parse_result is not None else [] 3556 3557 while self._match(sep): 3558 if parse_result and self._prev_comments: 3559 parse_result.comments = self._prev_comments 3560 3561 parse_result = parse_method() 3562 if parse_result is not None: 3563 items.append(parse_result) 3564 3565 return items 3566 3567 def _parse_tokens( 3568 self, parse_method: t.Callable, expressions: t.Dict 3569 ) -> t.Optional[exp.Expression]: 3570 this = parse_method() 3571 3572 while self._match_set(expressions): 3573 this = self.expression( 3574 expressions[self._prev.token_type], 3575 this=this, 3576 comments=self._prev_comments, 3577 expression=parse_method(), 3578 ) 3579 3580 return this 3581 3582 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3583 return self._parse_wrapped_csv(self._parse_id_var) 3584 3585 def _parse_wrapped_csv( 3586 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3587 ) -> t.List[t.Optional[exp.Expression]]: 3588 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3589 3590 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3591 self._match_l_paren() 3592 parse_result = parse_method() 3593 self._match_r_paren() 3594 return parse_result 3595 3596 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3597 return self._parse_select() or self._parse_expression() 3598 3599 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3600 return self._parse_set_operations( 3601 self._parse_select(nested=True, parse_subquery_alias=False) 3602 ) 3603 3604 def _parse_transaction(self) -> exp.Expression: 3605 this = None 3606 if self._match_texts(self.TRANSACTION_KIND): 3607 this = self._prev.text 3608 3609 self._match_texts({"TRANSACTION", "WORK"}) 3610 3611 modes = [] 3612 while True: 3613 mode = [] 3614 while self._match(TokenType.VAR): 3615 mode.append(self._prev.text) 3616 3617 if mode: 3618 modes.append(" ".join(mode)) 3619 if not self._match(TokenType.COMMA): 3620 break 3621 3622 return self.expression(exp.Transaction, this=this, modes=modes) 3623 3624 def _parse_commit_or_rollback(self) -> exp.Expression: 3625 chain = None 3626 savepoint = None 3627 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3628 3629 self._match_texts({"TRANSACTION", "WORK"}) 3630 3631 if self._match_text_seq("TO"): 3632 self._match_text_seq("SAVEPOINT") 3633 savepoint = self._parse_id_var() 3634 3635 if self._match(TokenType.AND): 3636 chain = not self._match_text_seq("NO") 3637 self._match_text_seq("CHAIN") 3638 3639 if is_rollback: 3640 return self.expression(exp.Rollback, savepoint=savepoint) 3641 return self.expression(exp.Commit, chain=chain) 3642 3643 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3644 if not self._match_text_seq("ADD"): 3645 return None 3646 3647 self._match(TokenType.COLUMN) 3648 exists_column = self._parse_exists(not_=True) 3649 expression = self._parse_column_def(self._parse_field(any_token=True)) 3650 3651 if expression: 3652 expression.set("exists", exists_column) 3653 3654 return expression 3655 3656 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3657 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3658 3659 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3660 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3661 return self.expression( 3662 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3663 ) 3664 3665 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3666 this = None 3667 kind = self._prev.token_type 3668 3669 if kind == TokenType.CONSTRAINT: 3670 this = self._parse_id_var() 3671 3672 if self._match_text_seq("CHECK"): 3673 expression = self._parse_wrapped(self._parse_conjunction) 3674 enforced = self._match_text_seq("ENFORCED") 3675 3676 return self.expression( 3677 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3678 ) 3679 3680 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3681 expression = self._parse_foreign_key() 3682 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3683 expression = self._parse_primary_key() 3684 3685 return self.expression(exp.AddConstraint, this=this, expression=expression) 3686 3687 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3688 index = self._index - 1 3689 3690 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3691 return self._parse_csv(self._parse_add_constraint) 3692 3693 self._retreat(index) 3694 return self._parse_csv(self._parse_add_column) 3695 3696 def _parse_alter_table_alter(self) -> exp.Expression: 3697 self._match(TokenType.COLUMN) 3698 column = self._parse_field(any_token=True) 3699 3700 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3701 return self.expression(exp.AlterColumn, this=column, drop=True) 3702 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3703 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3704 3705 self._match_text_seq("SET", "DATA") 3706 return self.expression( 3707 exp.AlterColumn, 3708 this=column, 3709 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3710 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3711 using=self._match(TokenType.USING) and self._parse_conjunction(), 3712 ) 3713 3714 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3715 index = self._index - 1 3716 3717 partition_exists = self._parse_exists() 3718 if self._match(TokenType.PARTITION, advance=False): 3719 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3720 3721 self._retreat(index) 3722 return self._parse_csv(self._parse_drop_column) 3723 3724 def _parse_alter_table_rename(self) -> exp.Expression: 3725 self._match_text_seq("TO") 3726 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3727 3728 def _parse_alter(self) -> t.Optional[exp.Expression]: 3729 if not self._match(TokenType.TABLE): 3730 return self._parse_as_command(self._prev) 3731 3732 exists = self._parse_exists() 3733 this = self._parse_table(schema=True) 3734 3735 if not self._curr: 3736 return None 3737 3738 parser = self.ALTER_PARSERS.get(self._curr.text.upper()) 3739 actions = ensure_list(self._advance() or parser(self)) if parser else [] # type: ignore 3740 3741 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3742 3743 def _parse_show(self) -> t.Optional[exp.Expression]: 3744 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3745 if parser: 3746 return parser(self) 3747 self._advance() 3748 return self.expression(exp.Show, this=self._prev.text.upper()) 3749 3750 def _default_parse_set_item(self) -> exp.Expression: 3751 return self.expression( 3752 exp.SetItem, 3753 this=self._parse_statement(), 3754 ) 3755 3756 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3757 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3758 return parser(self) if parser else self._default_parse_set_item() 3759 3760 def _parse_merge(self) -> exp.Expression: 3761 self._match(TokenType.INTO) 3762 target = self._parse_table() 3763 3764 self._match(TokenType.USING) 3765 using = self._parse_table() 3766 3767 self._match(TokenType.ON) 3768 on = self._parse_conjunction() 3769 3770 whens = [] 3771 while self._match(TokenType.WHEN): 3772 this = self._parse_conjunction() 3773 self._match(TokenType.THEN) 3774 3775 if self._match(TokenType.INSERT): 3776 _this = self._parse_star() 3777 if _this: 3778 then = self.expression(exp.Insert, this=_this) 3779 else: 3780 then = self.expression( 3781 exp.Insert, 3782 this=self._parse_value(), 3783 expression=self._match(TokenType.VALUES) and self._parse_value(), 3784 ) 3785 elif self._match(TokenType.UPDATE): 3786 expressions = self._parse_star() 3787 if expressions: 3788 then = self.expression(exp.Update, expressions=expressions) 3789 else: 3790 then = self.expression( 3791 exp.Update, 3792 expressions=self._match(TokenType.SET) 3793 and self._parse_csv(self._parse_equality), 3794 ) 3795 elif self._match(TokenType.DELETE): 3796 then = self.expression(exp.Var, this=self._prev.text) 3797 3798 whens.append(self.expression(exp.When, this=this, then=then)) 3799 3800 return self.expression( 3801 exp.Merge, 3802 this=target, 3803 using=using, 3804 on=on, 3805 expressions=whens, 3806 ) 3807 3808 def _parse_set(self) -> exp.Expression: 3809 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3810 3811 def _parse_as_command(self, start: Token) -> exp.Command: 3812 while self._curr: 3813 self._advance() 3814 text = self._find_sql(start, self._prev) 3815 size = len(start.text) 3816 return exp.Command(this=text[:size], expression=text[size:]) 3817 3818 def _find_parser( 3819 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3820 ) -> t.Optional[t.Callable]: 3821 index = self._index 3822 this = [] 3823 while True: 3824 # The current token might be multiple words 3825 curr = self._curr.text.upper() 3826 key = curr.split(" ") 3827 this.append(curr) 3828 self._advance() 3829 result, trie = in_trie(trie, key) 3830 if result == 0: 3831 break 3832 if result == 2: 3833 subparser = parsers[" ".join(this)] 3834 return subparser 3835 self._retreat(index) 3836 return None 3837 3838 def _match(self, token_type, advance=True): 3839 if not self._curr: 3840 return None 3841 3842 if self._curr.token_type == token_type: 3843 if advance: 3844 self._advance() 3845 return True 3846 3847 return None 3848 3849 def _match_set(self, types, advance=True): 3850 if not self._curr: 3851 return None 3852 3853 if self._curr.token_type in types: 3854 if advance: 3855 self._advance() 3856 return True 3857 3858 return None 3859 3860 def _match_pair(self, token_type_a, token_type_b, advance=True): 3861 if not self._curr or not self._next: 3862 return None 3863 3864 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3865 if advance: 3866 self._advance(2) 3867 return True 3868 3869 return None 3870 3871 def _match_l_paren(self, expression=None): 3872 if not self._match(TokenType.L_PAREN): 3873 self.raise_error("Expecting (") 3874 if expression and self._prev_comments: 3875 expression.comments = self._prev_comments 3876 3877 def _match_r_paren(self, expression=None): 3878 if not self._match(TokenType.R_PAREN): 3879 self.raise_error("Expecting )") 3880 if expression and self._prev_comments: 3881 expression.comments = self._prev_comments 3882 3883 def _match_texts(self, texts, advance=True): 3884 if self._curr and self._curr.text.upper() in texts: 3885 if advance: 3886 self._advance() 3887 return True 3888 return False 3889 3890 def _match_text_seq(self, *texts, advance=True): 3891 index = self._index 3892 for text in texts: 3893 if self._curr and self._curr.text.upper() == text: 3894 self._advance() 3895 else: 3896 self._retreat(index) 3897 return False 3898 3899 if not advance: 3900 self._retreat(index) 3901 3902 return True 3903 3904 def _replace_columns_with_dots(self, this): 3905 if isinstance(this, exp.Dot): 3906 exp.replace_children(this, self._replace_columns_with_dots) 3907 elif isinstance(this, exp.Column): 3908 exp.replace_children(this, self._replace_columns_with_dots) 3909 table = this.args.get("table") 3910 this = ( 3911 self.expression(exp.Dot, this=table, expression=this.this) 3912 if table 3913 else self.expression(exp.Var, this=this.name) 3914 ) 3915 elif isinstance(this, exp.Identifier): 3916 this = self.expression(exp.Var, this=this.name) 3917 return this 3918 3919 def _replace_lambda(self, node, lambda_variables): 3920 if isinstance(node, exp.Column): 3921 if node.name in lambda_variables: 3922 return node.this 3923 return node
43class Parser(metaclass=_Parser): 44 """ 45 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 46 a parsed syntax tree. 47 48 Args: 49 error_level: the desired error level. 50 Default: ErrorLevel.RAISE 51 error_message_context: determines the amount of context to capture from a 52 query string when displaying the error message (in number of characters). 53 Default: 50. 54 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 55 Default: 0 56 alias_post_tablesample: If the table alias comes after tablesample. 57 Default: False 58 max_errors: Maximum number of error messages to include in a raised ParseError. 59 This is only relevant if error_level is ErrorLevel.RAISE. 60 Default: 3 61 null_ordering: Indicates the default null ordering method to use if not explicitly set. 62 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 63 Default: "nulls_are_small" 64 """ 65 66 FUNCTIONS: t.Dict[str, t.Callable] = { 67 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 68 "DATE_TO_DATE_STR": lambda args: exp.Cast( 69 this=seq_get(args, 0), 70 to=exp.DataType(this=exp.DataType.Type.TEXT), 71 ), 72 "TIME_TO_TIME_STR": lambda args: exp.Cast( 73 this=seq_get(args, 0), 74 to=exp.DataType(this=exp.DataType.Type.TEXT), 75 ), 76 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 77 this=exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 start=exp.Literal.number(1), 82 length=exp.Literal.number(10), 83 ), 84 "VAR_MAP": parse_var_map, 85 "IFNULL": exp.Coalesce.from_arg_list, 86 } 87 88 NO_PAREN_FUNCTIONS = { 89 TokenType.CURRENT_DATE: exp.CurrentDate, 90 TokenType.CURRENT_DATETIME: exp.CurrentDate, 91 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 92 } 93 94 NESTED_TYPE_TOKENS = { 95 TokenType.ARRAY, 96 TokenType.MAP, 97 TokenType.STRUCT, 98 TokenType.NULLABLE, 99 } 100 101 TYPE_TOKENS = { 102 TokenType.BOOLEAN, 103 TokenType.TINYINT, 104 TokenType.SMALLINT, 105 TokenType.INT, 106 TokenType.BIGINT, 107 TokenType.FLOAT, 108 TokenType.DOUBLE, 109 TokenType.CHAR, 110 TokenType.NCHAR, 111 TokenType.VARCHAR, 112 TokenType.NVARCHAR, 113 TokenType.TEXT, 114 TokenType.MEDIUMTEXT, 115 TokenType.LONGTEXT, 116 TokenType.MEDIUMBLOB, 117 TokenType.LONGBLOB, 118 TokenType.BINARY, 119 TokenType.VARBINARY, 120 TokenType.JSON, 121 TokenType.JSONB, 122 TokenType.INTERVAL, 123 TokenType.TIME, 124 TokenType.TIMESTAMP, 125 TokenType.TIMESTAMPTZ, 126 TokenType.TIMESTAMPLTZ, 127 TokenType.DATETIME, 128 TokenType.DATE, 129 TokenType.DECIMAL, 130 TokenType.UUID, 131 TokenType.GEOGRAPHY, 132 TokenType.GEOMETRY, 133 TokenType.HLLSKETCH, 134 TokenType.HSTORE, 135 TokenType.PSEUDO_TYPE, 136 TokenType.SUPER, 137 TokenType.SERIAL, 138 TokenType.SMALLSERIAL, 139 TokenType.BIGSERIAL, 140 TokenType.XML, 141 TokenType.UNIQUEIDENTIFIER, 142 TokenType.MONEY, 143 TokenType.SMALLMONEY, 144 TokenType.ROWVERSION, 145 TokenType.IMAGE, 146 TokenType.VARIANT, 147 TokenType.OBJECT, 148 TokenType.INET, 149 *NESTED_TYPE_TOKENS, 150 } 151 152 SUBQUERY_PREDICATES = { 153 TokenType.ANY: exp.Any, 154 TokenType.ALL: exp.All, 155 TokenType.EXISTS: exp.Exists, 156 TokenType.SOME: exp.Any, 157 } 158 159 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 160 161 DB_CREATABLES = { 162 TokenType.DATABASE, 163 TokenType.SCHEMA, 164 TokenType.TABLE, 165 TokenType.VIEW, 166 } 167 168 CREATABLES = { 169 TokenType.COLUMN, 170 TokenType.FUNCTION, 171 TokenType.INDEX, 172 TokenType.PROCEDURE, 173 *DB_CREATABLES, 174 } 175 176 ID_VAR_TOKENS = { 177 TokenType.VAR, 178 TokenType.ANTI, 179 TokenType.APPLY, 180 TokenType.AUTO_INCREMENT, 181 TokenType.BEGIN, 182 TokenType.BOTH, 183 TokenType.BUCKET, 184 TokenType.CACHE, 185 TokenType.CASCADE, 186 TokenType.COLLATE, 187 TokenType.COMMAND, 188 TokenType.COMMENT, 189 TokenType.COMMIT, 190 TokenType.COMPOUND, 191 TokenType.CONSTRAINT, 192 TokenType.CURRENT_TIME, 193 TokenType.DEFAULT, 194 TokenType.DELETE, 195 TokenType.DESCRIBE, 196 TokenType.DIV, 197 TokenType.END, 198 TokenType.EXECUTE, 199 TokenType.ESCAPE, 200 TokenType.FALSE, 201 TokenType.FIRST, 202 TokenType.FILTER, 203 TokenType.FOLLOWING, 204 TokenType.FORMAT, 205 TokenType.IF, 206 TokenType.ISNULL, 207 TokenType.INTERVAL, 208 TokenType.LAZY, 209 TokenType.LEADING, 210 TokenType.LEFT, 211 TokenType.LOCAL, 212 TokenType.MATERIALIZED, 213 TokenType.MERGE, 214 TokenType.NATURAL, 215 TokenType.NEXT, 216 TokenType.OFFSET, 217 TokenType.ONLY, 218 TokenType.OPTIONS, 219 TokenType.ORDINALITY, 220 TokenType.PERCENT, 221 TokenType.PIVOT, 222 TokenType.PRECEDING, 223 TokenType.RANGE, 224 TokenType.REFERENCES, 225 TokenType.RIGHT, 226 TokenType.ROW, 227 TokenType.ROWS, 228 TokenType.SEED, 229 TokenType.SEMI, 230 TokenType.SET, 231 TokenType.SHOW, 232 TokenType.SORTKEY, 233 TokenType.TEMPORARY, 234 TokenType.TOP, 235 TokenType.TRAILING, 236 TokenType.TRUE, 237 TokenType.UNBOUNDED, 238 TokenType.UNIQUE, 239 TokenType.UNLOGGED, 240 TokenType.UNPIVOT, 241 TokenType.VOLATILE, 242 TokenType.WINDOW, 243 *CREATABLES, 244 *SUBQUERY_PREDICATES, 245 *TYPE_TOKENS, 246 *NO_PAREN_FUNCTIONS, 247 } 248 249 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 250 TokenType.APPLY, 251 TokenType.LEFT, 252 TokenType.NATURAL, 253 TokenType.OFFSET, 254 TokenType.RIGHT, 255 TokenType.WINDOW, 256 } 257 258 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 259 260 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 261 262 FUNC_TOKENS = { 263 TokenType.COMMAND, 264 TokenType.CURRENT_DATE, 265 TokenType.CURRENT_DATETIME, 266 TokenType.CURRENT_TIMESTAMP, 267 TokenType.CURRENT_TIME, 268 TokenType.FILTER, 269 TokenType.FIRST, 270 TokenType.FORMAT, 271 TokenType.IDENTIFIER, 272 TokenType.INDEX, 273 TokenType.ISNULL, 274 TokenType.ILIKE, 275 TokenType.LIKE, 276 TokenType.MERGE, 277 TokenType.OFFSET, 278 TokenType.PRIMARY_KEY, 279 TokenType.REPLACE, 280 TokenType.ROW, 281 TokenType.UNNEST, 282 TokenType.VAR, 283 TokenType.LEFT, 284 TokenType.RIGHT, 285 TokenType.DATE, 286 TokenType.DATETIME, 287 TokenType.TABLE, 288 TokenType.TIMESTAMP, 289 TokenType.TIMESTAMPTZ, 290 TokenType.WINDOW, 291 *TYPE_TOKENS, 292 *SUBQUERY_PREDICATES, 293 } 294 295 CONJUNCTION = { 296 TokenType.AND: exp.And, 297 TokenType.OR: exp.Or, 298 } 299 300 EQUALITY = { 301 TokenType.EQ: exp.EQ, 302 TokenType.NEQ: exp.NEQ, 303 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 304 } 305 306 COMPARISON = { 307 TokenType.GT: exp.GT, 308 TokenType.GTE: exp.GTE, 309 TokenType.LT: exp.LT, 310 TokenType.LTE: exp.LTE, 311 } 312 313 BITWISE = { 314 TokenType.AMP: exp.BitwiseAnd, 315 TokenType.CARET: exp.BitwiseXor, 316 TokenType.PIPE: exp.BitwiseOr, 317 TokenType.DPIPE: exp.DPipe, 318 } 319 320 TERM = { 321 TokenType.DASH: exp.Sub, 322 TokenType.PLUS: exp.Add, 323 TokenType.MOD: exp.Mod, 324 TokenType.COLLATE: exp.Collate, 325 } 326 327 FACTOR = { 328 TokenType.DIV: exp.IntDiv, 329 TokenType.LR_ARROW: exp.Distance, 330 TokenType.SLASH: exp.Div, 331 TokenType.STAR: exp.Mul, 332 } 333 334 TIMESTAMPS = { 335 TokenType.TIME, 336 TokenType.TIMESTAMP, 337 TokenType.TIMESTAMPTZ, 338 TokenType.TIMESTAMPLTZ, 339 } 340 341 SET_OPERATIONS = { 342 TokenType.UNION, 343 TokenType.INTERSECT, 344 TokenType.EXCEPT, 345 } 346 347 JOIN_SIDES = { 348 TokenType.LEFT, 349 TokenType.RIGHT, 350 TokenType.FULL, 351 } 352 353 JOIN_KINDS = { 354 TokenType.INNER, 355 TokenType.OUTER, 356 TokenType.CROSS, 357 TokenType.SEMI, 358 TokenType.ANTI, 359 } 360 361 LAMBDAS = { 362 TokenType.ARROW: lambda self, expressions: self.expression( 363 exp.Lambda, 364 this=self._parse_conjunction().transform( 365 self._replace_lambda, {node.name for node in expressions} 366 ), 367 expressions=expressions, 368 ), 369 TokenType.FARROW: lambda self, expressions: self.expression( 370 exp.Kwarg, 371 this=exp.Var(this=expressions[0].name), 372 expression=self._parse_conjunction(), 373 ), 374 } 375 376 COLUMN_OPERATORS = { 377 TokenType.DOT: None, 378 TokenType.DCOLON: lambda self, this, to: self.expression( 379 exp.Cast, 380 this=this, 381 to=to, 382 ), 383 TokenType.ARROW: lambda self, this, path: self.expression( 384 exp.JSONExtract, 385 this=this, 386 expression=path, 387 ), 388 TokenType.DARROW: lambda self, this, path: self.expression( 389 exp.JSONExtractScalar, 390 this=this, 391 expression=path, 392 ), 393 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 394 exp.JSONBExtract, 395 this=this, 396 expression=path, 397 ), 398 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 399 exp.JSONBExtractScalar, 400 this=this, 401 expression=path, 402 ), 403 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 404 exp.JSONBContains, 405 this=this, 406 expression=key, 407 ), 408 } 409 410 EXPRESSION_PARSERS = { 411 exp.Column: lambda self: self._parse_column(), 412 exp.DataType: lambda self: self._parse_types(), 413 exp.From: lambda self: self._parse_from(), 414 exp.Group: lambda self: self._parse_group(), 415 exp.Identifier: lambda self: self._parse_id_var(), 416 exp.Lateral: lambda self: self._parse_lateral(), 417 exp.Join: lambda self: self._parse_join(), 418 exp.Order: lambda self: self._parse_order(), 419 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 420 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 421 exp.Lambda: lambda self: self._parse_lambda(), 422 exp.Limit: lambda self: self._parse_limit(), 423 exp.Offset: lambda self: self._parse_offset(), 424 exp.TableAlias: lambda self: self._parse_table_alias(), 425 exp.Table: lambda self: self._parse_table(), 426 exp.Condition: lambda self: self._parse_conjunction(), 427 exp.Expression: lambda self: self._parse_statement(), 428 exp.Properties: lambda self: self._parse_properties(), 429 exp.Where: lambda self: self._parse_where(), 430 exp.Ordered: lambda self: self._parse_ordered(), 431 exp.Having: lambda self: self._parse_having(), 432 exp.With: lambda self: self._parse_with(), 433 exp.Window: lambda self: self._parse_named_window(), 434 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 435 } 436 437 STATEMENT_PARSERS = { 438 TokenType.ALTER: lambda self: self._parse_alter(), 439 TokenType.BEGIN: lambda self: self._parse_transaction(), 440 TokenType.CACHE: lambda self: self._parse_cache(), 441 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 442 TokenType.COMMENT: lambda self: self._parse_comment(), 443 TokenType.CREATE: lambda self: self._parse_create(), 444 TokenType.DELETE: lambda self: self._parse_delete(), 445 TokenType.DESC: lambda self: self._parse_describe(), 446 TokenType.DESCRIBE: lambda self: self._parse_describe(), 447 TokenType.DROP: lambda self: self._parse_drop(), 448 TokenType.END: lambda self: self._parse_commit_or_rollback(), 449 TokenType.INSERT: lambda self: self._parse_insert(), 450 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 451 TokenType.MERGE: lambda self: self._parse_merge(), 452 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 453 TokenType.UNCACHE: lambda self: self._parse_uncache(), 454 TokenType.UPDATE: lambda self: self._parse_update(), 455 TokenType.USE: lambda self: self.expression( 456 exp.Use, 457 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 458 and exp.Var(this=self._prev.text), 459 this=self._parse_table(schema=False), 460 ), 461 } 462 463 UNARY_PARSERS = { 464 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 465 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 466 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 467 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 468 } 469 470 PRIMARY_PARSERS = { 471 TokenType.STRING: lambda self, token: self.expression( 472 exp.Literal, this=token.text, is_string=True 473 ), 474 TokenType.NUMBER: lambda self, token: self.expression( 475 exp.Literal, this=token.text, is_string=False 476 ), 477 TokenType.STAR: lambda self, _: self.expression( 478 exp.Star, 479 **{"except": self._parse_except(), "replace": self._parse_replace()}, 480 ), 481 TokenType.NULL: lambda self, _: self.expression(exp.Null), 482 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 483 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 484 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 485 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 486 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 487 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 488 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 489 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 490 } 491 492 PLACEHOLDER_PARSERS = { 493 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 494 TokenType.PARAMETER: lambda self: self._parse_parameter(), 495 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 496 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 497 else None, 498 } 499 500 RANGE_PARSERS = { 501 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 502 TokenType.GLOB: lambda self, this: self._parse_escape( 503 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 504 ), 505 TokenType.OVERLAPS: lambda self, this: self._parse_escape( 506 self.expression(exp.Overlaps, this=this, expression=self._parse_bitwise()) 507 ), 508 TokenType.IN: lambda self, this: self._parse_in(this), 509 TokenType.IS: lambda self, this: self._parse_is(this), 510 TokenType.LIKE: lambda self, this: self._parse_escape( 511 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 512 ), 513 TokenType.ILIKE: lambda self, this: self._parse_escape( 514 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 515 ), 516 TokenType.IRLIKE: lambda self, this: self.expression( 517 exp.RegexpILike, this=this, expression=self._parse_bitwise() 518 ), 519 TokenType.RLIKE: lambda self, this: self.expression( 520 exp.RegexpLike, this=this, expression=self._parse_bitwise() 521 ), 522 TokenType.SIMILAR_TO: lambda self, this: self.expression( 523 exp.SimilarTo, this=this, expression=self._parse_bitwise() 524 ), 525 } 526 527 PROPERTY_PARSERS = { 528 "AFTER": lambda self: self._parse_afterjournal( 529 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 530 ), 531 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 532 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 533 "BEFORE": lambda self: self._parse_journal( 534 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 535 ), 536 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 537 "CHARACTER SET": lambda self: self._parse_character_set(), 538 "CHECKSUM": lambda self: self._parse_checksum(), 539 "CLUSTER BY": lambda self: self.expression( 540 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 541 ), 542 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 543 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 544 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 545 default=self._prev.text.upper() == "DEFAULT" 546 ), 547 "DEFINER": lambda self: self._parse_definer(), 548 "DETERMINISTIC": lambda self: self.expression( 549 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 550 ), 551 "DISTKEY": lambda self: self._parse_distkey(), 552 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 553 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 554 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 555 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 556 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 557 "FREESPACE": lambda self: self._parse_freespace(), 558 "GLOBAL": lambda self: self._parse_temporary(global_=True), 559 "IMMUTABLE": lambda self: self.expression( 560 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 561 ), 562 "JOURNAL": lambda self: self._parse_journal( 563 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 564 ), 565 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 566 "LIKE": lambda self: self._parse_create_like(), 567 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 568 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 569 "LOCK": lambda self: self._parse_locking(), 570 "LOCKING": lambda self: self._parse_locking(), 571 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 572 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 573 "MAX": lambda self: self._parse_datablocksize(), 574 "MAXIMUM": lambda self: self._parse_datablocksize(), 575 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 576 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 577 ), 578 "MIN": lambda self: self._parse_datablocksize(), 579 "MINIMUM": lambda self: self._parse_datablocksize(), 580 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 581 "NO": lambda self: self._parse_noprimaryindex(), 582 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 583 "ON": lambda self: self._parse_oncommit(), 584 "PARTITION BY": lambda self: self._parse_partitioned_by(), 585 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 586 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 587 "RETURNS": lambda self: self._parse_returns(), 588 "ROW": lambda self: self._parse_row(), 589 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 590 "SORTKEY": lambda self: self._parse_sortkey(), 591 "STABLE": lambda self: self.expression( 592 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 593 ), 594 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 595 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 596 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 597 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 598 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 599 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 600 "VOLATILE": lambda self: self.expression( 601 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 602 ), 603 "WITH": lambda self: self._parse_with_property(), 604 } 605 606 CONSTRAINT_PARSERS = { 607 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 608 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 609 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 610 "CHARACTER SET": lambda self: self.expression( 611 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 612 ), 613 "CHECK": lambda self: self.expression( 614 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 615 ), 616 "COLLATE": lambda self: self.expression( 617 exp.CollateColumnConstraint, this=self._parse_var() 618 ), 619 "COMMENT": lambda self: self.expression( 620 exp.CommentColumnConstraint, this=self._parse_string() 621 ), 622 "COMPRESS": lambda self: self._parse_compress(), 623 "DEFAULT": lambda self: self.expression( 624 exp.DefaultColumnConstraint, this=self._parse_bitwise() 625 ), 626 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 627 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 628 "FORMAT": lambda self: self.expression( 629 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 630 ), 631 "GENERATED": lambda self: self._parse_generated_as_identity(), 632 "IDENTITY": lambda self: self._parse_auto_increment(), 633 "INLINE": lambda self: self._parse_inline(), 634 "LIKE": lambda self: self._parse_create_like(), 635 "NOT": lambda self: self._parse_not_constraint(), 636 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 637 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 638 "PRIMARY KEY": lambda self: self._parse_primary_key(), 639 "TITLE": lambda self: self.expression( 640 exp.TitleColumnConstraint, this=self._parse_var_or_string() 641 ), 642 "UNIQUE": lambda self: self._parse_unique(), 643 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 644 } 645 646 ALTER_PARSERS = { 647 "ADD": lambda self: self._parse_alter_table_add(), 648 "ALTER": lambda self: self._parse_alter_table_alter(), 649 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 650 "DROP": lambda self: self._parse_alter_table_drop(), 651 "RENAME": lambda self: self._parse_alter_table_rename(), 652 } 653 654 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 655 656 NO_PAREN_FUNCTION_PARSERS = { 657 TokenType.CASE: lambda self: self._parse_case(), 658 TokenType.IF: lambda self: self._parse_if(), 659 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 660 } 661 662 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 663 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 664 "TRY_CONVERT": lambda self: self._parse_convert(False), 665 "EXTRACT": lambda self: self._parse_extract(), 666 "POSITION": lambda self: self._parse_position(), 667 "SUBSTRING": lambda self: self._parse_substring(), 668 "TRIM": lambda self: self._parse_trim(), 669 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 670 "TRY_CAST": lambda self: self._parse_cast(False), 671 "STRING_AGG": lambda self: self._parse_string_agg(), 672 } 673 674 QUERY_MODIFIER_PARSERS = { 675 "match": lambda self: self._parse_match_recognize(), 676 "where": lambda self: self._parse_where(), 677 "group": lambda self: self._parse_group(), 678 "having": lambda self: self._parse_having(), 679 "qualify": lambda self: self._parse_qualify(), 680 "windows": lambda self: self._parse_window_clause(), 681 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 682 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 683 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 684 "order": lambda self: self._parse_order(), 685 "limit": lambda self: self._parse_limit(), 686 "offset": lambda self: self._parse_offset(), 687 "lock": lambda self: self._parse_lock(), 688 } 689 690 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 691 SET_PARSERS: t.Dict[str, t.Callable] = {} 692 693 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 694 695 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 696 697 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 698 699 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 700 701 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 702 703 STRICT_CAST = True 704 705 __slots__ = ( 706 "error_level", 707 "error_message_context", 708 "sql", 709 "errors", 710 "index_offset", 711 "unnest_column_only", 712 "alias_post_tablesample", 713 "max_errors", 714 "null_ordering", 715 "_tokens", 716 "_index", 717 "_curr", 718 "_next", 719 "_prev", 720 "_prev_comments", 721 "_show_trie", 722 "_set_trie", 723 ) 724 725 def __init__( 726 self, 727 error_level: t.Optional[ErrorLevel] = None, 728 error_message_context: int = 100, 729 index_offset: int = 0, 730 unnest_column_only: bool = False, 731 alias_post_tablesample: bool = False, 732 max_errors: int = 3, 733 null_ordering: t.Optional[str] = None, 734 ): 735 self.error_level = error_level or ErrorLevel.IMMEDIATE 736 self.error_message_context = error_message_context 737 self.index_offset = index_offset 738 self.unnest_column_only = unnest_column_only 739 self.alias_post_tablesample = alias_post_tablesample 740 self.max_errors = max_errors 741 self.null_ordering = null_ordering 742 self.reset() 743 744 def reset(self): 745 self.sql = "" 746 self.errors = [] 747 self._tokens = [] 748 self._index = 0 749 self._curr = None 750 self._next = None 751 self._prev = None 752 self._prev_comments = None 753 754 def parse( 755 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 756 ) -> t.List[t.Optional[exp.Expression]]: 757 """ 758 Parses a list of tokens and returns a list of syntax trees, one tree 759 per parsed SQL statement. 760 761 Args: 762 raw_tokens: the list of tokens. 763 sql: the original SQL string, used to produce helpful debug messages. 764 765 Returns: 766 The list of syntax trees. 767 """ 768 return self._parse( 769 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 770 ) 771 772 def parse_into( 773 self, 774 expression_types: exp.IntoType, 775 raw_tokens: t.List[Token], 776 sql: t.Optional[str] = None, 777 ) -> t.List[t.Optional[exp.Expression]]: 778 """ 779 Parses a list of tokens into a given Expression type. If a collection of Expression 780 types is given instead, this method will try to parse the token list into each one 781 of them, stopping at the first for which the parsing succeeds. 782 783 Args: 784 expression_types: the expression type(s) to try and parse the token list into. 785 raw_tokens: the list of tokens. 786 sql: the original SQL string, used to produce helpful debug messages. 787 788 Returns: 789 The target Expression. 790 """ 791 errors = [] 792 for expression_type in ensure_collection(expression_types): 793 parser = self.EXPRESSION_PARSERS.get(expression_type) 794 if not parser: 795 raise TypeError(f"No parser registered for {expression_type}") 796 try: 797 return self._parse(parser, raw_tokens, sql) 798 except ParseError as e: 799 e.errors[0]["into_expression"] = expression_type 800 errors.append(e) 801 raise ParseError( 802 f"Failed to parse into {expression_types}", 803 errors=merge_errors(errors), 804 ) from errors[-1] 805 806 def _parse( 807 self, 808 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 809 raw_tokens: t.List[Token], 810 sql: t.Optional[str] = None, 811 ) -> t.List[t.Optional[exp.Expression]]: 812 self.reset() 813 self.sql = sql or "" 814 total = len(raw_tokens) 815 chunks: t.List[t.List[Token]] = [[]] 816 817 for i, token in enumerate(raw_tokens): 818 if token.token_type == TokenType.SEMICOLON: 819 if i < total - 1: 820 chunks.append([]) 821 else: 822 chunks[-1].append(token) 823 824 expressions = [] 825 826 for tokens in chunks: 827 self._index = -1 828 self._tokens = tokens 829 self._advance() 830 831 expressions.append(parse_method(self)) 832 833 if self._index < len(self._tokens): 834 self.raise_error("Invalid expression / Unexpected token") 835 836 self.check_errors() 837 838 return expressions 839 840 def check_errors(self) -> None: 841 """ 842 Logs or raises any found errors, depending on the chosen error level setting. 843 """ 844 if self.error_level == ErrorLevel.WARN: 845 for error in self.errors: 846 logger.error(str(error)) 847 elif self.error_level == ErrorLevel.RAISE and self.errors: 848 raise ParseError( 849 concat_messages(self.errors, self.max_errors), 850 errors=merge_errors(self.errors), 851 ) 852 853 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 854 """ 855 Appends an error in the list of recorded errors or raises it, depending on the chosen 856 error level setting. 857 """ 858 token = token or self._curr or self._prev or Token.string("") 859 start = self._find_token(token) 860 end = start + len(token.text) 861 start_context = self.sql[max(start - self.error_message_context, 0) : start] 862 highlight = self.sql[start:end] 863 end_context = self.sql[end : end + self.error_message_context] 864 865 error = ParseError.new( 866 f"{message}. Line {token.line}, Col: {token.col}.\n" 867 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 868 description=message, 869 line=token.line, 870 col=token.col, 871 start_context=start_context, 872 highlight=highlight, 873 end_context=end_context, 874 ) 875 876 if self.error_level == ErrorLevel.IMMEDIATE: 877 raise error 878 879 self.errors.append(error) 880 881 def expression( 882 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 883 ) -> exp.Expression: 884 """ 885 Creates a new, validated Expression. 886 887 Args: 888 exp_class: the expression class to instantiate. 889 comments: an optional list of comments to attach to the expression. 890 kwargs: the arguments to set for the expression along with their respective values. 891 892 Returns: 893 The target expression. 894 """ 895 instance = exp_class(**kwargs) 896 if self._prev_comments: 897 instance.comments = self._prev_comments 898 self._prev_comments = None 899 if comments: 900 instance.comments = comments 901 self.validate_expression(instance) 902 return instance 903 904 def validate_expression( 905 self, expression: exp.Expression, args: t.Optional[t.List] = None 906 ) -> None: 907 """ 908 Validates an already instantiated expression, making sure that all its mandatory arguments 909 are set. 910 911 Args: 912 expression: the expression to validate. 913 args: an optional list of items that was used to instantiate the expression, if it's a Func. 914 """ 915 if self.error_level == ErrorLevel.IGNORE: 916 return 917 918 for error_message in expression.error_messages(args): 919 self.raise_error(error_message) 920 921 def _find_sql(self, start: Token, end: Token) -> str: 922 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 923 924 def _find_token(self, token: Token) -> int: 925 line = 1 926 col = 1 927 index = 0 928 929 while line < token.line or col < token.col: 930 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 931 line += 1 932 col = 1 933 else: 934 col += 1 935 index += 1 936 937 return index 938 939 def _advance(self, times: int = 1) -> None: 940 self._index += times 941 self._curr = seq_get(self._tokens, self._index) 942 self._next = seq_get(self._tokens, self._index + 1) 943 if self._index > 0: 944 self._prev = self._tokens[self._index - 1] 945 self._prev_comments = self._prev.comments 946 else: 947 self._prev = None 948 self._prev_comments = None 949 950 def _retreat(self, index: int) -> None: 951 self._advance(index - self._index) 952 953 def _parse_command(self) -> exp.Expression: 954 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 955 956 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 957 start = self._prev 958 exists = self._parse_exists() if allow_exists else None 959 960 self._match(TokenType.ON) 961 962 kind = self._match_set(self.CREATABLES) and self._prev 963 964 if not kind: 965 return self._parse_as_command(start) 966 967 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 968 this = self._parse_user_defined_function(kind=kind.token_type) 969 elif kind.token_type == TokenType.TABLE: 970 this = self._parse_table() 971 elif kind.token_type == TokenType.COLUMN: 972 this = self._parse_column() 973 else: 974 this = self._parse_id_var() 975 976 self._match(TokenType.IS) 977 978 return self.expression( 979 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 980 ) 981 982 def _parse_statement(self) -> t.Optional[exp.Expression]: 983 if self._curr is None: 984 return None 985 986 if self._match_set(self.STATEMENT_PARSERS): 987 return self.STATEMENT_PARSERS[self._prev.token_type](self) 988 989 if self._match_set(Tokenizer.COMMANDS): 990 return self._parse_command() 991 992 expression = self._parse_expression() 993 expression = self._parse_set_operations(expression) if expression else self._parse_select() 994 995 self._parse_query_modifiers(expression) 996 return expression 997 998 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 999 start = self._prev 1000 temporary = self._match(TokenType.TEMPORARY) 1001 materialized = self._match(TokenType.MATERIALIZED) 1002 kind = self._match_set(self.CREATABLES) and self._prev.text 1003 if not kind: 1004 if default_kind: 1005 kind = default_kind 1006 else: 1007 return self._parse_as_command(start) 1008 1009 return self.expression( 1010 exp.Drop, 1011 exists=self._parse_exists(), 1012 this=self._parse_table(schema=True), 1013 kind=kind, 1014 temporary=temporary, 1015 materialized=materialized, 1016 cascade=self._match(TokenType.CASCADE), 1017 ) 1018 1019 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1020 return ( 1021 self._match(TokenType.IF) 1022 and (not not_ or self._match(TokenType.NOT)) 1023 and self._match(TokenType.EXISTS) 1024 ) 1025 1026 def _parse_create(self) -> t.Optional[exp.Expression]: 1027 start = self._prev 1028 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1029 TokenType.OR, TokenType.REPLACE 1030 ) 1031 unique = self._match(TokenType.UNIQUE) 1032 volatile = self._match(TokenType.VOLATILE) 1033 1034 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1035 self._match(TokenType.TABLE) 1036 1037 properties = None 1038 create_token = self._match_set(self.CREATABLES) and self._prev 1039 1040 if not create_token: 1041 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1042 create_token = self._match_set(self.CREATABLES) and self._prev 1043 1044 if not properties or not create_token: 1045 return self._parse_as_command(start) 1046 1047 exists = self._parse_exists(not_=True) 1048 this = None 1049 expression = None 1050 indexes = None 1051 no_schema_binding = None 1052 begin = None 1053 1054 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1055 this = self._parse_user_defined_function(kind=create_token.token_type) 1056 temp_properties = self._parse_properties() 1057 if properties and temp_properties: 1058 properties.expressions.extend(temp_properties.expressions) 1059 elif temp_properties: 1060 properties = temp_properties 1061 1062 self._match(TokenType.ALIAS) 1063 begin = self._match(TokenType.BEGIN) 1064 return_ = self._match_text_seq("RETURN") 1065 expression = self._parse_statement() 1066 1067 if return_: 1068 expression = self.expression(exp.Return, this=expression) 1069 elif create_token.token_type == TokenType.INDEX: 1070 this = self._parse_index() 1071 elif create_token.token_type in self.DB_CREATABLES: 1072 table_parts = self._parse_table_parts(schema=True) 1073 1074 # exp.Properties.Location.POST_NAME 1075 if self._match(TokenType.COMMA): 1076 temp_properties = self._parse_properties(before=True) 1077 if properties and temp_properties: 1078 properties.expressions.extend(temp_properties.expressions) 1079 elif temp_properties: 1080 properties = temp_properties 1081 1082 this = self._parse_schema(this=table_parts) 1083 1084 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1085 temp_properties = self._parse_properties() 1086 if properties and temp_properties: 1087 properties.expressions.extend(temp_properties.expressions) 1088 elif temp_properties: 1089 properties = temp_properties 1090 1091 self._match(TokenType.ALIAS) 1092 1093 # exp.Properties.Location.POST_ALIAS 1094 if not ( 1095 self._match(TokenType.SELECT, advance=False) 1096 or self._match(TokenType.WITH, advance=False) 1097 or self._match(TokenType.L_PAREN, advance=False) 1098 ): 1099 temp_properties = self._parse_properties() 1100 if properties and temp_properties: 1101 properties.expressions.extend(temp_properties.expressions) 1102 elif temp_properties: 1103 properties = temp_properties 1104 1105 expression = self._parse_ddl_select() 1106 1107 if create_token.token_type == TokenType.TABLE: 1108 # exp.Properties.Location.POST_EXPRESSION 1109 temp_properties = self._parse_properties() 1110 if properties and temp_properties: 1111 properties.expressions.extend(temp_properties.expressions) 1112 elif temp_properties: 1113 properties = temp_properties 1114 1115 indexes = [] 1116 while True: 1117 index = self._parse_create_table_index() 1118 1119 # exp.Properties.Location.POST_INDEX 1120 if self._match(TokenType.PARTITION_BY, advance=False): 1121 temp_properties = self._parse_properties() 1122 if properties and temp_properties: 1123 properties.expressions.extend(temp_properties.expressions) 1124 elif temp_properties: 1125 properties = temp_properties 1126 1127 if not index: 1128 break 1129 else: 1130 indexes.append(index) 1131 elif create_token.token_type == TokenType.VIEW: 1132 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1133 no_schema_binding = True 1134 1135 return self.expression( 1136 exp.Create, 1137 this=this, 1138 kind=create_token.text, 1139 replace=replace, 1140 unique=unique, 1141 volatile=volatile, 1142 expression=expression, 1143 exists=exists, 1144 properties=properties, 1145 indexes=indexes, 1146 no_schema_binding=no_schema_binding, 1147 begin=begin, 1148 ) 1149 1150 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1151 self._match(TokenType.COMMA) 1152 1153 # parsers look to _prev for no/dual/default, so need to consume first 1154 self._match_text_seq("NO") 1155 self._match_text_seq("DUAL") 1156 self._match_text_seq("DEFAULT") 1157 1158 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1159 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1160 1161 return None 1162 1163 def _parse_property(self) -> t.Optional[exp.Expression]: 1164 if self._match_texts(self.PROPERTY_PARSERS): 1165 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1166 1167 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1168 return self._parse_character_set(default=True) 1169 1170 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1171 return self._parse_sortkey(compound=True) 1172 1173 if self._match_text_seq("SQL", "SECURITY"): 1174 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1175 1176 assignment = self._match_pair( 1177 TokenType.VAR, TokenType.EQ, advance=False 1178 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1179 1180 if assignment: 1181 key = self._parse_var_or_string() 1182 self._match(TokenType.EQ) 1183 return self.expression(exp.Property, this=key, value=self._parse_column()) 1184 1185 return None 1186 1187 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1188 self._match(TokenType.EQ) 1189 self._match(TokenType.ALIAS) 1190 return self.expression( 1191 exp_class, 1192 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1193 ) 1194 1195 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1196 properties = [] 1197 1198 while True: 1199 if before: 1200 identified_property = self._parse_property_before() 1201 else: 1202 identified_property = self._parse_property() 1203 1204 if not identified_property: 1205 break 1206 for p in ensure_collection(identified_property): 1207 properties.append(p) 1208 1209 if properties: 1210 return self.expression(exp.Properties, expressions=properties) 1211 1212 return None 1213 1214 def _parse_fallback(self, no=False) -> exp.Expression: 1215 self._match_text_seq("FALLBACK") 1216 return self.expression( 1217 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1218 ) 1219 1220 def _parse_with_property( 1221 self, 1222 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1223 self._match(TokenType.WITH) 1224 if self._match(TokenType.L_PAREN, advance=False): 1225 return self._parse_wrapped_csv(self._parse_property) 1226 1227 if self._match_text_seq("JOURNAL"): 1228 return self._parse_withjournaltable() 1229 1230 if self._match_text_seq("DATA"): 1231 return self._parse_withdata(no=False) 1232 elif self._match_text_seq("NO", "DATA"): 1233 return self._parse_withdata(no=True) 1234 1235 if not self._next: 1236 return None 1237 1238 return self._parse_withisolatedloading() 1239 1240 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1241 def _parse_definer(self) -> t.Optional[exp.Expression]: 1242 self._match(TokenType.EQ) 1243 1244 user = self._parse_id_var() 1245 self._match(TokenType.PARAMETER) 1246 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1247 1248 if not user or not host: 1249 return None 1250 1251 return exp.DefinerProperty(this=f"{user}@{host}") 1252 1253 def _parse_withjournaltable(self) -> exp.Expression: 1254 self._match(TokenType.TABLE) 1255 self._match(TokenType.EQ) 1256 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1257 1258 def _parse_log(self, no=False) -> exp.Expression: 1259 self._match_text_seq("LOG") 1260 return self.expression(exp.LogProperty, no=no) 1261 1262 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1263 before = self._match_text_seq("BEFORE") 1264 self._match_text_seq("JOURNAL") 1265 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1266 1267 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1268 self._match_text_seq("NOT") 1269 self._match_text_seq("LOCAL") 1270 self._match_text_seq("AFTER", "JOURNAL") 1271 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1272 1273 def _parse_checksum(self) -> exp.Expression: 1274 self._match_text_seq("CHECKSUM") 1275 self._match(TokenType.EQ) 1276 1277 on = None 1278 if self._match(TokenType.ON): 1279 on = True 1280 elif self._match_text_seq("OFF"): 1281 on = False 1282 default = self._match(TokenType.DEFAULT) 1283 1284 return self.expression( 1285 exp.ChecksumProperty, 1286 on=on, 1287 default=default, 1288 ) 1289 1290 def _parse_freespace(self) -> exp.Expression: 1291 self._match_text_seq("FREESPACE") 1292 self._match(TokenType.EQ) 1293 return self.expression( 1294 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1295 ) 1296 1297 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1298 self._match_text_seq("MERGEBLOCKRATIO") 1299 if self._match(TokenType.EQ): 1300 return self.expression( 1301 exp.MergeBlockRatioProperty, 1302 this=self._parse_number(), 1303 percent=self._match(TokenType.PERCENT), 1304 ) 1305 else: 1306 return self.expression( 1307 exp.MergeBlockRatioProperty, 1308 no=no, 1309 default=default, 1310 ) 1311 1312 def _parse_datablocksize(self, default=None) -> exp.Expression: 1313 if default: 1314 self._match_text_seq("DATABLOCKSIZE") 1315 return self.expression(exp.DataBlocksizeProperty, default=True) 1316 elif self._match_texts(("MIN", "MINIMUM")): 1317 self._match_text_seq("DATABLOCKSIZE") 1318 return self.expression(exp.DataBlocksizeProperty, min=True) 1319 elif self._match_texts(("MAX", "MAXIMUM")): 1320 self._match_text_seq("DATABLOCKSIZE") 1321 return self.expression(exp.DataBlocksizeProperty, min=False) 1322 1323 self._match_text_seq("DATABLOCKSIZE") 1324 self._match(TokenType.EQ) 1325 size = self._parse_number() 1326 units = None 1327 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1328 units = self._prev.text 1329 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1330 1331 def _parse_blockcompression(self) -> exp.Expression: 1332 self._match_text_seq("BLOCKCOMPRESSION") 1333 self._match(TokenType.EQ) 1334 always = self._match_text_seq("ALWAYS") 1335 manual = self._match_text_seq("MANUAL") 1336 never = self._match_text_seq("NEVER") 1337 default = self._match_text_seq("DEFAULT") 1338 autotemp = None 1339 if self._match_text_seq("AUTOTEMP"): 1340 autotemp = self._parse_schema() 1341 1342 return self.expression( 1343 exp.BlockCompressionProperty, 1344 always=always, 1345 manual=manual, 1346 never=never, 1347 default=default, 1348 autotemp=autotemp, 1349 ) 1350 1351 def _parse_withisolatedloading(self) -> exp.Expression: 1352 no = self._match_text_seq("NO") 1353 concurrent = self._match_text_seq("CONCURRENT") 1354 self._match_text_seq("ISOLATED", "LOADING") 1355 for_all = self._match_text_seq("FOR", "ALL") 1356 for_insert = self._match_text_seq("FOR", "INSERT") 1357 for_none = self._match_text_seq("FOR", "NONE") 1358 return self.expression( 1359 exp.IsolatedLoadingProperty, 1360 no=no, 1361 concurrent=concurrent, 1362 for_all=for_all, 1363 for_insert=for_insert, 1364 for_none=for_none, 1365 ) 1366 1367 def _parse_locking(self) -> exp.Expression: 1368 if self._match(TokenType.TABLE): 1369 kind = "TABLE" 1370 elif self._match(TokenType.VIEW): 1371 kind = "VIEW" 1372 elif self._match(TokenType.ROW): 1373 kind = "ROW" 1374 elif self._match_text_seq("DATABASE"): 1375 kind = "DATABASE" 1376 else: 1377 kind = None 1378 1379 if kind in ("DATABASE", "TABLE", "VIEW"): 1380 this = self._parse_table_parts() 1381 else: 1382 this = None 1383 1384 if self._match(TokenType.FOR): 1385 for_or_in = "FOR" 1386 elif self._match(TokenType.IN): 1387 for_or_in = "IN" 1388 else: 1389 for_or_in = None 1390 1391 if self._match_text_seq("ACCESS"): 1392 lock_type = "ACCESS" 1393 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1394 lock_type = "EXCLUSIVE" 1395 elif self._match_text_seq("SHARE"): 1396 lock_type = "SHARE" 1397 elif self._match_text_seq("READ"): 1398 lock_type = "READ" 1399 elif self._match_text_seq("WRITE"): 1400 lock_type = "WRITE" 1401 elif self._match_text_seq("CHECKSUM"): 1402 lock_type = "CHECKSUM" 1403 else: 1404 lock_type = None 1405 1406 override = self._match_text_seq("OVERRIDE") 1407 1408 return self.expression( 1409 exp.LockingProperty, 1410 this=this, 1411 kind=kind, 1412 for_or_in=for_or_in, 1413 lock_type=lock_type, 1414 override=override, 1415 ) 1416 1417 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1418 if self._match(TokenType.PARTITION_BY): 1419 return self._parse_csv(self._parse_conjunction) 1420 return [] 1421 1422 def _parse_partitioned_by(self) -> exp.Expression: 1423 self._match(TokenType.EQ) 1424 return self.expression( 1425 exp.PartitionedByProperty, 1426 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1427 ) 1428 1429 def _parse_withdata(self, no=False) -> exp.Expression: 1430 if self._match_text_seq("AND", "STATISTICS"): 1431 statistics = True 1432 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1433 statistics = False 1434 else: 1435 statistics = None 1436 1437 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1438 1439 def _parse_noprimaryindex(self) -> exp.Expression: 1440 self._match_text_seq("PRIMARY", "INDEX") 1441 return exp.NoPrimaryIndexProperty() 1442 1443 def _parse_oncommit(self) -> exp.Expression: 1444 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1445 return exp.OnCommitProperty() 1446 1447 def _parse_distkey(self) -> exp.Expression: 1448 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1449 1450 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1451 table = self._parse_table(schema=True) 1452 options = [] 1453 while self._match_texts(("INCLUDING", "EXCLUDING")): 1454 this = self._prev.text.upper() 1455 id_var = self._parse_id_var() 1456 1457 if not id_var: 1458 return None 1459 1460 options.append( 1461 self.expression( 1462 exp.Property, 1463 this=this, 1464 value=exp.Var(this=id_var.this.upper()), 1465 ) 1466 ) 1467 return self.expression(exp.LikeProperty, this=table, expressions=options) 1468 1469 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1470 return self.expression( 1471 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1472 ) 1473 1474 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1475 self._match(TokenType.EQ) 1476 return self.expression( 1477 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1478 ) 1479 1480 def _parse_returns(self) -> exp.Expression: 1481 value: t.Optional[exp.Expression] 1482 is_table = self._match(TokenType.TABLE) 1483 1484 if is_table: 1485 if self._match(TokenType.LT): 1486 value = self.expression( 1487 exp.Schema, 1488 this="TABLE", 1489 expressions=self._parse_csv(self._parse_struct_kwargs), 1490 ) 1491 if not self._match(TokenType.GT): 1492 self.raise_error("Expecting >") 1493 else: 1494 value = self._parse_schema(exp.Var(this="TABLE")) 1495 else: 1496 value = self._parse_types() 1497 1498 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1499 1500 def _parse_temporary(self, global_=False) -> exp.Expression: 1501 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1502 return self.expression(exp.TemporaryProperty, global_=global_) 1503 1504 def _parse_describe(self) -> exp.Expression: 1505 kind = self._match_set(self.CREATABLES) and self._prev.text 1506 this = self._parse_table() 1507 1508 return self.expression(exp.Describe, this=this, kind=kind) 1509 1510 def _parse_insert(self) -> exp.Expression: 1511 overwrite = self._match(TokenType.OVERWRITE) 1512 local = self._match(TokenType.LOCAL) 1513 1514 this: t.Optional[exp.Expression] 1515 1516 alternative = None 1517 if self._match_text_seq("DIRECTORY"): 1518 this = self.expression( 1519 exp.Directory, 1520 this=self._parse_var_or_string(), 1521 local=local, 1522 row_format=self._parse_row_format(match_row=True), 1523 ) 1524 else: 1525 if self._match(TokenType.OR): 1526 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1527 1528 self._match(TokenType.INTO) 1529 self._match(TokenType.TABLE) 1530 this = self._parse_table(schema=True) 1531 1532 return self.expression( 1533 exp.Insert, 1534 this=this, 1535 exists=self._parse_exists(), 1536 partition=self._parse_partition(), 1537 expression=self._parse_ddl_select(), 1538 overwrite=overwrite, 1539 alternative=alternative, 1540 ) 1541 1542 def _parse_row(self) -> t.Optional[exp.Expression]: 1543 if not self._match(TokenType.FORMAT): 1544 return None 1545 return self._parse_row_format() 1546 1547 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1548 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1549 return None 1550 1551 if self._match_text_seq("SERDE"): 1552 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1553 1554 self._match_text_seq("DELIMITED") 1555 1556 kwargs = {} 1557 1558 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1559 kwargs["fields"] = self._parse_string() 1560 if self._match_text_seq("ESCAPED", "BY"): 1561 kwargs["escaped"] = self._parse_string() 1562 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1563 kwargs["collection_items"] = self._parse_string() 1564 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1565 kwargs["map_keys"] = self._parse_string() 1566 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1567 kwargs["lines"] = self._parse_string() 1568 if self._match_text_seq("NULL", "DEFINED", "AS"): 1569 kwargs["null"] = self._parse_string() 1570 1571 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1572 1573 def _parse_load_data(self) -> exp.Expression: 1574 local = self._match(TokenType.LOCAL) 1575 self._match_text_seq("INPATH") 1576 inpath = self._parse_string() 1577 overwrite = self._match(TokenType.OVERWRITE) 1578 self._match_pair(TokenType.INTO, TokenType.TABLE) 1579 1580 return self.expression( 1581 exp.LoadData, 1582 this=self._parse_table(schema=True), 1583 local=local, 1584 overwrite=overwrite, 1585 inpath=inpath, 1586 partition=self._parse_partition(), 1587 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1588 serde=self._match_text_seq("SERDE") and self._parse_string(), 1589 ) 1590 1591 def _parse_delete(self) -> exp.Expression: 1592 self._match(TokenType.FROM) 1593 1594 return self.expression( 1595 exp.Delete, 1596 this=self._parse_table(schema=True), 1597 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1598 where=self._parse_where(), 1599 ) 1600 1601 def _parse_update(self) -> exp.Expression: 1602 return self.expression( 1603 exp.Update, 1604 **{ # type: ignore 1605 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1606 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1607 "from": self._parse_from(), 1608 "where": self._parse_where(), 1609 }, 1610 ) 1611 1612 def _parse_uncache(self) -> exp.Expression: 1613 if not self._match(TokenType.TABLE): 1614 self.raise_error("Expecting TABLE after UNCACHE") 1615 1616 return self.expression( 1617 exp.Uncache, 1618 exists=self._parse_exists(), 1619 this=self._parse_table(schema=True), 1620 ) 1621 1622 def _parse_cache(self) -> exp.Expression: 1623 lazy = self._match(TokenType.LAZY) 1624 self._match(TokenType.TABLE) 1625 table = self._parse_table(schema=True) 1626 options = [] 1627 1628 if self._match(TokenType.OPTIONS): 1629 self._match_l_paren() 1630 k = self._parse_string() 1631 self._match(TokenType.EQ) 1632 v = self._parse_string() 1633 options = [k, v] 1634 self._match_r_paren() 1635 1636 self._match(TokenType.ALIAS) 1637 return self.expression( 1638 exp.Cache, 1639 this=table, 1640 lazy=lazy, 1641 options=options, 1642 expression=self._parse_select(nested=True), 1643 ) 1644 1645 def _parse_partition(self) -> t.Optional[exp.Expression]: 1646 if not self._match(TokenType.PARTITION): 1647 return None 1648 1649 return self.expression( 1650 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1651 ) 1652 1653 def _parse_value(self) -> exp.Expression: 1654 if self._match(TokenType.L_PAREN): 1655 expressions = self._parse_csv(self._parse_conjunction) 1656 self._match_r_paren() 1657 return self.expression(exp.Tuple, expressions=expressions) 1658 1659 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1660 # Source: https://prestodb.io/docs/current/sql/values.html 1661 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1662 1663 def _parse_select( 1664 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1665 ) -> t.Optional[exp.Expression]: 1666 cte = self._parse_with() 1667 if cte: 1668 this = self._parse_statement() 1669 1670 if not this: 1671 self.raise_error("Failed to parse any statement following CTE") 1672 return cte 1673 1674 if "with" in this.arg_types: 1675 this.set("with", cte) 1676 else: 1677 self.raise_error(f"{this.key} does not support CTE") 1678 this = cte 1679 elif self._match(TokenType.SELECT): 1680 comments = self._prev_comments 1681 1682 hint = self._parse_hint() 1683 all_ = self._match(TokenType.ALL) 1684 distinct = self._match(TokenType.DISTINCT) 1685 1686 if distinct: 1687 distinct = self.expression( 1688 exp.Distinct, 1689 on=self._parse_value() if self._match(TokenType.ON) else None, 1690 ) 1691 1692 if all_ and distinct: 1693 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1694 1695 limit = self._parse_limit(top=True) 1696 expressions = self._parse_csv(self._parse_expression) 1697 1698 this = self.expression( 1699 exp.Select, 1700 hint=hint, 1701 distinct=distinct, 1702 expressions=expressions, 1703 limit=limit, 1704 ) 1705 this.comments = comments 1706 1707 into = self._parse_into() 1708 if into: 1709 this.set("into", into) 1710 1711 from_ = self._parse_from() 1712 if from_: 1713 this.set("from", from_) 1714 1715 self._parse_query_modifiers(this) 1716 elif (table or nested) and self._match(TokenType.L_PAREN): 1717 this = self._parse_table() if table else self._parse_select(nested=True) 1718 self._parse_query_modifiers(this) 1719 this = self._parse_set_operations(this) 1720 self._match_r_paren() 1721 1722 # early return so that subquery unions aren't parsed again 1723 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1724 # Union ALL should be a property of the top select node, not the subquery 1725 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1726 elif self._match(TokenType.VALUES): 1727 this = self.expression( 1728 exp.Values, 1729 expressions=self._parse_csv(self._parse_value), 1730 alias=self._parse_table_alias(), 1731 ) 1732 else: 1733 this = None 1734 1735 return self._parse_set_operations(this) 1736 1737 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1738 if not skip_with_token and not self._match(TokenType.WITH): 1739 return None 1740 1741 recursive = self._match(TokenType.RECURSIVE) 1742 1743 expressions = [] 1744 while True: 1745 expressions.append(self._parse_cte()) 1746 1747 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1748 break 1749 else: 1750 self._match(TokenType.WITH) 1751 1752 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1753 1754 def _parse_cte(self) -> exp.Expression: 1755 alias = self._parse_table_alias() 1756 if not alias or not alias.this: 1757 self.raise_error("Expected CTE to have alias") 1758 1759 self._match(TokenType.ALIAS) 1760 1761 return self.expression( 1762 exp.CTE, 1763 this=self._parse_wrapped(self._parse_statement), 1764 alias=alias, 1765 ) 1766 1767 def _parse_table_alias( 1768 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1769 ) -> t.Optional[exp.Expression]: 1770 any_token = self._match(TokenType.ALIAS) 1771 alias = self._parse_id_var( 1772 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1773 ) 1774 index = self._index 1775 1776 if self._match(TokenType.L_PAREN): 1777 columns = self._parse_csv(self._parse_function_parameter) 1778 self._match_r_paren() if columns else self._retreat(index) 1779 else: 1780 columns = None 1781 1782 if not alias and not columns: 1783 return None 1784 1785 return self.expression(exp.TableAlias, this=alias, columns=columns) 1786 1787 def _parse_subquery( 1788 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1789 ) -> exp.Expression: 1790 return self.expression( 1791 exp.Subquery, 1792 this=this, 1793 pivots=self._parse_pivots(), 1794 alias=self._parse_table_alias() if parse_alias else None, 1795 ) 1796 1797 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1798 if not isinstance(this, self.MODIFIABLES): 1799 return 1800 1801 table = isinstance(this, exp.Table) 1802 1803 while True: 1804 lateral = self._parse_lateral() 1805 join = self._parse_join() 1806 comma = None if table else self._match(TokenType.COMMA) 1807 if lateral: 1808 this.append("laterals", lateral) 1809 if join: 1810 this.append("joins", join) 1811 if comma: 1812 this.args["from"].append("expressions", self._parse_table()) 1813 if not (lateral or join or comma): 1814 break 1815 1816 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1817 expression = parser(self) 1818 1819 if expression: 1820 this.set(key, expression) 1821 1822 def _parse_hint(self) -> t.Optional[exp.Expression]: 1823 if self._match(TokenType.HINT): 1824 hints = self._parse_csv(self._parse_function) 1825 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1826 self.raise_error("Expected */ after HINT") 1827 return self.expression(exp.Hint, expressions=hints) 1828 1829 return None 1830 1831 def _parse_into(self) -> t.Optional[exp.Expression]: 1832 if not self._match(TokenType.INTO): 1833 return None 1834 1835 temp = self._match(TokenType.TEMPORARY) 1836 unlogged = self._match(TokenType.UNLOGGED) 1837 self._match(TokenType.TABLE) 1838 1839 return self.expression( 1840 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1841 ) 1842 1843 def _parse_from(self) -> t.Optional[exp.Expression]: 1844 if not self._match(TokenType.FROM): 1845 return None 1846 1847 return self.expression( 1848 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1849 ) 1850 1851 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1852 if not self._match(TokenType.MATCH_RECOGNIZE): 1853 return None 1854 self._match_l_paren() 1855 1856 partition = self._parse_partition_by() 1857 order = self._parse_order() 1858 measures = ( 1859 self._parse_alias(self._parse_conjunction()) 1860 if self._match_text_seq("MEASURES") 1861 else None 1862 ) 1863 1864 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1865 rows = exp.Var(this="ONE ROW PER MATCH") 1866 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1867 text = "ALL ROWS PER MATCH" 1868 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1869 text += f" SHOW EMPTY MATCHES" 1870 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1871 text += f" OMIT EMPTY MATCHES" 1872 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1873 text += f" WITH UNMATCHED ROWS" 1874 rows = exp.Var(this=text) 1875 else: 1876 rows = None 1877 1878 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1879 text = "AFTER MATCH SKIP" 1880 if self._match_text_seq("PAST", "LAST", "ROW"): 1881 text += f" PAST LAST ROW" 1882 elif self._match_text_seq("TO", "NEXT", "ROW"): 1883 text += f" TO NEXT ROW" 1884 elif self._match_text_seq("TO", "FIRST"): 1885 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1886 elif self._match_text_seq("TO", "LAST"): 1887 text += f" TO LAST {self._advance_any().text}" # type: ignore 1888 after = exp.Var(this=text) 1889 else: 1890 after = None 1891 1892 if self._match_text_seq("PATTERN"): 1893 self._match_l_paren() 1894 1895 if not self._curr: 1896 self.raise_error("Expecting )", self._curr) 1897 1898 paren = 1 1899 start = self._curr 1900 1901 while self._curr and paren > 0: 1902 if self._curr.token_type == TokenType.L_PAREN: 1903 paren += 1 1904 if self._curr.token_type == TokenType.R_PAREN: 1905 paren -= 1 1906 end = self._prev 1907 self._advance() 1908 if paren > 0: 1909 self.raise_error("Expecting )", self._curr) 1910 pattern = exp.Var(this=self._find_sql(start, end)) 1911 else: 1912 pattern = None 1913 1914 define = ( 1915 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1916 ) 1917 self._match_r_paren() 1918 1919 return self.expression( 1920 exp.MatchRecognize, 1921 partition_by=partition, 1922 order=order, 1923 measures=measures, 1924 rows=rows, 1925 after=after, 1926 pattern=pattern, 1927 define=define, 1928 ) 1929 1930 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1931 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1932 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1933 1934 if outer_apply or cross_apply: 1935 this = self._parse_select(table=True) 1936 view = None 1937 outer = not cross_apply 1938 elif self._match(TokenType.LATERAL): 1939 this = self._parse_select(table=True) 1940 view = self._match(TokenType.VIEW) 1941 outer = self._match(TokenType.OUTER) 1942 else: 1943 return None 1944 1945 if not this: 1946 this = self._parse_function() or self._parse_id_var(any_token=False) 1947 while self._match(TokenType.DOT): 1948 this = exp.Dot( 1949 this=this, 1950 expression=self._parse_function() or self._parse_id_var(any_token=False), 1951 ) 1952 1953 table_alias: t.Optional[exp.Expression] 1954 1955 if view: 1956 table = self._parse_id_var(any_token=False) 1957 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1958 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1959 else: 1960 table_alias = self._parse_table_alias() 1961 1962 expression = self.expression( 1963 exp.Lateral, 1964 this=this, 1965 view=view, 1966 outer=outer, 1967 alias=table_alias, 1968 ) 1969 1970 if outer_apply or cross_apply: 1971 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1972 1973 return expression 1974 1975 def _parse_join_side_and_kind( 1976 self, 1977 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1978 return ( 1979 self._match(TokenType.NATURAL) and self._prev, 1980 self._match_set(self.JOIN_SIDES) and self._prev, 1981 self._match_set(self.JOIN_KINDS) and self._prev, 1982 ) 1983 1984 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1985 natural, side, kind = self._parse_join_side_and_kind() 1986 1987 if not skip_join_token and not self._match(TokenType.JOIN): 1988 return None 1989 1990 kwargs: t.Dict[ 1991 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1992 ] = {"this": self._parse_table()} 1993 1994 if natural: 1995 kwargs["natural"] = True 1996 if side: 1997 kwargs["side"] = side.text 1998 if kind: 1999 kwargs["kind"] = kind.text 2000 2001 if self._match(TokenType.ON): 2002 kwargs["on"] = self._parse_conjunction() 2003 elif self._match(TokenType.USING): 2004 kwargs["using"] = self._parse_wrapped_id_vars() 2005 2006 return self.expression(exp.Join, **kwargs) # type: ignore 2007 2008 def _parse_index(self) -> exp.Expression: 2009 index = self._parse_id_var() 2010 self._match(TokenType.ON) 2011 self._match(TokenType.TABLE) # hive 2012 2013 return self.expression( 2014 exp.Index, 2015 this=index, 2016 table=self.expression(exp.Table, this=self._parse_id_var()), 2017 columns=self._parse_expression(), 2018 ) 2019 2020 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2021 unique = self._match(TokenType.UNIQUE) 2022 primary = self._match_text_seq("PRIMARY") 2023 amp = self._match_text_seq("AMP") 2024 if not self._match(TokenType.INDEX): 2025 return None 2026 index = self._parse_id_var() 2027 columns = None 2028 if self._match(TokenType.L_PAREN, advance=False): 2029 columns = self._parse_wrapped_csv(self._parse_column) 2030 return self.expression( 2031 exp.Index, 2032 this=index, 2033 columns=columns, 2034 unique=unique, 2035 primary=primary, 2036 amp=amp, 2037 ) 2038 2039 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2040 catalog = None 2041 db = None 2042 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2043 2044 while self._match(TokenType.DOT): 2045 if catalog: 2046 # This allows nesting the table in arbitrarily many dot expressions if needed 2047 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2048 else: 2049 catalog = db 2050 db = table 2051 table = self._parse_id_var() 2052 2053 if not table: 2054 self.raise_error(f"Expected table name but got {self._curr}") 2055 2056 return self.expression( 2057 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2058 ) 2059 2060 def _parse_table( 2061 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2062 ) -> t.Optional[exp.Expression]: 2063 lateral = self._parse_lateral() 2064 2065 if lateral: 2066 return lateral 2067 2068 unnest = self._parse_unnest() 2069 2070 if unnest: 2071 return unnest 2072 2073 values = self._parse_derived_table_values() 2074 2075 if values: 2076 return values 2077 2078 subquery = self._parse_select(table=True) 2079 2080 if subquery: 2081 return subquery 2082 2083 this = self._parse_table_parts(schema=schema) 2084 2085 if schema: 2086 return self._parse_schema(this=this) 2087 2088 if self.alias_post_tablesample: 2089 table_sample = self._parse_table_sample() 2090 2091 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2092 2093 if alias: 2094 this.set("alias", alias) 2095 2096 if not this.args.get("pivots"): 2097 this.set("pivots", self._parse_pivots()) 2098 2099 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2100 this.set( 2101 "hints", 2102 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2103 ) 2104 self._match_r_paren() 2105 2106 if not self.alias_post_tablesample: 2107 table_sample = self._parse_table_sample() 2108 2109 if table_sample: 2110 table_sample.set("this", this) 2111 this = table_sample 2112 2113 return this 2114 2115 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2116 if not self._match(TokenType.UNNEST): 2117 return None 2118 2119 expressions = self._parse_wrapped_csv(self._parse_column) 2120 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2121 alias = self._parse_table_alias() 2122 2123 if alias and self.unnest_column_only: 2124 if alias.args.get("columns"): 2125 self.raise_error("Unexpected extra column alias in unnest.") 2126 alias.set("columns", [alias.this]) 2127 alias.set("this", None) 2128 2129 offset = None 2130 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2131 self._match(TokenType.ALIAS) 2132 offset = self._parse_conjunction() 2133 2134 return self.expression( 2135 exp.Unnest, 2136 expressions=expressions, 2137 ordinality=ordinality, 2138 alias=alias, 2139 offset=offset, 2140 ) 2141 2142 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2143 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2144 if not is_derived and not self._match(TokenType.VALUES): 2145 return None 2146 2147 expressions = self._parse_csv(self._parse_value) 2148 2149 if is_derived: 2150 self._match_r_paren() 2151 2152 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2153 2154 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 2155 if not self._match(TokenType.TABLE_SAMPLE): 2156 return None 2157 2158 method = self._parse_var() 2159 bucket_numerator = None 2160 bucket_denominator = None 2161 bucket_field = None 2162 percent = None 2163 rows = None 2164 size = None 2165 seed = None 2166 2167 self._match_l_paren() 2168 2169 if self._match(TokenType.BUCKET): 2170 bucket_numerator = self._parse_number() 2171 self._match(TokenType.OUT_OF) 2172 bucket_denominator = bucket_denominator = self._parse_number() 2173 self._match(TokenType.ON) 2174 bucket_field = self._parse_field() 2175 else: 2176 num = self._parse_number() 2177 2178 if self._match(TokenType.PERCENT): 2179 percent = num 2180 elif self._match(TokenType.ROWS): 2181 rows = num 2182 else: 2183 size = num 2184 2185 self._match_r_paren() 2186 2187 if self._match(TokenType.SEED): 2188 seed = self._parse_wrapped(self._parse_number) 2189 2190 return self.expression( 2191 exp.TableSample, 2192 method=method, 2193 bucket_numerator=bucket_numerator, 2194 bucket_denominator=bucket_denominator, 2195 bucket_field=bucket_field, 2196 percent=percent, 2197 rows=rows, 2198 size=size, 2199 seed=seed, 2200 ) 2201 2202 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2203 return list(iter(self._parse_pivot, None)) 2204 2205 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2206 index = self._index 2207 2208 if self._match(TokenType.PIVOT): 2209 unpivot = False 2210 elif self._match(TokenType.UNPIVOT): 2211 unpivot = True 2212 else: 2213 return None 2214 2215 expressions = [] 2216 field = None 2217 2218 if not self._match(TokenType.L_PAREN): 2219 self._retreat(index) 2220 return None 2221 2222 if unpivot: 2223 expressions = self._parse_csv(self._parse_column) 2224 else: 2225 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2226 2227 if not self._match(TokenType.FOR): 2228 self.raise_error("Expecting FOR") 2229 2230 value = self._parse_column() 2231 2232 if not self._match(TokenType.IN): 2233 self.raise_error("Expecting IN") 2234 2235 field = self._parse_in(value) 2236 2237 self._match_r_paren() 2238 2239 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2240 2241 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2242 pivot.set("alias", self._parse_table_alias()) 2243 2244 return pivot 2245 2246 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2247 if not skip_where_token and not self._match(TokenType.WHERE): 2248 return None 2249 2250 return self.expression( 2251 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2252 ) 2253 2254 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2255 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2256 return None 2257 2258 elements = defaultdict(list) 2259 2260 while True: 2261 expressions = self._parse_csv(self._parse_conjunction) 2262 if expressions: 2263 elements["expressions"].extend(expressions) 2264 2265 grouping_sets = self._parse_grouping_sets() 2266 if grouping_sets: 2267 elements["grouping_sets"].extend(grouping_sets) 2268 2269 rollup = None 2270 cube = None 2271 2272 with_ = self._match(TokenType.WITH) 2273 if self._match(TokenType.ROLLUP): 2274 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2275 elements["rollup"].extend(ensure_list(rollup)) 2276 2277 if self._match(TokenType.CUBE): 2278 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2279 elements["cube"].extend(ensure_list(cube)) 2280 2281 if not (expressions or grouping_sets or rollup or cube): 2282 break 2283 2284 return self.expression(exp.Group, **elements) # type: ignore 2285 2286 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2287 if not self._match(TokenType.GROUPING_SETS): 2288 return None 2289 2290 return self._parse_wrapped_csv(self._parse_grouping_set) 2291 2292 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2293 if self._match(TokenType.L_PAREN): 2294 grouping_set = self._parse_csv(self._parse_column) 2295 self._match_r_paren() 2296 return self.expression(exp.Tuple, expressions=grouping_set) 2297 2298 return self._parse_column() 2299 2300 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2301 if not skip_having_token and not self._match(TokenType.HAVING): 2302 return None 2303 return self.expression(exp.Having, this=self._parse_conjunction()) 2304 2305 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2306 if not self._match(TokenType.QUALIFY): 2307 return None 2308 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2309 2310 def _parse_order( 2311 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2312 ) -> t.Optional[exp.Expression]: 2313 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2314 return this 2315 2316 return self.expression( 2317 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2318 ) 2319 2320 def _parse_sort( 2321 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2322 ) -> t.Optional[exp.Expression]: 2323 if not self._match(token_type): 2324 return None 2325 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2326 2327 def _parse_ordered(self) -> exp.Expression: 2328 this = self._parse_conjunction() 2329 self._match(TokenType.ASC) 2330 is_desc = self._match(TokenType.DESC) 2331 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2332 is_nulls_last = self._match(TokenType.NULLS_LAST) 2333 desc = is_desc or False 2334 asc = not desc 2335 nulls_first = is_nulls_first or False 2336 explicitly_null_ordered = is_nulls_first or is_nulls_last 2337 if ( 2338 not explicitly_null_ordered 2339 and ( 2340 (asc and self.null_ordering == "nulls_are_small") 2341 or (desc and self.null_ordering != "nulls_are_small") 2342 ) 2343 and self.null_ordering != "nulls_are_last" 2344 ): 2345 nulls_first = True 2346 2347 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2348 2349 def _parse_limit( 2350 self, this: t.Optional[exp.Expression] = None, top: bool = False 2351 ) -> t.Optional[exp.Expression]: 2352 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2353 limit_paren = self._match(TokenType.L_PAREN) 2354 limit_exp = self.expression( 2355 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2356 ) 2357 2358 if limit_paren: 2359 self._match_r_paren() 2360 2361 return limit_exp 2362 2363 if self._match(TokenType.FETCH): 2364 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2365 direction = self._prev.text if direction else "FIRST" 2366 count = self._parse_number() 2367 self._match_set((TokenType.ROW, TokenType.ROWS)) 2368 self._match(TokenType.ONLY) 2369 return self.expression(exp.Fetch, direction=direction, count=count) 2370 2371 return this 2372 2373 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2374 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2375 return this 2376 2377 count = self._parse_number() 2378 self._match_set((TokenType.ROW, TokenType.ROWS)) 2379 return self.expression(exp.Offset, this=this, expression=count) 2380 2381 def _parse_lock(self) -> t.Optional[exp.Expression]: 2382 if self._match_text_seq("FOR", "UPDATE"): 2383 return self.expression(exp.Lock, update=True) 2384 if self._match_text_seq("FOR", "SHARE"): 2385 return self.expression(exp.Lock, update=False) 2386 2387 return None 2388 2389 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2390 if not self._match_set(self.SET_OPERATIONS): 2391 return this 2392 2393 token_type = self._prev.token_type 2394 2395 if token_type == TokenType.UNION: 2396 expression = exp.Union 2397 elif token_type == TokenType.EXCEPT: 2398 expression = exp.Except 2399 else: 2400 expression = exp.Intersect 2401 2402 return self.expression( 2403 expression, 2404 this=this, 2405 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2406 expression=self._parse_set_operations(self._parse_select(nested=True)), 2407 ) 2408 2409 def _parse_expression(self) -> t.Optional[exp.Expression]: 2410 return self._parse_alias(self._parse_conjunction()) 2411 2412 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2413 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2414 2415 def _parse_equality(self) -> t.Optional[exp.Expression]: 2416 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2417 2418 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2419 return self._parse_tokens(self._parse_range, self.COMPARISON) 2420 2421 def _parse_range(self) -> t.Optional[exp.Expression]: 2422 this = self._parse_bitwise() 2423 negate = self._match(TokenType.NOT) 2424 2425 if self._match_set(self.RANGE_PARSERS): 2426 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2427 elif self._match(TokenType.ISNULL): 2428 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2429 2430 # Postgres supports ISNULL and NOTNULL for conditions. 2431 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2432 if self._match(TokenType.NOTNULL): 2433 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2434 this = self.expression(exp.Not, this=this) 2435 2436 if negate: 2437 this = self.expression(exp.Not, this=this) 2438 2439 if self._match(TokenType.IS): 2440 this = self._parse_is(this) 2441 2442 return this 2443 2444 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2445 negate = self._match(TokenType.NOT) 2446 if self._match(TokenType.DISTINCT_FROM): 2447 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2448 return self.expression(klass, this=this, expression=self._parse_expression()) 2449 2450 this = self.expression( 2451 exp.Is, 2452 this=this, 2453 expression=self._parse_null() or self._parse_boolean(), 2454 ) 2455 return self.expression(exp.Not, this=this) if negate else this 2456 2457 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2458 unnest = self._parse_unnest() 2459 if unnest: 2460 this = self.expression(exp.In, this=this, unnest=unnest) 2461 elif self._match(TokenType.L_PAREN): 2462 expressions = self._parse_csv(self._parse_select_or_expression) 2463 2464 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2465 this = self.expression(exp.In, this=this, query=expressions[0]) 2466 else: 2467 this = self.expression(exp.In, this=this, expressions=expressions) 2468 2469 self._match_r_paren() 2470 else: 2471 this = self.expression(exp.In, this=this, field=self._parse_field()) 2472 2473 return this 2474 2475 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2476 low = self._parse_bitwise() 2477 self._match(TokenType.AND) 2478 high = self._parse_bitwise() 2479 return self.expression(exp.Between, this=this, low=low, high=high) 2480 2481 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2482 if not self._match(TokenType.ESCAPE): 2483 return this 2484 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2485 2486 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2487 this = self._parse_term() 2488 2489 while True: 2490 if self._match_set(self.BITWISE): 2491 this = self.expression( 2492 self.BITWISE[self._prev.token_type], 2493 this=this, 2494 expression=self._parse_term(), 2495 ) 2496 elif self._match_pair(TokenType.LT, TokenType.LT): 2497 this = self.expression( 2498 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2499 ) 2500 elif self._match_pair(TokenType.GT, TokenType.GT): 2501 this = self.expression( 2502 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2503 ) 2504 else: 2505 break 2506 2507 return this 2508 2509 def _parse_term(self) -> t.Optional[exp.Expression]: 2510 return self._parse_tokens(self._parse_factor, self.TERM) 2511 2512 def _parse_factor(self) -> t.Optional[exp.Expression]: 2513 return self._parse_tokens(self._parse_unary, self.FACTOR) 2514 2515 def _parse_unary(self) -> t.Optional[exp.Expression]: 2516 if self._match_set(self.UNARY_PARSERS): 2517 return self.UNARY_PARSERS[self._prev.token_type](self) 2518 return self._parse_at_time_zone(self._parse_type()) 2519 2520 def _parse_type(self) -> t.Optional[exp.Expression]: 2521 if self._match(TokenType.INTERVAL): 2522 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2523 2524 index = self._index 2525 type_token = self._parse_types(check_func=True) 2526 this = self._parse_column() 2527 2528 if type_token: 2529 if this and not isinstance(this, exp.Star): 2530 return self.expression(exp.Cast, this=this, to=type_token) 2531 if not type_token.args.get("expressions"): 2532 self._retreat(index) 2533 return self._parse_column() 2534 return type_token 2535 2536 return this 2537 2538 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2539 index = self._index 2540 2541 prefix = self._match_text_seq("SYSUDTLIB", ".") 2542 2543 if not self._match_set(self.TYPE_TOKENS): 2544 return None 2545 2546 type_token = self._prev.token_type 2547 2548 if type_token == TokenType.PSEUDO_TYPE: 2549 return self.expression(exp.PseudoType, this=self._prev.text) 2550 2551 nested = type_token in self.NESTED_TYPE_TOKENS 2552 is_struct = type_token == TokenType.STRUCT 2553 expressions = None 2554 maybe_func = False 2555 2556 if self._match(TokenType.L_PAREN): 2557 if is_struct: 2558 expressions = self._parse_csv(self._parse_struct_kwargs) 2559 elif nested: 2560 expressions = self._parse_csv(self._parse_types) 2561 else: 2562 expressions = self._parse_csv(self._parse_conjunction) 2563 2564 if not expressions: 2565 self._retreat(index) 2566 return None 2567 2568 self._match_r_paren() 2569 maybe_func = True 2570 2571 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2572 this = exp.DataType( 2573 this=exp.DataType.Type.ARRAY, 2574 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2575 nested=True, 2576 ) 2577 2578 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2579 this = exp.DataType( 2580 this=exp.DataType.Type.ARRAY, 2581 expressions=[this], 2582 nested=True, 2583 ) 2584 2585 return this 2586 2587 if self._match(TokenType.L_BRACKET): 2588 self._retreat(index) 2589 return None 2590 2591 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2592 if nested and self._match(TokenType.LT): 2593 if is_struct: 2594 expressions = self._parse_csv(self._parse_struct_kwargs) 2595 else: 2596 expressions = self._parse_csv(self._parse_types) 2597 2598 if not self._match(TokenType.GT): 2599 self.raise_error("Expecting >") 2600 2601 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2602 values = self._parse_csv(self._parse_conjunction) 2603 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2604 2605 value: t.Optional[exp.Expression] = None 2606 if type_token in self.TIMESTAMPS: 2607 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2608 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2609 elif ( 2610 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2611 ): 2612 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2613 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2614 if type_token == TokenType.TIME: 2615 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2616 else: 2617 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2618 2619 maybe_func = maybe_func and value is None 2620 2621 if value is None: 2622 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2623 elif type_token == TokenType.INTERVAL: 2624 value = self.expression(exp.Interval, unit=self._parse_var()) 2625 2626 if maybe_func and check_func: 2627 index2 = self._index 2628 peek = self._parse_string() 2629 2630 if not peek: 2631 self._retreat(index) 2632 return None 2633 2634 self._retreat(index2) 2635 2636 if value: 2637 return value 2638 2639 return exp.DataType( 2640 this=exp.DataType.Type[type_token.value.upper()], 2641 expressions=expressions, 2642 nested=nested, 2643 values=values, 2644 prefix=prefix, 2645 ) 2646 2647 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2648 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2649 return self._parse_types() 2650 2651 this = self._parse_id_var() 2652 self._match(TokenType.COLON) 2653 data_type = self._parse_types() 2654 2655 if not data_type: 2656 return None 2657 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2658 2659 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2660 if not self._match(TokenType.AT_TIME_ZONE): 2661 return this 2662 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2663 2664 def _parse_column(self) -> t.Optional[exp.Expression]: 2665 this = self._parse_field() 2666 if isinstance(this, exp.Identifier): 2667 this = self.expression(exp.Column, this=this) 2668 elif not this: 2669 return self._parse_bracket(this) 2670 this = self._parse_bracket(this) 2671 2672 while self._match_set(self.COLUMN_OPERATORS): 2673 op_token = self._prev.token_type 2674 op = self.COLUMN_OPERATORS.get(op_token) 2675 2676 if op_token == TokenType.DCOLON: 2677 field = self._parse_types() 2678 if not field: 2679 self.raise_error("Expected type") 2680 elif op: 2681 self._advance() 2682 value = self._prev.text 2683 field = ( 2684 exp.Literal.number(value) 2685 if self._prev.token_type == TokenType.NUMBER 2686 else exp.Literal.string(value) 2687 ) 2688 else: 2689 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2690 2691 if isinstance(field, exp.Func): 2692 # bigquery allows function calls like x.y.count(...) 2693 # SAFE.SUBSTR(...) 2694 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2695 this = self._replace_columns_with_dots(this) 2696 2697 if op: 2698 this = op(self, this, field) 2699 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2700 this = self.expression( 2701 exp.Column, 2702 this=field, 2703 table=this.this, 2704 db=this.args.get("table"), 2705 catalog=this.args.get("db"), 2706 ) 2707 else: 2708 this = self.expression(exp.Dot, this=this, expression=field) 2709 this = self._parse_bracket(this) 2710 2711 return this 2712 2713 def _parse_primary(self) -> t.Optional[exp.Expression]: 2714 if self._match_set(self.PRIMARY_PARSERS): 2715 token_type = self._prev.token_type 2716 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2717 2718 if token_type == TokenType.STRING: 2719 expressions = [primary] 2720 while self._match(TokenType.STRING): 2721 expressions.append(exp.Literal.string(self._prev.text)) 2722 if len(expressions) > 1: 2723 return self.expression(exp.Concat, expressions=expressions) 2724 return primary 2725 2726 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2727 return exp.Literal.number(f"0.{self._prev.text}") 2728 2729 if self._match(TokenType.L_PAREN): 2730 comments = self._prev_comments 2731 query = self._parse_select() 2732 2733 if query: 2734 expressions = [query] 2735 else: 2736 expressions = self._parse_csv( 2737 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2738 ) 2739 2740 this = seq_get(expressions, 0) 2741 self._parse_query_modifiers(this) 2742 self._match_r_paren() 2743 2744 if isinstance(this, exp.Subqueryable): 2745 this = self._parse_set_operations( 2746 self._parse_subquery(this=this, parse_alias=False) 2747 ) 2748 elif len(expressions) > 1: 2749 this = self.expression(exp.Tuple, expressions=expressions) 2750 else: 2751 this = self.expression(exp.Paren, this=this) 2752 2753 if this and comments: 2754 this.comments = comments 2755 2756 return this 2757 2758 return None 2759 2760 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2761 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2762 2763 def _parse_function( 2764 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2765 ) -> t.Optional[exp.Expression]: 2766 if not self._curr: 2767 return None 2768 2769 token_type = self._curr.token_type 2770 2771 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2772 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2773 2774 if not self._next or self._next.token_type != TokenType.L_PAREN: 2775 if token_type in self.NO_PAREN_FUNCTIONS: 2776 self._advance() 2777 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2778 2779 return None 2780 2781 if token_type not in self.FUNC_TOKENS: 2782 return None 2783 2784 this = self._curr.text 2785 upper = this.upper() 2786 self._advance(2) 2787 2788 parser = self.FUNCTION_PARSERS.get(upper) 2789 2790 if parser: 2791 this = parser(self) 2792 else: 2793 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2794 2795 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2796 this = self.expression(subquery_predicate, this=self._parse_select()) 2797 self._match_r_paren() 2798 return this 2799 2800 if functions is None: 2801 functions = self.FUNCTIONS 2802 2803 function = functions.get(upper) 2804 args = self._parse_csv(self._parse_lambda) 2805 2806 if function: 2807 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2808 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2809 if count_params(function) == 2: 2810 params = None 2811 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2812 params = self._parse_csv(self._parse_lambda) 2813 2814 this = function(args, params) 2815 else: 2816 this = function(args) 2817 2818 self.validate_expression(this, args) 2819 else: 2820 this = self.expression(exp.Anonymous, this=this, expressions=args) 2821 2822 self._match_r_paren(this) 2823 return self._parse_window(this) 2824 2825 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2826 return self._parse_column_def(self._parse_id_var()) 2827 2828 def _parse_user_defined_function( 2829 self, kind: t.Optional[TokenType] = None 2830 ) -> t.Optional[exp.Expression]: 2831 this = self._parse_id_var() 2832 2833 while self._match(TokenType.DOT): 2834 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2835 2836 if not self._match(TokenType.L_PAREN): 2837 return this 2838 2839 expressions = self._parse_csv(self._parse_function_parameter) 2840 self._match_r_paren() 2841 return self.expression( 2842 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2843 ) 2844 2845 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2846 literal = self._parse_primary() 2847 if literal: 2848 return self.expression(exp.Introducer, this=token.text, expression=literal) 2849 2850 return self.expression(exp.Identifier, this=token.text) 2851 2852 def _parse_national(self, token: Token) -> exp.Expression: 2853 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2854 2855 def _parse_session_parameter(self) -> exp.Expression: 2856 kind = None 2857 this = self._parse_id_var() or self._parse_primary() 2858 2859 if this and self._match(TokenType.DOT): 2860 kind = this.name 2861 this = self._parse_var() or self._parse_primary() 2862 2863 return self.expression(exp.SessionParameter, this=this, kind=kind) 2864 2865 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2866 index = self._index 2867 2868 if self._match(TokenType.L_PAREN): 2869 expressions = self._parse_csv(self._parse_id_var) 2870 2871 if not self._match(TokenType.R_PAREN): 2872 self._retreat(index) 2873 else: 2874 expressions = [self._parse_id_var()] 2875 2876 if self._match_set(self.LAMBDAS): 2877 return self.LAMBDAS[self._prev.token_type](self, expressions) 2878 2879 self._retreat(index) 2880 2881 this: t.Optional[exp.Expression] 2882 2883 if self._match(TokenType.DISTINCT): 2884 this = self.expression( 2885 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2886 ) 2887 else: 2888 this = self._parse_select_or_expression() 2889 2890 if self._match(TokenType.IGNORE_NULLS): 2891 this = self.expression(exp.IgnoreNulls, this=this) 2892 else: 2893 self._match(TokenType.RESPECT_NULLS) 2894 2895 return self._parse_limit(self._parse_order(this)) 2896 2897 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2898 index = self._index 2899 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2900 self._retreat(index) 2901 return this 2902 2903 args = self._parse_csv( 2904 lambda: self._parse_constraint() 2905 or self._parse_column_def(self._parse_field(any_token=True)) 2906 ) 2907 self._match_r_paren() 2908 return self.expression(exp.Schema, this=this, expressions=args) 2909 2910 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2911 kind = self._parse_types() 2912 2913 if self._match_text_seq("FOR", "ORDINALITY"): 2914 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2915 2916 constraints = [] 2917 while True: 2918 constraint = self._parse_column_constraint() 2919 if not constraint: 2920 break 2921 constraints.append(constraint) 2922 2923 if not kind and not constraints: 2924 return this 2925 2926 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2927 2928 def _parse_auto_increment(self) -> exp.Expression: 2929 start = None 2930 increment = None 2931 2932 if self._match(TokenType.L_PAREN, advance=False): 2933 args = self._parse_wrapped_csv(self._parse_bitwise) 2934 start = seq_get(args, 0) 2935 increment = seq_get(args, 1) 2936 elif self._match_text_seq("START"): 2937 start = self._parse_bitwise() 2938 self._match_text_seq("INCREMENT") 2939 increment = self._parse_bitwise() 2940 2941 if start and increment: 2942 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2943 2944 return exp.AutoIncrementColumnConstraint() 2945 2946 def _parse_compress(self) -> exp.Expression: 2947 if self._match(TokenType.L_PAREN, advance=False): 2948 return self.expression( 2949 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2950 ) 2951 2952 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2953 2954 def _parse_generated_as_identity(self) -> exp.Expression: 2955 if self._match(TokenType.BY_DEFAULT): 2956 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2957 else: 2958 self._match_text_seq("ALWAYS") 2959 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2960 2961 self._match_text_seq("AS", "IDENTITY") 2962 if self._match(TokenType.L_PAREN): 2963 if self._match_text_seq("START", "WITH"): 2964 this.set("start", self._parse_bitwise()) 2965 if self._match_text_seq("INCREMENT", "BY"): 2966 this.set("increment", self._parse_bitwise()) 2967 if self._match_text_seq("MINVALUE"): 2968 this.set("minvalue", self._parse_bitwise()) 2969 if self._match_text_seq("MAXVALUE"): 2970 this.set("maxvalue", self._parse_bitwise()) 2971 2972 if self._match_text_seq("CYCLE"): 2973 this.set("cycle", True) 2974 elif self._match_text_seq("NO", "CYCLE"): 2975 this.set("cycle", False) 2976 2977 self._match_r_paren() 2978 2979 return this 2980 2981 def _parse_inline(self) -> t.Optional[exp.Expression]: 2982 self._match_text_seq("LENGTH") 2983 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 2984 2985 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 2986 if self._match_text_seq("NULL"): 2987 return self.expression(exp.NotNullColumnConstraint) 2988 if self._match_text_seq("CASESPECIFIC"): 2989 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 2990 return None 2991 2992 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2993 this = self._parse_references() 2994 if this: 2995 return this 2996 2997 if self._match(TokenType.CONSTRAINT): 2998 this = self._parse_id_var() 2999 3000 if self._match_texts(self.CONSTRAINT_PARSERS): 3001 return self.expression( 3002 exp.ColumnConstraint, 3003 this=this, 3004 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3005 ) 3006 3007 return this 3008 3009 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3010 if not self._match(TokenType.CONSTRAINT): 3011 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3012 3013 this = self._parse_id_var() 3014 expressions = [] 3015 3016 while True: 3017 constraint = self._parse_unnamed_constraint() or self._parse_function() 3018 if not constraint: 3019 break 3020 expressions.append(constraint) 3021 3022 return self.expression(exp.Constraint, this=this, expressions=expressions) 3023 3024 def _parse_unnamed_constraint( 3025 self, constraints: t.Optional[t.Collection[str]] = None 3026 ) -> t.Optional[exp.Expression]: 3027 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3028 return None 3029 3030 constraint = self._prev.text.upper() 3031 if constraint not in self.CONSTRAINT_PARSERS: 3032 self.raise_error(f"No parser found for schema constraint {constraint}.") 3033 3034 return self.CONSTRAINT_PARSERS[constraint](self) 3035 3036 def _parse_unique(self) -> exp.Expression: 3037 if not self._match(TokenType.L_PAREN, advance=False): 3038 return self.expression(exp.UniqueColumnConstraint) 3039 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3040 3041 def _parse_key_constraint_options(self) -> t.List[str]: 3042 options = [] 3043 while True: 3044 if not self._curr: 3045 break 3046 3047 if self._match(TokenType.ON): 3048 action = None 3049 on = self._advance_any() and self._prev.text 3050 3051 if self._match(TokenType.NO_ACTION): 3052 action = "NO ACTION" 3053 elif self._match(TokenType.CASCADE): 3054 action = "CASCADE" 3055 elif self._match_pair(TokenType.SET, TokenType.NULL): 3056 action = "SET NULL" 3057 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3058 action = "SET DEFAULT" 3059 else: 3060 self.raise_error("Invalid key constraint") 3061 3062 options.append(f"ON {on} {action}") 3063 elif self._match_text_seq("NOT", "ENFORCED"): 3064 options.append("NOT ENFORCED") 3065 elif self._match_text_seq("DEFERRABLE"): 3066 options.append("DEFERRABLE") 3067 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3068 options.append("INITIALLY DEFERRED") 3069 elif self._match_text_seq("NORELY"): 3070 options.append("NORELY") 3071 elif self._match_text_seq("MATCH", "FULL"): 3072 options.append("MATCH FULL") 3073 else: 3074 break 3075 3076 return options 3077 3078 def _parse_references(self) -> t.Optional[exp.Expression]: 3079 if not self._match(TokenType.REFERENCES): 3080 return None 3081 3082 expressions = None 3083 this = self._parse_id_var() 3084 3085 if self._match(TokenType.L_PAREN, advance=False): 3086 expressions = self._parse_wrapped_id_vars() 3087 3088 options = self._parse_key_constraint_options() 3089 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3090 3091 def _parse_foreign_key(self) -> exp.Expression: 3092 expressions = self._parse_wrapped_id_vars() 3093 reference = self._parse_references() 3094 options = {} 3095 3096 while self._match(TokenType.ON): 3097 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3098 self.raise_error("Expected DELETE or UPDATE") 3099 3100 kind = self._prev.text.lower() 3101 3102 if self._match(TokenType.NO_ACTION): 3103 action = "NO ACTION" 3104 elif self._match(TokenType.SET): 3105 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3106 action = "SET " + self._prev.text.upper() 3107 else: 3108 self._advance() 3109 action = self._prev.text.upper() 3110 3111 options[kind] = action 3112 3113 return self.expression( 3114 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3115 ) 3116 3117 def _parse_primary_key(self) -> exp.Expression: 3118 desc = ( 3119 self._match_set((TokenType.ASC, TokenType.DESC)) 3120 and self._prev.token_type == TokenType.DESC 3121 ) 3122 3123 if not self._match(TokenType.L_PAREN, advance=False): 3124 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3125 3126 expressions = self._parse_wrapped_id_vars() 3127 options = self._parse_key_constraint_options() 3128 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3129 3130 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3131 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3132 return this 3133 3134 bracket_kind = self._prev.token_type 3135 expressions: t.List[t.Optional[exp.Expression]] 3136 3137 if self._match(TokenType.COLON): 3138 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3139 else: 3140 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3141 3142 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3143 if bracket_kind == TokenType.L_BRACE: 3144 this = self.expression(exp.Struct, expressions=expressions) 3145 elif not this or this.name.upper() == "ARRAY": 3146 this = self.expression(exp.Array, expressions=expressions) 3147 else: 3148 expressions = apply_index_offset(expressions, -self.index_offset) 3149 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3150 3151 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3152 self.raise_error("Expected ]") 3153 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3154 self.raise_error("Expected }") 3155 3156 this.comments = self._prev_comments 3157 return self._parse_bracket(this) 3158 3159 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3160 if self._match(TokenType.COLON): 3161 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3162 return this 3163 3164 def _parse_case(self) -> t.Optional[exp.Expression]: 3165 ifs = [] 3166 default = None 3167 3168 expression = self._parse_conjunction() 3169 3170 while self._match(TokenType.WHEN): 3171 this = self._parse_conjunction() 3172 self._match(TokenType.THEN) 3173 then = self._parse_conjunction() 3174 ifs.append(self.expression(exp.If, this=this, true=then)) 3175 3176 if self._match(TokenType.ELSE): 3177 default = self._parse_conjunction() 3178 3179 if not self._match(TokenType.END): 3180 self.raise_error("Expected END after CASE", self._prev) 3181 3182 return self._parse_window( 3183 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3184 ) 3185 3186 def _parse_if(self) -> t.Optional[exp.Expression]: 3187 if self._match(TokenType.L_PAREN): 3188 args = self._parse_csv(self._parse_conjunction) 3189 this = exp.If.from_arg_list(args) 3190 self.validate_expression(this, args) 3191 self._match_r_paren() 3192 else: 3193 condition = self._parse_conjunction() 3194 self._match(TokenType.THEN) 3195 true = self._parse_conjunction() 3196 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3197 self._match(TokenType.END) 3198 this = self.expression(exp.If, this=condition, true=true, false=false) 3199 3200 return self._parse_window(this) 3201 3202 def _parse_extract(self) -> exp.Expression: 3203 this = self._parse_function() or self._parse_var() or self._parse_type() 3204 3205 if self._match(TokenType.FROM): 3206 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3207 3208 if not self._match(TokenType.COMMA): 3209 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3210 3211 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3212 3213 def _parse_cast(self, strict: bool) -> exp.Expression: 3214 this = self._parse_conjunction() 3215 3216 if not self._match(TokenType.ALIAS): 3217 self.raise_error("Expected AS after CAST") 3218 3219 to = self._parse_types() 3220 3221 if not to: 3222 self.raise_error("Expected TYPE after CAST") 3223 elif to.this == exp.DataType.Type.CHAR: 3224 if self._match(TokenType.CHARACTER_SET): 3225 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3226 3227 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3228 3229 def _parse_string_agg(self) -> exp.Expression: 3230 expression: t.Optional[exp.Expression] 3231 3232 if self._match(TokenType.DISTINCT): 3233 args = self._parse_csv(self._parse_conjunction) 3234 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3235 else: 3236 args = self._parse_csv(self._parse_conjunction) 3237 expression = seq_get(args, 0) 3238 3239 index = self._index 3240 if not self._match(TokenType.R_PAREN): 3241 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3242 order = self._parse_order(this=expression) 3243 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3244 3245 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3246 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3247 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3248 if not self._match(TokenType.WITHIN_GROUP): 3249 self._retreat(index) 3250 this = exp.GroupConcat.from_arg_list(args) 3251 self.validate_expression(this, args) 3252 return this 3253 3254 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3255 order = self._parse_order(this=expression) 3256 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3257 3258 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3259 to: t.Optional[exp.Expression] 3260 this = self._parse_column() 3261 3262 if self._match(TokenType.USING): 3263 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3264 elif self._match(TokenType.COMMA): 3265 to = self._parse_types() 3266 else: 3267 to = None 3268 3269 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3270 3271 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3272 args = self._parse_csv(self._parse_bitwise) 3273 3274 if self._match(TokenType.IN): 3275 return self.expression( 3276 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3277 ) 3278 3279 if haystack_first: 3280 haystack = seq_get(args, 0) 3281 needle = seq_get(args, 1) 3282 else: 3283 needle = seq_get(args, 0) 3284 haystack = seq_get(args, 1) 3285 3286 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3287 3288 self.validate_expression(this, args) 3289 3290 return this 3291 3292 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3293 args = self._parse_csv(self._parse_table) 3294 return exp.JoinHint(this=func_name.upper(), expressions=args) 3295 3296 def _parse_substring(self) -> exp.Expression: 3297 # Postgres supports the form: substring(string [from int] [for int]) 3298 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3299 3300 args = self._parse_csv(self._parse_bitwise) 3301 3302 if self._match(TokenType.FROM): 3303 args.append(self._parse_bitwise()) 3304 if self._match(TokenType.FOR): 3305 args.append(self._parse_bitwise()) 3306 3307 this = exp.Substring.from_arg_list(args) 3308 self.validate_expression(this, args) 3309 3310 return this 3311 3312 def _parse_trim(self) -> exp.Expression: 3313 # https://www.w3resource.com/sql/character-functions/trim.php 3314 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3315 3316 position = None 3317 collation = None 3318 3319 if self._match_set(self.TRIM_TYPES): 3320 position = self._prev.text.upper() 3321 3322 expression = self._parse_term() 3323 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3324 this = self._parse_term() 3325 else: 3326 this = expression 3327 expression = None 3328 3329 if self._match(TokenType.COLLATE): 3330 collation = self._parse_term() 3331 3332 return self.expression( 3333 exp.Trim, 3334 this=this, 3335 position=position, 3336 expression=expression, 3337 collation=collation, 3338 ) 3339 3340 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3341 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3342 3343 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3344 return self._parse_window(self._parse_id_var(), alias=True) 3345 3346 def _parse_window( 3347 self, this: t.Optional[exp.Expression], alias: bool = False 3348 ) -> t.Optional[exp.Expression]: 3349 if self._match(TokenType.FILTER): 3350 where = self._parse_wrapped(self._parse_where) 3351 this = self.expression(exp.Filter, this=this, expression=where) 3352 3353 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3354 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3355 if self._match(TokenType.WITHIN_GROUP): 3356 order = self._parse_wrapped(self._parse_order) 3357 this = self.expression(exp.WithinGroup, this=this, expression=order) 3358 3359 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3360 # Some dialects choose to implement and some do not. 3361 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3362 3363 # There is some code above in _parse_lambda that handles 3364 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3365 3366 # The below changes handle 3367 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3368 3369 # Oracle allows both formats 3370 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3371 # and Snowflake chose to do the same for familiarity 3372 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3373 if self._match(TokenType.IGNORE_NULLS): 3374 this = self.expression(exp.IgnoreNulls, this=this) 3375 elif self._match(TokenType.RESPECT_NULLS): 3376 this = self.expression(exp.RespectNulls, this=this) 3377 3378 # bigquery select from window x AS (partition by ...) 3379 if alias: 3380 self._match(TokenType.ALIAS) 3381 elif not self._match(TokenType.OVER): 3382 return this 3383 3384 if not self._match(TokenType.L_PAREN): 3385 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3386 3387 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3388 partition = self._parse_partition_by() 3389 order = self._parse_order() 3390 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3391 3392 if kind: 3393 self._match(TokenType.BETWEEN) 3394 start = self._parse_window_spec() 3395 self._match(TokenType.AND) 3396 end = self._parse_window_spec() 3397 3398 spec = self.expression( 3399 exp.WindowSpec, 3400 kind=kind, 3401 start=start["value"], 3402 start_side=start["side"], 3403 end=end["value"], 3404 end_side=end["side"], 3405 ) 3406 else: 3407 spec = None 3408 3409 self._match_r_paren() 3410 3411 return self.expression( 3412 exp.Window, 3413 this=this, 3414 partition_by=partition, 3415 order=order, 3416 spec=spec, 3417 alias=window_alias, 3418 ) 3419 3420 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3421 self._match(TokenType.BETWEEN) 3422 3423 return { 3424 "value": ( 3425 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3426 ) 3427 or self._parse_bitwise(), 3428 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3429 } 3430 3431 def _parse_alias( 3432 self, this: t.Optional[exp.Expression], explicit: bool = False 3433 ) -> t.Optional[exp.Expression]: 3434 any_token = self._match(TokenType.ALIAS) 3435 3436 if explicit and not any_token: 3437 return this 3438 3439 if self._match(TokenType.L_PAREN): 3440 aliases = self.expression( 3441 exp.Aliases, 3442 this=this, 3443 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3444 ) 3445 self._match_r_paren(aliases) 3446 return aliases 3447 3448 alias = self._parse_id_var(any_token) 3449 3450 if alias: 3451 return self.expression(exp.Alias, this=this, alias=alias) 3452 3453 return this 3454 3455 def _parse_id_var( 3456 self, 3457 any_token: bool = True, 3458 tokens: t.Optional[t.Collection[TokenType]] = None, 3459 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3460 ) -> t.Optional[exp.Expression]: 3461 identifier = self._parse_identifier() 3462 3463 if identifier: 3464 return identifier 3465 3466 prefix = "" 3467 3468 if prefix_tokens: 3469 while self._match_set(prefix_tokens): 3470 prefix += self._prev.text 3471 3472 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3473 quoted = self._prev.token_type == TokenType.STRING 3474 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3475 3476 return None 3477 3478 def _parse_string(self) -> t.Optional[exp.Expression]: 3479 if self._match(TokenType.STRING): 3480 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3481 return self._parse_placeholder() 3482 3483 def _parse_number(self) -> t.Optional[exp.Expression]: 3484 if self._match(TokenType.NUMBER): 3485 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3486 return self._parse_placeholder() 3487 3488 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3489 if self._match(TokenType.IDENTIFIER): 3490 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3491 return self._parse_placeholder() 3492 3493 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3494 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3495 return self.expression(exp.Var, this=self._prev.text) 3496 return self._parse_placeholder() 3497 3498 def _advance_any(self) -> t.Optional[Token]: 3499 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3500 self._advance() 3501 return self._prev 3502 return None 3503 3504 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3505 return self._parse_var() or self._parse_string() 3506 3507 def _parse_null(self) -> t.Optional[exp.Expression]: 3508 if self._match(TokenType.NULL): 3509 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3510 return None 3511 3512 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3513 if self._match(TokenType.TRUE): 3514 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3515 if self._match(TokenType.FALSE): 3516 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3517 return None 3518 3519 def _parse_star(self) -> t.Optional[exp.Expression]: 3520 if self._match(TokenType.STAR): 3521 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3522 return None 3523 3524 def _parse_parameter(self) -> exp.Expression: 3525 wrapped = self._match(TokenType.L_BRACE) 3526 this = self._parse_var() or self._parse_primary() 3527 self._match(TokenType.R_BRACE) 3528 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3529 3530 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3531 if self._match_set(self.PLACEHOLDER_PARSERS): 3532 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3533 if placeholder: 3534 return placeholder 3535 self._advance(-1) 3536 return None 3537 3538 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3539 if not self._match(TokenType.EXCEPT): 3540 return None 3541 if self._match(TokenType.L_PAREN, advance=False): 3542 return self._parse_wrapped_csv(self._parse_column) 3543 return self._parse_csv(self._parse_column) 3544 3545 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3546 if not self._match(TokenType.REPLACE): 3547 return None 3548 if self._match(TokenType.L_PAREN, advance=False): 3549 return self._parse_wrapped_csv(self._parse_expression) 3550 return self._parse_csv(self._parse_expression) 3551 3552 def _parse_csv( 3553 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3554 ) -> t.List[t.Optional[exp.Expression]]: 3555 parse_result = parse_method() 3556 items = [parse_result] if parse_result is not None else [] 3557 3558 while self._match(sep): 3559 if parse_result and self._prev_comments: 3560 parse_result.comments = self._prev_comments 3561 3562 parse_result = parse_method() 3563 if parse_result is not None: 3564 items.append(parse_result) 3565 3566 return items 3567 3568 def _parse_tokens( 3569 self, parse_method: t.Callable, expressions: t.Dict 3570 ) -> t.Optional[exp.Expression]: 3571 this = parse_method() 3572 3573 while self._match_set(expressions): 3574 this = self.expression( 3575 expressions[self._prev.token_type], 3576 this=this, 3577 comments=self._prev_comments, 3578 expression=parse_method(), 3579 ) 3580 3581 return this 3582 3583 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3584 return self._parse_wrapped_csv(self._parse_id_var) 3585 3586 def _parse_wrapped_csv( 3587 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3588 ) -> t.List[t.Optional[exp.Expression]]: 3589 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3590 3591 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3592 self._match_l_paren() 3593 parse_result = parse_method() 3594 self._match_r_paren() 3595 return parse_result 3596 3597 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3598 return self._parse_select() or self._parse_expression() 3599 3600 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3601 return self._parse_set_operations( 3602 self._parse_select(nested=True, parse_subquery_alias=False) 3603 ) 3604 3605 def _parse_transaction(self) -> exp.Expression: 3606 this = None 3607 if self._match_texts(self.TRANSACTION_KIND): 3608 this = self._prev.text 3609 3610 self._match_texts({"TRANSACTION", "WORK"}) 3611 3612 modes = [] 3613 while True: 3614 mode = [] 3615 while self._match(TokenType.VAR): 3616 mode.append(self._prev.text) 3617 3618 if mode: 3619 modes.append(" ".join(mode)) 3620 if not self._match(TokenType.COMMA): 3621 break 3622 3623 return self.expression(exp.Transaction, this=this, modes=modes) 3624 3625 def _parse_commit_or_rollback(self) -> exp.Expression: 3626 chain = None 3627 savepoint = None 3628 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3629 3630 self._match_texts({"TRANSACTION", "WORK"}) 3631 3632 if self._match_text_seq("TO"): 3633 self._match_text_seq("SAVEPOINT") 3634 savepoint = self._parse_id_var() 3635 3636 if self._match(TokenType.AND): 3637 chain = not self._match_text_seq("NO") 3638 self._match_text_seq("CHAIN") 3639 3640 if is_rollback: 3641 return self.expression(exp.Rollback, savepoint=savepoint) 3642 return self.expression(exp.Commit, chain=chain) 3643 3644 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3645 if not self._match_text_seq("ADD"): 3646 return None 3647 3648 self._match(TokenType.COLUMN) 3649 exists_column = self._parse_exists(not_=True) 3650 expression = self._parse_column_def(self._parse_field(any_token=True)) 3651 3652 if expression: 3653 expression.set("exists", exists_column) 3654 3655 return expression 3656 3657 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3658 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3659 3660 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3661 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3662 return self.expression( 3663 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3664 ) 3665 3666 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3667 this = None 3668 kind = self._prev.token_type 3669 3670 if kind == TokenType.CONSTRAINT: 3671 this = self._parse_id_var() 3672 3673 if self._match_text_seq("CHECK"): 3674 expression = self._parse_wrapped(self._parse_conjunction) 3675 enforced = self._match_text_seq("ENFORCED") 3676 3677 return self.expression( 3678 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3679 ) 3680 3681 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3682 expression = self._parse_foreign_key() 3683 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3684 expression = self._parse_primary_key() 3685 3686 return self.expression(exp.AddConstraint, this=this, expression=expression) 3687 3688 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3689 index = self._index - 1 3690 3691 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3692 return self._parse_csv(self._parse_add_constraint) 3693 3694 self._retreat(index) 3695 return self._parse_csv(self._parse_add_column) 3696 3697 def _parse_alter_table_alter(self) -> exp.Expression: 3698 self._match(TokenType.COLUMN) 3699 column = self._parse_field(any_token=True) 3700 3701 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3702 return self.expression(exp.AlterColumn, this=column, drop=True) 3703 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3704 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3705 3706 self._match_text_seq("SET", "DATA") 3707 return self.expression( 3708 exp.AlterColumn, 3709 this=column, 3710 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3711 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3712 using=self._match(TokenType.USING) and self._parse_conjunction(), 3713 ) 3714 3715 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3716 index = self._index - 1 3717 3718 partition_exists = self._parse_exists() 3719 if self._match(TokenType.PARTITION, advance=False): 3720 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3721 3722 self._retreat(index) 3723 return self._parse_csv(self._parse_drop_column) 3724 3725 def _parse_alter_table_rename(self) -> exp.Expression: 3726 self._match_text_seq("TO") 3727 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3728 3729 def _parse_alter(self) -> t.Optional[exp.Expression]: 3730 if not self._match(TokenType.TABLE): 3731 return self._parse_as_command(self._prev) 3732 3733 exists = self._parse_exists() 3734 this = self._parse_table(schema=True) 3735 3736 if not self._curr: 3737 return None 3738 3739 parser = self.ALTER_PARSERS.get(self._curr.text.upper()) 3740 actions = ensure_list(self._advance() or parser(self)) if parser else [] # type: ignore 3741 3742 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3743 3744 def _parse_show(self) -> t.Optional[exp.Expression]: 3745 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3746 if parser: 3747 return parser(self) 3748 self._advance() 3749 return self.expression(exp.Show, this=self._prev.text.upper()) 3750 3751 def _default_parse_set_item(self) -> exp.Expression: 3752 return self.expression( 3753 exp.SetItem, 3754 this=self._parse_statement(), 3755 ) 3756 3757 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3758 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3759 return parser(self) if parser else self._default_parse_set_item() 3760 3761 def _parse_merge(self) -> exp.Expression: 3762 self._match(TokenType.INTO) 3763 target = self._parse_table() 3764 3765 self._match(TokenType.USING) 3766 using = self._parse_table() 3767 3768 self._match(TokenType.ON) 3769 on = self._parse_conjunction() 3770 3771 whens = [] 3772 while self._match(TokenType.WHEN): 3773 this = self._parse_conjunction() 3774 self._match(TokenType.THEN) 3775 3776 if self._match(TokenType.INSERT): 3777 _this = self._parse_star() 3778 if _this: 3779 then = self.expression(exp.Insert, this=_this) 3780 else: 3781 then = self.expression( 3782 exp.Insert, 3783 this=self._parse_value(), 3784 expression=self._match(TokenType.VALUES) and self._parse_value(), 3785 ) 3786 elif self._match(TokenType.UPDATE): 3787 expressions = self._parse_star() 3788 if expressions: 3789 then = self.expression(exp.Update, expressions=expressions) 3790 else: 3791 then = self.expression( 3792 exp.Update, 3793 expressions=self._match(TokenType.SET) 3794 and self._parse_csv(self._parse_equality), 3795 ) 3796 elif self._match(TokenType.DELETE): 3797 then = self.expression(exp.Var, this=self._prev.text) 3798 3799 whens.append(self.expression(exp.When, this=this, then=then)) 3800 3801 return self.expression( 3802 exp.Merge, 3803 this=target, 3804 using=using, 3805 on=on, 3806 expressions=whens, 3807 ) 3808 3809 def _parse_set(self) -> exp.Expression: 3810 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3811 3812 def _parse_as_command(self, start: Token) -> exp.Command: 3813 while self._curr: 3814 self._advance() 3815 text = self._find_sql(start, self._prev) 3816 size = len(start.text) 3817 return exp.Command(this=text[:size], expression=text[size:]) 3818 3819 def _find_parser( 3820 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3821 ) -> t.Optional[t.Callable]: 3822 index = self._index 3823 this = [] 3824 while True: 3825 # The current token might be multiple words 3826 curr = self._curr.text.upper() 3827 key = curr.split(" ") 3828 this.append(curr) 3829 self._advance() 3830 result, trie = in_trie(trie, key) 3831 if result == 0: 3832 break 3833 if result == 2: 3834 subparser = parsers[" ".join(this)] 3835 return subparser 3836 self._retreat(index) 3837 return None 3838 3839 def _match(self, token_type, advance=True): 3840 if not self._curr: 3841 return None 3842 3843 if self._curr.token_type == token_type: 3844 if advance: 3845 self._advance() 3846 return True 3847 3848 return None 3849 3850 def _match_set(self, types, advance=True): 3851 if not self._curr: 3852 return None 3853 3854 if self._curr.token_type in types: 3855 if advance: 3856 self._advance() 3857 return True 3858 3859 return None 3860 3861 def _match_pair(self, token_type_a, token_type_b, advance=True): 3862 if not self._curr or not self._next: 3863 return None 3864 3865 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3866 if advance: 3867 self._advance(2) 3868 return True 3869 3870 return None 3871 3872 def _match_l_paren(self, expression=None): 3873 if not self._match(TokenType.L_PAREN): 3874 self.raise_error("Expecting (") 3875 if expression and self._prev_comments: 3876 expression.comments = self._prev_comments 3877 3878 def _match_r_paren(self, expression=None): 3879 if not self._match(TokenType.R_PAREN): 3880 self.raise_error("Expecting )") 3881 if expression and self._prev_comments: 3882 expression.comments = self._prev_comments 3883 3884 def _match_texts(self, texts, advance=True): 3885 if self._curr and self._curr.text.upper() in texts: 3886 if advance: 3887 self._advance() 3888 return True 3889 return False 3890 3891 def _match_text_seq(self, *texts, advance=True): 3892 index = self._index 3893 for text in texts: 3894 if self._curr and self._curr.text.upper() == text: 3895 self._advance() 3896 else: 3897 self._retreat(index) 3898 return False 3899 3900 if not advance: 3901 self._retreat(index) 3902 3903 return True 3904 3905 def _replace_columns_with_dots(self, this): 3906 if isinstance(this, exp.Dot): 3907 exp.replace_children(this, self._replace_columns_with_dots) 3908 elif isinstance(this, exp.Column): 3909 exp.replace_children(this, self._replace_columns_with_dots) 3910 table = this.args.get("table") 3911 this = ( 3912 self.expression(exp.Dot, this=table, expression=this.this) 3913 if table 3914 else self.expression(exp.Var, this=this.name) 3915 ) 3916 elif isinstance(this, exp.Identifier): 3917 this = self.expression(exp.Var, this=this.name) 3918 return this 3919 3920 def _replace_lambda(self, node, lambda_variables): 3921 if isinstance(node, exp.Column): 3922 if node.name in lambda_variables: 3923 return node.this 3924 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
725 def __init__( 726 self, 727 error_level: t.Optional[ErrorLevel] = None, 728 error_message_context: int = 100, 729 index_offset: int = 0, 730 unnest_column_only: bool = False, 731 alias_post_tablesample: bool = False, 732 max_errors: int = 3, 733 null_ordering: t.Optional[str] = None, 734 ): 735 self.error_level = error_level or ErrorLevel.IMMEDIATE 736 self.error_message_context = error_message_context 737 self.index_offset = index_offset 738 self.unnest_column_only = unnest_column_only 739 self.alias_post_tablesample = alias_post_tablesample 740 self.max_errors = max_errors 741 self.null_ordering = null_ordering 742 self.reset()
754 def parse( 755 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 756 ) -> t.List[t.Optional[exp.Expression]]: 757 """ 758 Parses a list of tokens and returns a list of syntax trees, one tree 759 per parsed SQL statement. 760 761 Args: 762 raw_tokens: the list of tokens. 763 sql: the original SQL string, used to produce helpful debug messages. 764 765 Returns: 766 The list of syntax trees. 767 """ 768 return self._parse( 769 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 770 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
772 def parse_into( 773 self, 774 expression_types: exp.IntoType, 775 raw_tokens: t.List[Token], 776 sql: t.Optional[str] = None, 777 ) -> t.List[t.Optional[exp.Expression]]: 778 """ 779 Parses a list of tokens into a given Expression type. If a collection of Expression 780 types is given instead, this method will try to parse the token list into each one 781 of them, stopping at the first for which the parsing succeeds. 782 783 Args: 784 expression_types: the expression type(s) to try and parse the token list into. 785 raw_tokens: the list of tokens. 786 sql: the original SQL string, used to produce helpful debug messages. 787 788 Returns: 789 The target Expression. 790 """ 791 errors = [] 792 for expression_type in ensure_collection(expression_types): 793 parser = self.EXPRESSION_PARSERS.get(expression_type) 794 if not parser: 795 raise TypeError(f"No parser registered for {expression_type}") 796 try: 797 return self._parse(parser, raw_tokens, sql) 798 except ParseError as e: 799 e.errors[0]["into_expression"] = expression_type 800 errors.append(e) 801 raise ParseError( 802 f"Failed to parse into {expression_types}", 803 errors=merge_errors(errors), 804 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
840 def check_errors(self) -> None: 841 """ 842 Logs or raises any found errors, depending on the chosen error level setting. 843 """ 844 if self.error_level == ErrorLevel.WARN: 845 for error in self.errors: 846 logger.error(str(error)) 847 elif self.error_level == ErrorLevel.RAISE and self.errors: 848 raise ParseError( 849 concat_messages(self.errors, self.max_errors), 850 errors=merge_errors(self.errors), 851 )
Logs or raises any found errors, depending on the chosen error level setting.
853 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 854 """ 855 Appends an error in the list of recorded errors or raises it, depending on the chosen 856 error level setting. 857 """ 858 token = token or self._curr or self._prev or Token.string("") 859 start = self._find_token(token) 860 end = start + len(token.text) 861 start_context = self.sql[max(start - self.error_message_context, 0) : start] 862 highlight = self.sql[start:end] 863 end_context = self.sql[end : end + self.error_message_context] 864 865 error = ParseError.new( 866 f"{message}. Line {token.line}, Col: {token.col}.\n" 867 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 868 description=message, 869 line=token.line, 870 col=token.col, 871 start_context=start_context, 872 highlight=highlight, 873 end_context=end_context, 874 ) 875 876 if self.error_level == ErrorLevel.IMMEDIATE: 877 raise error 878 879 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
881 def expression( 882 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 883 ) -> exp.Expression: 884 """ 885 Creates a new, validated Expression. 886 887 Args: 888 exp_class: the expression class to instantiate. 889 comments: an optional list of comments to attach to the expression. 890 kwargs: the arguments to set for the expression along with their respective values. 891 892 Returns: 893 The target expression. 894 """ 895 instance = exp_class(**kwargs) 896 if self._prev_comments: 897 instance.comments = self._prev_comments 898 self._prev_comments = None 899 if comments: 900 instance.comments = comments 901 self.validate_expression(instance) 902 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
904 def validate_expression( 905 self, expression: exp.Expression, args: t.Optional[t.List] = None 906 ) -> None: 907 """ 908 Validates an already instantiated expression, making sure that all its mandatory arguments 909 are set. 910 911 Args: 912 expression: the expression to validate. 913 args: an optional list of items that was used to instantiate the expression, if it's a Func. 914 """ 915 if self.error_level == ErrorLevel.IGNORE: 916 return 917 918 for error_message in expression.error_messages(args): 919 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.