sqlglot.optimizer.scope
1from __future__ import annotations 2 3import itertools 4import logging 5import typing as t 6from collections import defaultdict 7from enum import Enum, auto 8 9from sqlglot import exp 10from sqlglot.errors import OptimizeError 11from sqlglot.helper import ensure_collection, find_new_name, seq_get 12 13logger = logging.getLogger("sqlglot") 14 15TRAVERSABLES = (exp.Query, exp.DDL, exp.DML) 16 17 18class ScopeType(Enum): 19 ROOT = auto() 20 SUBQUERY = auto() 21 DERIVED_TABLE = auto() 22 CTE = auto() 23 UNION = auto() 24 UDTF = auto() 25 26 27class Scope: 28 """ 29 Selection scope. 30 31 Attributes: 32 expression (exp.Select|exp.Union): Root expression of this scope 33 sources (dict[str, exp.Table|Scope]): Mapping of source name to either 34 a Table expression or another Scope instance. For example: 35 SELECT * FROM x {"x": Table(this="x")} 36 SELECT * FROM x AS y {"y": Table(this="x")} 37 SELECT * FROM (SELECT ...) AS y {"y": Scope(...)} 38 lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals 39 For example: 40 SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; 41 The LATERAL VIEW EXPLODE gets x as a source. 42 cte_sources (dict[str, Scope]): Sources from CTES 43 outer_columns (list[str]): If this is a derived table or CTE, and the outer query 44 defines a column list for the alias of this scope, this is that list of columns. 45 For example: 46 SELECT * FROM (SELECT ...) AS y(col1, col2) 47 The inner query would have `["col1", "col2"]` for its `outer_columns` 48 parent (Scope): Parent scope 49 scope_type (ScopeType): Type of this scope, relative to it's parent 50 subquery_scopes (list[Scope]): List of all child scopes for subqueries 51 cte_scopes (list[Scope]): List of all child scopes for CTEs 52 derived_table_scopes (list[Scope]): List of all child scopes for derived_tables 53 udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions 54 table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined 55 union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be 56 a list of the left and right child scopes. 57 """ 58 59 def __init__( 60 self, 61 expression, 62 sources=None, 63 outer_columns=None, 64 parent=None, 65 scope_type=ScopeType.ROOT, 66 lateral_sources=None, 67 cte_sources=None, 68 ): 69 self.expression = expression 70 self.sources = sources or {} 71 self.lateral_sources = lateral_sources or {} 72 self.cte_sources = cte_sources or {} 73 self.sources.update(self.lateral_sources) 74 self.sources.update(self.cte_sources) 75 self.outer_columns = outer_columns or [] 76 self.parent = parent 77 self.scope_type = scope_type 78 self.subquery_scopes = [] 79 self.derived_table_scopes = [] 80 self.table_scopes = [] 81 self.cte_scopes = [] 82 self.union_scopes = [] 83 self.udtf_scopes = [] 84 self.clear_cache() 85 86 def clear_cache(self): 87 self._collected = False 88 self._raw_columns = None 89 self._stars = None 90 self._derived_tables = None 91 self._udtfs = None 92 self._tables = None 93 self._ctes = None 94 self._subqueries = None 95 self._selected_sources = None 96 self._columns = None 97 self._external_columns = None 98 self._join_hints = None 99 self._pivots = None 100 self._references = None 101 102 def branch( 103 self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs 104 ): 105 """Branch from the current scope to a new, inner scope""" 106 return Scope( 107 expression=expression.unnest(), 108 sources=sources.copy() if sources else None, 109 parent=self, 110 scope_type=scope_type, 111 cte_sources={**self.cte_sources, **(cte_sources or {})}, 112 lateral_sources=lateral_sources.copy() if lateral_sources else None, 113 **kwargs, 114 ) 115 116 def _collect(self): 117 self._tables = [] 118 self._ctes = [] 119 self._subqueries = [] 120 self._derived_tables = [] 121 self._udtfs = [] 122 self._raw_columns = [] 123 self._stars = [] 124 self._join_hints = [] 125 126 for node in self.walk(bfs=False): 127 if node is self.expression: 128 continue 129 130 if isinstance(node, exp.Dot) and node.is_star: 131 self._stars.append(node) 132 elif isinstance(node, exp.Column): 133 if isinstance(node.this, exp.Star): 134 self._stars.append(node) 135 else: 136 self._raw_columns.append(node) 137 elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint): 138 self._tables.append(node) 139 elif isinstance(node, exp.JoinHint): 140 self._join_hints.append(node) 141 elif isinstance(node, exp.UDTF): 142 self._udtfs.append(node) 143 elif isinstance(node, exp.CTE): 144 self._ctes.append(node) 145 elif _is_derived_table(node) and isinstance( 146 node.parent, (exp.From, exp.Join, exp.Subquery) 147 ): 148 self._derived_tables.append(node) 149 elif isinstance(node, exp.UNWRAPPED_QUERIES): 150 self._subqueries.append(node) 151 152 self._collected = True 153 154 def _ensure_collected(self): 155 if not self._collected: 156 self._collect() 157 158 def walk(self, bfs=True, prune=None): 159 return walk_in_scope(self.expression, bfs=bfs, prune=None) 160 161 def find(self, *expression_types, bfs=True): 162 return find_in_scope(self.expression, expression_types, bfs=bfs) 163 164 def find_all(self, *expression_types, bfs=True): 165 return find_all_in_scope(self.expression, expression_types, bfs=bfs) 166 167 def replace(self, old, new): 168 """ 169 Replace `old` with `new`. 170 171 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 172 173 Args: 174 old (exp.Expression): old node 175 new (exp.Expression): new node 176 """ 177 old.replace(new) 178 self.clear_cache() 179 180 @property 181 def tables(self): 182 """ 183 List of tables in this scope. 184 185 Returns: 186 list[exp.Table]: tables 187 """ 188 self._ensure_collected() 189 return self._tables 190 191 @property 192 def ctes(self): 193 """ 194 List of CTEs in this scope. 195 196 Returns: 197 list[exp.CTE]: ctes 198 """ 199 self._ensure_collected() 200 return self._ctes 201 202 @property 203 def derived_tables(self): 204 """ 205 List of derived tables in this scope. 206 207 For example: 208 SELECT * FROM (SELECT ...) <- that's a derived table 209 210 Returns: 211 list[exp.Subquery]: derived tables 212 """ 213 self._ensure_collected() 214 return self._derived_tables 215 216 @property 217 def udtfs(self): 218 """ 219 List of "User Defined Tabular Functions" in this scope. 220 221 Returns: 222 list[exp.UDTF]: UDTFs 223 """ 224 self._ensure_collected() 225 return self._udtfs 226 227 @property 228 def subqueries(self): 229 """ 230 List of subqueries in this scope. 231 232 For example: 233 SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery 234 235 Returns: 236 list[exp.Select | exp.Union]: subqueries 237 """ 238 self._ensure_collected() 239 return self._subqueries 240 241 @property 242 def stars(self) -> t.List[exp.Column | exp.Dot]: 243 """ 244 List of star expressions (columns or dots) in this scope. 245 """ 246 self._ensure_collected() 247 return self._stars 248 249 @property 250 def columns(self): 251 """ 252 List of columns in this scope. 253 254 Returns: 255 list[exp.Column]: Column instances in this scope, plus any 256 Columns that reference this scope from correlated subqueries. 257 """ 258 if self._columns is None: 259 self._ensure_collected() 260 columns = self._raw_columns 261 262 external_columns = [ 263 column 264 for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes) 265 for column in scope.external_columns 266 ] 267 268 named_selects = set(self.expression.named_selects) 269 270 self._columns = [] 271 for column in columns + external_columns: 272 ancestor = column.find_ancestor( 273 exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star 274 ) 275 if ( 276 not ancestor 277 or column.table 278 or isinstance(ancestor, exp.Select) 279 or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func)) 280 or ( 281 isinstance(ancestor, exp.Order) 282 and ( 283 isinstance(ancestor.parent, exp.Window) 284 or column.name not in named_selects 285 ) 286 ) 287 ): 288 self._columns.append(column) 289 290 return self._columns 291 292 @property 293 def selected_sources(self): 294 """ 295 Mapping of nodes and sources that are actually selected from in this scope. 296 297 That is, all tables in a schema are selectable at any point. But a 298 table only becomes a selected source if it's included in a FROM or JOIN clause. 299 300 Returns: 301 dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes 302 """ 303 if self._selected_sources is None: 304 result = {} 305 306 for name, node in self.references: 307 if name in result: 308 raise OptimizeError(f"Alias already used: {name}") 309 if name in self.sources: 310 result[name] = (node, self.sources[name]) 311 312 self._selected_sources = result 313 return self._selected_sources 314 315 @property 316 def references(self) -> t.List[t.Tuple[str, exp.Expression]]: 317 if self._references is None: 318 self._references = [] 319 320 for table in self.tables: 321 self._references.append((table.alias_or_name, table)) 322 for expression in itertools.chain(self.derived_tables, self.udtfs): 323 self._references.append( 324 ( 325 expression.alias, 326 expression if expression.args.get("pivots") else expression.unnest(), 327 ) 328 ) 329 330 return self._references 331 332 @property 333 def external_columns(self): 334 """ 335 Columns that appear to reference sources in outer scopes. 336 337 Returns: 338 list[exp.Column]: Column instances that don't reference 339 sources in the current scope. 340 """ 341 if self._external_columns is None: 342 if isinstance(self.expression, exp.Union): 343 left, right = self.union_scopes 344 self._external_columns = left.external_columns + right.external_columns 345 else: 346 self._external_columns = [ 347 c for c in self.columns if c.table not in self.selected_sources 348 ] 349 350 return self._external_columns 351 352 @property 353 def unqualified_columns(self): 354 """ 355 Unqualified columns in the current scope. 356 357 Returns: 358 list[exp.Column]: Unqualified columns 359 """ 360 return [c for c in self.columns if not c.table] 361 362 @property 363 def join_hints(self): 364 """ 365 Hints that exist in the scope that reference tables 366 367 Returns: 368 list[exp.JoinHint]: Join hints that are referenced within the scope 369 """ 370 if self._join_hints is None: 371 return [] 372 return self._join_hints 373 374 @property 375 def pivots(self): 376 if not self._pivots: 377 self._pivots = [ 378 pivot for _, node in self.references for pivot in node.args.get("pivots") or [] 379 ] 380 381 return self._pivots 382 383 def source_columns(self, source_name): 384 """ 385 Get all columns in the current scope for a particular source. 386 387 Args: 388 source_name (str): Name of the source 389 Returns: 390 list[exp.Column]: Column instances that reference `source_name` 391 """ 392 return [column for column in self.columns if column.table == source_name] 393 394 @property 395 def is_subquery(self): 396 """Determine if this scope is a subquery""" 397 return self.scope_type == ScopeType.SUBQUERY 398 399 @property 400 def is_derived_table(self): 401 """Determine if this scope is a derived table""" 402 return self.scope_type == ScopeType.DERIVED_TABLE 403 404 @property 405 def is_union(self): 406 """Determine if this scope is a union""" 407 return self.scope_type == ScopeType.UNION 408 409 @property 410 def is_cte(self): 411 """Determine if this scope is a common table expression""" 412 return self.scope_type == ScopeType.CTE 413 414 @property 415 def is_root(self): 416 """Determine if this is the root scope""" 417 return self.scope_type == ScopeType.ROOT 418 419 @property 420 def is_udtf(self): 421 """Determine if this scope is a UDTF (User Defined Table Function)""" 422 return self.scope_type == ScopeType.UDTF 423 424 @property 425 def is_correlated_subquery(self): 426 """Determine if this scope is a correlated subquery""" 427 return bool( 428 (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral))) 429 and self.external_columns 430 ) 431 432 def rename_source(self, old_name, new_name): 433 """Rename a source in this scope""" 434 columns = self.sources.pop(old_name or "", []) 435 self.sources[new_name] = columns 436 437 def add_source(self, name, source): 438 """Add a source to this scope""" 439 self.sources[name] = source 440 self.clear_cache() 441 442 def remove_source(self, name): 443 """Remove a source from this scope""" 444 self.sources.pop(name, None) 445 self.clear_cache() 446 447 def __repr__(self): 448 return f"Scope<{self.expression.sql()}>" 449 450 def traverse(self): 451 """ 452 Traverse the scope tree from this node. 453 454 Yields: 455 Scope: scope instances in depth-first-search post-order 456 """ 457 stack = [self] 458 result = [] 459 while stack: 460 scope = stack.pop() 461 result.append(scope) 462 stack.extend( 463 itertools.chain( 464 scope.cte_scopes, 465 scope.union_scopes, 466 scope.table_scopes, 467 scope.subquery_scopes, 468 ) 469 ) 470 471 yield from reversed(result) 472 473 def ref_count(self): 474 """ 475 Count the number of times each scope in this tree is referenced. 476 477 Returns: 478 dict[int, int]: Mapping of Scope instance ID to reference count 479 """ 480 scope_ref_count = defaultdict(lambda: 0) 481 482 for scope in self.traverse(): 483 for _, source in scope.selected_sources.values(): 484 scope_ref_count[id(source)] += 1 485 486 return scope_ref_count 487 488 489def traverse_scope(expression: exp.Expression) -> t.List[Scope]: 490 """ 491 Traverse an expression by its "scopes". 492 493 "Scope" represents the current context of a Select statement. 494 495 This is helpful for optimizing queries, where we need more information than 496 the expression tree itself. For example, we might care about the source 497 names within a subquery. Returns a list because a generator could result in 498 incomplete properties which is confusing. 499 500 Examples: 501 >>> import sqlglot 502 >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") 503 >>> scopes = traverse_scope(expression) 504 >>> scopes[0].expression.sql(), list(scopes[0].sources) 505 ('SELECT a FROM x', ['x']) 506 >>> scopes[1].expression.sql(), list(scopes[1].sources) 507 ('SELECT a FROM (SELECT a FROM x) AS y', ['y']) 508 509 Args: 510 expression: Expression to traverse 511 512 Returns: 513 A list of the created scope instances 514 """ 515 if isinstance(expression, TRAVERSABLES): 516 return list(_traverse_scope(Scope(expression))) 517 return [] 518 519 520def build_scope(expression: exp.Expression) -> t.Optional[Scope]: 521 """ 522 Build a scope tree. 523 524 Args: 525 expression: Expression to build the scope tree for. 526 527 Returns: 528 The root scope 529 """ 530 return seq_get(traverse_scope(expression), -1) 531 532 533def _traverse_scope(scope): 534 expression = scope.expression 535 536 if isinstance(expression, exp.Select): 537 yield from _traverse_select(scope) 538 elif isinstance(expression, exp.Union): 539 yield from _traverse_ctes(scope) 540 yield from _traverse_union(scope) 541 return 542 elif isinstance(expression, exp.Subquery): 543 if scope.is_root: 544 yield from _traverse_select(scope) 545 else: 546 yield from _traverse_subqueries(scope) 547 elif isinstance(expression, exp.Table): 548 yield from _traverse_tables(scope) 549 elif isinstance(expression, exp.UDTF): 550 yield from _traverse_udtfs(scope) 551 elif isinstance(expression, exp.DDL): 552 if isinstance(expression.expression, exp.Query): 553 yield from _traverse_ctes(scope) 554 yield from _traverse_scope(Scope(expression.expression, cte_sources=scope.cte_sources)) 555 return 556 elif isinstance(expression, exp.DML): 557 yield from _traverse_ctes(scope) 558 for query in find_all_in_scope(expression, exp.Query): 559 # This check ensures we don't yield the CTE queries twice 560 if not isinstance(query.parent, exp.CTE): 561 yield from _traverse_scope(Scope(query, cte_sources=scope.cte_sources)) 562 return 563 else: 564 logger.warning("Cannot traverse scope %s with type '%s'", expression, type(expression)) 565 return 566 567 yield scope 568 569 570def _traverse_select(scope): 571 yield from _traverse_ctes(scope) 572 yield from _traverse_tables(scope) 573 yield from _traverse_subqueries(scope) 574 575 576def _traverse_union(scope): 577 prev_scope = None 578 union_scope_stack = [scope] 579 expression_stack = [scope.expression.right, scope.expression.left] 580 581 while expression_stack: 582 expression = expression_stack.pop() 583 union_scope = union_scope_stack[-1] 584 585 new_scope = union_scope.branch( 586 expression, 587 outer_columns=union_scope.outer_columns, 588 scope_type=ScopeType.UNION, 589 ) 590 591 if isinstance(expression, exp.Union): 592 yield from _traverse_ctes(new_scope) 593 594 union_scope_stack.append(new_scope) 595 expression_stack.extend([expression.right, expression.left]) 596 continue 597 598 for scope in _traverse_scope(new_scope): 599 yield scope 600 601 if prev_scope: 602 union_scope_stack.pop() 603 union_scope.union_scopes = [prev_scope, scope] 604 prev_scope = union_scope 605 606 yield union_scope 607 else: 608 prev_scope = scope 609 610 611def _traverse_ctes(scope): 612 sources = {} 613 614 for cte in scope.ctes: 615 cte_name = cte.alias 616 617 # if the scope is a recursive cte, it must be in the form of base_case UNION recursive. 618 # thus the recursive scope is the first section of the union. 619 with_ = scope.expression.args.get("with") 620 if with_ and with_.recursive: 621 union = cte.this 622 623 if isinstance(union, exp.Union): 624 sources[cte_name] = scope.branch(union.this, scope_type=ScopeType.CTE) 625 626 child_scope = None 627 628 for child_scope in _traverse_scope( 629 scope.branch( 630 cte.this, 631 cte_sources=sources, 632 outer_columns=cte.alias_column_names, 633 scope_type=ScopeType.CTE, 634 ) 635 ): 636 yield child_scope 637 638 # append the final child_scope yielded 639 if child_scope: 640 sources[cte_name] = child_scope 641 scope.cte_scopes.append(child_scope) 642 643 scope.sources.update(sources) 644 scope.cte_sources.update(sources) 645 646 647def _is_derived_table(expression: exp.Subquery) -> bool: 648 """ 649 We represent (tbl1 JOIN tbl2) as a Subquery, but it's not really a "derived table", 650 as it doesn't introduce a new scope. If an alias is present, it shadows all names 651 under the Subquery, so that's one exception to this rule. 652 """ 653 return isinstance(expression, exp.Subquery) and bool( 654 expression.alias or isinstance(expression.this, exp.UNWRAPPED_QUERIES) 655 ) 656 657 658def _traverse_tables(scope): 659 sources = {} 660 661 # Traverse FROMs, JOINs, and LATERALs in the order they are defined 662 expressions = [] 663 from_ = scope.expression.args.get("from") 664 if from_: 665 expressions.append(from_.this) 666 667 for join in scope.expression.args.get("joins") or []: 668 expressions.append(join.this) 669 670 if isinstance(scope.expression, exp.Table): 671 expressions.append(scope.expression) 672 673 expressions.extend(scope.expression.args.get("laterals") or []) 674 675 for expression in expressions: 676 if isinstance(expression, exp.Table): 677 table_name = expression.name 678 source_name = expression.alias_or_name 679 680 if table_name in scope.sources and not expression.db: 681 # This is a reference to a parent source (e.g. a CTE), not an actual table, unless 682 # it is pivoted, because then we get back a new table and hence a new source. 683 pivots = expression.args.get("pivots") 684 if pivots: 685 sources[pivots[0].alias] = expression 686 else: 687 sources[source_name] = scope.sources[table_name] 688 elif source_name in sources: 689 sources[find_new_name(sources, table_name)] = expression 690 else: 691 sources[source_name] = expression 692 693 # Make sure to not include the joins twice 694 if expression is not scope.expression: 695 expressions.extend(join.this for join in expression.args.get("joins") or []) 696 697 continue 698 699 if not isinstance(expression, exp.DerivedTable): 700 continue 701 702 if isinstance(expression, exp.UDTF): 703 lateral_sources = sources 704 scope_type = ScopeType.UDTF 705 scopes = scope.udtf_scopes 706 elif _is_derived_table(expression): 707 lateral_sources = None 708 scope_type = ScopeType.DERIVED_TABLE 709 scopes = scope.derived_table_scopes 710 expressions.extend(join.this for join in expression.args.get("joins") or []) 711 else: 712 # Makes sure we check for possible sources in nested table constructs 713 expressions.append(expression.this) 714 expressions.extend(join.this for join in expression.args.get("joins") or []) 715 continue 716 717 for child_scope in _traverse_scope( 718 scope.branch( 719 expression, 720 lateral_sources=lateral_sources, 721 outer_columns=expression.alias_column_names, 722 scope_type=scope_type, 723 ) 724 ): 725 yield child_scope 726 727 # Tables without aliases will be set as "" 728 # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything. 729 # Until then, this means that only a single, unaliased derived table is allowed (rather, 730 # the latest one wins. 731 sources[expression.alias] = child_scope 732 733 # append the final child_scope yielded 734 scopes.append(child_scope) 735 scope.table_scopes.append(child_scope) 736 737 scope.sources.update(sources) 738 739 740def _traverse_subqueries(scope): 741 for subquery in scope.subqueries: 742 top = None 743 for child_scope in _traverse_scope(scope.branch(subquery, scope_type=ScopeType.SUBQUERY)): 744 yield child_scope 745 top = child_scope 746 scope.subquery_scopes.append(top) 747 748 749def _traverse_udtfs(scope): 750 if isinstance(scope.expression, exp.Unnest): 751 expressions = scope.expression.expressions 752 elif isinstance(scope.expression, exp.Lateral): 753 expressions = [scope.expression.this] 754 else: 755 expressions = [] 756 757 sources = {} 758 for expression in expressions: 759 if _is_derived_table(expression): 760 top = None 761 for child_scope in _traverse_scope( 762 scope.branch( 763 expression, 764 scope_type=ScopeType.SUBQUERY, 765 outer_columns=expression.alias_column_names, 766 ) 767 ): 768 yield child_scope 769 top = child_scope 770 sources[expression.alias] = child_scope 771 772 scope.subquery_scopes.append(top) 773 774 scope.sources.update(sources) 775 776 777def walk_in_scope(expression, bfs=True, prune=None): 778 """ 779 Returns a generator object which visits all nodes in the syntrax tree, stopping at 780 nodes that start child scopes. 781 782 Args: 783 expression (exp.Expression): 784 bfs (bool): if set to True the BFS traversal order will be applied, 785 otherwise the DFS traversal will be used instead. 786 prune ((node, parent, arg_key) -> bool): callable that returns True if 787 the generator should stop traversing this branch of the tree. 788 789 Yields: 790 tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key 791 """ 792 # We'll use this variable to pass state into the dfs generator. 793 # Whenever we set it to True, we exclude a subtree from traversal. 794 crossed_scope_boundary = False 795 796 for node in expression.walk( 797 bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n)) 798 ): 799 crossed_scope_boundary = False 800 801 yield node 802 803 if node is expression: 804 continue 805 if ( 806 isinstance(node, exp.CTE) 807 or ( 808 isinstance(node.parent, (exp.From, exp.Join, exp.Subquery)) 809 and (_is_derived_table(node) or isinstance(node, exp.UDTF)) 810 ) 811 or isinstance(node, exp.UNWRAPPED_QUERIES) 812 ): 813 crossed_scope_boundary = True 814 815 if isinstance(node, (exp.Subquery, exp.UDTF)): 816 # The following args are not actually in the inner scope, so we should visit them 817 for key in ("joins", "laterals", "pivots"): 818 for arg in node.args.get(key) or []: 819 yield from walk_in_scope(arg, bfs=bfs) 820 821 822def find_all_in_scope(expression, expression_types, bfs=True): 823 """ 824 Returns a generator object which visits all nodes in this scope and only yields those that 825 match at least one of the specified expression types. 826 827 This does NOT traverse into subscopes. 828 829 Args: 830 expression (exp.Expression): 831 expression_types (tuple[type]|type): the expression type(s) to match. 832 bfs (bool): True to use breadth-first search, False to use depth-first. 833 834 Yields: 835 exp.Expression: nodes 836 """ 837 for expression in walk_in_scope(expression, bfs=bfs): 838 if isinstance(expression, tuple(ensure_collection(expression_types))): 839 yield expression 840 841 842def find_in_scope(expression, expression_types, bfs=True): 843 """ 844 Returns the first node in this scope which matches at least one of the specified types. 845 846 This does NOT traverse into subscopes. 847 848 Args: 849 expression (exp.Expression): 850 expression_types (tuple[type]|type): the expression type(s) to match. 851 bfs (bool): True to use breadth-first search, False to use depth-first. 852 853 Returns: 854 exp.Expression: the node which matches the criteria or None if no node matching 855 the criteria was found. 856 """ 857 return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
19class ScopeType(Enum): 20 ROOT = auto() 21 SUBQUERY = auto() 22 DERIVED_TABLE = auto() 23 CTE = auto() 24 UNION = auto() 25 UDTF = auto()
An enumeration.
Inherited Members
- enum.Enum
- name
- value
28class Scope: 29 """ 30 Selection scope. 31 32 Attributes: 33 expression (exp.Select|exp.Union): Root expression of this scope 34 sources (dict[str, exp.Table|Scope]): Mapping of source name to either 35 a Table expression or another Scope instance. For example: 36 SELECT * FROM x {"x": Table(this="x")} 37 SELECT * FROM x AS y {"y": Table(this="x")} 38 SELECT * FROM (SELECT ...) AS y {"y": Scope(...)} 39 lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals 40 For example: 41 SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; 42 The LATERAL VIEW EXPLODE gets x as a source. 43 cte_sources (dict[str, Scope]): Sources from CTES 44 outer_columns (list[str]): If this is a derived table or CTE, and the outer query 45 defines a column list for the alias of this scope, this is that list of columns. 46 For example: 47 SELECT * FROM (SELECT ...) AS y(col1, col2) 48 The inner query would have `["col1", "col2"]` for its `outer_columns` 49 parent (Scope): Parent scope 50 scope_type (ScopeType): Type of this scope, relative to it's parent 51 subquery_scopes (list[Scope]): List of all child scopes for subqueries 52 cte_scopes (list[Scope]): List of all child scopes for CTEs 53 derived_table_scopes (list[Scope]): List of all child scopes for derived_tables 54 udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions 55 table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined 56 union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be 57 a list of the left and right child scopes. 58 """ 59 60 def __init__( 61 self, 62 expression, 63 sources=None, 64 outer_columns=None, 65 parent=None, 66 scope_type=ScopeType.ROOT, 67 lateral_sources=None, 68 cte_sources=None, 69 ): 70 self.expression = expression 71 self.sources = sources or {} 72 self.lateral_sources = lateral_sources or {} 73 self.cte_sources = cte_sources or {} 74 self.sources.update(self.lateral_sources) 75 self.sources.update(self.cte_sources) 76 self.outer_columns = outer_columns or [] 77 self.parent = parent 78 self.scope_type = scope_type 79 self.subquery_scopes = [] 80 self.derived_table_scopes = [] 81 self.table_scopes = [] 82 self.cte_scopes = [] 83 self.union_scopes = [] 84 self.udtf_scopes = [] 85 self.clear_cache() 86 87 def clear_cache(self): 88 self._collected = False 89 self._raw_columns = None 90 self._stars = None 91 self._derived_tables = None 92 self._udtfs = None 93 self._tables = None 94 self._ctes = None 95 self._subqueries = None 96 self._selected_sources = None 97 self._columns = None 98 self._external_columns = None 99 self._join_hints = None 100 self._pivots = None 101 self._references = None 102 103 def branch( 104 self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs 105 ): 106 """Branch from the current scope to a new, inner scope""" 107 return Scope( 108 expression=expression.unnest(), 109 sources=sources.copy() if sources else None, 110 parent=self, 111 scope_type=scope_type, 112 cte_sources={**self.cte_sources, **(cte_sources or {})}, 113 lateral_sources=lateral_sources.copy() if lateral_sources else None, 114 **kwargs, 115 ) 116 117 def _collect(self): 118 self._tables = [] 119 self._ctes = [] 120 self._subqueries = [] 121 self._derived_tables = [] 122 self._udtfs = [] 123 self._raw_columns = [] 124 self._stars = [] 125 self._join_hints = [] 126 127 for node in self.walk(bfs=False): 128 if node is self.expression: 129 continue 130 131 if isinstance(node, exp.Dot) and node.is_star: 132 self._stars.append(node) 133 elif isinstance(node, exp.Column): 134 if isinstance(node.this, exp.Star): 135 self._stars.append(node) 136 else: 137 self._raw_columns.append(node) 138 elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint): 139 self._tables.append(node) 140 elif isinstance(node, exp.JoinHint): 141 self._join_hints.append(node) 142 elif isinstance(node, exp.UDTF): 143 self._udtfs.append(node) 144 elif isinstance(node, exp.CTE): 145 self._ctes.append(node) 146 elif _is_derived_table(node) and isinstance( 147 node.parent, (exp.From, exp.Join, exp.Subquery) 148 ): 149 self._derived_tables.append(node) 150 elif isinstance(node, exp.UNWRAPPED_QUERIES): 151 self._subqueries.append(node) 152 153 self._collected = True 154 155 def _ensure_collected(self): 156 if not self._collected: 157 self._collect() 158 159 def walk(self, bfs=True, prune=None): 160 return walk_in_scope(self.expression, bfs=bfs, prune=None) 161 162 def find(self, *expression_types, bfs=True): 163 return find_in_scope(self.expression, expression_types, bfs=bfs) 164 165 def find_all(self, *expression_types, bfs=True): 166 return find_all_in_scope(self.expression, expression_types, bfs=bfs) 167 168 def replace(self, old, new): 169 """ 170 Replace `old` with `new`. 171 172 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 173 174 Args: 175 old (exp.Expression): old node 176 new (exp.Expression): new node 177 """ 178 old.replace(new) 179 self.clear_cache() 180 181 @property 182 def tables(self): 183 """ 184 List of tables in this scope. 185 186 Returns: 187 list[exp.Table]: tables 188 """ 189 self._ensure_collected() 190 return self._tables 191 192 @property 193 def ctes(self): 194 """ 195 List of CTEs in this scope. 196 197 Returns: 198 list[exp.CTE]: ctes 199 """ 200 self._ensure_collected() 201 return self._ctes 202 203 @property 204 def derived_tables(self): 205 """ 206 List of derived tables in this scope. 207 208 For example: 209 SELECT * FROM (SELECT ...) <- that's a derived table 210 211 Returns: 212 list[exp.Subquery]: derived tables 213 """ 214 self._ensure_collected() 215 return self._derived_tables 216 217 @property 218 def udtfs(self): 219 """ 220 List of "User Defined Tabular Functions" in this scope. 221 222 Returns: 223 list[exp.UDTF]: UDTFs 224 """ 225 self._ensure_collected() 226 return self._udtfs 227 228 @property 229 def subqueries(self): 230 """ 231 List of subqueries in this scope. 232 233 For example: 234 SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery 235 236 Returns: 237 list[exp.Select | exp.Union]: subqueries 238 """ 239 self._ensure_collected() 240 return self._subqueries 241 242 @property 243 def stars(self) -> t.List[exp.Column | exp.Dot]: 244 """ 245 List of star expressions (columns or dots) in this scope. 246 """ 247 self._ensure_collected() 248 return self._stars 249 250 @property 251 def columns(self): 252 """ 253 List of columns in this scope. 254 255 Returns: 256 list[exp.Column]: Column instances in this scope, plus any 257 Columns that reference this scope from correlated subqueries. 258 """ 259 if self._columns is None: 260 self._ensure_collected() 261 columns = self._raw_columns 262 263 external_columns = [ 264 column 265 for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes) 266 for column in scope.external_columns 267 ] 268 269 named_selects = set(self.expression.named_selects) 270 271 self._columns = [] 272 for column in columns + external_columns: 273 ancestor = column.find_ancestor( 274 exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star 275 ) 276 if ( 277 not ancestor 278 or column.table 279 or isinstance(ancestor, exp.Select) 280 or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func)) 281 or ( 282 isinstance(ancestor, exp.Order) 283 and ( 284 isinstance(ancestor.parent, exp.Window) 285 or column.name not in named_selects 286 ) 287 ) 288 ): 289 self._columns.append(column) 290 291 return self._columns 292 293 @property 294 def selected_sources(self): 295 """ 296 Mapping of nodes and sources that are actually selected from in this scope. 297 298 That is, all tables in a schema are selectable at any point. But a 299 table only becomes a selected source if it's included in a FROM or JOIN clause. 300 301 Returns: 302 dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes 303 """ 304 if self._selected_sources is None: 305 result = {} 306 307 for name, node in self.references: 308 if name in result: 309 raise OptimizeError(f"Alias already used: {name}") 310 if name in self.sources: 311 result[name] = (node, self.sources[name]) 312 313 self._selected_sources = result 314 return self._selected_sources 315 316 @property 317 def references(self) -> t.List[t.Tuple[str, exp.Expression]]: 318 if self._references is None: 319 self._references = [] 320 321 for table in self.tables: 322 self._references.append((table.alias_or_name, table)) 323 for expression in itertools.chain(self.derived_tables, self.udtfs): 324 self._references.append( 325 ( 326 expression.alias, 327 expression if expression.args.get("pivots") else expression.unnest(), 328 ) 329 ) 330 331 return self._references 332 333 @property 334 def external_columns(self): 335 """ 336 Columns that appear to reference sources in outer scopes. 337 338 Returns: 339 list[exp.Column]: Column instances that don't reference 340 sources in the current scope. 341 """ 342 if self._external_columns is None: 343 if isinstance(self.expression, exp.Union): 344 left, right = self.union_scopes 345 self._external_columns = left.external_columns + right.external_columns 346 else: 347 self._external_columns = [ 348 c for c in self.columns if c.table not in self.selected_sources 349 ] 350 351 return self._external_columns 352 353 @property 354 def unqualified_columns(self): 355 """ 356 Unqualified columns in the current scope. 357 358 Returns: 359 list[exp.Column]: Unqualified columns 360 """ 361 return [c for c in self.columns if not c.table] 362 363 @property 364 def join_hints(self): 365 """ 366 Hints that exist in the scope that reference tables 367 368 Returns: 369 list[exp.JoinHint]: Join hints that are referenced within the scope 370 """ 371 if self._join_hints is None: 372 return [] 373 return self._join_hints 374 375 @property 376 def pivots(self): 377 if not self._pivots: 378 self._pivots = [ 379 pivot for _, node in self.references for pivot in node.args.get("pivots") or [] 380 ] 381 382 return self._pivots 383 384 def source_columns(self, source_name): 385 """ 386 Get all columns in the current scope for a particular source. 387 388 Args: 389 source_name (str): Name of the source 390 Returns: 391 list[exp.Column]: Column instances that reference `source_name` 392 """ 393 return [column for column in self.columns if column.table == source_name] 394 395 @property 396 def is_subquery(self): 397 """Determine if this scope is a subquery""" 398 return self.scope_type == ScopeType.SUBQUERY 399 400 @property 401 def is_derived_table(self): 402 """Determine if this scope is a derived table""" 403 return self.scope_type == ScopeType.DERIVED_TABLE 404 405 @property 406 def is_union(self): 407 """Determine if this scope is a union""" 408 return self.scope_type == ScopeType.UNION 409 410 @property 411 def is_cte(self): 412 """Determine if this scope is a common table expression""" 413 return self.scope_type == ScopeType.CTE 414 415 @property 416 def is_root(self): 417 """Determine if this is the root scope""" 418 return self.scope_type == ScopeType.ROOT 419 420 @property 421 def is_udtf(self): 422 """Determine if this scope is a UDTF (User Defined Table Function)""" 423 return self.scope_type == ScopeType.UDTF 424 425 @property 426 def is_correlated_subquery(self): 427 """Determine if this scope is a correlated subquery""" 428 return bool( 429 (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral))) 430 and self.external_columns 431 ) 432 433 def rename_source(self, old_name, new_name): 434 """Rename a source in this scope""" 435 columns = self.sources.pop(old_name or "", []) 436 self.sources[new_name] = columns 437 438 def add_source(self, name, source): 439 """Add a source to this scope""" 440 self.sources[name] = source 441 self.clear_cache() 442 443 def remove_source(self, name): 444 """Remove a source from this scope""" 445 self.sources.pop(name, None) 446 self.clear_cache() 447 448 def __repr__(self): 449 return f"Scope<{self.expression.sql()}>" 450 451 def traverse(self): 452 """ 453 Traverse the scope tree from this node. 454 455 Yields: 456 Scope: scope instances in depth-first-search post-order 457 """ 458 stack = [self] 459 result = [] 460 while stack: 461 scope = stack.pop() 462 result.append(scope) 463 stack.extend( 464 itertools.chain( 465 scope.cte_scopes, 466 scope.union_scopes, 467 scope.table_scopes, 468 scope.subquery_scopes, 469 ) 470 ) 471 472 yield from reversed(result) 473 474 def ref_count(self): 475 """ 476 Count the number of times each scope in this tree is referenced. 477 478 Returns: 479 dict[int, int]: Mapping of Scope instance ID to reference count 480 """ 481 scope_ref_count = defaultdict(lambda: 0) 482 483 for scope in self.traverse(): 484 for _, source in scope.selected_sources.values(): 485 scope_ref_count[id(source)] += 1 486 487 return scope_ref_count
Selection scope.
Attributes:
- expression (exp.Select|exp.Union): Root expression of this scope
- sources (dict[str, exp.Table|Scope]): Mapping of source name to either a Table expression or another Scope instance. For example: SELECT * FROM x {"x": Table(this="x")} SELECT * FROM x AS y {"y": Table(this="x")} SELECT * FROM (SELECT ...) AS y {"y": Scope(...)}
- lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals For example: SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; The LATERAL VIEW EXPLODE gets x as a source.
- cte_sources (dict[str, Scope]): Sources from CTES
- outer_columns (list[str]): If this is a derived table or CTE, and the outer query
defines a column list for the alias of this scope, this is that list of columns.
For example:
SELECT * FROM (SELECT ...) AS y(col1, col2)
The inner query would have
["col1", "col2"]
for itsouter_columns
- parent (Scope): Parent scope
- scope_type (ScopeType): Type of this scope, relative to it's parent
- subquery_scopes (list[Scope]): List of all child scopes for subqueries
- cte_scopes (list[Scope]): List of all child scopes for CTEs
- derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
- udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
- table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
- union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be a list of the left and right child scopes.
60 def __init__( 61 self, 62 expression, 63 sources=None, 64 outer_columns=None, 65 parent=None, 66 scope_type=ScopeType.ROOT, 67 lateral_sources=None, 68 cte_sources=None, 69 ): 70 self.expression = expression 71 self.sources = sources or {} 72 self.lateral_sources = lateral_sources or {} 73 self.cte_sources = cte_sources or {} 74 self.sources.update(self.lateral_sources) 75 self.sources.update(self.cte_sources) 76 self.outer_columns = outer_columns or [] 77 self.parent = parent 78 self.scope_type = scope_type 79 self.subquery_scopes = [] 80 self.derived_table_scopes = [] 81 self.table_scopes = [] 82 self.cte_scopes = [] 83 self.union_scopes = [] 84 self.udtf_scopes = [] 85 self.clear_cache()
87 def clear_cache(self): 88 self._collected = False 89 self._raw_columns = None 90 self._stars = None 91 self._derived_tables = None 92 self._udtfs = None 93 self._tables = None 94 self._ctes = None 95 self._subqueries = None 96 self._selected_sources = None 97 self._columns = None 98 self._external_columns = None 99 self._join_hints = None 100 self._pivots = None 101 self._references = None
103 def branch( 104 self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs 105 ): 106 """Branch from the current scope to a new, inner scope""" 107 return Scope( 108 expression=expression.unnest(), 109 sources=sources.copy() if sources else None, 110 parent=self, 111 scope_type=scope_type, 112 cte_sources={**self.cte_sources, **(cte_sources or {})}, 113 lateral_sources=lateral_sources.copy() if lateral_sources else None, 114 **kwargs, 115 )
Branch from the current scope to a new, inner scope
168 def replace(self, old, new): 169 """ 170 Replace `old` with `new`. 171 172 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 173 174 Args: 175 old (exp.Expression): old node 176 new (exp.Expression): new node 177 """ 178 old.replace(new) 179 self.clear_cache()
Replace old
with new
.
This can be used instead of exp.Expression.replace
to ensure the Scope
is kept up-to-date.
Arguments:
- old (exp.Expression): old node
- new (exp.Expression): new node
181 @property 182 def tables(self): 183 """ 184 List of tables in this scope. 185 186 Returns: 187 list[exp.Table]: tables 188 """ 189 self._ensure_collected() 190 return self._tables
List of tables in this scope.
Returns:
list[exp.Table]: tables
192 @property 193 def ctes(self): 194 """ 195 List of CTEs in this scope. 196 197 Returns: 198 list[exp.CTE]: ctes 199 """ 200 self._ensure_collected() 201 return self._ctes
List of CTEs in this scope.
Returns:
list[exp.CTE]: ctes
203 @property 204 def derived_tables(self): 205 """ 206 List of derived tables in this scope. 207 208 For example: 209 SELECT * FROM (SELECT ...) <- that's a derived table 210 211 Returns: 212 list[exp.Subquery]: derived tables 213 """ 214 self._ensure_collected() 215 return self._derived_tables
List of derived tables in this scope.
For example:
SELECT * FROM (SELECT ...) <- that's a derived table
Returns:
list[exp.Subquery]: derived tables
217 @property 218 def udtfs(self): 219 """ 220 List of "User Defined Tabular Functions" in this scope. 221 222 Returns: 223 list[exp.UDTF]: UDTFs 224 """ 225 self._ensure_collected() 226 return self._udtfs
List of "User Defined Tabular Functions" in this scope.
Returns:
list[exp.UDTF]: UDTFs
228 @property 229 def subqueries(self): 230 """ 231 List of subqueries in this scope. 232 233 For example: 234 SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery 235 236 Returns: 237 list[exp.Select | exp.Union]: subqueries 238 """ 239 self._ensure_collected() 240 return self._subqueries
List of subqueries in this scope.
For example:
SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
Returns:
list[exp.Select | exp.Union]: subqueries
242 @property 243 def stars(self) -> t.List[exp.Column | exp.Dot]: 244 """ 245 List of star expressions (columns or dots) in this scope. 246 """ 247 self._ensure_collected() 248 return self._stars
List of star expressions (columns or dots) in this scope.
250 @property 251 def columns(self): 252 """ 253 List of columns in this scope. 254 255 Returns: 256 list[exp.Column]: Column instances in this scope, plus any 257 Columns that reference this scope from correlated subqueries. 258 """ 259 if self._columns is None: 260 self._ensure_collected() 261 columns = self._raw_columns 262 263 external_columns = [ 264 column 265 for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes) 266 for column in scope.external_columns 267 ] 268 269 named_selects = set(self.expression.named_selects) 270 271 self._columns = [] 272 for column in columns + external_columns: 273 ancestor = column.find_ancestor( 274 exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star 275 ) 276 if ( 277 not ancestor 278 or column.table 279 or isinstance(ancestor, exp.Select) 280 or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func)) 281 or ( 282 isinstance(ancestor, exp.Order) 283 and ( 284 isinstance(ancestor.parent, exp.Window) 285 or column.name not in named_selects 286 ) 287 ) 288 ): 289 self._columns.append(column) 290 291 return self._columns
List of columns in this scope.
Returns:
list[exp.Column]: Column instances in this scope, plus any Columns that reference this scope from correlated subqueries.
293 @property 294 def selected_sources(self): 295 """ 296 Mapping of nodes and sources that are actually selected from in this scope. 297 298 That is, all tables in a schema are selectable at any point. But a 299 table only becomes a selected source if it's included in a FROM or JOIN clause. 300 301 Returns: 302 dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes 303 """ 304 if self._selected_sources is None: 305 result = {} 306 307 for name, node in self.references: 308 if name in result: 309 raise OptimizeError(f"Alias already used: {name}") 310 if name in self.sources: 311 result[name] = (node, self.sources[name]) 312 313 self._selected_sources = result 314 return self._selected_sources
Mapping of nodes and sources that are actually selected from in this scope.
That is, all tables in a schema are selectable at any point. But a table only becomes a selected source if it's included in a FROM or JOIN clause.
Returns:
dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
316 @property 317 def references(self) -> t.List[t.Tuple[str, exp.Expression]]: 318 if self._references is None: 319 self._references = [] 320 321 for table in self.tables: 322 self._references.append((table.alias_or_name, table)) 323 for expression in itertools.chain(self.derived_tables, self.udtfs): 324 self._references.append( 325 ( 326 expression.alias, 327 expression if expression.args.get("pivots") else expression.unnest(), 328 ) 329 ) 330 331 return self._references
333 @property 334 def external_columns(self): 335 """ 336 Columns that appear to reference sources in outer scopes. 337 338 Returns: 339 list[exp.Column]: Column instances that don't reference 340 sources in the current scope. 341 """ 342 if self._external_columns is None: 343 if isinstance(self.expression, exp.Union): 344 left, right = self.union_scopes 345 self._external_columns = left.external_columns + right.external_columns 346 else: 347 self._external_columns = [ 348 c for c in self.columns if c.table not in self.selected_sources 349 ] 350 351 return self._external_columns
Columns that appear to reference sources in outer scopes.
Returns:
list[exp.Column]: Column instances that don't reference sources in the current scope.
353 @property 354 def unqualified_columns(self): 355 """ 356 Unqualified columns in the current scope. 357 358 Returns: 359 list[exp.Column]: Unqualified columns 360 """ 361 return [c for c in self.columns if not c.table]
Unqualified columns in the current scope.
Returns:
list[exp.Column]: Unqualified columns
363 @property 364 def join_hints(self): 365 """ 366 Hints that exist in the scope that reference tables 367 368 Returns: 369 list[exp.JoinHint]: Join hints that are referenced within the scope 370 """ 371 if self._join_hints is None: 372 return [] 373 return self._join_hints
Hints that exist in the scope that reference tables
Returns:
list[exp.JoinHint]: Join hints that are referenced within the scope
384 def source_columns(self, source_name): 385 """ 386 Get all columns in the current scope for a particular source. 387 388 Args: 389 source_name (str): Name of the source 390 Returns: 391 list[exp.Column]: Column instances that reference `source_name` 392 """ 393 return [column for column in self.columns if column.table == source_name]
Get all columns in the current scope for a particular source.
Arguments:
- source_name (str): Name of the source
Returns:
list[exp.Column]: Column instances that reference
source_name
395 @property 396 def is_subquery(self): 397 """Determine if this scope is a subquery""" 398 return self.scope_type == ScopeType.SUBQUERY
Determine if this scope is a subquery
400 @property 401 def is_derived_table(self): 402 """Determine if this scope is a derived table""" 403 return self.scope_type == ScopeType.DERIVED_TABLE
Determine if this scope is a derived table
405 @property 406 def is_union(self): 407 """Determine if this scope is a union""" 408 return self.scope_type == ScopeType.UNION
Determine if this scope is a union
410 @property 411 def is_cte(self): 412 """Determine if this scope is a common table expression""" 413 return self.scope_type == ScopeType.CTE
Determine if this scope is a common table expression
415 @property 416 def is_root(self): 417 """Determine if this is the root scope""" 418 return self.scope_type == ScopeType.ROOT
Determine if this is the root scope
420 @property 421 def is_udtf(self): 422 """Determine if this scope is a UDTF (User Defined Table Function)""" 423 return self.scope_type == ScopeType.UDTF
Determine if this scope is a UDTF (User Defined Table Function)
433 def rename_source(self, old_name, new_name): 434 """Rename a source in this scope""" 435 columns = self.sources.pop(old_name or "", []) 436 self.sources[new_name] = columns
Rename a source in this scope
438 def add_source(self, name, source): 439 """Add a source to this scope""" 440 self.sources[name] = source 441 self.clear_cache()
Add a source to this scope
443 def remove_source(self, name): 444 """Remove a source from this scope""" 445 self.sources.pop(name, None) 446 self.clear_cache()
Remove a source from this scope
451 def traverse(self): 452 """ 453 Traverse the scope tree from this node. 454 455 Yields: 456 Scope: scope instances in depth-first-search post-order 457 """ 458 stack = [self] 459 result = [] 460 while stack: 461 scope = stack.pop() 462 result.append(scope) 463 stack.extend( 464 itertools.chain( 465 scope.cte_scopes, 466 scope.union_scopes, 467 scope.table_scopes, 468 scope.subquery_scopes, 469 ) 470 ) 471 472 yield from reversed(result)
Traverse the scope tree from this node.
Yields:
Scope: scope instances in depth-first-search post-order
474 def ref_count(self): 475 """ 476 Count the number of times each scope in this tree is referenced. 477 478 Returns: 479 dict[int, int]: Mapping of Scope instance ID to reference count 480 """ 481 scope_ref_count = defaultdict(lambda: 0) 482 483 for scope in self.traverse(): 484 for _, source in scope.selected_sources.values(): 485 scope_ref_count[id(source)] += 1 486 487 return scope_ref_count
Count the number of times each scope in this tree is referenced.
Returns:
dict[int, int]: Mapping of Scope instance ID to reference count
490def traverse_scope(expression: exp.Expression) -> t.List[Scope]: 491 """ 492 Traverse an expression by its "scopes". 493 494 "Scope" represents the current context of a Select statement. 495 496 This is helpful for optimizing queries, where we need more information than 497 the expression tree itself. For example, we might care about the source 498 names within a subquery. Returns a list because a generator could result in 499 incomplete properties which is confusing. 500 501 Examples: 502 >>> import sqlglot 503 >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") 504 >>> scopes = traverse_scope(expression) 505 >>> scopes[0].expression.sql(), list(scopes[0].sources) 506 ('SELECT a FROM x', ['x']) 507 >>> scopes[1].expression.sql(), list(scopes[1].sources) 508 ('SELECT a FROM (SELECT a FROM x) AS y', ['y']) 509 510 Args: 511 expression: Expression to traverse 512 513 Returns: 514 A list of the created scope instances 515 """ 516 if isinstance(expression, TRAVERSABLES): 517 return list(_traverse_scope(Scope(expression))) 518 return []
Traverse an expression by its "scopes".
"Scope" represents the current context of a Select statement.
This is helpful for optimizing queries, where we need more information than the expression tree itself. For example, we might care about the source names within a subquery. Returns a list because a generator could result in incomplete properties which is confusing.
Examples:
>>> import sqlglot >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") >>> scopes = traverse_scope(expression) >>> scopes[0].expression.sql(), list(scopes[0].sources) ('SELECT a FROM x', ['x']) >>> scopes[1].expression.sql(), list(scopes[1].sources) ('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
Arguments:
- expression: Expression to traverse
Returns:
A list of the created scope instances
521def build_scope(expression: exp.Expression) -> t.Optional[Scope]: 522 """ 523 Build a scope tree. 524 525 Args: 526 expression: Expression to build the scope tree for. 527 528 Returns: 529 The root scope 530 """ 531 return seq_get(traverse_scope(expression), -1)
Build a scope tree.
Arguments:
- expression: Expression to build the scope tree for.
Returns:
The root scope
778def walk_in_scope(expression, bfs=True, prune=None): 779 """ 780 Returns a generator object which visits all nodes in the syntrax tree, stopping at 781 nodes that start child scopes. 782 783 Args: 784 expression (exp.Expression): 785 bfs (bool): if set to True the BFS traversal order will be applied, 786 otherwise the DFS traversal will be used instead. 787 prune ((node, parent, arg_key) -> bool): callable that returns True if 788 the generator should stop traversing this branch of the tree. 789 790 Yields: 791 tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key 792 """ 793 # We'll use this variable to pass state into the dfs generator. 794 # Whenever we set it to True, we exclude a subtree from traversal. 795 crossed_scope_boundary = False 796 797 for node in expression.walk( 798 bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n)) 799 ): 800 crossed_scope_boundary = False 801 802 yield node 803 804 if node is expression: 805 continue 806 if ( 807 isinstance(node, exp.CTE) 808 or ( 809 isinstance(node.parent, (exp.From, exp.Join, exp.Subquery)) 810 and (_is_derived_table(node) or isinstance(node, exp.UDTF)) 811 ) 812 or isinstance(node, exp.UNWRAPPED_QUERIES) 813 ): 814 crossed_scope_boundary = True 815 816 if isinstance(node, (exp.Subquery, exp.UDTF)): 817 # The following args are not actually in the inner scope, so we should visit them 818 for key in ("joins", "laterals", "pivots"): 819 for arg in node.args.get(key) or []: 820 yield from walk_in_scope(arg, bfs=bfs)
Returns a generator object which visits all nodes in the syntrax tree, stopping at nodes that start child scopes.
Arguments:
- expression (exp.Expression):
- bfs (bool): if set to True the BFS traversal order will be applied, otherwise the DFS traversal will be used instead.
- prune ((node, parent, arg_key) -> bool): callable that returns True if the generator should stop traversing this branch of the tree.
Yields:
tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key
823def find_all_in_scope(expression, expression_types, bfs=True): 824 """ 825 Returns a generator object which visits all nodes in this scope and only yields those that 826 match at least one of the specified expression types. 827 828 This does NOT traverse into subscopes. 829 830 Args: 831 expression (exp.Expression): 832 expression_types (tuple[type]|type): the expression type(s) to match. 833 bfs (bool): True to use breadth-first search, False to use depth-first. 834 835 Yields: 836 exp.Expression: nodes 837 """ 838 for expression in walk_in_scope(expression, bfs=bfs): 839 if isinstance(expression, tuple(ensure_collection(expression_types))): 840 yield expression
Returns a generator object which visits all nodes in this scope and only yields those that match at least one of the specified expression types.
This does NOT traverse into subscopes.
Arguments:
- expression (exp.Expression):
- expression_types (tuple[type]|type): the expression type(s) to match.
- bfs (bool): True to use breadth-first search, False to use depth-first.
Yields:
exp.Expression: nodes
843def find_in_scope(expression, expression_types, bfs=True): 844 """ 845 Returns the first node in this scope which matches at least one of the specified types. 846 847 This does NOT traverse into subscopes. 848 849 Args: 850 expression (exp.Expression): 851 expression_types (tuple[type]|type): the expression type(s) to match. 852 bfs (bool): True to use breadth-first search, False to use depth-first. 853 854 Returns: 855 exp.Expression: the node which matches the criteria or None if no node matching 856 the criteria was found. 857 """ 858 return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
Returns the first node in this scope which matches at least one of the specified types.
This does NOT traverse into subscopes.
Arguments:
- expression (exp.Expression):
- expression_types (tuple[type]|type): the expression type(s) to match.
- bfs (bool): True to use breadth-first search, False to use depth-first.
Returns:
exp.Expression: the node which matches the criteria or None if no node matching the criteria was found.