Merging upstream version 25.29.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
de8c8a17d0
commit
1e53504dfc
97 changed files with 64720 additions and 61752 deletions
|
@ -10,6 +10,7 @@ import typing as t
|
|||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from heapq import heappop, heappush
|
||||
from itertools import chain
|
||||
|
||||
from sqlglot import Dialect, expressions as exp
|
||||
from sqlglot.helper import seq_get
|
||||
|
@ -36,7 +37,8 @@ class Remove:
|
|||
class Move:
|
||||
"""Indicates that an existing node's position within the tree has changed"""
|
||||
|
||||
expression: exp.Expression
|
||||
source: exp.Expression
|
||||
target: exp.Expression
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
@ -93,11 +95,11 @@ def diff(
|
|||
matchings: the list of pre-matched node pairs which is used to help the algorithm's
|
||||
heuristics produce better results for subtrees that are known by a caller to be matching.
|
||||
Note: expression references in this list must refer to the same node objects that are
|
||||
referenced in source / target trees.
|
||||
referenced in the source / target trees.
|
||||
delta_only: excludes all `Keep` nodes from the diff.
|
||||
copy: whether to copy the input expressions.
|
||||
Note: if this is set to false, the caller must ensure that there are no shared references
|
||||
in the two ASTs, otherwise the diffing algorithm may produce unexpected behavior.
|
||||
in the two trees, otherwise the diffing algorithm may produce unexpected behavior.
|
||||
kwargs: additional arguments to pass to the ChangeDistiller instance.
|
||||
|
||||
Returns:
|
||||
|
@ -111,11 +113,19 @@ def diff(
|
|||
def compute_node_mappings(
|
||||
original: exp.Expression, copy: exp.Expression
|
||||
) -> t.Dict[int, exp.Expression]:
|
||||
return {
|
||||
id(old_node): new_node
|
||||
for old_node, new_node in zip(original.walk(), copy.walk())
|
||||
if id(old_node) in matching_ids
|
||||
}
|
||||
node_mapping = {}
|
||||
for old_node, new_node in zip(
|
||||
reversed(tuple(original.walk())), reversed(tuple(copy.walk()))
|
||||
):
|
||||
# We cache the hash of each new node here to speed up equality comparisons. If the input
|
||||
# trees aren't copied, these hashes will be evicted before returning the edit script.
|
||||
new_node._hash = hash(new_node)
|
||||
|
||||
old_node_id = id(old_node)
|
||||
if old_node_id in matching_ids:
|
||||
node_mapping[old_node_id] = new_node
|
||||
|
||||
return node_mapping
|
||||
|
||||
source_copy = source.copy() if copy else source
|
||||
target_copy = target.copy() if copy else target
|
||||
|
@ -126,13 +136,19 @@ def diff(
|
|||
}
|
||||
matchings_copy = [(node_mappings[id(s)], node_mappings[id(t)]) for s, t in matchings]
|
||||
|
||||
return ChangeDistiller(**kwargs).diff(
|
||||
edit_script = ChangeDistiller(**kwargs).diff(
|
||||
source_copy,
|
||||
target_copy,
|
||||
matchings=matchings_copy,
|
||||
delta_only=delta_only,
|
||||
)
|
||||
|
||||
if not copy:
|
||||
for node in chain(source.walk(), target.walk()):
|
||||
node._hash = None
|
||||
|
||||
return edit_script
|
||||
|
||||
|
||||
# The expression types for which Update edits are allowed.
|
||||
UPDATABLE_EXPRESSION_TYPES = (
|
||||
|
@ -186,29 +202,39 @@ class ChangeDistiller:
|
|||
self._bigram_histo_cache: t.Dict[int, t.DefaultDict[str, int]] = {}
|
||||
|
||||
matching_set = self._compute_matching_set() | set(pre_matched_nodes.items())
|
||||
return self._generate_edit_script(matching_set, delta_only)
|
||||
return self._generate_edit_script(dict(matching_set), delta_only)
|
||||
|
||||
def _generate_edit_script(
|
||||
self,
|
||||
matching_set: t.Set[t.Tuple[int, int]],
|
||||
delta_only: bool,
|
||||
) -> t.List[Edit]:
|
||||
def _generate_edit_script(self, matchings: t.Dict[int, int], delta_only: bool) -> t.List[Edit]:
|
||||
edit_script: t.List[Edit] = []
|
||||
for removed_node_id in self._unmatched_source_nodes:
|
||||
edit_script.append(Remove(self._source_index[removed_node_id]))
|
||||
for inserted_node_id in self._unmatched_target_nodes:
|
||||
edit_script.append(Insert(self._target_index[inserted_node_id]))
|
||||
for kept_source_node_id, kept_target_node_id in matching_set:
|
||||
for kept_source_node_id, kept_target_node_id in matchings.items():
|
||||
source_node = self._source_index[kept_source_node_id]
|
||||
target_node = self._target_index[kept_target_node_id]
|
||||
|
||||
if (
|
||||
not isinstance(source_node, UPDATABLE_EXPRESSION_TYPES)
|
||||
or source_node == target_node
|
||||
):
|
||||
edit_script.extend(
|
||||
self._generate_move_edits(source_node, target_node, matching_set)
|
||||
)
|
||||
identical_nodes = source_node == target_node
|
||||
|
||||
if not isinstance(source_node, UPDATABLE_EXPRESSION_TYPES) or identical_nodes:
|
||||
if identical_nodes:
|
||||
source_parent = source_node.parent
|
||||
target_parent = target_node.parent
|
||||
|
||||
if (
|
||||
(source_parent and not target_parent)
|
||||
or (not source_parent and target_parent)
|
||||
or (
|
||||
source_parent
|
||||
and target_parent
|
||||
and matchings.get(id(source_parent)) != id(target_parent)
|
||||
)
|
||||
):
|
||||
edit_script.append(Move(source=source_node, target=target_node))
|
||||
else:
|
||||
edit_script.extend(
|
||||
self._generate_move_edits(source_node, target_node, matchings)
|
||||
)
|
||||
|
||||
source_non_expression_leaves = dict(_get_non_expression_leaves(source_node))
|
||||
target_non_expression_leaves = dict(_get_non_expression_leaves(target_node))
|
||||
|
@ -223,17 +249,21 @@ class ChangeDistiller:
|
|||
return edit_script
|
||||
|
||||
def _generate_move_edits(
|
||||
self, source: exp.Expression, target: exp.Expression, matching_set: t.Set[t.Tuple[int, int]]
|
||||
self, source: exp.Expression, target: exp.Expression, matchings: t.Dict[int, int]
|
||||
) -> t.List[Move]:
|
||||
source_args = [id(e) for e in _expression_only_args(source)]
|
||||
target_args = [id(e) for e in _expression_only_args(target)]
|
||||
|
||||
args_lcs = set(_lcs(source_args, target_args, lambda l, r: (l, r) in matching_set))
|
||||
args_lcs = set(
|
||||
_lcs(source_args, target_args, lambda l, r: matchings.get(t.cast(int, l)) == r)
|
||||
)
|
||||
|
||||
move_edits = []
|
||||
for a in source_args:
|
||||
if a not in args_lcs and a not in self._unmatched_source_nodes:
|
||||
move_edits.append(Move(self._source_index[a]))
|
||||
move_edits.append(
|
||||
Move(source=self._source_index[a], target=self._target_index[matchings[a]])
|
||||
)
|
||||
|
||||
return move_edits
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue