1
0
Fork 0

Merging upstream version 25.26.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:56:02 +01:00
parent 9138e4b92a
commit 829a709061
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
117 changed files with 49296 additions and 47316 deletions

View file

@ -12,7 +12,7 @@ from dataclasses import dataclass
from heapq import heappop, heappush
from sqlglot import Dialect, expressions as exp
from sqlglot.helper import ensure_list
from sqlglot.helper import seq_get
if t.TYPE_CHECKING:
from sqlglot.dialects.dialect import DialectType
@ -185,7 +185,7 @@ class ChangeDistiller:
self._unmatched_target_nodes = set(self._target_index) - set(pre_matched_nodes.values())
self._bigram_histo_cache: t.Dict[int, t.DefaultDict[str, int]] = {}
matching_set = self._compute_matching_set() | {(s, t) for s, t in pre_matched_nodes.items()}
matching_set = self._compute_matching_set() | set(pre_matched_nodes.items())
return self._generate_edit_script(matching_set, delta_only)
def _generate_edit_script(
@ -201,6 +201,7 @@ class ChangeDistiller:
for kept_source_node_id, kept_target_node_id in matching_set:
source_node = self._source_index[kept_source_node_id]
target_node = self._target_index[kept_target_node_id]
if (
not isinstance(source_node, UPDATABLE_EXPRESSION_TYPES)
or source_node == target_node
@ -208,7 +209,13 @@ class ChangeDistiller:
edit_script.extend(
self._generate_move_edits(source_node, target_node, matching_set)
)
if not delta_only:
source_non_expression_leaves = dict(_get_non_expression_leaves(source_node))
target_non_expression_leaves = dict(_get_non_expression_leaves(target_node))
if source_non_expression_leaves != target_non_expression_leaves:
edit_script.append(Update(source_node, target_node))
elif not delta_only:
edit_script.append(Keep(source_node, target_node))
else:
edit_script.append(Update(source_node, target_node))
@ -246,8 +253,8 @@ class ChangeDistiller:
source_node = self._source_index[source_node_id]
target_node = self._target_index[target_node_id]
if _is_same_type(source_node, target_node):
source_leaf_ids = {id(l) for l in _get_leaves(source_node)}
target_leaf_ids = {id(l) for l in _get_leaves(target_node)}
source_leaf_ids = {id(l) for l in _get_expression_leaves(source_node)}
target_leaf_ids = {id(l) for l in _get_expression_leaves(target_node)}
max_leaves_num = max(len(source_leaf_ids), len(target_leaf_ids))
if max_leaves_num:
@ -277,10 +284,10 @@ class ChangeDistiller:
def _compute_leaf_matching_set(self) -> t.Set[t.Tuple[int, int]]:
candidate_matchings: t.List[t.Tuple[float, int, int, exp.Expression, exp.Expression]] = []
source_leaves = list(_get_leaves(self._source))
target_leaves = list(_get_leaves(self._target))
for source_leaf in source_leaves:
for target_leaf in target_leaves:
source_expression_leaves = list(_get_expression_leaves(self._source))
target_expression_leaves = list(_get_expression_leaves(self._target))
for source_leaf in source_expression_leaves:
for target_leaf in target_expression_leaves:
if _is_same_type(source_leaf, target_leaf):
similarity_score = self._dice_coefficient(source_leaf, target_leaf)
if similarity_score >= self.f:
@ -338,18 +345,28 @@ class ChangeDistiller:
return bigram_histo
def _get_leaves(expression: exp.Expression) -> t.Iterator[exp.Expression]:
def _get_expression_leaves(expression: exp.Expression) -> t.Iterator[exp.Expression]:
has_child_exprs = False
for node in expression.iter_expressions():
if not isinstance(node, IGNORED_LEAF_EXPRESSION_TYPES):
has_child_exprs = True
yield from _get_leaves(node)
yield from _get_expression_leaves(node)
if not has_child_exprs:
yield expression
def _get_non_expression_leaves(expression: exp.Expression) -> t.Iterator[t.Tuple[str, t.Any]]:
for arg, value in expression.args.items():
if isinstance(value, exp.Expression) or (
isinstance(value, list) and isinstance(seq_get(value, 0), exp.Expression)
):
continue
yield (arg, value)
def _is_same_type(source: exp.Expression, target: exp.Expression) -> bool:
if type(source) is type(target):
if isinstance(source, exp.Join):
@ -372,16 +389,12 @@ def _parent_similarity_score(
return 1 + _parent_similarity_score(source.parent, target.parent)
def _expression_only_args(expression: exp.Expression) -> t.List[exp.Expression]:
args: t.List[t.Union[exp.Expression, t.List]] = []
if expression:
for a in expression.args.values():
args.extend(ensure_list(a))
return [
a
for a in args
if isinstance(a, exp.Expression) and not isinstance(a, IGNORED_LEAF_EXPRESSION_TYPES)
]
def _expression_only_args(expression: exp.Expression) -> t.Iterator[exp.Expression]:
yield from (
arg
for arg in expression.iter_expressions()
if not isinstance(arg, IGNORED_LEAF_EXPRESSION_TYPES)
)
def _lcs(