Skip to content

Commit 7be142d

Browse files
committed
feat!: optional rename tracking for directories.
Depending on the source of the rename-information, items that are children of renamed parents may be provided to enable rename tracking based on these containers, instead of always resorting to tracking leaf nodes (i.e. blobs).
1 parent 5c1f010 commit 7be142d

File tree

6 files changed

+396
-50
lines changed

6 files changed

+396
-50
lines changed

crate-status.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,10 +311,13 @@ Check out the [performance discussion][gix-diff-performance] as well.
311311
* **lines**
312312
* [x] Simple line-by-line diffs powered by the `imara-diff` crate.
313313
* **generic rename tracker to find renames and copies**
314-
* [x] find by exact match
315-
* [x] find by similarity check
314+
* [x] find blobs by exact match
315+
* [x] find blobs by similarity check
316316
* [ ] heuristics to find best candidate
317-
* [ ] find by basename to help detecting simple moves
317+
* [ ] find by basename to support similarity check
318+
* [x] directory tracking
319+
- [x] by identity
320+
- [ ] by similarity
318321
* **blob**
319322
* [x] a choice of to-worktree, to-git and to-worktree-if-needed conversions
320323
* [x] `textconv` filters

gix-diff/src/rewrites/tracker.rs

Lines changed: 123 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
//!
55
//! - it's less sophisticated and doesn't use any ranking of candidates. Instead, it picks the first possible match.
66
//! - the set used for copy-detection is probably smaller by default.
7+
78
use std::ops::Range;
89

9-
use bstr::BStr;
10+
use bstr::{BStr, ByteSlice};
1011
use gix_object::tree::{EntryKind, EntryMode};
1112

13+
use crate::tree::visit::{Action, ChangeId, Relation};
1214
use crate::{
1315
blob::{platform::prepare_diff::Operation, DiffLineStats, ResourceKind},
1416
rewrites::{CopySource, Outcome, Tracker},
@@ -28,11 +30,22 @@ pub enum ChangeKind {
2830

2931
/// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`].
3032
pub trait Change: Clone {
31-
/// Return the hash of this change for identification.
33+
/// Return the hash of the object behind this change for identification.
3234
///
3335
/// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace
34-
/// conversions.
36+
/// conversions. What matters is that the IDs are comparable.
3537
fn id(&self) -> &gix_hash::oid;
38+
/// Return the relation that this change may have with other changes.
39+
///
40+
/// It allows to associate a directory with its children that are added or removed at the same moment.
41+
/// Note that this is ignored for modifications.
42+
///
43+
/// If rename-tracking should always be on leaf-level, this should be set to `None` consistently.
44+
/// Note that trees will never be looked up by their `id` as their children are assumed to be passed in
45+
/// with the respective relationship.
46+
///
47+
/// Also note that the tracker only sees what's given to it, it will not lookup trees or match paths itself.
48+
fn relation(&self) -> Option<Relation>;
3649
/// Return the kind of this change.
3750
fn kind(&self) -> ChangeKind;
3851
/// Return more information about the kind of entry affected by this change.
@@ -55,11 +68,11 @@ impl<T: Change> Item<T> {
5568
fn location<'a>(&self, backing: &'a [u8]) -> &'a BStr {
5669
backing[self.path.clone()].as_ref()
5770
}
58-
fn entry_mode_compatible(&self, mode: EntryMode) -> bool {
71+
fn entry_mode_compatible(&self, other: EntryMode) -> bool {
5972
use EntryKind::*;
6073
matches!(
61-
(mode.kind(), self.change.entry_mode().kind()),
62-
(Blob | BlobExecutable, Blob | BlobExecutable) | (Link, Link)
74+
(other.kind(), self.change.entry_mode().kind()),
75+
(Blob | BlobExecutable, Blob | BlobExecutable) | (Link, Link) | (Tree, Tree)
6376
)
6477
}
6578

@@ -108,7 +121,7 @@ pub mod visit {
108121
}
109122

110123
/// A change along with a location.
111-
#[derive(Clone)]
124+
#[derive(Debug, Clone)]
112125
pub struct Destination<'a, T: Clone> {
113126
/// The change at the given `location`.
114127
pub change: T,
@@ -150,23 +163,25 @@ impl<T: Change> Tracker<T> {
150163
impl<T: Change> Tracker<T> {
151164
/// We may refuse the push if that information isn't needed for what we have to track.
152165
pub fn try_push_change(&mut self, change: T, location: &BStr) -> Option<T> {
153-
if !change.entry_mode().is_blob_or_symlink() {
166+
let change_kind = change.kind();
167+
if let (None, ChangeKind::Modification { .. }) = (self.rewrites.copies, change_kind) {
154168
return Some(change);
155-
}
156-
let keep = match (self.rewrites.copies, change.kind()) {
157-
(Some(_find_copies), _) => true,
158-
(None, ChangeKind::Modification { .. }) => false,
159-
(None, _) => true,
160169
};
161170

162-
if !keep {
171+
let relation = change
172+
.relation()
173+
.filter(|_| matches!(change_kind, ChangeKind::Addition | ChangeKind::Deletion));
174+
let entry_kind = change.entry_mode().kind();
175+
if let (None | Some(Relation::ChildOfParent(_)), EntryKind::Commit | EntryKind::Tree) = (relation, entry_kind) {
163176
return Some(change);
164-
}
177+
};
165178

166179
let start = self.path_backing.len();
167180
self.path_backing.extend_from_slice(location);
181+
let path = start..self.path_backing.len();
182+
168183
self.items.push(Item {
169-
path: start..self.path_backing.len(),
184+
path,
170185
change,
171186
emitted: false,
172187
});
@@ -178,6 +193,8 @@ impl<T: Change> Tracker<T> {
178193
/// `cb(destination, source)` is called for each item, either with `Some(source)` if it's
179194
/// the destination of a copy or rename, or with `None` for source if no relation to other
180195
/// items in the tracked set exist, which is like saying 'no rename or rewrite or copy' happened.
196+
/// Note that directories with [relation](Relation) will be emitted if there is a match, along with all their matching
197+
/// child-items which are similarly bundled as rename.
181198
///
182199
/// `objects` is used to access blob data for similarity checks if required and is taken directly from the object database.
183200
/// Worktree filters and text conversions will be applied afterwards automatically. Note that object-caching *should not*
@@ -195,7 +212,7 @@ impl<T: Change> Tracker<T> {
195212
/// will panic if `change` is not a modification, and it's valid to not call `push` at all.
196213
pub fn emit<PushSourceTreeFn, E>(
197214
&mut self,
198-
mut cb: impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action,
215+
mut cb: impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
199216
diff_cache: &mut crate::blob::Platform,
200217
objects: &impl gix_object::FindObjectOrHeader,
201218
mut push_source_tree: PushSourceTreeFn,
@@ -272,7 +289,7 @@ impl<T: Change> Tracker<T> {
272289
change: item.change,
273290
},
274291
None,
275-
) == crate::tree::visit::Action::Cancel
292+
) == Action::Cancel
276293
{
277294
break;
278295
}
@@ -285,17 +302,15 @@ impl<T: Change> Tracker<T> {
285302
fn match_pairs_of_kind(
286303
&mut self,
287304
kind: visit::SourceKind,
288-
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action,
305+
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
289306
percentage: Option<f32>,
290307
out: &mut Outcome,
291308
diff_cache: &mut crate::blob::Platform,
292309
objects: &impl gix_object::FindObjectOrHeader,
293310
) -> Result<(), emit::Error> {
294311
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
295312
let needs_second_pass = !needs_exact_match(percentage);
296-
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)?
297-
== crate::tree::visit::Action::Cancel
298-
{
313+
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)? == Action::Cancel {
299314
return Ok(());
300315
}
301316
if needs_second_pass {
@@ -328,13 +343,13 @@ impl<T: Change> Tracker<T> {
328343

329344
fn match_pairs(
330345
&mut self,
331-
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action,
346+
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
332347
percentage: Option<f32>,
333348
kind: visit::SourceKind,
334349
stats: &mut Outcome,
335350
diff_cache: &mut crate::blob::Platform,
336351
objects: &impl gix_object::FindObjectOrHeader,
337-
) -> Result<crate::tree::visit::Action, emit::Error> {
352+
) -> Result<Action, emit::Error> {
338353
let mut dest_ofs = 0;
339354
while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
340355
(!item.emitted && matches!(item.change.kind(), ChangeKind::Addition)).then_some((idx, item))
@@ -368,28 +383,102 @@ impl<T: Change> Tracker<T> {
368383
src_idx,
369384
)
370385
});
371-
if src.is_none() {
386+
let Some((src, src_idx)) = src else {
372387
continue;
373-
}
388+
};
374389
let location = dest.location(&self.path_backing);
375390
let change = dest.change.clone();
376391
let dest = visit::Destination { change, location };
377-
let src_idx = src.as_ref().map(|t| t.1);
378-
let res = cb(dest, src.map(|t| t.0));
392+
let relations = if percentage.is_none() {
393+
src.change.relation().zip(dest.change.relation())
394+
} else {
395+
None
396+
};
397+
let res = cb(dest, Some(src));
379398

380399
self.items[dest_idx].emitted = true;
381-
if let Some(src_idx) = src_idx {
382-
self.items[src_idx].emitted = true;
400+
self.items[src_idx].emitted = true;
401+
402+
if res == Action::Cancel {
403+
return Ok(Action::Cancel);
404+
}
405+
406+
if let Some((Relation::Parent(src), Relation::Parent(dst))) = relations {
407+
let res = self.emit_child_renames_matching_identity(cb, kind, src, dst)?;
408+
if res == Action::Cancel {
409+
return Ok(Action::Cancel);
410+
}
383411
}
412+
}
413+
Ok(Action::Continue)
414+
}
384415

385-
if res == crate::tree::visit::Action::Cancel {
386-
return Ok(crate::tree::visit::Action::Cancel);
416+
/// Emit the children of `src_parent_id` and `dst_parent_id` as pairs of exact matches, which are assumed
417+
/// as `src` and `dst` were an exact match (so all children have to match exactly).
418+
fn emit_child_renames_matching_identity(
419+
&mut self,
420+
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
421+
kind: visit::SourceKind,
422+
src_parent_id: ChangeId,
423+
dst_parent_id: ChangeId,
424+
) -> Result<Action, emit::Error> {
425+
debug_assert_ne!(
426+
src_parent_id, dst_parent_id,
427+
"src and destination directories must be distinct"
428+
);
429+
let (mut src_items, mut dst_items) = (Vec::with_capacity(1), Vec::with_capacity(1));
430+
for item in self.items.iter_mut().filter(|item| !item.emitted) {
431+
match item.change.relation() {
432+
Some(Relation::ChildOfParent(id)) if id == src_parent_id => {
433+
src_items.push((item.change.id().to_owned(), item));
434+
}
435+
Some(Relation::ChildOfParent(id)) if id == dst_parent_id => {
436+
dst_items.push((item.change.id().to_owned(), item));
437+
}
438+
_ => continue,
439+
};
440+
}
441+
442+
for ((src_id, src_item), (dst_id, dst_item)) in src_items.into_iter().zip(dst_items) {
443+
// Since the parent items are already identical by ID, we know that the children will also match, we just
444+
// double-check to still have a chance to be correct in case some of that goes wrong.
445+
if src_id == dst_id
446+
&& filename(src_item.location(&self.path_backing)) == filename(dst_item.location(&self.path_backing))
447+
{
448+
let entry_mode = src_item.change.entry_mode();
449+
let location = src_item.location(&self.path_backing);
450+
let src = visit::Source {
451+
entry_mode,
452+
id: src_id,
453+
kind,
454+
location,
455+
change: &src_item.change,
456+
diff: None,
457+
};
458+
let location = dst_item.location(&self.path_backing);
459+
let change = dst_item.change.clone();
460+
let dst = visit::Destination { change, location };
461+
let res = cb(dst, Some(src));
462+
463+
src_item.emitted = true;
464+
dst_item.emitted = true;
465+
466+
if res == Action::Cancel {
467+
return Ok(res);
468+
}
469+
} else {
470+
gix_trace::warn!("Children of parents with change-id {src_parent_id} and {dst_parent_id} were not equal, even though their parents claimed to be");
471+
break;
387472
}
388473
}
389-
Ok(crate::tree::visit::Action::Continue)
474+
Ok(Action::Continue)
390475
}
391476
}
392477

478+
fn filename(path: &BStr) -> &BStr {
479+
path.rfind_byte(b'/').map_or(path, |idx| path[idx + 1..].as_bstr())
480+
}
481+
393482
/// Returns the amount of viable sources and destinations for `items` as eligible for the given `kind` of operation.
394483
fn estimate_involved_items(
395484
items: impl IntoIterator<Item = (bool, ChangeKind)>,
@@ -473,13 +562,9 @@ fn find_match<'a, T: Change>(
473562
if let Some(src) = res {
474563
return Ok(Some(src));
475564
}
476-
} else {
565+
} else if item_mode.is_blob() {
477566
let mut has_new = false;
478567
let percentage = percentage.expect("it's set to something below 1.0 and we assured this");
479-
debug_assert!(
480-
item_mode.is_blob(),
481-
"symlinks are matched exactly, and trees aren't used here"
482-
);
483568

484569
for (can_idx, src) in items
485570
.iter()

gix-diff/src/tree/visit.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ mod change_impls {
110110
use gix_hash::oid;
111111
use gix_object::tree::EntryMode;
112112

113+
use crate::tree::visit::Relation;
113114
use crate::{rewrites::tracker::ChangeKind, tree::visit::Change};
114115

115116
impl crate::rewrites::tracker::Change for crate::tree::visit::Change {
@@ -119,6 +120,13 @@ mod change_impls {
119120
}
120121
}
121122

123+
fn relation(&self) -> Option<Relation> {
124+
match self {
125+
Change::Addition { relation, .. } | Change::Deletion { relation, .. } => *relation,
126+
Change::Modification { .. } => None,
127+
}
128+
}
129+
122130
fn kind(&self) -> ChangeKind {
123131
match self {
124132
Change::Addition { .. } => ChangeKind::Addition,

0 commit comments

Comments
 (0)