4
4
//!
5
5
//! - it's less sophisticated and doesn't use any ranking of candidates. Instead, it picks the first possible match.
6
6
//! - the set used for copy-detection is probably smaller by default.
7
+
7
8
use std:: ops:: Range ;
8
9
9
- use bstr:: BStr ;
10
+ use bstr:: { BStr , ByteSlice } ;
10
11
use gix_object:: tree:: { EntryKind , EntryMode } ;
11
12
13
+ use crate :: tree:: visit:: { Action , ChangeId , Relation } ;
12
14
use crate :: {
13
15
blob:: { platform:: prepare_diff:: Operation , DiffLineStats , ResourceKind } ,
14
16
rewrites:: { CopySource , Outcome , Tracker } ,
@@ -28,11 +30,22 @@ pub enum ChangeKind {
28
30
29
31
/// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`].
30
32
pub trait Change : Clone {
31
- /// Return the hash of this change for identification.
33
+ /// Return the hash of the object behind this change for identification.
32
34
///
33
35
/// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace
34
- /// conversions.
36
+ /// conversions. What matters is that the IDs are comparable.
35
37
fn id ( & self ) -> & gix_hash:: oid ;
38
+ /// Return the relation that this change may have with other changes.
39
+ ///
40
+ /// It allows to associate a directory with its children that are added or removed at the same moment.
41
+ /// Note that this is ignored for modifications.
42
+ ///
43
+ /// If rename-tracking should always be on leaf-level, this should be set to `None` consistently.
44
+ /// Note that trees will never be looked up by their `id` as their children are assumed to be passed in
45
+ /// with the respective relationship.
46
+ ///
47
+ /// Also note that the tracker only sees what's given to it, it will not lookup trees or match paths itself.
48
+ fn relation ( & self ) -> Option < Relation > ;
36
49
/// Return the kind of this change.
37
50
fn kind ( & self ) -> ChangeKind ;
38
51
/// Return more information about the kind of entry affected by this change.
@@ -55,11 +68,11 @@ impl<T: Change> Item<T> {
55
68
fn location < ' a > ( & self , backing : & ' a [ u8 ] ) -> & ' a BStr {
56
69
backing[ self . path . clone ( ) ] . as_ref ( )
57
70
}
58
- fn entry_mode_compatible ( & self , mode : EntryMode ) -> bool {
71
+ fn entry_mode_compatible ( & self , other : EntryMode ) -> bool {
59
72
use EntryKind :: * ;
60
73
matches ! (
61
- ( mode . kind( ) , self . change. entry_mode( ) . kind( ) ) ,
62
- ( Blob | BlobExecutable , Blob | BlobExecutable ) | ( Link , Link )
74
+ ( other . kind( ) , self . change. entry_mode( ) . kind( ) ) ,
75
+ ( Blob | BlobExecutable , Blob | BlobExecutable ) | ( Link , Link ) | ( Tree , Tree )
63
76
)
64
77
}
65
78
@@ -108,7 +121,7 @@ pub mod visit {
108
121
}
109
122
110
123
/// A change along with a location.
111
- #[ derive( Clone ) ]
124
+ #[ derive( Debug , Clone ) ]
112
125
pub struct Destination < ' a , T : Clone > {
113
126
/// The change at the given `location`.
114
127
pub change : T ,
@@ -150,23 +163,25 @@ impl<T: Change> Tracker<T> {
150
163
impl < T : Change > Tracker < T > {
151
164
/// We may refuse the push if that information isn't needed for what we have to track.
152
165
pub fn try_push_change ( & mut self , change : T , location : & BStr ) -> Option < T > {
153
- if !change. entry_mode ( ) . is_blob_or_symlink ( ) {
166
+ let change_kind = change. kind ( ) ;
167
+ if let ( None , ChangeKind :: Modification { .. } ) = ( self . rewrites . copies , change_kind) {
154
168
return Some ( change) ;
155
- }
156
- let keep = match ( self . rewrites . copies , change. kind ( ) ) {
157
- ( Some ( _find_copies) , _) => true ,
158
- ( None , ChangeKind :: Modification { .. } ) => false ,
159
- ( None , _) => true ,
160
169
} ;
161
170
162
- if !keep {
171
+ let relation = change
172
+ . relation ( )
173
+ . filter ( |_| matches ! ( change_kind, ChangeKind :: Addition | ChangeKind :: Deletion ) ) ;
174
+ let entry_kind = change. entry_mode ( ) . kind ( ) ;
175
+ if let ( None | Some ( Relation :: ChildOfParent ( _) ) , EntryKind :: Commit | EntryKind :: Tree ) = ( relation, entry_kind) {
163
176
return Some ( change) ;
164
- }
177
+ } ;
165
178
166
179
let start = self . path_backing . len ( ) ;
167
180
self . path_backing . extend_from_slice ( location) ;
181
+ let path = start..self . path_backing . len ( ) ;
182
+
168
183
self . items . push ( Item {
169
- path : start.. self . path_backing . len ( ) ,
184
+ path,
170
185
change,
171
186
emitted : false ,
172
187
} ) ;
@@ -178,6 +193,8 @@ impl<T: Change> Tracker<T> {
178
193
/// `cb(destination, source)` is called for each item, either with `Some(source)` if it's
179
194
/// the destination of a copy or rename, or with `None` for source if no relation to other
180
195
/// items in the tracked set exist, which is like saying 'no rename or rewrite or copy' happened.
196
+ /// Note that directories with [relation](Relation) will be emitted if there is a match, along with all their matching
197
+ /// child-items which are similarly bundled as rename.
181
198
///
182
199
/// `objects` is used to access blob data for similarity checks if required and is taken directly from the object database.
183
200
/// Worktree filters and text conversions will be applied afterwards automatically. Note that object-caching *should not*
@@ -195,7 +212,7 @@ impl<T: Change> Tracker<T> {
195
212
/// will panic if `change` is not a modification, and it's valid to not call `push` at all.
196
213
pub fn emit < PushSourceTreeFn , E > (
197
214
& mut self ,
198
- mut cb : impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
215
+ mut cb : impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
199
216
diff_cache : & mut crate :: blob:: Platform ,
200
217
objects : & impl gix_object:: FindObjectOrHeader ,
201
218
mut push_source_tree : PushSourceTreeFn ,
@@ -272,7 +289,7 @@ impl<T: Change> Tracker<T> {
272
289
change : item. change ,
273
290
} ,
274
291
None ,
275
- ) == crate :: tree :: visit :: Action :: Cancel
292
+ ) == Action :: Cancel
276
293
{
277
294
break ;
278
295
}
@@ -285,17 +302,15 @@ impl<T: Change> Tracker<T> {
285
302
fn match_pairs_of_kind (
286
303
& mut self ,
287
304
kind : visit:: SourceKind ,
288
- cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
305
+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
289
306
percentage : Option < f32 > ,
290
307
out : & mut Outcome ,
291
308
diff_cache : & mut crate :: blob:: Platform ,
292
309
objects : & impl gix_object:: FindObjectOrHeader ,
293
310
) -> Result < ( ) , emit:: Error > {
294
311
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
295
312
let needs_second_pass = !needs_exact_match ( percentage) ;
296
- if self . match_pairs ( cb, None /* by identity */ , kind, out, diff_cache, objects) ?
297
- == crate :: tree:: visit:: Action :: Cancel
298
- {
313
+ if self . match_pairs ( cb, None /* by identity */ , kind, out, diff_cache, objects) ? == Action :: Cancel {
299
314
return Ok ( ( ) ) ;
300
315
}
301
316
if needs_second_pass {
@@ -328,13 +343,13 @@ impl<T: Change> Tracker<T> {
328
343
329
344
fn match_pairs (
330
345
& mut self ,
331
- cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> crate :: tree :: visit :: Action ,
346
+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
332
347
percentage : Option < f32 > ,
333
348
kind : visit:: SourceKind ,
334
349
stats : & mut Outcome ,
335
350
diff_cache : & mut crate :: blob:: Platform ,
336
351
objects : & impl gix_object:: FindObjectOrHeader ,
337
- ) -> Result < crate :: tree :: visit :: Action , emit:: Error > {
352
+ ) -> Result < Action , emit:: Error > {
338
353
let mut dest_ofs = 0 ;
339
354
while let Some ( ( mut dest_idx, dest) ) = self . items [ dest_ofs..] . iter ( ) . enumerate ( ) . find_map ( |( idx, item) | {
340
355
( !item. emitted && matches ! ( item. change. kind( ) , ChangeKind :: Addition ) ) . then_some ( ( idx, item) )
@@ -368,28 +383,102 @@ impl<T: Change> Tracker<T> {
368
383
src_idx,
369
384
)
370
385
} ) ;
371
- if src . is_none ( ) {
386
+ let Some ( ( src , src_idx ) ) = src else {
372
387
continue ;
373
- }
388
+ } ;
374
389
let location = dest. location ( & self . path_backing ) ;
375
390
let change = dest. change . clone ( ) ;
376
391
let dest = visit:: Destination { change, location } ;
377
- let src_idx = src. as_ref ( ) . map ( |t| t. 1 ) ;
378
- let res = cb ( dest, src. map ( |t| t. 0 ) ) ;
392
+ let relations = if percentage. is_none ( ) {
393
+ src. change . relation ( ) . zip ( dest. change . relation ( ) )
394
+ } else {
395
+ None
396
+ } ;
397
+ let res = cb ( dest, Some ( src) ) ;
379
398
380
399
self . items [ dest_idx] . emitted = true ;
381
- if let Some ( src_idx) = src_idx {
382
- self . items [ src_idx] . emitted = true ;
400
+ self . items [ src_idx] . emitted = true ;
401
+
402
+ if res == Action :: Cancel {
403
+ return Ok ( Action :: Cancel ) ;
404
+ }
405
+
406
+ if let Some ( ( Relation :: Parent ( src) , Relation :: Parent ( dst) ) ) = relations {
407
+ let res = self . emit_child_renames_matching_identity ( cb, kind, src, dst) ?;
408
+ if res == Action :: Cancel {
409
+ return Ok ( Action :: Cancel ) ;
410
+ }
383
411
}
412
+ }
413
+ Ok ( Action :: Continue )
414
+ }
384
415
385
- if res == crate :: tree:: visit:: Action :: Cancel {
386
- return Ok ( crate :: tree:: visit:: Action :: Cancel ) ;
416
+ /// Emit the children of `src_parent_id` and `dst_parent_id` as pairs of exact matches, which are assumed
417
+ /// as `src` and `dst` were an exact match (so all children have to match exactly).
418
+ fn emit_child_renames_matching_identity (
419
+ & mut self ,
420
+ cb : & mut impl FnMut ( visit:: Destination < ' _ , T > , Option < visit:: Source < ' _ , T > > ) -> Action ,
421
+ kind : visit:: SourceKind ,
422
+ src_parent_id : ChangeId ,
423
+ dst_parent_id : ChangeId ,
424
+ ) -> Result < Action , emit:: Error > {
425
+ debug_assert_ne ! (
426
+ src_parent_id, dst_parent_id,
427
+ "src and destination directories must be distinct"
428
+ ) ;
429
+ let ( mut src_items, mut dst_items) = ( Vec :: with_capacity ( 1 ) , Vec :: with_capacity ( 1 ) ) ;
430
+ for item in self . items . iter_mut ( ) . filter ( |item| !item. emitted ) {
431
+ match item. change . relation ( ) {
432
+ Some ( Relation :: ChildOfParent ( id) ) if id == src_parent_id => {
433
+ src_items. push ( ( item. change . id ( ) . to_owned ( ) , item) ) ;
434
+ }
435
+ Some ( Relation :: ChildOfParent ( id) ) if id == dst_parent_id => {
436
+ dst_items. push ( ( item. change . id ( ) . to_owned ( ) , item) ) ;
437
+ }
438
+ _ => continue ,
439
+ } ;
440
+ }
441
+
442
+ for ( ( src_id, src_item) , ( dst_id, dst_item) ) in src_items. into_iter ( ) . zip ( dst_items) {
443
+ // Since the parent items are already identical by ID, we know that the children will also match, we just
444
+ // double-check to still have a chance to be correct in case some of that goes wrong.
445
+ if src_id == dst_id
446
+ && filename ( src_item. location ( & self . path_backing ) ) == filename ( dst_item. location ( & self . path_backing ) )
447
+ {
448
+ let entry_mode = src_item. change . entry_mode ( ) ;
449
+ let location = src_item. location ( & self . path_backing ) ;
450
+ let src = visit:: Source {
451
+ entry_mode,
452
+ id : src_id,
453
+ kind,
454
+ location,
455
+ change : & src_item. change ,
456
+ diff : None ,
457
+ } ;
458
+ let location = dst_item. location ( & self . path_backing ) ;
459
+ let change = dst_item. change . clone ( ) ;
460
+ let dst = visit:: Destination { change, location } ;
461
+ let res = cb ( dst, Some ( src) ) ;
462
+
463
+ src_item. emitted = true ;
464
+ dst_item. emitted = true ;
465
+
466
+ if res == Action :: Cancel {
467
+ return Ok ( res) ;
468
+ }
469
+ } else {
470
+ gix_trace:: warn!( "Children of parents with change-id {src_parent_id} and {dst_parent_id} were not equal, even though their parents claimed to be" ) ;
471
+ break ;
387
472
}
388
473
}
389
- Ok ( crate :: tree :: visit :: Action :: Continue )
474
+ Ok ( Action :: Continue )
390
475
}
391
476
}
392
477
478
+ fn filename ( path : & BStr ) -> & BStr {
479
+ path. rfind_byte ( b'/' ) . map_or ( path, |idx| path[ idx + 1 ..] . as_bstr ( ) )
480
+ }
481
+
393
482
/// Returns the amount of viable sources and destinations for `items` as eligible for the given `kind` of operation.
394
483
fn estimate_involved_items (
395
484
items : impl IntoIterator < Item = ( bool , ChangeKind ) > ,
@@ -473,13 +562,9 @@ fn find_match<'a, T: Change>(
473
562
if let Some ( src) = res {
474
563
return Ok ( Some ( src) ) ;
475
564
}
476
- } else {
565
+ } else if item_mode . is_blob ( ) {
477
566
let mut has_new = false ;
478
567
let percentage = percentage. expect ( "it's set to something below 1.0 and we assured this" ) ;
479
- debug_assert ! (
480
- item_mode. is_blob( ) ,
481
- "symlinks are matched exactly, and trees aren't used here"
482
- ) ;
483
568
484
569
for ( can_idx, src) in items
485
570
. iter ( )
0 commit comments