Skip to content

Commit 83e1b73

Browse files
authored
Merge pull request #1983 from cruessler/make-process-changes-work-with-overlapping-ranges
Correctly process overlapping unblamed hunks
2 parents 189d1a0 + b2121bc commit 83e1b73

File tree

7 files changed

+89
-67
lines changed

7 files changed

+89
-67
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-blame/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ gix-fs = { path = "../gix-fs" }
3131
gix-index = { path = "../gix-index" }
3232
gix-odb = { path = "../gix-odb" }
3333
gix-testtools = { path = "../tests/tools" }
34+
pretty_assertions = "1.4.0"

gix-blame/src/file/function.rs

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ use crate::{BlameEntry, Error, Options, Outcome, Statistics};
3737
///
3838
/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that
3939
/// represent the worktree state.
40-
/// We begin with a single *Unblamed Hunk* and a single suspect, usually the `HEAD` commit as the commit containing the
40+
///
41+
/// We begin with one or more *Unblamed Hunks* and a single suspect, usually the `HEAD` commit as the commit containing the
4142
/// *Blamed File*, so that it contains the entire file, with the first commit being a candidate for the entire *Blamed File*.
4243
/// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`.
4344
/// If so, we have found a *Source File* and a *Suspect* commit, and have hunks that represent these changes.
@@ -197,15 +198,16 @@ pub fn file(
197198
if let Some(range_in_suspect) = hunk.get_range(&suspect) {
198199
let range_in_blamed_file = hunk.range_in_blamed_file.clone();
199200

200-
for (blamed_line_number, source_line_number) in range_in_blamed_file.zip(range_in_suspect.clone()) {
201-
let source_token = source_lines_as_tokens[source_line_number as usize];
202-
let blame_token = blamed_lines_as_tokens[blamed_line_number as usize];
203-
204-
let source_line = BString::new(source_interner[source_token].into());
205-
let blamed_line = BString::new(blamed_interner[blame_token].into());
201+
let source_lines = range_in_suspect
202+
.clone()
203+
.map(|i| BString::new(source_interner[source_lines_as_tokens[i as usize]].into()))
204+
.collect::<Vec<_>>();
205+
let blamed_lines = range_in_blamed_file
206+
.clone()
207+
.map(|i| BString::new(blamed_interner[blamed_lines_as_tokens[i as usize]].into()))
208+
.collect::<Vec<_>>();
206209

207-
assert_eq!(source_line, blamed_line);
208-
}
210+
assert_eq!(source_lines, blamed_lines);
209211
}
210212
}
211213
}
@@ -302,33 +304,6 @@ pub fn file(
302304
unblamed_hunk.remove_blame(suspect);
303305
true
304306
});
305-
306-
// This block asserts that line ranges for each suspect never overlap. If they did overlap
307-
// this would mean that the same line in a *Source File* would map to more than one line in
308-
// the *Blamed File* and this is not possible.
309-
#[cfg(debug_assertions)]
310-
{
311-
let ranges = hunks_to_blame.iter().fold(
312-
std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
313-
|mut acc, hunk| {
314-
for (suspect, range) in hunk.suspects.clone() {
315-
acc.entry(suspect).or_default().push(range);
316-
}
317-
318-
acc
319-
},
320-
);
321-
322-
for (_, mut ranges) in ranges {
323-
ranges.sort_by(|a, b| a.start.cmp(&b.start));
324-
325-
for window in ranges.windows(2) {
326-
if let [a, b] = window {
327-
assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
328-
}
329-
}
330-
}
331-
}
332307
}
333308

334309
debug_assert_eq!(

gix-blame/src/file/mod.rs

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ use crate::types::{BlameEntry, Change, Either, LineRange, Offset, UnblamedHunk};
88

99
pub(super) mod function;
1010

11-
/// Compare a section from the *Blamed File* (`hunk`) with a change from a diff and see if there
12-
/// is an intersection with `change`. Based on that intersection, we may generate a [`BlameEntry`] for `out`
13-
/// and/or split the `hunk` into multiple.
11+
/// Compare a section from a potential *Source File* (`hunk`) with a change from a diff and see if
12+
/// there is an intersection with `change`. Based on that intersection, we may generate a
13+
/// [`BlameEntry`] for `out` and/or split the `hunk` into multiple.
1414
///
15-
/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*.
15+
/// This is the core of the blame implementation as it matches regions in *Blamed File* to
16+
/// corresponding regions in one or more than one *Source File*.
1617
fn process_change(
1718
new_hunks_to_blame: &mut Vec<UnblamedHunk>,
1819
offset: &mut Offset,
@@ -320,36 +321,41 @@ fn process_change(
320321

321322
/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other.
322323
/// Once a match is found, it's pushed onto `out`.
324+
///
325+
/// `process_changes` assumes that ranges coming from the same *Source File* can and do
326+
/// occasionally overlap. If it were a desirable property of the blame algorithm as a whole to
327+
/// never have two different lines from a *Blamed File* mapped to the same line in a *Source File*,
328+
/// this property would need to be enforced at a higher level than `process_changes`.
329+
/// Then the nested loops could potentially be flattened into one.
323330
fn process_changes(
324331
hunks_to_blame: Vec<UnblamedHunk>,
325332
changes: Vec<Change>,
326333
suspect: ObjectId,
327334
parent: ObjectId,
328335
) -> Vec<UnblamedHunk> {
329-
let mut hunks_iter = hunks_to_blame.into_iter();
330-
let mut changes_iter = changes.into_iter();
336+
let mut new_hunks_to_blame = Vec::new();
331337

332-
let mut hunk = hunks_iter.next();
333-
let mut change = changes_iter.next();
338+
for mut hunk in hunks_to_blame.into_iter().map(Some) {
339+
let mut offset_in_destination = Offset::Added(0);
334340

335-
let mut new_hunks_to_blame = Vec::new();
336-
let mut offset_in_destination = Offset::Added(0);
337-
338-
loop {
339-
(hunk, change) = process_change(
340-
&mut new_hunks_to_blame,
341-
&mut offset_in_destination,
342-
suspect,
343-
parent,
344-
hunk,
345-
change,
346-
);
347-
348-
hunk = hunk.or_else(|| hunks_iter.next());
349-
change = change.or_else(|| changes_iter.next());
350-
351-
if hunk.is_none() && change.is_none() {
352-
break;
341+
let mut changes_iter = changes.iter().cloned();
342+
let mut change = changes_iter.next();
343+
344+
loop {
345+
(hunk, change) = process_change(
346+
&mut new_hunks_to_blame,
347+
&mut offset_in_destination,
348+
suspect,
349+
parent,
350+
hunk,
351+
change,
352+
);
353+
354+
change = change.or_else(|| changes_iter.next());
355+
356+
if hunk.is_none() {
357+
break;
358+
}
353359
}
354360
}
355361
new_hunks_to_blame

gix-blame/src/file/tests.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,8 @@ mod process_change {
959959
}
960960

961961
mod process_changes {
962+
use pretty_assertions::assert_eq;
963+
962964
use crate::file::{
963965
process_changes,
964966
tests::{new_unblamed_hunk, one_sha, zero_sha},
@@ -1337,4 +1339,38 @@ mod process_changes {
13371339
]
13381340
);
13391341
}
1342+
1343+
#[test]
1344+
fn subsequent_hunks_overlapping_end_of_addition() {
1345+
let suspect = zero_sha();
1346+
let parent = one_sha();
1347+
let hunks_to_blame = vec![
1348+
new_unblamed_hunk(13..16, suspect, Offset::Added(0)),
1349+
new_unblamed_hunk(10..17, suspect, Offset::Added(0)),
1350+
];
1351+
let changes = vec![Change::AddedOrReplaced(10..14, 0)];
1352+
let new_hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent);
1353+
1354+
assert_eq!(
1355+
new_hunks_to_blame,
1356+
[
1357+
UnblamedHunk {
1358+
range_in_blamed_file: 13..14,
1359+
suspects: [(suspect, 13..14)].into()
1360+
},
1361+
UnblamedHunk {
1362+
range_in_blamed_file: 14..16,
1363+
suspects: [(parent, 10..12)].into(),
1364+
},
1365+
UnblamedHunk {
1366+
range_in_blamed_file: 10..14,
1367+
suspects: [(suspect, 10..14)].into(),
1368+
},
1369+
UnblamedHunk {
1370+
range_in_blamed_file: 14..17,
1371+
suspects: [(parent, 10..13)].into(),
1372+
},
1373+
]
1374+
);
1375+
}
13401376
}

gix-blame/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
//!
33
//! ### Terminology
44
//!
5-
//! * **Source File**
5+
//! * **Blamed File**
66
//! - The file as it exists in `HEAD`.
77
//! - the initial state with all lines that we need to associate with a *Source File*.
8-
//! * **Blamed File**
9-
//! - A file at a version (i.e. commit) that introduces hunks into the final 'image'.
8+
//! * **Source File**
9+
//! - A file at a version (i.e., commit) that introduces hunks into the final 'image' of the *Blamed File*.
1010
//! * **Suspects**
1111
//! - The versions of the files that can contain hunks that we could use in the final 'image'
1212
//! - multiple at the same time as the commit-graph may split up.
13-
//! - turns into *Source File* once we have found an association into the *Blamed File*.
13+
//! - They turn into a *Source File* once we have found an association into the *Blamed File*.
1414
#![deny(rust_2018_idioms, missing_docs)]
1515
#![forbid(unsafe_code)]
1616

gix-blame/src/types.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,10 @@ pub(crate) enum Either<T, U> {
353353
}
354354

355355
/// A single change between two blobs, or an unchanged region.
356-
#[derive(Debug, PartialEq)]
356+
///
357+
/// Line numbers refer to the file that is referred to as `after` or `NewOrDestination`, depending
358+
/// on the context.
359+
#[derive(Clone, Debug, PartialEq)]
357360
pub enum Change {
358361
/// A range of tokens that wasn't changed.
359362
Unchanged(Range<u32>),

0 commit comments

Comments
 (0)