Skip to content

Commit d803ff5

Browse files
committed
feat: provide a way to record and apply index changes.
These changes will then be applicable to an index that is created from the written tree editor.
1 parent c6f1409 commit d803ff5

File tree

10 files changed

+725
-50
lines changed

10 files changed

+725
-50
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-status.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@ Check out the [performance discussion][gix-diff-performance] as well.
318318
* [x] find blobs by similarity check
319319
* [ ] heuristics to find best candidate
320320
* [ ] find by basename to support similarity check
321+
- Not having it can lead to issues when files with the same or similar content are part of a move
322+
as files can be lost that way.
321323
* [x] directory tracking
322324
- [x] by identity
323325
- [ ] by similarity
@@ -349,8 +351,7 @@ Check out the [performance discussion][gix-diff-performance] as well.
349351
- [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change).
350352
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
351353
* [x] **tree**-diff-heuristics match Git for its test-cases
352-
- [ ] a way to generate an index with stages
353-
- *currently the data it provides won't generate index entries, and possibly can't be used for it yet*
354+
- [x] a way to generate an index with stages, mostly conforming with Git.
354355
- [ ] submodule merges (*right now they count as conflicts if they differ*)
355356
* [x] **commits** - with handling of multiple merge bases by recursive merge-base merge
356357
* [x] API documentation

gix-merge/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ gix-quote = { version = "^0.4.13", path = "../gix-quote" }
3232
gix-revision = { version = "^0.30.0", path = "../gix-revision", default-features = false, features = ["merge_base"] }
3333
gix-revwalk = { version = "^0.16.0", path = "../gix-revwalk" }
3434
gix-diff = { version = "^0.47.0", path = "../gix-diff", default-features = false, features = ["blob"] }
35+
gix-index = { version = "^0.36.0", path = "../gix-index" }
3536

3637
thiserror = "2.0.0"
3738
imara-diff = { version = "0.1.7" }

gix-merge/src/tree/function.rs

Lines changed: 179 additions & 33 deletions
Large diffs are not rendered by default.

gix-merge/src/tree/mod.rs

Lines changed: 187 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,17 @@ impl Outcome<'_> {
8181
pub fn has_unresolved_conflicts(&self, how: TreatAsUnresolved) -> bool {
8282
self.conflicts.iter().any(|c| c.is_unresolved(how))
8383
}
84+
85+
/// Returns `true` if `index` changed as we applied conflicting stages to it, using `how` to determine if a
86+
/// conflict should be considered unresolved.
87+
/// It's important that `index` is at the state of [`Self::tree`].
88+
///
89+
/// Note that in practice, whenever there is a single [conflict](Conflict), this function will return `true`.
90+
/// Also, the unconflicted stage of such entries will be removed merely by setting a flag, so the
91+
/// in-memory entry is still present.
92+
pub fn index_changed_after_applying_conflicts(&self, index: &mut gix_index::State, how: TreatAsUnresolved) -> bool {
93+
apply_index_entries(&self.conflicts, how, index)
94+
}
8495
}
8596

8697
/// A description of a conflict (i.e. merge issue without an auto-resolution) as seen during a [tree-merge](crate::tree()).
@@ -99,11 +110,45 @@ pub struct Conflict {
99110
pub ours: Change,
100111
/// The change representing *their* side.
101112
pub theirs: Change,
113+
/// An array to store an entry for each stage of the conflict.
114+
///
115+
/// * `entries[0]` => Base
116+
/// * `entries[1]` => Ours
117+
/// * `entries[2]` => Theirs
118+
///
119+
/// Note that ours and theirs might be swapped, so one should access it through [`Self::entries()`] to compensate for that.
120+
pub entries: [Option<ConflictIndexEntry>; 3],
102121
/// Determine how to interpret the `ours` and `theirs` fields. This is used to implement [`Self::changes_in_resolution()`]
103122
/// and [`Self::into_parts_by_resolution()`].
104123
map: ConflictMapping,
105124
}
106125

126+
/// A conflicting entry for insertion into the index.
127+
/// It will always be either on stage 1 (ancestor/base), 2 (ours) or 3 (theirs)
128+
#[derive(Debug, Clone, Copy)]
129+
pub struct ConflictIndexEntry {
130+
/// The kind of object at this stage.
131+
/// Note that it's possible that this is a directory, for instance if a directory was replaced with a file.
132+
pub mode: gix_object::tree::EntryMode,
133+
/// The id defining the state of the object.
134+
pub id: gix_hash::ObjectId,
135+
/// Hidden, maybe one day we can do without?
136+
path_hint: Option<ConflictIndexEntryPathHint>,
137+
}
138+
139+
/// A hint for [`apply_index_entries()`] to know which paths to use for an entry.
140+
/// This is only used when necessary.
141+
#[derive(Debug, Clone, Copy)]
142+
enum ConflictIndexEntryPathHint {
143+
/// Use the previous path, i.e. rename source.
144+
Source,
145+
/// Use the current path as it is in the tree.
146+
Current,
147+
/// Use the path of the final destination, or *their* name.
148+
/// It's definitely finicky, as we don't store the actual path and instead refer to it.
149+
RenamedOrTheirs,
150+
}
151+
107152
/// A utility to help define which side is what in the [`Conflict`] type.
108153
#[derive(Debug, Clone, Copy)]
109154
enum ConflictMapping {
@@ -147,7 +192,11 @@ impl Conflict {
147192
TreatAsUnresolved::Renames | TreatAsUnresolved::RenamesAndAutoResolvedContent => match &self.resolution {
148193
Ok(success) => match success {
149194
Resolution::SourceLocationAffectedByRename { .. } => false,
150-
Resolution::OursModifiedTheirsRenamedAndChangedThenRename { .. } => true,
195+
Resolution::OursModifiedTheirsRenamedAndChangedThenRename {
196+
merged_blob,
197+
final_location,
198+
..
199+
} => final_location.is_some() || merged_blob.as_ref().map_or(false, content_merge_matches),
151200
Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { merged_blob } => {
152201
content_merge_matches(merged_blob)
153202
}
@@ -178,6 +227,14 @@ impl Conflict {
178227
}
179228
}
180229

230+
/// Return the index entries for insertion into the index, to match with what's returned by [`Self::changes_in_resolution()`].
231+
pub fn entries(&self) -> [Option<ConflictIndexEntry>; 3] {
232+
match self.map {
233+
ConflictMapping::Original => self.entries,
234+
ConflictMapping::Swapped => [self.entries[0], self.entries[2], self.entries[1]],
235+
}
236+
}
237+
181238
/// Return information about the content merge if it was performed.
182239
pub fn content_merge(&self) -> Option<ContentMerge> {
183240
match &self.resolution {
@@ -308,3 +365,132 @@ pub struct Options {
308365

309366
pub(super) mod function;
310367
mod utils;
368+
pub mod apply_index_entries {
369+
370+
pub(super) mod function {
371+
use crate::tree::{Conflict, ConflictIndexEntryPathHint, Resolution, ResolutionFailure, TreatAsUnresolved};
372+
use bstr::{BStr, ByteSlice};
373+
use std::collections::{hash_map, HashMap};
374+
375+
/// Returns `true` if `index` changed as we applied conflicting stages to it, using `how` to determine if a
376+
/// conflict should be considered unresolved.
377+
/// Once a stage of a path conflicts, the unconflicting stage is removed even though it might be the one
378+
/// that is currently checked out.
379+
/// This removal, however, is only done by flagging it with [gix_index::entry::Flags::REMOVE], which means
380+
/// these entries won't be written back to disk but will still be present in the index.
381+
/// It's important that `index` matches the tree that was produced as part of the merge that also
382+
/// brought about `conflicts`, or else this function will fail if it cannot find the path matching
383+
/// the conflicting entries.
384+
///
385+
/// Note that in practice, whenever there is a single [conflict](Conflict), this function will return `true`.
386+
/// Errors can only occour if `index` isn't the one created from the merged tree that produced the `conflicts`.
387+
pub fn apply_index_entries(
388+
conflicts: &[Conflict],
389+
how: TreatAsUnresolved,
390+
index: &mut gix_index::State,
391+
) -> bool {
392+
let len = index.entries().len();
393+
let mut idx_by_path_stage = HashMap::<(gix_index::entry::Stage, &BStr), usize>::default();
394+
for conflict in conflicts.iter().filter(|c| c.is_unresolved(how)) {
395+
let (renamed_path, current_path): (Option<&BStr>, &BStr) = match &conflict.resolution {
396+
Ok(success) => match success {
397+
Resolution::SourceLocationAffectedByRename { final_location } => {
398+
(Some(final_location.as_bstr()), final_location.as_bstr())
399+
}
400+
Resolution::OursModifiedTheirsRenamedAndChangedThenRename { final_location, .. } => (
401+
final_location.as_ref().map(|p| p.as_bstr()),
402+
conflict.changes_in_resolution().1.location(),
403+
),
404+
Resolution::OursModifiedTheirsModifiedThenBlobContentMerge { .. } => {
405+
(None, conflict.ours.location())
406+
}
407+
},
408+
Err(failure) => match failure {
409+
ResolutionFailure::OursRenamedTheirsRenamedDifferently { .. } => {
410+
(Some(conflict.theirs.location()), conflict.ours.location())
411+
}
412+
ResolutionFailure::OursModifiedTheirsRenamedTypeMismatch
413+
| ResolutionFailure::OursDeletedTheirsRenamed
414+
| ResolutionFailure::OursModifiedTheirsDeleted
415+
| ResolutionFailure::Unknown => (None, conflict.ours.location()),
416+
ResolutionFailure::OursModifiedTheirsDirectoryThenOursRenamed {
417+
renamed_unique_path_to_modified_blob,
418+
} => (
419+
Some(renamed_unique_path_to_modified_blob.as_bstr()),
420+
conflict.ours.location(),
421+
),
422+
ResolutionFailure::OursAddedTheirsAddedTypeMismatch { their_unique_location } => {
423+
(Some(their_unique_location.as_bstr()), conflict.ours.location())
424+
}
425+
},
426+
};
427+
let source_path = conflict.ours.source_location();
428+
429+
let entries_with_stage = conflict.entries().into_iter().enumerate().filter_map(|(idx, entry)| {
430+
entry.filter(|e| e.mode.is_no_tree()).map(|e| {
431+
(
432+
match idx {
433+
0 => gix_index::entry::Stage::Base,
434+
1 => gix_index::entry::Stage::Ours,
435+
2 => gix_index::entry::Stage::Theirs,
436+
_ => unreachable!("fixed size array with three items"),
437+
},
438+
match e.path_hint {
439+
None => renamed_path.unwrap_or(current_path),
440+
Some(ConflictIndexEntryPathHint::Source) => source_path,
441+
Some(ConflictIndexEntryPathHint::Current) => current_path,
442+
Some(ConflictIndexEntryPathHint::RenamedOrTheirs) => {
443+
renamed_path.unwrap_or_else(|| conflict.changes_in_resolution().1.location())
444+
}
445+
},
446+
e,
447+
)
448+
})
449+
});
450+
451+
if !entries_with_stage.clone().any(|(_, path, _)| {
452+
index
453+
.entry_index_by_path_and_stage_bounded(path, gix_index::entry::Stage::Unconflicted, len)
454+
.is_some()
455+
}) {
456+
continue;
457+
}
458+
459+
for (stage, path, entry) in entries_with_stage {
460+
if let Some(pos) =
461+
index.entry_index_by_path_and_stage_bounded(path, gix_index::entry::Stage::Unconflicted, len)
462+
{
463+
index.entries_mut()[pos].flags.insert(gix_index::entry::Flags::REMOVE);
464+
};
465+
match idx_by_path_stage.entry((stage, path)) {
466+
hash_map::Entry::Occupied(map_entry) => {
467+
// This can happen due to the way the algorithm works.
468+
// The same happens in Git, but it stores the index-related data as part of its deduplicating tree.
469+
// We store each conflict we encounter, which also may duplicate their index entries, sometimes, but
470+
// with different values. The most recent value wins.
471+
// Instead of trying to deduplicate the index entries when the merge runs, we put the cost
472+
// to the tree-assembly - there is no way around it.
473+
let index_entry = &mut index.entries_mut()[*map_entry.get()];
474+
index_entry.mode = entry.mode.into();
475+
index_entry.id = entry.id;
476+
}
477+
hash_map::Entry::Vacant(map_entry) => {
478+
map_entry.insert(index.entries().len());
479+
index.dangerously_push_entry(
480+
Default::default(),
481+
entry.id,
482+
stage.into(),
483+
entry.mode.into(),
484+
path,
485+
);
486+
}
487+
};
488+
}
489+
}
490+
491+
index.sort_entries();
492+
index.entries().len() != len
493+
}
494+
}
495+
}
496+
pub use apply_index_entries::function::apply_index_entries;

gix-merge/src/tree/utils.rs

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
//! contribute to finding a fix faster.
88
use crate::blob::builtin_driver::binary::Pick;
99
use crate::blob::ResourceKind;
10-
use crate::tree::{Conflict, ConflictMapping, Error, Options, Resolution, ResolutionFailure};
10+
use crate::tree::{
11+
Conflict, ConflictIndexEntry, ConflictIndexEntryPathHint, ConflictMapping, Error, Options, Resolution,
12+
ResolutionFailure,
13+
};
1114
use bstr::ByteSlice;
1215
use bstr::{BStr, BString, ByteVec};
1316
use gix_diff::tree_with_rewrites::{Change, ChangeRef};
@@ -98,6 +101,14 @@ pub fn perform_blob_merge<E>(
98101
where
99102
E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
100103
{
104+
if our_id == their_id {
105+
// This can happen if the merge modes are different.
106+
debug_assert_ne!(
107+
our_mode, their_mode,
108+
"BUG: we must think anything has to be merged if the modes and the ids are the same"
109+
);
110+
return Ok((their_id, crate::blob::Resolution::Complete));
111+
}
101112
if matches!(our_mode.kind(), EntryKind::Link) && matches!(their_mode.kind(), EntryKind::Link) {
102113
let (pick, resolution) = crate::blob::builtin_driver::binary(options.symlink_conflicts);
103114
let (our_id, their_id) = match outer_side {
@@ -544,29 +555,57 @@ impl Conflict {
544555
pub(super) fn without_resolution(
545556
resolution: ResolutionFailure,
546557
changes: (&Change, &Change, ConflictMapping, ConflictMapping),
558+
entries: [Option<ConflictIndexEntry>; 3],
547559
) -> Self {
548-
Conflict::maybe_resolved(Err(resolution), changes)
560+
Conflict::maybe_resolved(Err(resolution), changes, entries)
549561
}
550562

551563
pub(super) fn with_resolution(
552564
resolution: Resolution,
553565
changes: (&Change, &Change, ConflictMapping, ConflictMapping),
566+
entries: [Option<ConflictIndexEntry>; 3],
554567
) -> Self {
555-
Conflict::maybe_resolved(Ok(resolution), changes)
568+
Conflict::maybe_resolved(Ok(resolution), changes, entries)
556569
}
557570

558-
pub(super) fn maybe_resolved(
571+
fn maybe_resolved(
559572
resolution: Result<Resolution, ResolutionFailure>,
560573
(ours, theirs, map, outer_map): (&Change, &Change, ConflictMapping, ConflictMapping),
574+
entries: [Option<ConflictIndexEntry>; 3],
561575
) -> Self {
562576
Conflict {
563577
resolution,
564578
ours: ours.clone(),
565579
theirs: theirs.clone(),
580+
entries,
566581
map: match outer_map {
567582
ConflictMapping::Original => map,
568583
ConflictMapping::Swapped => map.swapped(),
569584
},
570585
}
571586
}
587+
588+
pub(super) fn unknown(changes: (&Change, &Change, ConflictMapping, ConflictMapping)) -> Self {
589+
let (source_mode, source_id) = changes.0.source_entry_mode_and_id();
590+
let (our_mode, our_id) = changes.0.entry_mode_and_id();
591+
let (their_mode, their_id) = changes.1.entry_mode_and_id();
592+
let entries = [
593+
Some(ConflictIndexEntry {
594+
mode: source_mode,
595+
id: source_id.into(),
596+
path_hint: Some(ConflictIndexEntryPathHint::Source),
597+
}),
598+
Some(ConflictIndexEntry {
599+
mode: our_mode,
600+
id: our_id.into(),
601+
path_hint: Some(ConflictIndexEntryPathHint::Current),
602+
}),
603+
Some(ConflictIndexEntry {
604+
mode: their_mode,
605+
id: their_id.into(),
606+
path_hint: Some(ConflictIndexEntryPathHint::RenamedOrTheirs),
607+
}),
608+
];
609+
Conflict::maybe_resolved(Err(ResolutionFailure::Unknown), changes, entries)
610+
}
572611
}
Binary file not shown.

0 commit comments

Comments
 (0)