Skip to content

Commit 756edb6

Browse files
committed
Merge branch 'en/misc-fixes'
Several small miscellaneous fixes, mostly internal in nature. A couple corrections in the docs about the ref-map file and some fixes to stash rewriting special cases are included as well, though. Signed-off-by: Elijah Newren <[email protected]>
2 parents a9093a6 + ebe933f commit 756edb6

7 files changed

+963
-909
lines changed

Documentation/git-filter-repo.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -398,10 +398,10 @@ Reference map
398398
~~~~~~~~~~~~~
399399

400400
The `$GIT_DIR/filter-repo/ref-map` file contains a mapping of which local
401-
references were changed.
401+
references were (or were not) changed.
402402

403403
* A header is the first line with the text "old", "new" and "ref"
404-
* Reference mappings are in no particular order
404+
* Reference mappings are sorted by ref
405405
* An all-zeros hash, or null SHA, represents a non-existent object.
406406
When in the "new" column, this means the ref was removed entirely.
407407

git-filter-repo

+47-40
Original file line numberDiff line numberDiff line change
@@ -2947,6 +2947,9 @@ class RepoFilter(object):
29472947
# now-missing commit hash, since there was nothing to map it to.
29482948
self._commits_referenced_but_removed = set()
29492949

2950+
# Other vars related to metadata tracking
2951+
self._already_ran = False
2952+
29502953
# Progress handling (number of commits parsed, etc.)
29512954
self._progress_writer = ProgressWriter()
29522955
self._num_commits = 0
@@ -2967,31 +2970,30 @@ class RepoFilter(object):
29672970

29682971
# Compile some regexes and cache those
29692972
self._hash_re = re.compile(br'(\b[0-9a-f]{7,40}\b)')
2970-
self._full_hash_re = re.compile(br'(\b[0-9a-f]{40}\b)')
29712973

29722974
def _handle_arg_callbacks(self):
2973-
def make_callback(argname, str):
2975+
def make_callback(argname, bdy):
29742976
callback_globals = {g: globals()[g] for g in public_globals}
29752977
callback_locals = {}
29762978
exec('def callback({}, _do_not_use_this_var = None):\n'.format(argname)+
2977-
' '+'\n '.join(str.splitlines()), callback_globals, callback_locals)
2979+
' '+'\n '.join(bdy.splitlines()), callback_globals, callback_locals)
29782980
return callback_locals['callback']
2979-
def handle(type):
2980-
callback_field = '_{}_callback'.format(type)
2981-
code_string = getattr(self._args, type+'_callback')
2981+
def handle(which):
2982+
callback_field = '_{}_callback'.format(which)
2983+
code_string = getattr(self._args, which+'_callback')
29822984
if code_string:
29832985
if os.path.exists(code_string):
29842986
with open(code_string, 'r', encoding='utf-8') as f:
29852987
code_string = f.read()
29862988
if getattr(self, callback_field):
29872989
raise SystemExit(_("Error: Cannot pass a %s_callback to RepoFilter "
29882990
"AND pass --%s-callback"
2989-
% (type, type)))
2991+
% (which, which)))
29902992
if 'return ' not in code_string and \
2991-
type not in ('blob', 'commit', 'tag', 'reset'):
2993+
which not in ('blob', 'commit', 'tag', 'reset'):
29922994
raise SystemExit(_("Error: --%s-callback should have a return statement")
2993-
% type)
2994-
setattr(self, callback_field, make_callback(type, code_string))
2995+
% which)
2996+
setattr(self, callback_field, make_callback(which, code_string))
29952997
handle('filename')
29962998
handle('message')
29972999
handle('name')
@@ -3023,8 +3025,8 @@ class RepoFilter(object):
30233025
# Determine if this is second or later run of filter-repo
30243026
tmp_dir = self.results_tmp_dir(create_if_missing=False)
30253027
ran_path = os.path.join(tmp_dir, b'already_ran')
3026-
already_ran = os.path.isfile(ran_path)
3027-
if already_ran:
3028+
self._already_ran = os.path.isfile(ran_path)
3029+
if self._already_ran:
30283030
current_time = time.time()
30293031
file_mod_time = os.path.getmtime(ran_path)
30303032
file_age = current_time - file_mod_time
@@ -3036,17 +3038,17 @@ class RepoFilter(object):
30363038

30373039
if response.lower() != 'y':
30383040
os.remove(ran_path)
3039-
already_ran = False
3041+
self._already_ran = False
30403042

30413043
# Default for --replace-refs
30423044
if not self._args.replace_refs:
30433045
self._args.replace_refs = 'delete-no-add'
30443046
if self._args.replace_refs == 'old-default':
3045-
self._args.replace_refs = ('update-or-add' if already_ran
3047+
self._args.replace_refs = ('update-or-add' if self._already_ran
30463048
else 'update-and-add')
30473049

30483050
# Do sanity checks from the correct directory
3049-
if not self._args.force and not already_ran:
3051+
if not self._args.force and not self._already_ran:
30503052
cwd = os.getcwd()
30513053
os.chdir(target_working_dir)
30523054
RepoFilter.sanity_check(self._orig_refs, is_bare, self._config_settings)
@@ -3278,13 +3280,6 @@ class RepoFilter(object):
32783280
assert new_hash is not None
32793281
return new_hash[0:orig_len]
32803282

3281-
def _translate_full_commit_hash(self, matchobj):
3282-
old_hash = matchobj.group(1)
3283-
new_hash = self._get_rename(old_hash)
3284-
if new_hash is None:
3285-
return old_hash
3286-
return new_hash
3287-
32883283
def _maybe_trim_extra_parents(self, orig_parents, parents):
32893284
'''Due to pruning of empty commits, some parents could be non-existent
32903285
(None) or otherwise redundant. Remove the non-existent parents, and
@@ -3754,6 +3749,10 @@ class RepoFilter(object):
37543749
orig_file_changes = set(commit.file_changes)
37553750
self._filter_files(commit)
37563751

3752+
# Call the user-defined callback, if any
3753+
if self._commit_callback:
3754+
self._commit_callback(commit, self.callback_metadata(aux_info))
3755+
37573756
# Find out which files were modified by the callbacks. Such paths could
37583757
# lead to subsequent commits being empty (e.g. if removing a line containing
37593758
# a password from every version of a file that had the password, and some
@@ -3765,10 +3764,6 @@ class RepoFilter(object):
37653764
differences = orig_file_changes.symmetric_difference(final_file_changes)
37663765
self._files_tweaked.update(x.filename for x in differences)
37673766

3768-
# Call the user-defined callback, if any
3769-
if self._commit_callback:
3770-
self._commit_callback(commit, self.callback_metadata(aux_info))
3771-
37723767
# Now print the resulting commit, or if prunable skip it
37733768
if not commit.dumped:
37743769
if not self._prunable(commit, new_1st_parent,
@@ -3781,8 +3776,13 @@ class RepoFilter(object):
37813776
if self._args.state_branch:
37823777
alias = Alias(commit.old_id or commit.id, rewrite_to or deleted_hash)
37833778
self._insert_into_stream(alias)
3784-
reset = Reset(commit.branch, rewrite_to or deleted_hash)
3785-
self._insert_into_stream(reset)
3779+
if commit.branch.startswith(b'refs/') or commit.branch == b'HEAD':
3780+
# The special check above is because when direct revisions are passed
3781+
# along to fast-export (such as with stashes), there is a chance the
3782+
# revision is rewritten to nothing. In such cases, we don't want to
3783+
# point an invalid ref that just names a revision to some other point.
3784+
reset = Reset(commit.branch, rewrite_to or deleted_hash)
3785+
self._insert_into_stream(reset)
37863786
self._commit_renames[commit.original_id] = None
37873787

37883788
# Show progress
@@ -3933,21 +3933,26 @@ class RepoFilter(object):
39333933
git_dir = GitUtils.determine_git_dir(repo_working_dir)
39343934
stash = os.path.join(git_dir, b'logs', b'refs', b'stash')
39353935
if os.path.exists(stash):
3936+
self._stash = []
39363937
with open(stash, 'br') as f:
3937-
self._stash = f.read()
3938-
out = subproc.check_output('git rev-list -g refs/stash'.split(),
3939-
cwd=repo_working_dir)
3940-
self._args.refs.extend(decode(out.strip()).split())
3938+
for line in f:
3939+
(oldhash, newhash, rest) = line.split(None, 2)
3940+
self._stash.append((newhash, rest))
3941+
self._args.refs.extend([x[0] for x in self._stash])
39413942

39423943
def _write_stash(self):
3944+
last = deleted_hash
39433945
if self._stash:
39443946
target_working_dir = self._args.target or b'.'
39453947
git_dir = GitUtils.determine_git_dir(target_working_dir)
39463948
stash = os.path.join(git_dir, b'logs', b'refs', b'stash')
39473949
with open(stash, 'bw') as f:
3948-
self._stash = self._full_hash_re.sub(self._translate_full_commit_hash,
3949-
self._stash)
3950-
f.write(self._stash)
3950+
for (hash, rest) in self._stash:
3951+
new_hash = self._get_rename(hash)
3952+
if new_hash is None:
3953+
continue
3954+
f.write(b' '.join([last, new_hash, rest]) + b'\n')
3955+
last = new_hash
39513956
print(_("Rewrote the stash."))
39523957

39533958
def _setup_input(self, use_done_feature):
@@ -4083,6 +4088,10 @@ class RepoFilter(object):
40834088
# Remove unused refs
40844089
exported_refs, imported_refs = self.get_exported_and_imported_refs()
40854090
refs_to_nuke = exported_refs - imported_refs
4091+
# Because revisions can be passed to fast-export which handles them as
4092+
# though they were refs, we might have bad "refs" to nuke; strip them out.
4093+
refs_to_nuke = [x for x in refs_to_nuke
4094+
if x.startswith(b'refs/') or x == b'HEAD']
40864095
if self._args.partial:
40874096
refs_to_nuke = set()
40884097
if refs_to_nuke and self._args.debug:
@@ -4134,13 +4143,11 @@ class RepoFilter(object):
41344143
return new_hash
41354144

41364145
def _compute_metadata(self, metadata_dir, orig_refs):
4137-
already_ran = os.path.isfile(os.path.join(metadata_dir, b'already_ran'))
4138-
41394146
#
41404147
# First, handle commit_renames
41414148
#
41424149
old_commit_renames = dict()
4143-
if not already_ran:
4150+
if not self._already_ran:
41444151
commit_renames = {old: new
41454152
for old, new in self._commit_renames.items()
41464153
}
@@ -4169,7 +4176,7 @@ class RepoFilter(object):
41694176
exported_refs, imported_refs = self.get_exported_and_imported_refs()
41704177

41714178
old_commit_unrenames = dict()
4172-
if not already_ran:
4179+
if not self._already_ran:
41734180
old_ref_map = dict((refname, (old_hash, deleted_hash))
41744181
for refname, old_hash in orig_refs.items()
41754182
if refname in exported_refs)
@@ -4236,7 +4243,7 @@ class RepoFilter(object):
42364243
#
42374244

42384245
old_first_changes = dict()
4239-
if already_ran:
4246+
if self._already_ran:
42404247
# Read first_changes into old_first_changes
42414248
with open(os.path.join(metadata_dir, b'first-changed-commits'), 'br') as f:
42424249
for line in f:

0 commit comments

Comments
 (0)