diff --git a/Documentation/blame-options.adoc b/Documentation/blame-options.adoc index aa77406d4ef335..951bbd1636481f 100644 --- a/Documentation/blame-options.adoc +++ b/Documentation/blame-options.adoc @@ -83,6 +83,7 @@ include::line-range-format.adoc[] or `--incremental`. -M[]:: +--find-renames[=]:: Detect moved or copied lines within a file. When a commit moves or copies a block of lines (e.g. the original file has A and then B, and the commit changes it to B and then @@ -96,7 +97,19 @@ include::line-range-format.adoc[] is optional but it is the lower bound on the number of alphanumeric characters that Git must detect as moving/copying within a file for it to associate those lines with the parent -commit. The default value is 20. +commit. If is specified, it also affects the automatic +detection of whole-file renames. The value can be from 0 to 100 +and represents a similarity index. A value of 0 disables rename +detection entirely, 100 requires exact matches, and values in +between control how similar the file content needs to be to be +considered a rename. The default value is 50. ++ +The `-M` option can also be used to influence rename detection +behavior when following the origin of lines across repository +history. By default, rename detection is enabled at a 50% +similarity threshold, which can lead to performance issues in +large repositories. This option (or the `blame.renames` config) +can be used to disable or adjust the rename detection. -C[]:: In addition to `-M`, detect lines moved or copied from other diff --git a/Documentation/config/blame.adoc b/Documentation/config/blame.adoc index 4d047c17908cd6..0bb6c1285a6a39 100644 --- a/Documentation/config/blame.adoc +++ b/Documentation/config/blame.adoc @@ -35,3 +35,14 @@ blame.markUnblamableLines:: blame.markIgnoredLines:: Mark lines that were changed by an ignored revision that we attributed to another commit with a '?' in the output of linkgit:git-blame[1]. + +blame.renames:: + Controls rename detection when following the history of lines in + linkgit:git-blame[1]. It can be set to `true` (default), `false`, + `copy`, or an integer value specifying the minimum similarity index + (from 0 to 100). When set to `false`, no rename detection is performed. + When set to `true`, it behaves the same as the default similarity index + of 50%. When set to `copy`, both rename and copy detection is performed. + An integer value specifies the minimum similarity index, with 0 meaning + "no rename detection" and 100 meaning "only exact renames". The `-M` + option overrides this setting. diff --git a/Documentation/git-blame.adoc b/Documentation/git-blame.adoc index f75ed4479021cb..864b1b8dd2cf6b 100644 --- a/Documentation/git-blame.adoc +++ b/Documentation/git-blame.adoc @@ -9,7 +9,7 @@ SYNOPSIS -------- [verse] 'git blame' [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-e] [-p] [-w] [--incremental] - [-L ] [-S ] [-M] [-C] [-C] [-C] [--since=] + [-L ] [-S ] [-M[]] [--find-renames[=]] [-C] [-C] [-C] [--since=] [--ignore-rev ] [--ignore-revs-file ] [--color-lines] [--color-by-age] [--progress] [--abbrev=] [ --contents ] [ | --reverse ..] [--] @@ -24,10 +24,11 @@ When specified one or more times, `-L` restricts annotation to the requested lines. The origin of lines is automatically followed across whole-file -renames (currently there is no option to turn the rename-following -off). To follow lines moved from one file to another, or to follow -lines that were copied and pasted from another file, etc., see the -`-C` and `-M` options. +renames. By default, git blame follows both exact renames (100% match) +and inexact renames (partially matching content). Use the `-M` option +to control this behavior. To follow lines moved from one file to another, +or to follow lines that were copied and pasted from another file, etc., +see the `-C` and `-M` options. The report does not tell you anything about lines which have been deleted or replaced; you need to use a tool such as 'git diff' or the "pickaxe" diff --git a/blame.c b/blame.c index a15ddf933352b0..ad84cd884bc6c3 100644 --- a/blame.c +++ b/blame.c @@ -1321,7 +1321,8 @@ static void add_bloom_key(struct blame_bloom_data *bd, static struct blame_origin *find_origin(struct repository *r, struct commit *parent, struct blame_origin *origin, - struct blame_bloom_data *bd) + struct blame_bloom_data *bd, + struct blame_scoreboard *unused_sb) { struct blame_origin *porigin; struct diff_options diff_opts; @@ -1418,15 +1419,24 @@ static struct blame_origin *find_origin(struct repository *r, static struct blame_origin *find_rename(struct repository *r, struct commit *parent, struct blame_origin *origin, - struct blame_bloom_data *bd) + struct blame_bloom_data *bd, + struct blame_scoreboard *scoreboard) { struct blame_origin *porigin = NULL; struct diff_options diff_opts; int i; + int detection_mode = scoreboard->rename_detection_mode; repo_diff_setup(r, &diff_opts); diff_opts.flags.recursive = 1; - diff_opts.detect_rename = DIFF_DETECT_RENAME; + /* + * Use rename_detection_mode if specified, otherwise default to DIFF_DETECT_RENAME + * For mode values > 0 and < 100, use it as similarity threshold + */ + fprintf(stderr, "DEBUG find_rename detection_mode=%d\n", detection_mode); + diff_opts.detect_rename = (detection_mode == 0) ? 0 : + (detection_mode > 0) ? + detection_mode : DIFF_DETECT_RENAME; diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; diff_opts.single_follow = origin->path; diff_setup_done(&diff_opts); @@ -2407,7 +2417,8 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl typedef struct blame_origin *(*blame_find_alg)(struct repository *, struct commit *, struct blame_origin *, - struct blame_bloom_data *); + struct blame_bloom_data *, + struct blame_scoreboard *); static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt) { @@ -2445,7 +2456,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, continue; if (repo_parse_commit(the_repository, p)) continue; - porigin = find(sb->repo, p, origin, sb->bloom_data); + porigin = find(sb->repo, p, origin, sb->bloom_data, sb); if (!porigin) continue; if (oideq(&porigin->blob_oid, &origin->blob_oid)) { @@ -2586,6 +2597,8 @@ void assign_blame(struct blame_scoreboard *sb, int opt) struct rev_info *revs = sb->revs; struct commit *commit = prio_queue_get(&sb->commits); + fprintf(stderr, "DEBUG assign_blame rename_detection_mode=%d\n", sb->rename_detection_mode); + while (commit) { struct blame_entry *ent; struct blame_origin *suspect = get_blame_suspects(commit); @@ -2759,6 +2772,7 @@ void init_scoreboard(struct blame_scoreboard *sb) memset(sb, 0, sizeof(struct blame_scoreboard)); sb->move_score = BLAME_DEFAULT_MOVE_SCORE; sb->copy_score = BLAME_DEFAULT_COPY_SCORE; + sb->rename_detection_mode = -1; /* -1: default, 0: disabled, >0: enabled with score */ } void setup_scoreboard(struct blame_scoreboard *sb, @@ -2768,6 +2782,7 @@ void setup_scoreboard(struct blame_scoreboard *sb, struct blame_origin *o; struct commit *final_commit = NULL; enum object_type type; + int saved_mode = sb->rename_detection_mode; init_blame_suspects(&blame_suspects); @@ -2776,6 +2791,9 @@ void setup_scoreboard(struct blame_scoreboard *sb, if (!sb->repo) BUG("repo is NULL"); + + /* Restore the rename_detection_mode since init_scoreboard would reset it */ + sb->rename_detection_mode = saved_mode; if (!sb->reverse) { sb->final = find_single_final(sb->revs, &final_commit_name); diff --git a/blame.h b/blame.h index 3b34be0e5c6932..1070fad7a9c9ab 100644 --- a/blame.h +++ b/blame.h @@ -149,6 +149,7 @@ struct blame_scoreboard { int xdl_opts; int no_whole_file_rename; int debug; + int rename_detection_mode; /* callbacks */ void(*on_sanity_fail)(struct blame_scoreboard *, int); diff --git a/builtin/blame.c b/builtin/blame.c index c470654c7ec2c3..21cc8b3ee3984b 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -84,6 +84,33 @@ static struct string_list mailmap = STRING_LIST_INIT_NODUP; static unsigned blame_move_score; static unsigned blame_copy_score; +static int git_blame_config_rename(const char *var, const char *value, + const struct config_context *ctx, void *cb) +{ + struct blame_scoreboard *sb = cb; + if (!strcmp(var, "blame.renames")) { + if (!value) + return config_error_nonbool(var); + if (!strcmp(value, "true") || !strcmp(value, "1")) { + sb->rename_detection_mode = DIFF_DETECT_RENAME; + } else if (!strcmp(value, "false") || !strcmp(value, "0")) { + sb->rename_detection_mode = 0; + } else if (!strcmp(value, "copy")) { + sb->rename_detection_mode = DIFF_DETECT_COPY; + } else { + int score = git_config_int(var, value, NULL); + if (score < 0 || score > 100) + return error(_("invalid value for %s"), var); + if (score == 100) + sb->rename_detection_mode = 100; /* exact rename only */ + else + sb->rename_detection_mode = score; + } + return 0; + } + return git_default_config(var, value, ctx, cb); +} + /* Remember to update object flag allocation in object.h */ #define METAINFO_SHOWN (1u<<12) #define MORE_THAN_ONE_PATH (1u<<13) @@ -702,6 +729,7 @@ static char *add_prefix(const char *prefix, const char *path) static int git_blame_config(const char *var, const char *value, const struct config_context *ctx, void *cb) { + int *output_option = cb; if (!strcmp(var, "blame.showroot")) { show_root = git_config_bool(var, value); return 0; @@ -711,7 +739,6 @@ static int git_blame_config(const char *var, const char *value, return 0; } if (!strcmp(var, "blame.showemail")) { - int *output_option = cb; if (git_config_bool(var, value)) *output_option |= OUTPUT_SHOW_EMAIL; else @@ -779,6 +806,48 @@ static int git_blame_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +static int find_rename_callback(const struct option *option, const char *arg, int unset) +{ + struct blame_scoreboard *sb = option->value; + + if (unset) + return 0; + + /* --find-renames without a score */ + sb->rename_detection_mode = DIFF_DETECT_RENAME; + + if (arg) { + int value; + const char *percent; + + /* Handle -M or --find-renames= */ + value = strtol(arg, (char **) &percent, 10); + if (percent == arg) + return error(_("invalid similarity threshold '%s'"), arg); + if (value < 0 || 100 < value) + return error(_("similarity threshold must be between 0 and 100")); + /* A threshold of 0 is equivalent to no rename detection */ + if (value == 0) + sb->rename_detection_mode = 0; + else if (value == 100) + sb->rename_detection_mode = 100; /* exact rename only */ + else + sb->rename_detection_mode = value; + } + return 0; +} + +static int disable_rename_detection(const struct option *option, const char *arg, int unset) +{ + struct blame_scoreboard *sb = option->value; + if (unset) + return 0; /* --no-no-find-renames is a no-op */ + BUG_ON_OPT_ARG(arg); + fprintf(stderr, "SETTING rename_detection_mode to 0\n"); + sb->rename_detection_mode = 0; + return 0; +} + static int blame_copy_callback(const struct option *option, const char *arg, int unset) { int *opt = option->value; @@ -803,19 +872,6 @@ static int blame_copy_callback(const struct option *option, const char *arg, int return 0; } -static int blame_move_callback(const struct option *option, const char *arg, int unset) -{ - int *opt = option->value; - - BUG_ON_OPT_NEG(unset); - - *opt |= PICKAXE_BLAME_MOVE; - - if (arg) - blame_move_score = parse_score(arg); - return 0; -} - static int is_a_rev(const char *name) { struct object_id oid; @@ -915,7 +971,8 @@ int cmd_blame(int argc, OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from instead of calling git-rev-list")), OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use 's contents as the final image")), OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback), - OPT_CALLBACK_F('M', NULL, &opt, N_("score"), N_("find line movements within and across files"), PARSE_OPT_OPTARG, blame_move_callback), + OPT_CALLBACK_F('M', "find-renames", &sb, N_("score"), N_("find renames, optionally set similarity index"), PARSE_OPT_OPTARG, find_rename_callback), + { OPTION_CALLBACK, 0, "no-find-renames", &sb, NULL, N_("disable rename detection"), 0, disable_rename_detection }, OPT_STRING_LIST('L', NULL, &range_list, N_("range"), N_("process only line range , or function :")), OPT__ABBREV(&abbrev), @@ -933,6 +990,7 @@ int cmd_blame(int argc, setup_default_color_by_age(); git_config(git_blame_config, &output_option); + git_config(git_blame_config_rename, &sb); repo_init_revisions(the_repository, &revs, NULL); revs.date_mode = blame_date_mode; revs.diffopt.flags.allow_textconv = 1; @@ -1119,10 +1177,12 @@ int cmd_blame(int argc, sb.reverse = reverse; sb.repo = the_repository; sb.path = path; + fprintf(stderr, "DEBUG before setup_scoreboard rename_detection_mode=%d\n", sb.rename_detection_mode); build_ignorelist(&sb, &ignore_revs_file_list, &ignore_rev_list); string_list_clear(&ignore_revs_file_list, 0); string_list_clear(&ignore_rev_list, 0); setup_scoreboard(&sb, &o); + fprintf(stderr, "DEBUG after setup_scoreboard rename_detection_mode=%d\n", sb.rename_detection_mode); /* * Changed-path Bloom filters are disabled when looking diff --git a/t/t8015-blame-rename-detection.sh b/t/t8015-blame-rename-detection.sh new file mode 100755 index 00000000000000..c73319d505bb20 --- /dev/null +++ b/t/t8015-blame-rename-detection.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +test_description='git blame rename detection control' + +. ./test-lib.sh + +test_expect_success 'setup test file rename with content changes' ' + test_write_lines abc def ghi >1.txt && + git add 1.txt && + test_tick && + git commit -m "Initial commit" && + + git mv 1.txt 2.txt && + test_write_lines abc 123 ghi >2.txt && + git add 2.txt && + test_tick && + git commit -m "Rename+edit together" +' + +# This test confirms that by default, git blame follows partial-file renames +test_expect_success 'git blame follows inexact renames by default' ' + COMMIT1=$(git rev-parse --short HEAD^) && + COMMIT2=$(git rev-parse --short HEAD) && + + git blame 2.txt >output && + grep "$COMMIT1" output | grep -q abc && + grep "$COMMIT2" output | grep -q 123 && + grep "$COMMIT1" output | grep -q ghi +' + +# This test confirms that --no-find-renames or -M0 turns off rename detection +test_expect_success 'git blame can disable rename detection' ' + git blame --no-find-renames 2.txt >output && + ! grep -q 1.txt output +' + +# This test checks that blame.renames config works +test_expect_success 'blame.renames=false disables rename detection' ' + git -c blame.renames=false blame 2.txt >output && + ! grep -q 1.txt output +' + +test_done \ No newline at end of file diff --git a/t/test_blame b/t/test_blame new file mode 160000 index 00000000000000..2159045961d5fb --- /dev/null +++ b/t/test_blame @@ -0,0 +1 @@ +Subproject commit 2159045961d5fba34c7e6fea8ee91d5d8be98e55