From 4bc0ba082907464fd940d9025b641d52ce6e56e3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 16 May 2025 18:11:51 +0000 Subject: [PATCH 01/22] pack-objects: extract should_attempt_deltas() This will be helpful in a future change, which will reuse this logic. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 56 ++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 58a9b1612626e0..7805429f5d1cb0 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3196,6 +3196,36 @@ static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, cons return 0; } +static int should_attempt_deltas(struct object_entry *entry) +{ + if (DELTA(entry)) + /* This happens if we decided to reuse existing + * delta from a pack. "reuse_delta &&" is implied. + */ + return 0; + + if (!entry->type_valid || + oe_size_less_than(&to_pack, entry, 50)) + return 0; + + if (entry->no_try_delta) + return 0; + + if (!entry->preferred_base) { + if (oe_type(entry) < 0) + die(_("unable to get type of object %s"), + oid_to_hex(&entry->idx.oid)); + } else if (oe_type(entry) < 0) { + /* + * This object is not found, but we + * don't have to include it anyway. + */ + return 0; + } + + return 1; +} + static void prepare_pack(int window, int depth) { struct object_entry **delta_list; @@ -3226,33 +3256,11 @@ static void prepare_pack(int window, int depth) for (i = 0; i < to_pack.nr_objects; i++) { struct object_entry *entry = to_pack.objects + i; - if (DELTA(entry)) - /* This happens if we decided to reuse existing - * delta from a pack. "reuse_delta &&" is implied. - */ - continue; - - if (!entry->type_valid || - oe_size_less_than(&to_pack, entry, 50)) + if (!should_attempt_deltas(entry)) continue; - if (entry->no_try_delta) - continue; - - if (!entry->preferred_base) { + if (!entry->preferred_base) nr_deltas++; - if (oe_type(entry) < 0) - die(_("unable to get type of object %s"), - oid_to_hex(&entry->idx.oid)); - } else { - if (oe_type(entry) < 0) { - /* - * This object is not found, but we - * don't have to include it anyway. - */ - continue; - } - } delta_list[n++] = entry; } From 70664d2865ccd21da4a182fed6d11da5a615cd2b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 16 May 2025 18:11:52 +0000 Subject: [PATCH 02/22] pack-objects: add --path-walk option In order to more easily compute delta bases among objects that appear at the exact same path, add a --path-walk option to 'git pack-objects'. This option will use the path-walk API instead of the object walk given by the revision machinery. Since objects will be provided in batches representing a common path, those objects can be tested for delta bases immediately instead of waiting for a sort of the full object list by name-hash. This has multiple benefits, including avoiding collisions by name-hash. The objects marked as UNINTERESTING are included in these batches, so we are guaranteeing some locality to find good delta bases. After the individual passes are done on a per-path basis, the default name-hash is used to find other opportunistic delta bases that did not match exactly by the full path name. The current implementation performs delta calculations while walking objects, which is not ideal for a few reasons. First, this will cause the "Enumerating objects" phase to be much longer than usual. Second, it does not take advantage of threading during the path-scoped delta calculations. Even with this lack of threading, the path-walk option is sometimes faster than the usual approach. Future changes will refactor this code to allow for threading, but that complexity is deferred until later to keep this patch as simple as possible. This new walk is incompatible with some features and is ignored by others: * Object filters are not currently integrated with the path-walk API, such as sparse-checkout or tree depth. A blobless packfile could be integrated easily, but that is deferred for later. * Server-focused features such as delta islands, shallow packs, and using a bitmap index are incompatible with the path-walk API. * The path walk API is only compatible with the --revs option, not taking object lists or pack lists over stdin. These alternative ways to specify the objects currently ignores the --path-walk option without even a warning. Future changes will create performance tests that demonstrate the power of this approach. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.adoc | 13 +- Documentation/technical/api-path-walk.adoc | 1 + builtin/pack-objects.c | 148 +++++++++++++++++++-- t/t5300-pack-object.sh | 15 +++ 4 files changed, 168 insertions(+), 9 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index 7f69ae4855f6ce..3b803d3a783030 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -16,7 +16,7 @@ SYNOPSIS [--cruft] [--cruft-expiration=