From a241878ac7b67b1894c14d3c3e33ed13f11ae253 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Aug 2021 22:48:52 -0400 Subject: [PATCH 1/3] object-store.h: teach for_each_packed_object to ignore kept packs The next patch will reimplement a function that wants to iterate over packed objects while ignoring packs which are marked as kept (either in-core or on-disk). Teach for_each_packed_object() to ignore all objects from those packs by adding a new flag for each of the "kept" states that a pack can be in. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- object-store.h | 6 ++++++ packfile.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/object-store.h b/object-store.h index d24915ced1..b4dc6668aa 100644 --- a/object-store.h +++ b/object-store.h @@ -455,6 +455,12 @@ enum for_each_object_flags { * Visit objects within a pack in packfile order rather than .idx order */ FOR_EACH_OBJECT_PACK_ORDER = (1<<2), + + /* Only iterate over packs that are not marked as kept in-core. */ + FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), + + /* Only iterate over packs that do not have .keep files. */ + FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; /* diff --git a/packfile.c b/packfile.c index 9ef6d98292..4d0d625238 100644 --- a/packfile.c +++ b/packfile.c @@ -2205,6 +2205,12 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; + if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + p->pack_keep_in_core) + continue; + if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + p->pack_keep) + continue; if (open_pack_index(p)) { pack_errors = 1; continue; From a9fd2f207d0318cd7a4771f13c9fb4759d280f68 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Aug 2021 22:48:54 -0400 Subject: [PATCH 2/3] builtin/pack-objects.c: simplify add_objects_in_unpacked_packs() This function is used to implement `pack-objects`'s `--keep-unreachable` option, but can be simplified in a couple of ways: - add_objects_in_unpacked_packs() iterates over all packs (and then all packed objects) itself, but could use for_each_packed_object() instead since the missing flags necessary were added in the previous commit - objects are added to an in_pack array which store (off_t, object) tuples, and then sorted in offset order when we could iterate objects in offset order. There is a slight behavior change here: before we would have added objects in sorted offset order among _all_ packs. Handing objects to create_object_entry() in pack order for each pack (instead of feeding objects from all packs simultaneously their offset relative to different packs) is much more reasonable, if different than how the code currently works. - objects in a single pack are iterated in index order and searched for in order to discover their offsets, which is much less efficient than using the on-disk reverse index Simplify the function by addressing each of the above and moving the core of the loop into a callback function that we then pass to for_each_packed_object() instead of open-coding the latter function ourselves. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 84 ++++++++---------------------------------- 1 file changed, 16 insertions(+), 68 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index df49f656b9..87ddbd5553 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3505,79 +3505,27 @@ static void show_edge(struct commit *commit) add_preferred_base(&commit->object.oid); } -struct in_pack_object { - off_t offset; - struct object *object; -}; - -struct in_pack { - unsigned int alloc; - unsigned int nr; - struct in_pack_object *array; -}; - -static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack) +static int add_object_in_unpacked_pack(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *_data) { - in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->oid.hash, p); - in_pack->array[in_pack->nr].object = object; - in_pack->nr++; -} - -/* - * Compare the objects in the offset order, in order to emulate the - * "git rev-list --objects" output that produced the pack originally. - */ -static int ofscmp(const void *a_, const void *b_) -{ - struct in_pack_object *a = (struct in_pack_object *)a_; - struct in_pack_object *b = (struct in_pack_object *)b_; - - if (a->offset < b->offset) - return -1; - else if (a->offset > b->offset) - return 1; - else - return oidcmp(&a->object->oid, &b->object->oid); + struct object *obj = lookup_unknown_object(the_repository, oid); + if (obj->flags & OBJECT_ADDED) + return 0; + add_object_entry(oid, obj->type, "", 0); + obj->flags |= OBJECT_ADDED; + return 0; } static void add_objects_in_unpacked_packs(void) { - struct packed_git *p; - struct in_pack in_pack; - uint32_t i; - - memset(&in_pack, 0, sizeof(in_pack)); - - for (p = get_all_packs(the_repository); p; p = p->next) { - struct object_id oid; - struct object *o; - - if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) - continue; - if (open_pack_index(p)) - die(_("cannot open pack index")); - - ALLOC_GROW(in_pack.array, - in_pack.nr + p->num_objects, - in_pack.alloc); - - for (i = 0; i < p->num_objects; i++) { - nth_packed_object_id(&oid, p, i); - o = lookup_unknown_object(the_repository, &oid); - if (!(o->flags & OBJECT_ADDED)) - mark_in_pack_object(o, p, &in_pack); - o->flags |= OBJECT_ADDED; - } - } - - if (in_pack.nr) { - QSORT(in_pack.array, in_pack.nr, ofscmp); - for (i = 0; i < in_pack.nr; i++) { - struct object *o = in_pack.array[i].object; - add_object_entry(&o->oid, o->type, "", 0); - } - } - free(in_pack.array); + if (for_each_packed_object(add_object_in_unpacked_pack, NULL, + FOR_EACH_OBJECT_PACK_ORDER | + FOR_EACH_OBJECT_LOCAL_ONLY | + FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + die(_("cannot open pack index")); } static int add_loose_object(const struct object_id *oid, const char *path, From b0173340c6b5fb330f5ea22504389fc6c5367f14 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Aug 2021 22:48:57 -0400 Subject: [PATCH 3/3] builtin/pack-objects.c: remove duplicate hash lookup In the original code from 08cdfb1337 (pack-objects --keep-unreachable, 2007-09-16), we add each object to the packing list with type `obj->type`, where `obj` comes from `lookup_unknown_object()`. Unless we had already looked up and parsed the object, this will be `OBJ_NONE`. That's fine, since oe_set_type() sets the type_valid bit to '0', and we determine the real type later on. So the only thing we need from the object lookup is access to the `flags` field so that we can mark that we've added the object with `OBJECT_ADDED` to avoid adding it again (we can just pass `OBJ_NONE` directly instead of grabbing it from the object). But add_object_entry() already rejects duplicates! This has been the behavior since 7a979d99ba (Thin pack - create packfile with missing delta base., 2006-02-19), but 08cdfb1337 didn't take advantage of it. Moreover, to do the OBJECT_ADDED check, we have to do a hash lookup in `obj_hash`. So we can drop the lookup_unknown_object() call completely, *and* the OBJECT_ADDED flag, too, since the spot we're touching here is the only location that checks it. In the end, we perform the same number of hash lookups, but with the added bonus that we don't waste memory allocating an OBJ_NONE object (if we were traversing, we'd need it eventually, but the whole point of this code path is not to traverse). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 11 +---------- object.h | 1 - 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 87ddbd5553..ec8503563a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3405,13 +3405,9 @@ static void read_object_list_from_stdin(void) } } -/* Remember to update object flag allocation in object.h */ -#define OBJECT_ADDED (1u<<20) - static void show_commit(struct commit *commit, void *data) { add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0); - commit->object.flags |= OBJECT_ADDED; if (write_bitmap_index) index_commit_for_bitmap(commit); @@ -3424,7 +3420,6 @@ static void show_object(struct object *obj, const char *name, void *data) { add_preferred_base_object(name); add_object_entry(&obj->oid, obj->type, name, 0); - obj->flags |= OBJECT_ADDED; if (use_delta_islands) { const char *p; @@ -3510,11 +3505,7 @@ static int add_object_in_unpacked_pack(const struct object_id *oid, uint32_t pos, void *_data) { - struct object *obj = lookup_unknown_object(the_repository, oid); - if (obj->flags & OBJECT_ADDED) - return 0; - add_object_entry(oid, obj->type, "", 0); - obj->flags |= OBJECT_ADDED; + add_object_entry(oid, OBJ_NONE, "", 0); return 0; } diff --git a/object.h b/object.h index 3b38c9cc98..549f2d256b 100644 --- a/object.h +++ b/object.h @@ -75,7 +75,6 @@ struct object_array { * builtin/fsck.c: 0--3 * builtin/gc.c: 0 * builtin/index-pack.c: 2021 - * builtin/pack-objects.c: 20 * builtin/reflog.c: 10--12 * builtin/show-branch.c: 0-------------------------------------------26 * builtin/unpack-objects.c: 2021