From 5766524956714d51b131d826a2894c85949e5770 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 Jul 2022 15:26:04 +0000 Subject: [PATCH 1/4] pack-bitmap-write: use const for hashes The next change will use a const array when calling this method. There is no need for the non-const version, so let's do this cleanup quickly. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 2 +- pack-bitmap.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index c43375bd34..4fcfaed428 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -683,7 +683,7 @@ static void write_hash_cache(struct hashfile *f, } } -void bitmap_writer_set_checksum(unsigned char *sha1) +void bitmap_writer_set_checksum(const unsigned char *sha1) { hashcpy(writer.pack_checksum, sha1); } diff --git a/pack-bitmap.h b/pack-bitmap.h index 3d3ddd7734..f3a57ca065 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -75,7 +75,7 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_i off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *); void bitmap_writer_show_progress(int show); -void bitmap_writer_set_checksum(unsigned char *sha1); +void bitmap_writer_set_checksum(const unsigned char *sha1); void bitmap_writer_build_type_index(struct packing_data *to_pack, struct pack_idx_entry **index, uint32_t index_nr); From 90b2bb710da47f12967a6f6a32640c197ca82685 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 Jul 2022 15:26:05 +0000 Subject: [PATCH 2/4] midx: extract bitmap write setup The write_midx_bitmap() method is a long method that does a lot of steps. It requires the write_midx_context struct for use in prepare_midx_packing_data() and find_commits_for_midx_bitmap(), but after that only needs the pack_order array. This is a messy, but completely non-functional refactoring. The code is only being moved around to reduce visibility of the write_midx_context during the longest part of computing reachability bitmaps. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 56 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/midx.c b/midx.c index 5f0dd386b0..e2dd808b35 100644 --- a/midx.c +++ b/midx.c @@ -1053,40 +1053,35 @@ static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr return cb.commits; } -static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash, - struct write_midx_context *ctx, +static int write_midx_bitmap(const char *midx_name, + const unsigned char *midx_hash, + struct packing_data *pdata, + struct commit **commits, + uint32_t commits_nr, + uint32_t *pack_order, const char *refs_snapshot, unsigned flags) { - struct packing_data pdata; - struct pack_idx_entry **index; - struct commit **commits = NULL; - uint32_t i, commits_nr; + int ret, i; uint16_t options = 0; - char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, hash_to_hex(midx_hash)); - int ret; - - if (!ctx->entries_nr) - BUG("cannot write a bitmap without any objects"); + struct pack_idx_entry **index; + char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, + hash_to_hex(midx_hash)); if (flags & MIDX_WRITE_BITMAP_HASH_CACHE) options |= BITMAP_OPT_HASH_CACHE; - prepare_midx_packing_data(&pdata, ctx); - - commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, ctx); - /* * Build the MIDX-order index based on pdata.objects (which is already * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of * this order). */ - ALLOC_ARRAY(index, pdata.nr_objects); - for (i = 0; i < pdata.nr_objects; i++) - index[i] = &pdata.objects[i].idx; + ALLOC_ARRAY(index, pdata->nr_objects); + for (i = 0; i < pdata->nr_objects; i++) + index[i] = &pdata->objects[i].idx; bitmap_writer_show_progress(flags & MIDX_PROGRESS); - bitmap_writer_build_type_index(&pdata, index, pdata.nr_objects); + bitmap_writer_build_type_index(pdata, index, pdata->nr_objects); /* * bitmap_writer_finish expects objects in lex order, but pack_order @@ -1101,16 +1096,16 @@ static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash, * happens between bitmap_writer_build_type_index() and * bitmap_writer_finish(). */ - for (i = 0; i < pdata.nr_objects; i++) - index[ctx->pack_order[i]] = &pdata.objects[i].idx; + for (i = 0; i < pdata->nr_objects; i++) + index[pack_order[i]] = &pdata->objects[i].idx; bitmap_writer_select_commits(commits, commits_nr, -1); - ret = bitmap_writer_build(&pdata); + ret = bitmap_writer_build(pdata); if (ret < 0) goto cleanup; bitmap_writer_set_checksum(midx_hash); - bitmap_writer_finish(index, pdata.nr_objects, bitmap_name, options); + bitmap_writer_finish(index, pdata->nr_objects, bitmap_name, options); cleanup: free(index); @@ -1443,8 +1438,21 @@ static int write_midx_internal(const char *object_dir, if (flags & MIDX_WRITE_REV_INDEX && git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) write_midx_reverse_index(midx_name.buf, midx_hash, &ctx); + if (flags & MIDX_WRITE_BITMAP) { - if (write_midx_bitmap(midx_name.buf, midx_hash, &ctx, + struct packing_data pdata; + struct commit **commits; + uint32_t commits_nr; + + if (!ctx.entries_nr) + BUG("cannot write a bitmap without any objects"); + + prepare_midx_packing_data(&pdata, &ctx); + + commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx); + + if (write_midx_bitmap(midx_name.buf, midx_hash, &pdata, + commits, commits_nr, ctx.pack_order, refs_snapshot, flags) < 0) { error(_("could not write multi-pack bitmap")); result = 1; From 068fa54c0034be0b953d798628b06815f9cfaff0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 Jul 2022 15:26:06 +0000 Subject: [PATCH 3/4] midx: reduce memory pressure while writing bitmaps We noticed that some 'git multi-pack-index write --bitmap' processes were running with very high memory. It turns out that a lot of this memory is required to store a list of every object in the written multi-pack-index, with a second copy that has additional information used for the bitmap writing logic. Using 'valgrind --tool=massif' before this change, the following chart shows how memory load increased and was maintained throughout the process: GB 4.102^ :: | @ @::@@::@@::::::::@::::::@@:#:::::::::::::@@:: : | :::::@@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :::: :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :::: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | : :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | : :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | @ :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | @ :: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | @::: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | @::: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | @::: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : | @::: :: : :: @@:@: @ ::@ ::: ::::@: ::: @@:#:::::: :: : :@ :: : 0 +---------------------------------------------------------------> It turns out that the 'struct write_midx_context' data is persisting through the life of the process, including the 'entries' array. This array is used last inside find_commits_for_midx_bitmap() within write_midx_bitmap(). If we free (and nullify) the array at that point, we can free a decent chunk of memory before the bitmap logic adds more to the memory footprint. Here is the massif memory load chart after this change: GB 3.111^ # | # :::::::::::@::::::::::::::@ | # ::::::::::::::::::::::::: : :: : @:: ::::: :: ::@ | @# :::::::::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | :::@#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | :: @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | :: @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ | :: @#::: ::: :::::: :::: :: : :::::::: : :: : @:: ::::: :: ::@ 0 +---------------------------------------------------------------> The previous change introduced a refactoring of write_midx_bitmap() to make it more clear how much of the 'struct write_midx_context' instance is needed at different parts of the process. In addition, the following defensive programming measures were put in place: 1. Using FREE_AND_NULL() we will at least get a segfault from reading a NULL pointer instead of a use-after-free. 2. 'entries_nr' is also set to zero to make any loop that would iterate over the entries be trivial. 3. Add significant comments in write_midx_internal() to add warnings for future authors who might accidentally add references to this cleared memory. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/midx.c b/midx.c index e2dd808b35..772ab7d294 100644 --- a/midx.c +++ b/midx.c @@ -1451,6 +1451,15 @@ static int write_midx_internal(const char *object_dir, commits = find_commits_for_midx_bitmap(&commits_nr, refs_snapshot, &ctx); + /* + * The previous steps translated the information from + * 'entries' into information suitable for constructing + * bitmaps. We no longer need that array, so clear it to + * reduce memory pressure. + */ + FREE_AND_NULL(ctx.entries); + ctx.entries_nr = 0; + if (write_midx_bitmap(midx_name.buf, midx_hash, &pdata, commits, commits_nr, ctx.pack_order, refs_snapshot, flags) < 0) { @@ -1459,6 +1468,10 @@ static int write_midx_internal(const char *object_dir, goto cleanup; } } + /* + * NOTE: Do not use ctx.entries beyond this point, since it might + * have been freed in the previous if block. + */ if (ctx.m) close_object_store(the_repository->objects); From 51d1b69a534b591aa4afdc6f64c0e7aa8d886961 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 26 Jul 2022 18:05:03 -0400 Subject: [PATCH 4/4] write_midx_bitmap(): drop unused refs_snapshot parameter The refactoring in 90b2bb710d (midx: extract bitmap write setup, 2022-07-19) hoisted our call to find_commits_for_midx_bitmap() into the caller, which means we no longer need to see the refs_snapshot at all. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/midx.c b/midx.c index 772ab7d294..4e956cacb7 100644 --- a/midx.c +++ b/midx.c @@ -1059,7 +1059,6 @@ static int write_midx_bitmap(const char *midx_name, struct commit **commits, uint32_t commits_nr, uint32_t *pack_order, - const char *refs_snapshot, unsigned flags) { int ret, i; @@ -1462,7 +1461,7 @@ static int write_midx_internal(const char *object_dir, if (write_midx_bitmap(midx_name.buf, midx_hash, &pdata, commits, commits_nr, ctx.pack_order, - refs_snapshot, flags) < 0) { + flags) < 0) { error(_("could not write multi-pack bitmap")); result = 1; goto cleanup;