Merge branch 'tb/midx-avoid-cruft-packs' into ps/object-store-midx

* tb/midx-avoid-cruft-packs:
  repack: exclude cruft pack(s) from the MIDX where possible
  pack-objects: introduce '--stdin-packs=follow'
  pack-objects: swap 'show_{object,commit}_pack_hint'
  pack-objects: fix typo in 'show_object_pack_hint()'
  pack-objects: perform name-hash traversal for unpacked objects
  pack-objects: declare 'rev_info' for '--stdin-packs' earlier
  pack-objects: factor out handling '--stdin-packs'
  pack-objects: limit scope in 'add_object_entry_from_pack()'
  pack-objects: use standard option incompatibility functions
This commit is contained in:
Junio C Hamano
2025-07-15 12:06:57 -07:00
6 changed files with 571 additions and 89 deletions

View File

@@ -39,6 +39,7 @@ static int write_bitmaps = -1;
static int use_delta_islands;
static int run_update_server_info = 1;
static char *packdir, *packtmp_name, *packtmp;
static int midx_must_contain_cruft = 1;
static const char *const git_repack_usage[] = {
N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
@@ -108,6 +109,10 @@ static int repack_config(const char *var, const char *value,
free(cruft_po_args->threads);
return git_config_string(&cruft_po_args->threads, var, value);
}
if (!strcmp(var, "repack.midxmustcontaincruft")) {
midx_must_contain_cruft = git_config_bool(var, value);
return 0;
}
return git_default_config(var, value, ctx, cb);
}
@@ -690,6 +695,77 @@ static void free_pack_geometry(struct pack_geometry *geometry)
free(geometry->pack);
}
static int midx_has_unknown_packs(char **midx_pack_names,
size_t midx_pack_names_nr,
struct string_list *include,
struct pack_geometry *geometry,
struct existing_packs *existing)
{
size_t i;
string_list_sort(include);
for (i = 0; i < midx_pack_names_nr; i++) {
const char *pack_name = midx_pack_names[i];
/*
* Determine whether or not each MIDX'd pack from the existing
* MIDX (if any) is represented in the new MIDX. For each pack
* in the MIDX, it must either be:
*
* - In the "include" list of packs to be included in the new
* MIDX. Note this function is called before the include
* list is populated with any cruft pack(s).
*
* - Below the geometric split line (if using pack geometry),
* indicating that the pack won't be included in the new
* MIDX, but its contents were rolled up as part of the
* geometric repack.
*
* - In the existing non-kept packs list (if not using pack
* geometry), and marked as non-deleted.
*/
if (string_list_has_string(include, pack_name)) {
continue;
} else if (geometry) {
struct strbuf buf = STRBUF_INIT;
uint32_t j;
for (j = 0; j < geometry->split; j++) {
strbuf_reset(&buf);
strbuf_addstr(&buf, pack_basename(geometry->pack[j]));
strbuf_strip_suffix(&buf, ".pack");
strbuf_addstr(&buf, ".idx");
if (!strcmp(pack_name, buf.buf)) {
strbuf_release(&buf);
break;
}
}
strbuf_release(&buf);
if (j < geometry->split)
continue;
} else {
struct string_list_item *item;
item = string_list_lookup(&existing->non_kept_packs,
pack_name);
if (item && !pack_is_marked_for_deletion(item))
continue;
}
/*
* If we got to this point, the MIDX includes some pack that we
* don't know about.
*/
return 1;
}
return 0;
}
struct midx_snapshot_ref_data {
struct tempfile *f;
struct oidset seen;
@@ -758,6 +834,8 @@ static void midx_snapshot_refs(struct tempfile *f)
static void midx_included_packs(struct string_list *include,
struct existing_packs *existing,
char **midx_pack_names,
size_t midx_pack_names_nr,
struct string_list *names,
struct pack_geometry *geometry)
{
@@ -811,26 +889,56 @@ static void midx_included_packs(struct string_list *include,
}
}
for_each_string_list_item(item, &existing->cruft_packs) {
if (midx_must_contain_cruft ||
midx_has_unknown_packs(midx_pack_names, midx_pack_names_nr,
include, geometry, existing)) {
/*
* When doing a --geometric repack, there is no need to check
* for deleted packs, since we're by definition not doing an
* ALL_INTO_ONE repack (hence no packs will be deleted).
* Otherwise we must check for and exclude any packs which are
* enqueued for deletion.
* If there are one or more unknown pack(s) present (see
* midx_has_unknown_packs() for what makes a pack
* "unknown") in the MIDX before the repack, keep them
* as they may be required to form a reachability
* closure if the MIDX is bitmapped.
*
* So we could omit the conditional below in the --geometric
* case, but doing so is unnecessary since no packs are marked
* as pending deletion (since we only call
* `mark_packs_for_deletion()` when doing an all-into-one
* repack).
* For example, a cruft pack can be required to form a
* reachability closure if the MIDX is bitmapped and one
* or more of the bitmap's selected commits reaches a
* once-cruft object that was later made reachable.
*/
if (pack_is_marked_for_deletion(item))
continue;
for_each_string_list_item(item, &existing->cruft_packs) {
/*
* When doing a --geometric repack, there is no
* need to check for deleted packs, since we're
* by definition not doing an ALL_INTO_ONE
* repack (hence no packs will be deleted).
* Otherwise we must check for and exclude any
* packs which are enqueued for deletion.
*
* So we could omit the conditional below in the
* --geometric case, but doing so is unnecessary
* since no packs are marked as pending
* deletion (since we only call
* `mark_packs_for_deletion()` when doing an
* all-into-one repack).
*/
if (pack_is_marked_for_deletion(item))
continue;
strbuf_reset(&buf);
strbuf_addf(&buf, "%s.idx", item->string);
string_list_insert(include, buf.buf);
strbuf_reset(&buf);
strbuf_addf(&buf, "%s.idx", item->string);
string_list_insert(include, buf.buf);
}
} else {
/*
* Modern versions of Git (with the appropriate
* configuration setting) will write new copies of
* once-cruft objects when doing a --geometric repack.
*
* If the MIDX has no cruft pack, new packs written
* during a --geometric repack will not rely on the
* cruft pack to form a reachability closure, so we can
* avoid including them in the MIDX in that case.
*/
;
}
strbuf_release(&buf);
@@ -1145,6 +1253,8 @@ int cmd_repack(int argc,
struct tempfile *refs_snapshot = NULL;
int i, ext, ret;
int show_progress;
char **midx_pack_names = NULL;
size_t midx_pack_names_nr = 0;
/* variables to be filled by option parsing */
int delete_redundant = 0;
@@ -1362,7 +1472,10 @@ int cmd_repack(int argc,
!(pack_everything & PACK_CRUFT))
strvec_push(&cmd.args, "--pack-loose-unreachable");
} else if (geometry.split_factor) {
strvec_push(&cmd.args, "--stdin-packs");
if (midx_must_contain_cruft)
strvec_push(&cmd.args, "--stdin-packs");
else
strvec_push(&cmd.args, "--stdin-packs=follow");
strvec_push(&cmd.args, "--unpacked");
} else {
strvec_push(&cmd.args, "--unpacked");
@@ -1402,8 +1515,25 @@ int cmd_repack(int argc,
if (ret)
goto cleanup;
if (!names.nr && !po_args.quiet)
printf_ln(_("Nothing new to pack."));
if (!names.nr) {
if (!po_args.quiet)
printf_ln(_("Nothing new to pack."));
/*
* If we didn't write any new packs, the non-cruft packs
* may refer to once-unreachable objects in the cruft
* pack(s).
*
* If there isn't already a MIDX, the one we write
* must include the cruft pack(s), in case the
* non-cruft pack(s) refer to once-cruft objects.
*
* If there is already a MIDX, we can punt here, since
* midx_has_unknown_packs() will make the decision for
* us.
*/
if (!get_local_multi_pack_index(the_repository))
midx_must_contain_cruft = 1;
}
if (pack_everything & PACK_CRUFT) {
const char *pack_prefix = find_pack_prefix(packdir, packtmp);
@@ -1484,6 +1614,19 @@ int cmd_repack(int argc,
string_list_sort(&names);
if (get_local_multi_pack_index(the_repository)) {
struct multi_pack_index *m =
get_local_multi_pack_index(the_repository);
ALLOC_ARRAY(midx_pack_names,
m->num_packs + m->num_packs_in_base);
for (; m; m = m->base_midx)
for (uint32_t i = 0; i < m->num_packs; i++)
midx_pack_names[midx_pack_names_nr++] =
xstrdup(m->pack_names[i]);
}
close_object_store(the_repository->objects);
/*
@@ -1525,7 +1668,8 @@ int cmd_repack(int argc,
if (write_midx) {
struct string_list include = STRING_LIST_INIT_DUP;
midx_included_packs(&include, &existing, &names, &geometry);
midx_included_packs(&include, &existing, midx_pack_names,
midx_pack_names_nr, &names, &geometry);
ret = write_midx_included_packs(&include, &geometry, &names,
refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL,
@@ -1576,6 +1720,9 @@ cleanup:
string_list_clear(&names, 1);
existing_packs_release(&existing);
free_pack_geometry(&geometry);
for (size_t i = 0; i < midx_pack_names_nr; i++)
free(midx_pack_names[i]);
free(midx_pack_names);
pack_objects_args_release(&po_args);
pack_objects_args_release(&cruft_po_args);