Merge branch 'tb/midx-avoid-cruft-packs' into ps/object-store-midx
* tb/midx-avoid-cruft-packs:
repack: exclude cruft pack(s) from the MIDX where possible
pack-objects: introduce '--stdin-packs=follow'
pack-objects: swap 'show_{object,commit}_pack_hint'
pack-objects: fix typo in 'show_object_pack_hint()'
pack-objects: perform name-hash traversal for unpacked objects
pack-objects: declare 'rev_info' for '--stdin-packs' earlier
pack-objects: factor out handling '--stdin-packs'
pack-objects: limit scope in 'add_object_entry_from_pack()'
pack-objects: use standard option incompatibility functions
This commit is contained in:
@@ -284,6 +284,12 @@ static struct oidmap configured_exclusions;
|
||||
static struct oidset excluded_by_config;
|
||||
static int name_hash_version = -1;
|
||||
|
||||
enum stdin_packs_mode {
|
||||
STDIN_PACKS_MODE_NONE,
|
||||
STDIN_PACKS_MODE_STANDARD,
|
||||
STDIN_PACKS_MODE_FOLLOW,
|
||||
};
|
||||
|
||||
/**
|
||||
* Check whether the name_hash_version chosen by user input is appropriate,
|
||||
* and also validate whether it is compatible with other features.
|
||||
@@ -3727,7 +3733,6 @@ static int add_object_entry_from_pack(const struct object_id *oid,
|
||||
return 0;
|
||||
|
||||
if (p) {
|
||||
struct rev_info *revs = _data;
|
||||
struct object_info oi = OBJECT_INFO_INIT;
|
||||
|
||||
oi.typep = &type;
|
||||
@@ -3735,6 +3740,7 @@ static int add_object_entry_from_pack(const struct object_id *oid,
|
||||
die(_("could not get type of object %s in pack %s"),
|
||||
oid_to_hex(oid), p->pack_name);
|
||||
} else if (type == OBJ_COMMIT) {
|
||||
struct rev_info *revs = _data;
|
||||
/*
|
||||
* commits in included packs are used as starting points for the
|
||||
* subsequent revision walk
|
||||
@@ -3750,32 +3756,48 @@ static int add_object_entry_from_pack(const struct object_id *oid,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void show_commit_pack_hint(struct commit *commit UNUSED,
|
||||
void *data UNUSED)
|
||||
static void show_object_pack_hint(struct object *object, const char *name,
|
||||
void *data)
|
||||
{
|
||||
/* nothing to do; commits don't have a namehash */
|
||||
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
|
||||
if (mode == STDIN_PACKS_MODE_FOLLOW) {
|
||||
if (object->type == OBJ_BLOB &&
|
||||
!has_object(the_repository, &object->oid, 0))
|
||||
return;
|
||||
add_object_entry(&object->oid, object->type, name, 0);
|
||||
} else {
|
||||
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
|
||||
if (!oe)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Our 'to_pack' list was constructed by iterating all
|
||||
* objects packed in included packs, and so doesn't have
|
||||
* a non-zero hash field that you would typically pick
|
||||
* up during a reachability traversal.
|
||||
*
|
||||
* Make a best-effort attempt to fill in the ->hash and
|
||||
* ->no_try_delta fields here in order to perhaps
|
||||
* improve the delta selection process.
|
||||
*/
|
||||
oe->hash = pack_name_hash_fn(name);
|
||||
oe->no_try_delta = name && no_try_delta(name);
|
||||
|
||||
stdin_packs_hints_nr++;
|
||||
}
|
||||
}
|
||||
|
||||
static void show_object_pack_hint(struct object *object, const char *name,
|
||||
void *data UNUSED)
|
||||
static void show_commit_pack_hint(struct commit *commit, void *data)
|
||||
{
|
||||
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
|
||||
if (!oe)
|
||||
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
|
||||
|
||||
if (mode == STDIN_PACKS_MODE_FOLLOW) {
|
||||
show_object_pack_hint((struct object *)commit, "", data);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Our 'to_pack' list was constructed by iterating all objects packed in
|
||||
* included packs, and so doesn't have a non-zero hash field that you
|
||||
* would typically pick up during a reachability traversal.
|
||||
*
|
||||
* Make a best-effort attempt to fill in the ->hash and ->no_try_delta
|
||||
* here using a now in order to perhaps improve the delta selection
|
||||
* process.
|
||||
*/
|
||||
oe->hash = pack_name_hash_fn(name);
|
||||
oe->no_try_delta = name && no_try_delta(name);
|
||||
/* nothing to do; commits don't have a namehash */
|
||||
|
||||
stdin_packs_hints_nr++;
|
||||
}
|
||||
|
||||
static int pack_mtime_cmp(const void *_a, const void *_b)
|
||||
@@ -3795,7 +3817,7 @@ static int pack_mtime_cmp(const void *_a, const void *_b)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void read_packs_list_from_stdin(void)
|
||||
static void read_packs_list_from_stdin(struct rev_info *revs)
|
||||
{
|
||||
struct strbuf buf = STRBUF_INIT;
|
||||
struct string_list include_packs = STRING_LIST_INIT_DUP;
|
||||
@@ -3803,24 +3825,6 @@ static void read_packs_list_from_stdin(void)
|
||||
struct string_list_item *item = NULL;
|
||||
|
||||
struct packed_git *p;
|
||||
struct rev_info revs;
|
||||
|
||||
repo_init_revisions(the_repository, &revs, NULL);
|
||||
/*
|
||||
* Use a revision walk to fill in the namehash of objects in the include
|
||||
* packs. To save time, we'll avoid traversing through objects that are
|
||||
* in excluded packs.
|
||||
*
|
||||
* That may cause us to avoid populating all of the namehash fields of
|
||||
* all included objects, but our goal is best-effort, since this is only
|
||||
* an optimization during delta selection.
|
||||
*/
|
||||
revs.no_kept_objects = 1;
|
||||
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
|
||||
revs.blob_objects = 1;
|
||||
revs.tree_objects = 1;
|
||||
revs.tag_objects = 1;
|
||||
revs.ignore_missing_links = 1;
|
||||
|
||||
while (strbuf_getline(&buf, stdin) != EOF) {
|
||||
if (!buf.len)
|
||||
@@ -3890,25 +3894,55 @@ static void read_packs_list_from_stdin(void)
|
||||
struct packed_git *p = item->util;
|
||||
for_each_object_in_pack(p,
|
||||
add_object_entry_from_pack,
|
||||
&revs,
|
||||
revs,
|
||||
FOR_EACH_OBJECT_PACK_ORDER);
|
||||
}
|
||||
|
||||
strbuf_release(&buf);
|
||||
string_list_clear(&include_packs, 0);
|
||||
string_list_clear(&exclude_packs, 0);
|
||||
}
|
||||
|
||||
static void add_unreachable_loose_objects(struct rev_info *revs);
|
||||
|
||||
static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
|
||||
{
|
||||
struct rev_info revs;
|
||||
|
||||
repo_init_revisions(the_repository, &revs, NULL);
|
||||
/*
|
||||
* Use a revision walk to fill in the namehash of objects in the include
|
||||
* packs. To save time, we'll avoid traversing through objects that are
|
||||
* in excluded packs.
|
||||
*
|
||||
* That may cause us to avoid populating all of the namehash fields of
|
||||
* all included objects, but our goal is best-effort, since this is only
|
||||
* an optimization during delta selection.
|
||||
*/
|
||||
revs.no_kept_objects = 1;
|
||||
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
|
||||
revs.blob_objects = 1;
|
||||
revs.tree_objects = 1;
|
||||
revs.tag_objects = 1;
|
||||
revs.ignore_missing_links = 1;
|
||||
|
||||
/* avoids adding objects in excluded packs */
|
||||
ignore_packed_keep_in_core = 1;
|
||||
read_packs_list_from_stdin(&revs);
|
||||
if (rev_list_unpacked)
|
||||
add_unreachable_loose_objects(&revs);
|
||||
|
||||
if (prepare_revision_walk(&revs))
|
||||
die(_("revision walk setup failed"));
|
||||
traverse_commit_list(&revs,
|
||||
show_commit_pack_hint,
|
||||
show_object_pack_hint,
|
||||
NULL);
|
||||
&mode);
|
||||
|
||||
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
|
||||
stdin_packs_found_nr);
|
||||
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
|
||||
stdin_packs_hints_nr);
|
||||
|
||||
strbuf_release(&buf);
|
||||
string_list_clear(&include_packs, 0);
|
||||
string_list_clear(&exclude_packs, 0);
|
||||
}
|
||||
|
||||
static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
|
||||
@@ -4006,7 +4040,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
|
||||
}
|
||||
}
|
||||
|
||||
static void add_unreachable_loose_objects(void);
|
||||
static void add_objects_in_unpacked_packs(void);
|
||||
|
||||
static void enumerate_cruft_objects(void)
|
||||
@@ -4016,7 +4049,7 @@ static void enumerate_cruft_objects(void)
|
||||
_("Enumerating cruft objects"), 0);
|
||||
|
||||
add_objects_in_unpacked_packs();
|
||||
add_unreachable_loose_objects();
|
||||
add_unreachable_loose_objects(NULL);
|
||||
|
||||
stop_progress(&progress_state);
|
||||
}
|
||||
@@ -4294,8 +4327,9 @@ static void add_objects_in_unpacked_packs(void)
|
||||
}
|
||||
|
||||
static int add_loose_object(const struct object_id *oid, const char *path,
|
||||
void *data UNUSED)
|
||||
void *data)
|
||||
{
|
||||
struct rev_info *revs = data;
|
||||
enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL);
|
||||
|
||||
if (type < 0) {
|
||||
@@ -4316,6 +4350,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
|
||||
} else {
|
||||
add_object_entry(oid, type, "", 0);
|
||||
}
|
||||
|
||||
if (revs && type == OBJ_COMMIT)
|
||||
add_pending_oid(revs, NULL, oid, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4324,11 +4362,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
|
||||
* add_object_entry will weed out duplicates, so we just add every
|
||||
* loose object we find.
|
||||
*/
|
||||
static void add_unreachable_loose_objects(void)
|
||||
static void add_unreachable_loose_objects(struct rev_info *revs)
|
||||
{
|
||||
for_each_loose_file_in_objdir(repo_get_object_directory(the_repository),
|
||||
add_loose_object,
|
||||
NULL, NULL, NULL);
|
||||
add_loose_object, NULL, NULL, revs);
|
||||
}
|
||||
|
||||
static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
|
||||
@@ -4675,7 +4712,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
|
||||
if (keep_unreachable)
|
||||
add_objects_in_unpacked_packs();
|
||||
if (pack_loose_unreachable)
|
||||
add_unreachable_loose_objects();
|
||||
add_unreachable_loose_objects(NULL);
|
||||
if (unpack_unreachable)
|
||||
loosen_unused_packed_objects();
|
||||
|
||||
@@ -4782,6 +4819,23 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) {
|
||||
return is_not_in_promisor_pack_obj((struct object *) commit, data);
|
||||
}
|
||||
|
||||
static int parse_stdin_packs_mode(const struct option *opt, const char *arg,
|
||||
int unset)
|
||||
{
|
||||
enum stdin_packs_mode *mode = opt->value;
|
||||
|
||||
if (unset)
|
||||
*mode = STDIN_PACKS_MODE_NONE;
|
||||
else if (!arg || !*arg)
|
||||
*mode = STDIN_PACKS_MODE_STANDARD;
|
||||
else if (!strcmp(arg, "follow"))
|
||||
*mode = STDIN_PACKS_MODE_FOLLOW;
|
||||
else
|
||||
die(_("invalid value for '%s': '%s'"), opt->long_name, arg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmd_pack_objects(int argc,
|
||||
const char **argv,
|
||||
const char *prefix,
|
||||
@@ -4792,7 +4846,7 @@ int cmd_pack_objects(int argc,
|
||||
struct strvec rp = STRVEC_INIT;
|
||||
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
|
||||
int rev_list_index = 0;
|
||||
int stdin_packs = 0;
|
||||
enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE;
|
||||
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
|
||||
struct list_objects_filter_options filter_options =
|
||||
LIST_OBJECTS_FILTER_INIT;
|
||||
@@ -4847,6 +4901,9 @@ int cmd_pack_objects(int argc,
|
||||
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
|
||||
N_("include objects referred to by the index"),
|
||||
1, PARSE_OPT_NONEG),
|
||||
OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"),
|
||||
N_("read packs from stdin"),
|
||||
PARSE_OPT_OPTARG, parse_stdin_packs_mode),
|
||||
OPT_BOOL(0, "stdin-packs", &stdin_packs,
|
||||
N_("read packs from stdin")),
|
||||
OPT_BOOL(0, "stdout", &pack_to_stdout,
|
||||
@@ -5012,9 +5069,10 @@ int cmd_pack_objects(int argc,
|
||||
strvec_push(&rp, "--unpacked");
|
||||
}
|
||||
|
||||
if (exclude_promisor_objects && exclude_promisor_objects_best_effort)
|
||||
die(_("options '%s' and '%s' cannot be used together"),
|
||||
"--exclude-promisor-objects", "--exclude-promisor-objects-best-effort");
|
||||
die_for_incompatible_opt2(exclude_promisor_objects,
|
||||
"--exclude-promisor-objects",
|
||||
exclude_promisor_objects_best_effort,
|
||||
"--exclude-promisor-objects-best-effort");
|
||||
if (exclude_promisor_objects) {
|
||||
use_internal_rev_list = 1;
|
||||
fetch_if_missing = 0;
|
||||
@@ -5052,13 +5110,14 @@ int cmd_pack_objects(int argc,
|
||||
if (!pack_to_stdout && thin)
|
||||
die(_("--thin cannot be used to build an indexable pack"));
|
||||
|
||||
if (keep_unreachable && unpack_unreachable)
|
||||
die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
|
||||
die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable",
|
||||
unpack_unreachable, "--unpack-unreachable");
|
||||
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
|
||||
unpack_unreachable_expiration = 0;
|
||||
|
||||
if (stdin_packs && filter_options.choice)
|
||||
die(_("cannot use --filter with --stdin-packs"));
|
||||
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
|
||||
filter_options.choice, "--filter");
|
||||
|
||||
|
||||
if (stdin_packs && use_internal_rev_list)
|
||||
die(_("cannot use internal rev list with --stdin-packs"));
|
||||
@@ -5066,8 +5125,8 @@ int cmd_pack_objects(int argc,
|
||||
if (cruft) {
|
||||
if (use_internal_rev_list)
|
||||
die(_("cannot use internal rev list with --cruft"));
|
||||
if (stdin_packs)
|
||||
die(_("cannot use --stdin-packs with --cruft"));
|
||||
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
|
||||
cruft, "--cruft");
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5135,11 +5194,7 @@ int cmd_pack_objects(int argc,
|
||||
progress_state = start_progress(the_repository,
|
||||
_("Enumerating objects"), 0);
|
||||
if (stdin_packs) {
|
||||
/* avoids adding objects in excluded packs */
|
||||
ignore_packed_keep_in_core = 1;
|
||||
read_packs_list_from_stdin();
|
||||
if (rev_list_unpacked)
|
||||
add_unreachable_loose_objects();
|
||||
read_stdin_packs(stdin_packs, rev_list_unpacked);
|
||||
} else if (cruft) {
|
||||
read_cruft_objects();
|
||||
} else if (!use_internal_rev_list) {
|
||||
|
||||
Reference in New Issue
Block a user