Merge branch 'ds/name-hash-tweaks'

"git pack-objects" and its wrapper "git repack" learned an option
to use an alternative path-hash function to improve delta-base
selection to produce a packfile with deeper history than window
size.

* ds/name-hash-tweaks:
  pack-objects: prevent name hash version change
  test-tool: add helper for name-hash values
  p5313: add size comparison test
  pack-objects: add GIT_TEST_NAME_HASH_VERSION
  repack: add --name-hash-version option
  pack-objects: add --name-hash-version option
  pack-objects: create new name-hash function version
This commit is contained in:
Junio C Hamano
2025-02-12 10:08:51 -08:00
22 changed files with 389 additions and 16 deletions

View File

@@ -269,6 +269,43 @@ struct configured_exclusion {
static struct oidmap configured_exclusions;
static struct oidset excluded_by_config;
static int name_hash_version = -1;
/**
* Check whether the name_hash_version chosen by user input is appropriate,
* and also validate whether it is compatible with other features.
*/
static void validate_name_hash_version(void)
{
if (name_hash_version < 1 || name_hash_version > 2)
die(_("invalid --name-hash-version option: %d"), name_hash_version);
if (write_bitmap_index && name_hash_version != 1) {
warning(_("currently, --write-bitmap-index requires --name-hash-version=1"));
name_hash_version = 1;
}
}
static inline uint32_t pack_name_hash_fn(const char *name)
{
static int seen_version = -1;
if (seen_version < 0)
seen_version = name_hash_version;
else if (seen_version != name_hash_version)
BUG("name hash version changed from %d to %d mid-process",
seen_version, name_hash_version);
switch (name_hash_version) {
case 1:
return pack_name_hash(name);
case 2:
return pack_name_hash_v2((const unsigned char *)name);
default:
BUG("invalid name-hash version: %d", name_hash_version);
}
}
/*
* stats
@@ -1689,7 +1726,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
return 0;
}
create_object_entry(oid, type, pack_name_hash(name),
create_object_entry(oid, type, pack_name_hash_fn(name),
exclude, name && no_try_delta(name),
found_pack, found_offset);
return 1;
@@ -1903,7 +1940,7 @@ static void add_preferred_base_object(const char *name)
{
struct pbase_tree *it;
size_t cmplen;
unsigned hash = pack_name_hash(name);
unsigned hash = pack_name_hash_fn(name);
if (!num_preferred_base || check_pbase_path(hash))
return;
@@ -3415,7 +3452,7 @@ static void show_object_pack_hint(struct object *object, const char *name,
* here using a now in order to perhaps improve the delta selection
* process.
*/
oe->hash = pack_name_hash(name);
oe->hash = pack_name_hash_fn(name);
oe->no_try_delta = name && no_try_delta(name);
stdin_packs_hints_nr++;
@@ -3565,7 +3602,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
entry = packlist_find(&to_pack, oid);
if (entry) {
if (name) {
entry->hash = pack_name_hash(name);
entry->hash = pack_name_hash_fn(name);
entry->no_try_delta = no_try_delta(name);
}
} else {
@@ -3588,7 +3625,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
return;
}
entry = create_object_entry(oid, type, pack_name_hash(name),
entry = create_object_entry(oid, type, pack_name_hash_fn(name),
0, name && no_try_delta(name),
pack, offset);
}
@@ -4068,6 +4105,15 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
return -1;
/*
* For now, force the name-hash version to be 1 since that
* is the version implied by the bitmap format. Later, the
* format can include this version explicitly in its format,
* allowing readers to know the version that was used during
* the bitmap write.
*/
name_hash_version = 1;
if (pack_options_allow_reuse())
reuse_partial_packfile_from_bitmap(bitmap_git,
&reuse_packfiles,
@@ -4443,6 +4489,8 @@ int cmd_pack_objects(int argc,
OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
N_("protocol"),
N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
OPT_INTEGER(0, "name-hash-version", &name_hash_version,
N_("use the specified name-hash function to group similar objects")),
OPT_END(),
};
@@ -4598,6 +4646,11 @@ int cmd_pack_objects(int argc,
if (pack_to_stdout || !rev_list_all)
write_bitmap_index = 0;
if (name_hash_version < 0)
name_hash_version = (int)git_env_ulong("GIT_TEST_NAME_HASH_VERSION", 1);
validate_name_hash_version();
if (use_delta_islands)
strvec_push(&rp, "--topo-order");