bloom: replace struct bloom_key * with struct bloom_keyvec
Previously, we stored bloom keys in a flat array and marked a commit as NOT TREESAME if any key reported "definitely not changed". To support multiple pathspec items, we now require that for each pathspec item, there exists a bloom key reporting "definitely not changed". This "for every" condition makes a flat array insufficient, so we introduce a new structure to group keys by a single pathspec item. `struct bloom_keyvec` is introduced to replace `struct bloom_key *` and `bloom_key_nr`. And because we want to support multiple pathspec items, we added a bloom_keyvec * and a bloom_keyvec_nr field to `struct rev_info` to represent an array of bloom_keyvecs. This commit still optimize only one pathspec item, thus bloom_keyvec_nr can only be 0 or 1. New bloom_keyvec_* functions are added to create and destroy a keyvec. bloom_filter_contains_vec() is added to check if all key in keyvec is contained in a bloom filter. Signed-off-by: Lidong Yan <502024330056@smail.nju.edu.cn> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
committed by
Junio C Hamano
parent
b187353ed2
commit
90d5518a7d
61
bloom.c
61
bloom.c
@@ -278,6 +278,55 @@ void deinit_bloom_filters(void)
|
||||
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
|
||||
}
|
||||
|
||||
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
|
||||
const struct bloom_filter_settings *settings)
|
||||
{
|
||||
struct bloom_keyvec *vec;
|
||||
const char *p;
|
||||
size_t sz;
|
||||
size_t nr = 1;
|
||||
|
||||
p = path;
|
||||
while (*p) {
|
||||
/*
|
||||
* At this point, the path is normalized to use Unix-style
|
||||
* path separators. This is required due to how the
|
||||
* changed-path Bloom filters store the paths.
|
||||
*/
|
||||
if (*p == '/')
|
||||
nr++;
|
||||
p++;
|
||||
}
|
||||
|
||||
sz = sizeof(struct bloom_keyvec);
|
||||
sz += nr * sizeof(struct bloom_key);
|
||||
vec = (struct bloom_keyvec *)xcalloc(1, sz);
|
||||
if (!vec)
|
||||
return NULL;
|
||||
vec->count = nr;
|
||||
|
||||
bloom_key_fill(&vec->key[0], path, len, settings);
|
||||
nr = 1;
|
||||
p = path + len - 1;
|
||||
while (p > path) {
|
||||
if (*p == '/') {
|
||||
bloom_key_fill(&vec->key[nr++], path, p - path, settings);
|
||||
}
|
||||
p--;
|
||||
}
|
||||
assert(nr == vec->count);
|
||||
return vec;
|
||||
}
|
||||
|
||||
void bloom_keyvec_free(struct bloom_keyvec *vec)
|
||||
{
|
||||
if (!vec)
|
||||
return;
|
||||
for (size_t nr = 0; nr < vec->count; nr++)
|
||||
bloom_key_clear(&vec->key[nr]);
|
||||
free(vec);
|
||||
}
|
||||
|
||||
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
|
||||
const struct hashmap_entry *eptr,
|
||||
const struct hashmap_entry *entry_or_key,
|
||||
@@ -539,6 +588,18 @@ int bloom_filter_contains(const struct bloom_filter *filter,
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bloom_filter_contains_vec(const struct bloom_filter *filter,
|
||||
const struct bloom_keyvec *vec,
|
||||
const struct bloom_filter_settings *settings)
|
||||
{
|
||||
int ret = 1;
|
||||
|
||||
for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
|
||||
ret = bloom_filter_contains(filter, &vec->key[nr], settings);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
|
||||
int version)
|
||||
{
|
||||
|
||||
38
bloom.h
38
bloom.h
@@ -74,6 +74,16 @@ struct bloom_key {
|
||||
uint32_t *hashes;
|
||||
};
|
||||
|
||||
/*
|
||||
* A bloom_keyvec is a vector of bloom_keys, which
|
||||
* can be used to store multiple keys for a single
|
||||
* pathspec item.
|
||||
*/
|
||||
struct bloom_keyvec {
|
||||
size_t count;
|
||||
struct bloom_key key[FLEX_ARRAY];
|
||||
};
|
||||
|
||||
int load_bloom_filter_from_graph(struct commit_graph *g,
|
||||
struct bloom_filter *filter,
|
||||
uint32_t graph_pos);
|
||||
@@ -82,6 +92,23 @@ void bloom_key_fill(struct bloom_key *key, const char *data, size_t len,
|
||||
const struct bloom_filter_settings *settings);
|
||||
void bloom_key_clear(struct bloom_key *key);
|
||||
|
||||
/*
|
||||
* bloom_keyvec_new - Allocate and populate a bloom_keyvec with keys for the
|
||||
* given path.
|
||||
*
|
||||
* This function splits the input path by '/' and generates a bloom key for each
|
||||
* prefix, in reverse order of specificity. For example, given the input
|
||||
* "a/b/c", it will generate bloom keys for:
|
||||
* - "a/b/c"
|
||||
* - "a/b"
|
||||
* - "a"
|
||||
*
|
||||
* The resulting keys are stored in a newly allocated bloom_keyvec.
|
||||
*/
|
||||
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
|
||||
const struct bloom_filter_settings *settings);
|
||||
void bloom_keyvec_free(struct bloom_keyvec *vec);
|
||||
|
||||
void add_key_to_filter(const struct bloom_key *key,
|
||||
struct bloom_filter *filter,
|
||||
const struct bloom_filter_settings *settings);
|
||||
@@ -126,6 +153,17 @@ int bloom_filter_contains(const struct bloom_filter *filter,
|
||||
const struct bloom_key *key,
|
||||
const struct bloom_filter_settings *settings);
|
||||
|
||||
/*
|
||||
* bloom_filter_contains_vec - Check if all keys in a key vector are in the
|
||||
* Bloom filter.
|
||||
*
|
||||
* Returns 1 if **all** keys in the vector are present in the filter,
|
||||
* 0 if **any** key is not present.
|
||||
*/
|
||||
int bloom_filter_contains_vec(const struct bloom_filter *filter,
|
||||
const struct bloom_keyvec *v,
|
||||
const struct bloom_filter_settings *settings);
|
||||
|
||||
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
|
||||
int version);
|
||||
|
||||
|
||||
76
revision.c
76
revision.c
@@ -685,13 +685,14 @@ static int forbid_bloom_filters(struct pathspec *spec)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void release_revisions_bloom_keyvecs(struct rev_info *revs);
|
||||
|
||||
static void prepare_to_use_bloom_filter(struct rev_info *revs)
|
||||
{
|
||||
struct pathspec_item *pi;
|
||||
char *path_alloc = NULL;
|
||||
const char *path, *p;
|
||||
const char *path;
|
||||
size_t len;
|
||||
int path_component_nr = 1;
|
||||
|
||||
if (!revs->commits)
|
||||
return;
|
||||
@@ -708,6 +709,8 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
|
||||
if (!revs->pruning.pathspec.nr)
|
||||
return;
|
||||
|
||||
revs->bloom_keyvecs_nr = 1;
|
||||
CALLOC_ARRAY(revs->bloom_keyvecs, 1);
|
||||
pi = &revs->pruning.pathspec.items[0];
|
||||
|
||||
/* remove single trailing slash from path, if needed */
|
||||
@@ -718,53 +721,30 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
|
||||
path = pi->match;
|
||||
|
||||
len = strlen(path);
|
||||
if (!len) {
|
||||
revs->bloom_filter_settings = NULL;
|
||||
free(path_alloc);
|
||||
return;
|
||||
}
|
||||
if (!len)
|
||||
goto fail;
|
||||
|
||||
p = path;
|
||||
while (*p) {
|
||||
/*
|
||||
* At this point, the path is normalized to use Unix-style
|
||||
* path separators. This is required due to how the
|
||||
* changed-path Bloom filters store the paths.
|
||||
*/
|
||||
if (*p == '/')
|
||||
path_component_nr++;
|
||||
p++;
|
||||
}
|
||||
|
||||
revs->bloom_keys_nr = path_component_nr;
|
||||
ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
|
||||
|
||||
bloom_key_fill(&revs->bloom_keys[0], path, len,
|
||||
revs->bloom_filter_settings);
|
||||
path_component_nr = 1;
|
||||
|
||||
p = path + len - 1;
|
||||
while (p > path) {
|
||||
if (*p == '/')
|
||||
bloom_key_fill(&revs->bloom_keys[path_component_nr++],
|
||||
path, p - path,
|
||||
revs->bloom_filter_settings);
|
||||
p--;
|
||||
}
|
||||
revs->bloom_keyvecs[0] =
|
||||
bloom_keyvec_new(path, len, revs->bloom_filter_settings);
|
||||
|
||||
if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
|
||||
atexit(trace2_bloom_filter_statistics_atexit);
|
||||
bloom_filter_atexit_registered = 1;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
fail:
|
||||
revs->bloom_filter_settings = NULL;
|
||||
free(path_alloc);
|
||||
release_revisions_bloom_keyvecs(revs);
|
||||
}
|
||||
|
||||
static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
|
||||
struct commit *commit)
|
||||
{
|
||||
struct bloom_filter *filter;
|
||||
int result = 1, j;
|
||||
int result = 0;
|
||||
|
||||
if (!revs->repo->objects->commit_graph)
|
||||
return -1;
|
||||
@@ -779,10 +759,10 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (j = 0; result && j < revs->bloom_keys_nr; j++) {
|
||||
result = bloom_filter_contains(filter,
|
||||
&revs->bloom_keys[j],
|
||||
revs->bloom_filter_settings);
|
||||
for (size_t nr = 0; !result && nr < revs->bloom_keyvecs_nr; nr++) {
|
||||
result = bloom_filter_contains_vec(filter,
|
||||
revs->bloom_keyvecs[nr],
|
||||
revs->bloom_filter_settings);
|
||||
}
|
||||
|
||||
if (result)
|
||||
@@ -823,7 +803,7 @@ static int rev_compare_tree(struct rev_info *revs,
|
||||
return REV_TREE_SAME;
|
||||
}
|
||||
|
||||
if (revs->bloom_keys_nr && !nth_parent) {
|
||||
if (revs->bloom_keyvecs_nr && !nth_parent) {
|
||||
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
|
||||
|
||||
if (bloom_ret == 0)
|
||||
@@ -850,7 +830,7 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
|
||||
if (!t1)
|
||||
return 0;
|
||||
|
||||
if (!nth_parent && revs->bloom_keys_nr) {
|
||||
if (!nth_parent && revs->bloom_keyvecs_nr) {
|
||||
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
|
||||
if (!bloom_ret)
|
||||
return 1;
|
||||
@@ -3200,6 +3180,14 @@ static void release_revisions_mailmap(struct string_list *mailmap)
|
||||
|
||||
static void release_revisions_topo_walk_info(struct topo_walk_info *info);
|
||||
|
||||
static void release_revisions_bloom_keyvecs(struct rev_info *revs)
|
||||
{
|
||||
for (size_t nr = 0; nr < revs->bloom_keyvecs_nr; nr++)
|
||||
bloom_keyvec_free(revs->bloom_keyvecs[nr]);
|
||||
FREE_AND_NULL(revs->bloom_keyvecs);
|
||||
revs->bloom_keyvecs_nr = 0;
|
||||
}
|
||||
|
||||
static void free_void_commit_list(void *list)
|
||||
{
|
||||
free_commit_list(list);
|
||||
@@ -3228,11 +3216,7 @@ void release_revisions(struct rev_info *revs)
|
||||
clear_decoration(&revs->treesame, free);
|
||||
line_log_free(revs);
|
||||
oidset_clear(&revs->missing_commits);
|
||||
|
||||
for (int i = 0; i < revs->bloom_keys_nr; i++)
|
||||
bloom_key_clear(&revs->bloom_keys[i]);
|
||||
FREE_AND_NULL(revs->bloom_keys);
|
||||
revs->bloom_keys_nr = 0;
|
||||
release_revisions_bloom_keyvecs(revs);
|
||||
}
|
||||
|
||||
static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child)
|
||||
|
||||
@@ -62,7 +62,7 @@ struct repository;
|
||||
struct rev_info;
|
||||
struct string_list;
|
||||
struct saved_parents;
|
||||
struct bloom_key;
|
||||
struct bloom_keyvec;
|
||||
struct bloom_filter_settings;
|
||||
struct option;
|
||||
struct parse_opt_ctx_t;
|
||||
@@ -360,8 +360,8 @@ struct rev_info {
|
||||
|
||||
/* Commit graph bloom filter fields */
|
||||
/* The bloom filter key(s) for the pathspec */
|
||||
struct bloom_key *bloom_keys;
|
||||
int bloom_keys_nr;
|
||||
struct bloom_keyvec **bloom_keyvecs;
|
||||
int bloom_keyvecs_nr;
|
||||
|
||||
/*
|
||||
* The bloom filter settings used to generate the key.
|
||||
|
||||
Reference in New Issue
Block a user