When a multi-pack bitmap is used to implement verbatim pack reuse (that is, when verbatim chunks from an on-disk packfile are copied directly[^1]), it does so by using its "preferred pack" as the source for pack-reuse. This allows repositories to pack the majority of their objects into a single (often large) pack, and then use it as the single source for verbatim pack reuse. This increases the amount of objects that are reused verbatim (and consequently, decrease the amount of time it takes to generate many packs). But this performance comes at a cost, which is that the preferred packfile must pace its growth with that of the entire repository in order to maintain the utility of verbatim pack reuse. As repositories grow beyond what we can reasonably store in a single packfile, the utility of verbatim pack reuse diminishes. Or, at the very least, it becomes increasingly more expensive to maintain as the pack grows larger and larger. It would be beneficial to be able to perform this same optimization over multiple packs, provided some modest constraints (most importantly, that the set of packs eligible for verbatim reuse are disjoint with respect to the subset of their objects being sent). If we assume that the packs which we treat as candidates for verbatim reuse are disjoint with respect to any of their objects we may output, we need to make only modest modifications to the verbatim pack-reuse code itself. Most notably, we need to remove the assumption that the bits in the reachability bitmap corresponding to objects from the single reuse pack begin at the first bit position. Future patches will unwind these assumptions and reimplement their existing functionality as special cases of the more general assumptions (e.g. that reuse bits can start anywhere within the bitset, but happen to start at 0 for all existing cases). This patch does not yet relax any of those assumptions. Instead, it implements a foundational data-structure, the "Bitampped Packs" (`BTMP`) chunk of the multi-pack index. The `BTMP` chunk's contents are described in detail here. Importantly, the `BTMP` chunk contains information to map regions of a multi-pack index's reachability bitmap to the packs whose objects they represent. For now, this chunk is only written, not read (outside of the test-tool used in this patch to test the new chunk's behavior). Future patches will begin to make use of this new chunk. [^1]: Modulo patching any `OFS_DELTA`'s that cross over a region of the pack that wasn't used verbatim. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
97 lines
3.3 KiB
C
97 lines
3.3 KiB
C
#ifndef MIDX_H
|
|
#define MIDX_H
|
|
|
|
#include "repository.h"
|
|
#include "string-list.h"
|
|
|
|
struct object_id;
|
|
struct pack_entry;
|
|
struct repository;
|
|
struct bitmapped_pack;
|
|
|
|
#define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX"
|
|
#define GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP \
|
|
"GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP"
|
|
|
|
struct multi_pack_index {
|
|
struct multi_pack_index *next;
|
|
|
|
const unsigned char *data;
|
|
size_t data_len;
|
|
|
|
const uint32_t *revindex_data;
|
|
const uint32_t *revindex_map;
|
|
size_t revindex_len;
|
|
|
|
uint32_t signature;
|
|
unsigned char version;
|
|
unsigned char hash_len;
|
|
unsigned char num_chunks;
|
|
uint32_t num_packs;
|
|
uint32_t num_objects;
|
|
|
|
int local;
|
|
|
|
const unsigned char *chunk_pack_names;
|
|
size_t chunk_pack_names_len;
|
|
const uint32_t *chunk_bitmapped_packs;
|
|
size_t chunk_bitmapped_packs_len;
|
|
const uint32_t *chunk_oid_fanout;
|
|
const unsigned char *chunk_oid_lookup;
|
|
const unsigned char *chunk_object_offsets;
|
|
const unsigned char *chunk_large_offsets;
|
|
size_t chunk_large_offsets_len;
|
|
const unsigned char *chunk_revindex;
|
|
size_t chunk_revindex_len;
|
|
|
|
const char **pack_names;
|
|
struct packed_git **packs;
|
|
char object_dir[FLEX_ARRAY];
|
|
};
|
|
|
|
#define MIDX_PROGRESS (1 << 0)
|
|
#define MIDX_WRITE_REV_INDEX (1 << 1)
|
|
#define MIDX_WRITE_BITMAP (1 << 2)
|
|
#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3)
|
|
#define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4)
|
|
|
|
const unsigned char *get_midx_checksum(struct multi_pack_index *m);
|
|
void get_midx_filename(struct strbuf *out, const char *object_dir);
|
|
void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m);
|
|
|
|
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local);
|
|
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);
|
|
int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
|
|
struct bitmapped_pack *bp, uint32_t pack_int_id);
|
|
int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result);
|
|
off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos);
|
|
uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos);
|
|
struct object_id *nth_midxed_object_oid(struct object_id *oid,
|
|
struct multi_pack_index *m,
|
|
uint32_t n);
|
|
int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m);
|
|
int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name);
|
|
int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local);
|
|
|
|
/*
|
|
* Variant of write_midx_file which writes a MIDX containing only the packs
|
|
* specified in packs_to_include.
|
|
*/
|
|
int write_midx_file(const char *object_dir,
|
|
const char *preferred_pack_name,
|
|
const char *refs_snapshot,
|
|
unsigned flags);
|
|
int write_midx_file_only(const char *object_dir,
|
|
struct string_list *packs_to_include,
|
|
const char *preferred_pack_name,
|
|
const char *refs_snapshot,
|
|
unsigned flags);
|
|
void clear_midx_file(struct repository *r);
|
|
int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags);
|
|
int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags);
|
|
int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags);
|
|
|
|
void close_midx(struct multi_pack_index *m);
|
|
|
|
#endif
|