Files
git/builtin/pack-redundant.c
Jeff King 5247b762d0 builtins: mark unused prefix parameters
All builtins receive a "prefix" parameter, but it is only useful if they
need to adjust filenames given by the user on the command line. For
builtins that do not even call parse_options(), they often don't look at
the prefix at all, and -Wunused-parameter complains.

Let's annotate those to silence the compiler warning. I gave a quick
scan of each of these cases, and it seems like they don't have anything
they _should_ be using the prefix for (i.e., there is no hidden bug that
we are missing). The only questionable cases I saw were:

  - in git-unpack-file, we create a tempfile which will always be at the
    root of the repository, even if the command is run from a subdir.
    Arguably this should be created in the subdir from which we're run
    (as we report the path only as a relative name). However, nobody has
    complained, and I'm hesitant to change something that is deep
    plumbing going back to April 2005 (though I think within our
    scripts, the sole caller in git-merge-one-file would be OK, as it
    moves to the toplevel itself).

  - in fetch-pack, local-filesystem remotes are taken as relative to the
    project root, not the current directory. So:

       git init server.git
       [...put stuff in server.git...]
       git init client.git
       cd client.git
       mkdir subdir
       cd subdir
       git fetch-pack ../../server.git ...

    won't work, as we quietly move to the top of the repository before
    interpreting the path (so "../server.git" would work). This is
    weird, but again, nobody has complained and this is how it has
    always worked. And this is how "git fetch" works, too. Plus it
    raises questions about how a configured remote like:

      git config remote.origin.url ../server.git

    should behave. I can certainly come up with a reasonable set of
    behavior, but it may not be worth stirring up complications in a
    plumbing tool.

So I've left the behavior untouched in both of those cases. If anybody
really wants to revisit them, it's easy enough to drop the UNUSED
marker. This commit is just about removing them as obstacles to turning
on -Wunused-parameter all the time.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-28 14:11:24 -07:00

674 lines
15 KiB
C

/*
*
* Copyright 2005, Lukas Sandstrom <lukass@etek.chalmers.se>
*
* This file is licensed under the GPL v2.
*
*/
#include "builtin.h"
#include "hex.h"
#include "repository.h"
#include "packfile.h"
#include "object-store.h"
#define BLKSIZE 512
static const char pack_redundant_usage[] =
"git pack-redundant [--verbose] [--alt-odb] (--all | <pack-filename>...)";
static int load_all_packs, verbose, alt_odb;
struct llist_item {
struct llist_item *next;
struct object_id oid;
};
static struct llist {
struct llist_item *front;
struct llist_item *back;
size_t size;
} *all_objects; /* all objects which must be present in local packfiles */
static struct pack_list {
struct pack_list *next;
struct packed_git *pack;
struct llist *unique_objects;
struct llist *remaining_objects;
size_t all_objects_size;
} *local_packs = NULL, *altodb_packs = NULL;
static struct llist_item *free_nodes;
static inline void llist_item_put(struct llist_item *item)
{
item->next = free_nodes;
free_nodes = item;
}
static inline struct llist_item *llist_item_get(void)
{
struct llist_item *new_item;
if ( free_nodes ) {
new_item = free_nodes;
free_nodes = free_nodes->next;
} else {
int i = 1;
ALLOC_ARRAY(new_item, BLKSIZE);
for (; i < BLKSIZE; i++)
llist_item_put(&new_item[i]);
}
return new_item;
}
static inline void llist_init(struct llist **list)
{
*list = xmalloc(sizeof(struct llist));
(*list)->front = (*list)->back = NULL;
(*list)->size = 0;
}
static struct llist * llist_copy(struct llist *list)
{
struct llist *ret;
struct llist_item *new_item, *old_item, *prev;
llist_init(&ret);
if ((ret->size = list->size) == 0)
return ret;
new_item = ret->front = llist_item_get();
new_item->oid = list->front->oid;
old_item = list->front->next;
while (old_item) {
prev = new_item;
new_item = llist_item_get();
prev->next = new_item;
new_item->oid = old_item->oid;
old_item = old_item->next;
}
new_item->next = NULL;
ret->back = new_item;
return ret;
}
static inline struct llist_item *llist_insert(struct llist *list,
struct llist_item *after,
const unsigned char *oid)
{
struct llist_item *new_item = llist_item_get();
oidread(&new_item->oid, oid);
new_item->next = NULL;
if (after) {
new_item->next = after->next;
after->next = new_item;
if (after == list->back)
list->back = new_item;
} else {/* insert in front */
if (list->size == 0)
list->back = new_item;
else
new_item->next = list->front;
list->front = new_item;
}
list->size++;
return new_item;
}
static inline struct llist_item *llist_insert_back(struct llist *list,
const unsigned char *oid)
{
return llist_insert(list, list->back, oid);
}
static inline struct llist_item *llist_insert_sorted_unique(struct llist *list,
const struct object_id *oid, struct llist_item *hint)
{
struct llist_item *prev = NULL, *l;
l = (hint == NULL) ? list->front : hint;
while (l) {
int cmp = oidcmp(&l->oid, oid);
if (cmp > 0) { /* we insert before this entry */
return llist_insert(list, prev, oid->hash);
}
if (!cmp) { /* already exists */
return l;
}
prev = l;
l = l->next;
}
/* insert at the end */
return llist_insert_back(list, oid->hash);
}
/* returns a pointer to an item in front of sha1 */
static inline struct llist_item * llist_sorted_remove(struct llist *list, const unsigned char *oid, struct llist_item *hint)
{
struct llist_item *prev, *l;
redo_from_start:
l = (hint == NULL) ? list->front : hint;
prev = NULL;
while (l) {
const int cmp = hashcmp(l->oid.hash, oid);
if (cmp > 0) /* not in list, since sorted */
return prev;
if (!cmp) { /* found */
if (!prev) {
if (hint != NULL && hint != list->front) {
/* we don't know the previous element */
hint = NULL;
goto redo_from_start;
}
list->front = l->next;
} else
prev->next = l->next;
if (l == list->back)
list->back = prev;
llist_item_put(l);
list->size--;
return prev;
}
prev = l;
l = l->next;
}
return prev;
}
/* computes A\B */
static void llist_sorted_difference_inplace(struct llist *A,
struct llist *B)
{
struct llist_item *hint, *b;
hint = NULL;
b = B->front;
while (b) {
hint = llist_sorted_remove(A, b->oid.hash, hint);
b = b->next;
}
}
static inline struct pack_list * pack_list_insert(struct pack_list **pl,
struct pack_list *entry)
{
struct pack_list *p = xmalloc(sizeof(struct pack_list));
memcpy(p, entry, sizeof(struct pack_list));
p->next = *pl;
*pl = p;
return p;
}
static inline size_t pack_list_size(struct pack_list *pl)
{
size_t ret = 0;
while (pl) {
ret++;
pl = pl->next;
}
return ret;
}
static struct pack_list * pack_list_difference(const struct pack_list *A,
const struct pack_list *B)
{
struct pack_list *ret;
const struct pack_list *pl;
if (!A)
return NULL;
pl = B;
while (pl != NULL) {
if (A->pack == pl->pack)
return pack_list_difference(A->next, B);
pl = pl->next;
}
ret = xmalloc(sizeof(struct pack_list));
memcpy(ret, A, sizeof(struct pack_list));
ret->next = pack_list_difference(A->next, B);
return ret;
}
static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
{
size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
const unsigned char *p1_base, *p2_base;
struct llist_item *p1_hint = NULL, *p2_hint = NULL;
const unsigned int hashsz = the_hash_algo->rawsz;
if (!p1->unique_objects)
p1->unique_objects = llist_copy(p1->remaining_objects);
if (!p2->unique_objects)
p2->unique_objects = llist_copy(p2->remaining_objects);
p1_base = p1->pack->index_data;
p2_base = p2->pack->index_data;
p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
p2_base += 256 * 4 + ((p2->pack->index_version < 2) ? 4 : 8);
p1_step = hashsz + ((p1->pack->index_version < 2) ? 4 : 0);
p2_step = hashsz + ((p2->pack->index_version < 2) ? 4 : 0);
while (p1_off < p1->pack->num_objects * p1_step &&
p2_off < p2->pack->num_objects * p2_step)
{
const int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off);
/* cmp ~ p1 - p2 */
if (cmp == 0) {
p1_hint = llist_sorted_remove(p1->unique_objects,
p1_base + p1_off,
p1_hint);
p2_hint = llist_sorted_remove(p2->unique_objects,
p1_base + p1_off,
p2_hint);
p1_off += p1_step;
p2_off += p2_step;
continue;
}
if (cmp < 0) { /* p1 has the object, p2 doesn't */
p1_off += p1_step;
} else { /* p2 has the object, p1 doesn't */
p2_off += p2_step;
}
}
}
static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
{
size_t ret = 0;
size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
const unsigned char *p1_base, *p2_base;
const unsigned int hashsz = the_hash_algo->rawsz;
p1_base = p1->index_data;
p2_base = p2->index_data;
p1_base += 256 * 4 + ((p1->index_version < 2) ? 4 : 8);
p2_base += 256 * 4 + ((p2->index_version < 2) ? 4 : 8);
p1_step = hashsz + ((p1->index_version < 2) ? 4 : 0);
p2_step = hashsz + ((p2->index_version < 2) ? 4 : 0);
while (p1_off < p1->num_objects * p1_step &&
p2_off < p2->num_objects * p2_step)
{
int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off);
/* cmp ~ p1 - p2 */
if (cmp == 0) {
ret++;
p1_off += p1_step;
p2_off += p2_step;
continue;
}
if (cmp < 0) { /* p1 has the object, p2 doesn't */
p1_off += p1_step;
} else { /* p2 has the object, p1 doesn't */
p2_off += p2_step;
}
}
return ret;
}
/* another O(n^2) function ... */
static size_t get_pack_redundancy(struct pack_list *pl)
{
struct pack_list *subset;
size_t ret = 0;
if (!pl)
return 0;
while ((subset = pl->next)) {
while (subset) {
ret += sizeof_union(pl->pack, subset->pack);
subset = subset->next;
}
pl = pl->next;
}
return ret;
}
static inline off_t pack_set_bytecount(struct pack_list *pl)
{
off_t ret = 0;
while (pl) {
ret += pl->pack->pack_size;
ret += pl->pack->index_size;
pl = pl->next;
}
return ret;
}
static int cmp_remaining_objects(const void *a, const void *b)
{
struct pack_list *pl_a = *((struct pack_list **)a);
struct pack_list *pl_b = *((struct pack_list **)b);
if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
/* have the same remaining_objects, big pack first */
if (pl_a->all_objects_size == pl_b->all_objects_size)
return 0;
else if (pl_a->all_objects_size < pl_b->all_objects_size)
return 1;
else
return -1;
} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
/* sort by remaining objects, more objects first */
return 1;
} else {
return -1;
}
}
/* Sort pack_list, greater size of remaining_objects first */
static void sort_pack_list(struct pack_list **pl)
{
struct pack_list **ary, *p;
int i;
size_t n = pack_list_size(*pl);
if (n < 2)
return;
/* prepare an array of packed_list for easier sorting */
CALLOC_ARRAY(ary, n);
for (n = 0, p = *pl; p; p = p->next)
ary[n++] = p;
QSORT(ary, n, cmp_remaining_objects);
/* link them back again */
for (i = 0; i < n - 1; i++)
ary[i]->next = ary[i + 1];
ary[n - 1]->next = NULL;
*pl = ary[0];
free(ary);
}
static void minimize(struct pack_list **min)
{
struct pack_list *pl, *unique = NULL, *non_unique = NULL;
struct llist *missing, *unique_pack_objects;
pl = local_packs;
while (pl) {
if (pl->unique_objects->size)
pack_list_insert(&unique, pl);
else
pack_list_insert(&non_unique, pl);
pl = pl->next;
}
/* find out which objects are missing from the set of unique packs */
missing = llist_copy(all_objects);
pl = unique;
while (pl) {
llist_sorted_difference_inplace(missing, pl->remaining_objects);
pl = pl->next;
}
*min = unique;
/* return if there are no objects missing from the unique set */
if (missing->size == 0) {
free(missing);
return;
}
unique_pack_objects = llist_copy(all_objects);
llist_sorted_difference_inplace(unique_pack_objects, missing);
/* remove unique pack objects from the non_unique packs */
pl = non_unique;
while (pl) {
llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
pl = pl->next;
}
while (non_unique) {
/* sort the non_unique packs, greater size of remaining_objects first */
sort_pack_list(&non_unique);
if (non_unique->remaining_objects->size == 0)
break;
pack_list_insert(min, non_unique);
for (pl = non_unique->next; pl && pl->remaining_objects->size > 0; pl = pl->next)
llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
non_unique = non_unique->next;
}
}
static void load_all_objects(void)
{
struct pack_list *pl = local_packs;
struct llist_item *hint, *l;
llist_init(&all_objects);
while (pl) {
hint = NULL;
l = pl->remaining_objects->front;
while (l) {
hint = llist_insert_sorted_unique(all_objects,
&l->oid, hint);
l = l->next;
}
pl = pl->next;
}
/* remove objects present in remote packs */
pl = altodb_packs;
while (pl) {
llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
pl = pl->next;
}
}
/* this scales like O(n^2) */
static void cmp_local_packs(void)
{
struct pack_list *subset, *pl = local_packs;
/* only one packfile */
if (!pl->next) {
llist_init(&pl->unique_objects);
return;
}
while ((subset = pl)) {
while ((subset = subset->next))
cmp_two_packs(pl, subset);
pl = pl->next;
}
}
static void scan_alt_odb_packs(void)
{
struct pack_list *local, *alt;
alt = altodb_packs;
while (alt) {
local = local_packs;
while (local) {
llist_sorted_difference_inplace(local->remaining_objects,
alt->remaining_objects);
local = local->next;
}
alt = alt->next;
}
}
static struct pack_list * add_pack(struct packed_git *p)
{
struct pack_list l;
size_t off = 0, step;
const unsigned char *base;
if (!p->pack_local && !(alt_odb || verbose))
return NULL;
l.pack = p;
llist_init(&l.remaining_objects);
if (open_pack_index(p))
return NULL;
base = p->index_data;
base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
while (off < p->num_objects * step) {
llist_insert_back(l.remaining_objects, base + off);
off += step;
}
l.all_objects_size = l.remaining_objects->size;
l.unique_objects = NULL;
if (p->pack_local)
return pack_list_insert(&local_packs, &l);
else
return pack_list_insert(&altodb_packs, &l);
}
static struct pack_list * add_pack_file(const char *filename)
{
struct packed_git *p = get_all_packs(the_repository);
if (strlen(filename) < 40)
die("Bad pack filename: %s", filename);
while (p) {
if (strstr(p->pack_name, filename))
return add_pack(p);
p = p->next;
}
die("Filename %s not found in packed_git", filename);
}
static void load_all(void)
{
struct packed_git *p = get_all_packs(the_repository);
while (p) {
add_pack(p);
p = p->next;
}
}
int cmd_pack_redundant(int argc, const char **argv, const char *prefix UNUSED)
{
int i;
int i_still_use_this = 0;
struct pack_list *min = NULL, *red, *pl;
struct llist *ignore;
struct object_id *oid;
char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
if (argc == 2 && !strcmp(argv[1], "-h"))
usage(pack_redundant_usage);
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--")) {
i++;
break;
}
if (!strcmp(arg, "--all")) {
load_all_packs = 1;
continue;
}
if (!strcmp(arg, "--verbose")) {
verbose = 1;
continue;
}
if (!strcmp(arg, "--alt-odb")) {
alt_odb = 1;
continue;
}
if (!strcmp(arg, "--i-still-use-this")) {
i_still_use_this = 1;
continue;
}
if (*arg == '-')
usage(pack_redundant_usage);
else
break;
}
if (!i_still_use_this) {
fputs(_("'git pack-redundant' is nominated for removal.\n"
"If you still use this command, please add an extra\n"
"option, '--i-still-use-this', on the command line\n"
"and let us know you still use it by sending an e-mail\n"
"to <git@vger.kernel.org>. Thanks.\n"), stderr);
}
if (load_all_packs)
load_all();
else
while (*(argv + i) != NULL)
add_pack_file(*(argv + i++));
if (!local_packs)
die("Zero packs found!");
load_all_objects();
if (alt_odb)
scan_alt_odb_packs();
/* ignore objects given on stdin */
llist_init(&ignore);
if (!isatty(0)) {
while (fgets(buf, sizeof(buf), stdin)) {
oid = xmalloc(sizeof(*oid));
if (get_oid_hex(buf, oid))
die("Bad object ID on stdin: %s", buf);
llist_insert_sorted_unique(ignore, oid, NULL);
}
}
llist_sorted_difference_inplace(all_objects, ignore);
pl = local_packs;
while (pl) {
llist_sorted_difference_inplace(pl->remaining_objects, ignore);
pl = pl->next;
}
cmp_local_packs();
minimize(&min);
if (verbose) {
fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
(unsigned long)pack_list_size(altodb_packs));
fprintf(stderr, "The smallest (bytewise) set of packs is:\n");
pl = min;
while (pl) {
fprintf(stderr, "\t%s\n", pl->pack->pack_name);
pl = pl->next;
}
fprintf(stderr, "containing %lu duplicate objects "
"with a total size of %lukb.\n",
(unsigned long)get_pack_redundancy(min),
(unsigned long)pack_set_bytecount(min)/1024);
fprintf(stderr, "A total of %lu unique objects were considered.\n",
(unsigned long)all_objects->size);
fprintf(stderr, "Redundant packs (with indexes):\n");
}
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
sha1_pack_index_name(pl->pack->hash),
pl->pack->pack_name);
pl = pl->next;
}
if (verbose)
fprintf(stderr, "%luMB of redundant packs in total.\n",
(unsigned long)pack_set_bytecount(red)/(1024*1024));
return 0;
}