Files
git/builtin/backfill.c
Derrick Stolee 85127bcdea backfill: assume --sparse when sparse-checkout is enabled
The previous change introduced the '--[no-]sparse' option for the 'git
backfill' command, but did not assume it as enabled by default. However,
this is likely the behavior that users will most often want to happen.
Without this default, users with a small sparse-checkout may be confused
when 'git backfill' downloads every version of every object in the full
history.

However, this is left as a separate change so this decision can be reviewed
independently of the value of the '--[no-]sparse' option.

Add a test of adding the '--sparse' option to a repo without sparse-checkout
to make it clear that supplying it without a sparse-checkout is an error.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2025-02-03 16:12:42 -08:00

147 lines
3.4 KiB
C

/* We need this macro to access core_apply_sparse_checkout */
#define USE_THE_REPOSITORY_VARIABLE
#include "builtin.h"
#include "git-compat-util.h"
#include "config.h"
#include "parse-options.h"
#include "repository.h"
#include "commit.h"
#include "dir.h"
#include "environment.h"
#include "hex.h"
#include "tree.h"
#include "tree-walk.h"
#include "object.h"
#include "object-store-ll.h"
#include "oid-array.h"
#include "oidset.h"
#include "promisor-remote.h"
#include "strmap.h"
#include "string-list.h"
#include "revision.h"
#include "trace2.h"
#include "progress.h"
#include "packfile.h"
#include "path-walk.h"
static const char * const builtin_backfill_usage[] = {
N_("git backfill [--min-batch-size=<n>] [--[no-]sparse]"),
NULL
};
struct backfill_context {
struct repository *repo;
struct oid_array current_batch;
size_t min_batch_size;
int sparse;
};
static void backfill_context_clear(struct backfill_context *ctx)
{
oid_array_clear(&ctx->current_batch);
}
static void download_batch(struct backfill_context *ctx)
{
promisor_remote_get_direct(ctx->repo,
ctx->current_batch.oid,
ctx->current_batch.nr);
oid_array_clear(&ctx->current_batch);
/*
* We likely have a new packfile. Add it to the packed list to
* avoid possible duplicate downloads of the same objects.
*/
reprepare_packed_git(ctx->repo);
}
static int fill_missing_blobs(const char *path UNUSED,
struct oid_array *list,
enum object_type type,
void *data)
{
struct backfill_context *ctx = data;
if (type != OBJ_BLOB)
return 0;
for (size_t i = 0; i < list->nr; i++) {
if (!has_object(ctx->repo, &list->oid[i],
OBJECT_INFO_FOR_PREFETCH))
oid_array_append(&ctx->current_batch, &list->oid[i]);
}
if (ctx->current_batch.nr >= ctx->min_batch_size)
download_batch(ctx);
return 0;
}
static int do_backfill(struct backfill_context *ctx)
{
struct rev_info revs;
struct path_walk_info info = PATH_WALK_INFO_INIT;
int ret;
if (ctx->sparse) {
CALLOC_ARRAY(info.pl, 1);
if (get_sparse_checkout_patterns(info.pl)) {
path_walk_info_clear(&info);
return error(_("problem loading sparse-checkout"));
}
}
repo_init_revisions(ctx->repo, &revs, "");
handle_revision_arg("HEAD", &revs, 0, 0);
info.blobs = 1;
info.tags = info.commits = info.trees = 0;
info.revs = &revs;
info.path_fn = fill_missing_blobs;
info.path_fn_data = ctx;
ret = walk_objects_by_path(&info);
/* Download the objects that did not fill a batch. */
if (!ret)
download_batch(ctx);
path_walk_info_clear(&info);
release_revisions(&revs);
return ret;
}
int cmd_backfill(int argc, const char **argv, const char *prefix, struct repository *repo)
{
int result;
struct backfill_context ctx = {
.repo = repo,
.current_batch = OID_ARRAY_INIT,
.min_batch_size = 50000,
.sparse = 0,
};
struct option options[] = {
OPT_INTEGER(0, "min-batch-size", &ctx.min_batch_size,
N_("Minimum number of objects to request at a time")),
OPT_BOOL(0, "sparse", &ctx.sparse,
N_("Restrict the missing objects to the current sparse-checkout")),
OPT_END(),
};
show_usage_if_asked(argc, argv, builtin_backfill_usage[0]);
argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage,
0);
repo_config(repo, git_default_config, NULL);
if (ctx.sparse < 0)
ctx.sparse = core_apply_sparse_checkout;
result = do_backfill(&ctx);
backfill_context_clear(&ctx);
return result;
}