fetch: use batched reference updates
The reference updates performed as a part of 'git-fetch(1)', take place one at a time. For each reference update, a new transaction is created and committed. This is necessary to ensure we can allow individual updates to fail without failing the entire command. The command also supports an '--atomic' mode, which uses a single transaction to update all of the references. But this mode has an all-or-nothing approach, where if a single update fails, all updates would fail. In23fc8e4f61(refs: implement batch reference update support, 2025-04-08), we introduced a new mechanism to batch reference updates. Under the hood, this uses a single transaction to perform a batch of reference updates, while allowing only individual updates to fail. Utilize this newly introduced batch update mechanism in 'git-fetch(1)'. This provides a significant bump in performance, especially when dealing with repositories with large number of references. Adding support for batched updates is simply modifying the flow to also create a batch update transaction in the non-atomic flow. With the reftable backend there is a 22x performance improvement, when performing 'git-fetch(1)' with 10000 refs: Benchmark 1: fetch: many refs (refformat = reftable, refcount = 10000, revision = master) Time (mean ± σ): 3.403 s ± 0.775 s [User: 1.875 s, System: 1.417 s] Range (min … max): 2.454 s … 4.529 s 10 runs Benchmark 2: fetch: many refs (refformat = reftable, refcount = 10000, revision = HEAD) Time (mean ± σ): 154.3 ms ± 17.6 ms [User: 102.5 ms, System: 56.1 ms] Range (min … max): 145.2 ms … 220.5 ms 18 runs Summary fetch: many refs (refformat = reftable, refcount = 10000, revision = HEAD) ran 22.06 ± 5.62 times faster than fetch: many refs (refformat = reftable, refcount = 10000, revision = master) In similar conditions, the files backend sees a 1.25x performance improvement: Benchmark 1: fetch: many refs (refformat = files, refcount = 10000, revision = master) Time (mean ± σ): 605.5 ms ± 9.4 ms [User: 117.8 ms, System: 483.3 ms] Range (min … max): 595.6 ms … 621.5 ms 10 runs Benchmark 2: fetch: many refs (refformat = files, refcount = 10000, revision = HEAD) Time (mean ± σ): 485.8 ms ± 4.3 ms [User: 91.1 ms, System: 396.7 ms] Range (min … max): 477.6 ms … 494.3 ms 10 runs Summary fetch: many refs (refformat = files, refcount = 10000, revision = HEAD) ran 1.25 ± 0.02 times faster than fetch: many refs (refformat = files, refcount = 10000, revision = master) With this we'll either be using a regular transaction or a batch update transaction. This helps cleanup some code which is no longer needed as we'll now always have some type of 'ref_transaction' object being propagated. One big change is that earlier, each individual update would propagate a failure. Whereas now, the `ref_transaction_for_each_rejected_update` function is called at the end of the flow to capture the exit status for 'git-fetch(1)' and also to print F/D conflict errors. This does change the order of the errors being printed, but the behavior stays the same. Since transaction errors are now explicitly defined as part of76e760b999(refs: introduce enum-based transaction error types, 2025-04-08), utilize them and get rid of custom errors defined within 'builtin/fetch.c'. Signed-off-by: Karthik Nayak <karthik.188@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
committed by
Junio C Hamano
parent
b3de3832ce
commit
0e358de64a
113
builtin/fetch.c
113
builtin/fetch.c
@@ -640,9 +640,6 @@ static struct ref *get_ref_map(struct remote *remote,
|
||||
return ref_map;
|
||||
}
|
||||
|
||||
#define STORE_REF_ERROR_OTHER 1
|
||||
#define STORE_REF_ERROR_DF_CONFLICT 2
|
||||
|
||||
static int s_update_ref(const char *action,
|
||||
struct ref *ref,
|
||||
struct ref_transaction *transaction,
|
||||
@@ -650,7 +647,6 @@ static int s_update_ref(const char *action,
|
||||
{
|
||||
char *msg;
|
||||
char *rla = getenv("GIT_REFLOG_ACTION");
|
||||
struct ref_transaction *our_transaction = NULL;
|
||||
struct strbuf err = STRBUF_INIT;
|
||||
int ret;
|
||||
|
||||
@@ -660,43 +656,10 @@ static int s_update_ref(const char *action,
|
||||
rla = default_rla.buf;
|
||||
msg = xstrfmt("%s: %s", rla, action);
|
||||
|
||||
/*
|
||||
* If no transaction was passed to us, we manage the transaction
|
||||
* ourselves. Otherwise, we trust the caller to handle the transaction
|
||||
* lifecycle.
|
||||
*/
|
||||
if (!transaction) {
|
||||
transaction = our_transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
|
||||
0, &err);
|
||||
if (!transaction) {
|
||||
ret = STORE_REF_ERROR_OTHER;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = ref_transaction_update(transaction, ref->name, &ref->new_oid,
|
||||
check_old ? &ref->old_oid : NULL,
|
||||
NULL, NULL, 0, msg, &err);
|
||||
if (ret) {
|
||||
ret = STORE_REF_ERROR_OTHER;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (our_transaction) {
|
||||
switch (ref_transaction_commit(our_transaction, &err)) {
|
||||
case 0:
|
||||
break;
|
||||
case REF_TRANSACTION_ERROR_NAME_CONFLICT:
|
||||
ret = STORE_REF_ERROR_DF_CONFLICT;
|
||||
goto out;
|
||||
default:
|
||||
ret = STORE_REF_ERROR_OTHER;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
ref_transaction_free(our_transaction);
|
||||
if (ret)
|
||||
error("%s", err.buf);
|
||||
strbuf_release(&err);
|
||||
@@ -1139,7 +1102,6 @@ N_("it took %.2f seconds to check forced updates; you can use\n"
|
||||
"to avoid this check\n");
|
||||
|
||||
static int store_updated_refs(struct display_state *display_state,
|
||||
const char *remote_name,
|
||||
int connectivity_checked,
|
||||
struct ref_transaction *transaction, struct ref *ref_map,
|
||||
struct fetch_head *fetch_head,
|
||||
@@ -1277,11 +1239,6 @@ static int store_updated_refs(struct display_state *display_state,
|
||||
}
|
||||
}
|
||||
|
||||
if (rc & STORE_REF_ERROR_DF_CONFLICT)
|
||||
error(_("some local refs could not be updated; try running\n"
|
||||
" 'git remote prune %s' to remove any old, conflicting "
|
||||
"branches"), remote_name);
|
||||
|
||||
if (advice_enabled(ADVICE_FETCH_SHOW_FORCED_UPDATES)) {
|
||||
if (!config->show_forced_updates) {
|
||||
warning(_(warn_show_forced_updates));
|
||||
@@ -1365,9 +1322,8 @@ static int fetch_and_consume_refs(struct display_state *display_state,
|
||||
}
|
||||
|
||||
trace2_region_enter("fetch", "consume_refs", the_repository);
|
||||
ret = store_updated_refs(display_state, transport->remote->name,
|
||||
connectivity_checked, transaction, ref_map,
|
||||
fetch_head, config);
|
||||
ret = store_updated_refs(display_state, connectivity_checked,
|
||||
transaction, ref_map, fetch_head, config);
|
||||
trace2_region_leave("fetch", "consume_refs", the_repository);
|
||||
|
||||
out:
|
||||
@@ -1687,6 +1643,36 @@ cleanup:
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ref_rejection_data {
|
||||
int *retcode;
|
||||
int conflict_msg_shown;
|
||||
const char *remote_name;
|
||||
};
|
||||
|
||||
static void ref_transaction_rejection_handler(const char *refname,
|
||||
const struct object_id *old_oid UNUSED,
|
||||
const struct object_id *new_oid UNUSED,
|
||||
const char *old_target UNUSED,
|
||||
const char *new_target UNUSED,
|
||||
enum ref_transaction_error err,
|
||||
void *cb_data)
|
||||
{
|
||||
struct ref_rejection_data *data = cb_data;
|
||||
|
||||
if (err == REF_TRANSACTION_ERROR_NAME_CONFLICT && !data->conflict_msg_shown) {
|
||||
error(_("some local refs could not be updated; try running\n"
|
||||
" 'git remote prune %s' to remove any old, conflicting "
|
||||
"branches"), data->remote_name);
|
||||
data->conflict_msg_shown = 1;
|
||||
} else {
|
||||
const char *reason = ref_transaction_error_msg(err);
|
||||
|
||||
error(_("fetching ref %s failed: %s"), refname, reason);
|
||||
}
|
||||
|
||||
*data->retcode = 1;
|
||||
}
|
||||
|
||||
static int do_fetch(struct transport *transport,
|
||||
struct refspec *rs,
|
||||
const struct fetch_config *config)
|
||||
@@ -1807,6 +1793,24 @@ static int do_fetch(struct transport *transport,
|
||||
retcode = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If not atomic, we can still use batched updates, which would be much
|
||||
* more performant. We don't initiate the transaction before pruning,
|
||||
* since pruning must be an independent step, to avoid F/D conflicts.
|
||||
*
|
||||
* TODO: if reference transactions gain logical conflict resolution, we
|
||||
* can delete and create refs (with F/D conflicts) in the same transaction
|
||||
* and this can be moved above the 'prune_refs()' block.
|
||||
*/
|
||||
if (!transaction) {
|
||||
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
|
||||
REF_TRANSACTION_ALLOW_FAILURE, &err);
|
||||
if (!transaction) {
|
||||
retcode = -1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
if (fetch_and_consume_refs(&display_state, transport, transaction, ref_map,
|
||||
&fetch_head, config)) {
|
||||
retcode = 1;
|
||||
@@ -1838,7 +1842,6 @@ static int do_fetch(struct transport *transport,
|
||||
free_refs(tags_ref_map);
|
||||
}
|
||||
|
||||
if (transaction) {
|
||||
if (retcode)
|
||||
goto cleanup;
|
||||
|
||||
@@ -1852,6 +1855,22 @@ static int do_fetch(struct transport *transport,
|
||||
transaction = NULL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!atomic_fetch) {
|
||||
struct ref_rejection_data data = {
|
||||
.retcode = &retcode,
|
||||
.conflict_msg_shown = 0,
|
||||
.remote_name = transport->remote->name,
|
||||
};
|
||||
|
||||
ref_transaction_for_each_rejected_update(transaction,
|
||||
ref_transaction_rejection_handler,
|
||||
&data);
|
||||
if (retcode) {
|
||||
ref_transaction_free(transaction);
|
||||
transaction = NULL;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
commit_fetch_head(&fetch_head);
|
||||
|
||||
Reference in New Issue
Block a user