From bc67b4ab5f8bc268ecd2d9bb7dc1b7bf26884a8e Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Wed, 15 Jan 2025 11:54:51 +0000 Subject: [PATCH 1/2] reftable: write correct max_update_index to header In 297c09eabb (refs: allow multiple reflog entries for the same refname, 2024-12-16), the reftable backend learned to handle multiple reflog entries within the same transaction. This was done modifying the `update_index` for reflogs with multiple indices. During writing the logs, the `max_update_index` of the writer was modified to ensure the limits were raised to the modified `update_index`s. However, since ref entries are written before the modification to the `max_update_index`, if there are multiple blocks to be written, the reftable backend writes the header with the old `max_update_index`. When all logs are finally written, the footer will be written with the new `min_update_index`. This causes a mismatch between the header and the footer and causes the reftable file to be corrupted. The existing tests only spawn a single block and since headers are lazily written with the first block, the tests didn't capture this bug. To fix the issue, the appropriate `max_update_index` limit must be set even before the first block is written. Add a `max_index` field to the transaction which holds the `max_index` within all its updates, then propagate this value to the reftable backend, wherein this is used to the set the `max_update_index` correctly. Add a test which creates a few thousand reference updates with multiple reflog entries, which should trigger the bug. Reported-by: brian m. carlson Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs.c | 7 +++++++ refs/refs-internal.h | 1 + refs/reftable-backend.c | 20 ++++++++++---------- t/t1460-refs-migrate.sh | 12 ++++++++++++ 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/refs.c b/refs.c index c555839869..a5851a7de0 100644 --- a/refs.c +++ b/refs.c @@ -1297,6 +1297,13 @@ int ref_transaction_update_reflog(struct ref_transaction *transaction, update->flags &= ~REF_HAVE_OLD; update->index = index; + /* + * Reference backends may need to know the max index to optimize + * their writes. So we store the max_index on the transaction level. + */ + if (index > transaction->max_index) + transaction->max_index = index; + return 0; } diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 79b287c5ec..2aaff91ab4 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -203,6 +203,7 @@ struct ref_transaction { enum ref_transaction_state state; void *backend_data; unsigned int flags; + unsigned int max_index; }; /* diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index bec5962deb..68db2baa8f 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -852,6 +852,7 @@ struct write_transaction_table_arg { size_t updates_nr; size_t updates_alloc; size_t updates_expected; + unsigned int max_index; }; struct reftable_transaction_data { @@ -1302,7 +1303,6 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data struct reftable_log_record *logs = NULL; struct ident_split committer_ident = {0}; size_t logs_nr = 0, logs_alloc = 0, i; - uint64_t max_update_index = ts; const char *committer_info; int ret = 0; @@ -1312,7 +1312,12 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data QSORT(arg->updates, arg->updates_nr, transaction_update_cmp); - reftable_writer_set_limits(writer, ts, ts); + /* + * During reflog migration, we add indexes for a single reflog with + * multiple entries. Each entry will contain a different update_index, + * so set the limits accordingly. + */ + reftable_writer_set_limits(writer, ts, ts + arg->max_index); for (i = 0; i < arg->updates_nr; i++) { struct reftable_transaction_update *tx_update = &arg->updates[i]; @@ -1414,12 +1419,6 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data */ log->update_index = ts + u->index; - /* - * Note the max update_index so the limit can be set later on. - */ - if (log->update_index > max_update_index) - max_update_index = log->update_index; - log->refname = xstrdup(u->refname); memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ); @@ -1483,8 +1482,6 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data * and log blocks. */ if (logs) { - reftable_writer_set_limits(writer, ts, max_update_index); - ret = reftable_writer_add_logs(writer, logs, logs_nr); if (ret < 0) goto done; @@ -1505,6 +1502,9 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED, struct reftable_transaction_data *tx_data = transaction->backend_data; int ret = 0; + if (tx_data->args) + tx_data->args->max_index = transaction->max_index; + for (size_t i = 0; i < tx_data->args_nr; i++) { ret = reftable_addition_add(tx_data->args[i].addition, write_transaction_table, &tx_data->args[i]); diff --git a/t/t1460-refs-migrate.sh b/t/t1460-refs-migrate.sh index f59bc4860f..307b2998ef 100755 --- a/t/t1460-refs-migrate.sh +++ b/t/t1460-refs-migrate.sh @@ -227,6 +227,18 @@ do done done +test_expect_success 'multiple reftable blocks with multiple entries' ' + test_when_finished "rm -rf repo" && + git init --ref-format=files repo && + test_commit -C repo first && + printf "create refs/heads/ref-%d HEAD\n" $(test_seq 5000) >stdin && + git -C repo update-ref --stdin stdin && + git -C repo update-ref --stdin Date: Mon, 27 Jan 2025 10:44:08 +0100 Subject: [PATCH 2/2] refs/reftable: fix uninitialized memory access of `max_index` When migrating reflogs between reference backends, maintaining the original order of the reflog entries is crucial. To achieve this, an `index` field is stored within the `ref_update` struct that encodes the relative order of reflog entries. This field is used by the reftable backend as update index for the respective reflog entries to maintain that ordering. These update indices must be respected when writing table headers, which encode the minimum and maximum update index of contained records in the header and footer. This logic was added in commit bc67b4ab5f (reftable: write correct max_update_index to header, 2025-01-15), which started to use `reftable_writer_set_limits()` to propagate the mininum and maximum update index of all records contained in a ref transaction. However, we only set the maximum update index for the first transaction argument, even though there can be multiple such arguments. This is the case when we write to multiple stacks in a single transaction, e.g. when updating references in two different worktrees at once. Consequently, the update index for all but the first argument remain uninitialized, which may cause undefined behaviour. Fix this by moving the assignment of the maximum update index in `reftable_be_transaction_finish()` inside the loop, which ensures that all elements of the array are correctly initialized. Furthermore, initialize the `max_index` field to 0 when queueing a new transaction argument. This is not strictly necessary, as all elements of `write_transaction_table_arg.max_index` are now assigned correctly. However, this initialization is added for consistency and to safeguard against potential future changes that might inadvertently introduce uninitialized memory access. Reported-by: Johannes Schindelin Signed-off-by: Karthik Nayak Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 68db2baa8f..bb658826fe 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -920,6 +920,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out, arg->updates_nr = 0; arg->updates_alloc = 0; arg->updates_expected = 0; + arg->max_index = 0; } arg->updates_expected++; @@ -1502,10 +1503,9 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED, struct reftable_transaction_data *tx_data = transaction->backend_data; int ret = 0; - if (tx_data->args) - tx_data->args->max_index = transaction->max_index; - for (size_t i = 0; i < tx_data->args_nr; i++) { + tx_data->args[i].max_index = transaction->max_index; + ret = reftable_addition_add(tx_data->args[i].addition, write_transaction_table, &tx_data->args[i]); if (ret < 0)