From f3c1db4b2a23fac171a699b10f9328f8df52602f Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:33 -0500 Subject: [PATCH 1/6] bulk-checkin: remove ODB transaction nesting ODB transactions support being nested. Only the outermost {begin,end}_odb_transaction() start and finish a transaction. This allows internal object write codepaths to be optimized with ODB transactions without worrying about whether a transaction is already active. When {begin,end}_odb_transaction() is invoked during an active transaction, these operations are essentially treated as no-ops. This can make the interface a bit awkward to use, as calling end_odb_transaction() does not guarantee that a transaction is actually ended. Thus, in situations where a transaction needs to be explicitly flushed, flush_odb_transaction() must be used. To remove the need for an explicit transaction flush operation via flush_odb_transaction() and better clarify transaction semantics, drop the transaction nesting mechanism in favor of begin_odb_transaction() returning a NULL transaction value to signal it was a no-op, and end_odb_transaction() behaving as a no-op when a NULL transaction value is passed. This is safe for existing callers as the transaction value wired to end_odb_transaction() already comes from begin_odb_transaction() and thus continues the same no-op behavior when a transaction is already pending. With this model, passing a pending transaction to end_odb_transaction() ensures it is committed at that point in time. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 22 ++++++++++------------ bulk-checkin.h | 8 +++----- object-file.c | 2 +- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 124c493067..eb6ef704c3 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -33,7 +33,6 @@ struct bulk_checkin_packfile { struct odb_transaction { struct object_database *odb; - int nesting; struct tmp_objdir *objdir; struct bulk_checkin_packfile packfile; }; @@ -368,12 +367,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, struct odb_transaction *begin_odb_transaction(struct object_database *odb) { - if (!odb->transaction) { - CALLOC_ARRAY(odb->transaction, 1); - odb->transaction->odb = odb; - } + if (odb->transaction) + return NULL; - odb->transaction->nesting += 1; + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; return odb->transaction; } @@ -389,14 +387,14 @@ void flush_odb_transaction(struct odb_transaction *transaction) void end_odb_transaction(struct odb_transaction *transaction) { - if (!transaction || transaction->nesting == 0) - BUG("Unbalanced ODB transaction nesting"); - - transaction->nesting -= 1; - - if (transaction->nesting) + if (!transaction) return; + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + flush_odb_transaction(transaction); transaction->odb->transaction = NULL; free(transaction); diff --git a/bulk-checkin.h b/bulk-checkin.h index ac8887f476..51d0ac6134 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -38,9 +38,8 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, /* * Tell the object database to optimize for adding * multiple objects. end_odb_transaction must be called - * to make new objects visible. Transactions can be nested, - * and objects are only visible after the outermost transaction - * is complete or the transaction is flushed. + * to make new objects visible. If a transaction is already + * pending, NULL is returned. */ struct odb_transaction *begin_odb_transaction(struct object_database *odb); @@ -53,8 +52,7 @@ void flush_odb_transaction(struct odb_transaction *transaction); /* * Tell the object database to make any objects from the - * current transaction visible if this is the final nested - * transaction. + * current transaction visible. */ void end_odb_transaction(struct odb_transaction *transaction); diff --git a/object-file.c b/object-file.c index bc15af4245..5e76573549 100644 --- a/object-file.c +++ b/object-file.c @@ -1267,7 +1267,7 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct odb_transaction *transaction; transaction = begin_odb_transaction(the_repository->objects); - ret = index_blob_bulk_checkin(transaction, + ret = index_blob_bulk_checkin(the_repository->objects->transaction, oid, fd, xsize_t(st->st_size), path, flags); end_odb_transaction(transaction); From 9c61d9aded98748aae949b83babbdbd11e695f32 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:34 -0500 Subject: [PATCH 2/6] builtin/update-index: end ODB transaction when --verbose is specified With 23a3a303 (update-index: use the bulk-checkin infrastructure, 2022-04-04), object database transactions were added to git-update-index(1) to facilitate writing objects in bulk. With transactions, newly added objects are instead written to a temporary object directory and migrated to the primary object database upon transaction commit. When the --verbose option is specified, the subsequent set of objects written are explicitly flushed via flush_odb_transaction() prior to reporting the update. Flushing the object database transaction migrates pending objects to the primary object database without marking the transaction as complete. This is done so objects are immediately visible to git-update-index(1) callers using the --verbose option and that rely on parsing verbose output to know when objects are written. Due to how git-update-index(1) parses arguments, options that come after a filename are not considered during the object update. Therefore, it may not be known ahead of time whether the --verbose option is present and thus object writes are considered transactional by default until a --verbose option is parsed. Flushing a transaction after individual object writes negates the benefit of writing objects to a transaction in the first place. Furthermore, the mechanism to flush a transaction without actually committing is rather awkward. Drop the call to flush_odb_transaction() in favor of ending the transaction altogether when the --verbose flag is encountered. Subsequent object writes occur outside of a transaction and are therefore immediately visible which matches the current behavior. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/update-index.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/builtin/update-index.c b/builtin/update-index.c index 2ba2d29c95..d36bc55752 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -70,14 +70,6 @@ static void report(const char *fmt, ...) if (!verbose) return; - /* - * It is possible, though unlikely, that a caller could use the verbose - * output to synchronize with addition of objects to the object - * database. The current implementation of ODB transactions leaves - * objects invisible while a transaction is active, so flush the - * transaction here before reporting a change made by update-index. - */ - flush_odb_transaction(the_repository->objects->transaction); va_start(vp, fmt); vprintf(fmt, vp); putchar('\n'); @@ -1150,6 +1142,21 @@ int cmd_update_index(int argc, const char *path = ctx.argv[0]; char *p; + /* + * It is possible, though unlikely, that a caller could + * use the verbose output to synchronize with addition + * of objects to the object database. The current + * implementation of ODB transactions leaves objects + * invisible while a transaction is active, so end the + * transaction here early before processing the next + * update. All further updates are performed outside of + * a transaction. + */ + if (transaction && verbose) { + end_odb_transaction(transaction); + transaction = NULL; + } + setup_work_tree(); p = prefix_path(prefix, prefix_length, path); update_one(p); From ca7d93453b6c309aa1fca411e1bdaa9ca4c82199 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:35 -0500 Subject: [PATCH 3/6] bulk-checkin: drop flush_odb_transaction() Object database transactions can be explicitly flushed via flush_odb_transaction() without actually completing the transaction. This makes the provided transactional interface a bit awkward. Now that there are no longer any flush_odb_transaction() call sites, drop the function to simplify the interface and further ensure that a transaction is only finalized when end_odb_transaction() is invoked. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 12 ++---------- bulk-checkin.h | 7 ------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index eb6ef704c3..5de848deff 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -376,15 +376,6 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb) return odb->transaction; } -void flush_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); -} - void end_odb_transaction(struct odb_transaction *transaction) { if (!transaction) @@ -395,7 +386,8 @@ void end_odb_transaction(struct odb_transaction *transaction) */ ASSERT(transaction == transaction->odb->transaction); - flush_odb_transaction(transaction); + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(transaction); transaction->odb->transaction = NULL; free(transaction); } diff --git a/bulk-checkin.h b/bulk-checkin.h index 51d0ac6134..eea728f0d4 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -43,13 +43,6 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, */ struct odb_transaction *begin_odb_transaction(struct object_database *odb); -/* - * Make any objects that are currently part of a pending object - * database transaction visible. It is valid to call this function - * even if no transaction is active. - */ -void flush_odb_transaction(struct odb_transaction *transaction); - /* * Tell the object database to make any objects from the * current transaction visible. From 78839e9cdead363d10190a009783c2d18149cc54 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:36 -0500 Subject: [PATCH 4/6] object-file: relocate ODB transaction code The bulk-checkin subsystem provides various functions to manage ODB transactions. Apart from {begin,end}_odb_transaction(), these functions are only used by the object-file subsystem to manage aspects of a transaction implementation specific to the files object source. Relocate all the transaction code in bulk-checkin to object-file. This simplifies the exposed transaction interface by reducing it to only {begin,end}_odb_transaction(). Function and type names are adjusted in the subsequent commit to better fit the new location. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Makefile | 1 - builtin/add.c | 2 +- builtin/unpack-objects.c | 1 - builtin/update-index.c | 1 - bulk-checkin.c | 393 -------------------------------------- bulk-checkin.h | 52 ------ cache-tree.c | 1 - meson.build | 1 - object-file.c | 394 ++++++++++++++++++++++++++++++++++++++- object-file.h | 16 ++ read-cache.c | 1 - 11 files changed, 410 insertions(+), 453 deletions(-) delete mode 100644 bulk-checkin.c delete mode 100644 bulk-checkin.h diff --git a/Makefile b/Makefile index 4c95affadb..d25d4255f8 100644 --- a/Makefile +++ b/Makefile @@ -974,7 +974,6 @@ LIB_OBJS += blame.o LIB_OBJS += blob.o LIB_OBJS += bloom.o LIB_OBJS += branch.o -LIB_OBJS += bulk-checkin.o LIB_OBJS += bundle-uri.o LIB_OBJS += bundle.o LIB_OBJS += cache-tree.o diff --git a/builtin/add.c b/builtin/add.c index 740c7c4581..8294366d68 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -14,13 +14,13 @@ #include "gettext.h" #include "pathspec.h" #include "run-command.h" +#include "object-file.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" #include "diff.h" #include "read-cache.h" #include "revision.h" -#include "bulk-checkin.h" #include "strvec.h" #include "submodule.h" #include "add-interactive.h" diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 28124b324d..4596fff0da 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -2,7 +2,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" diff --git a/builtin/update-index.c b/builtin/update-index.c index d36bc55752..ee01c4e423 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" diff --git a/bulk-checkin.c b/bulk-checkin.c deleted file mode 100644 index 5de848deff..0000000000 --- a/bulk-checkin.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "bulk-checkin.h" -#include "environment.h" -#include "gettext.h" -#include "hex.h" -#include "lockfile.h" -#include "repository.h" -#include "csum-file.h" -#include "pack.h" -#include "strbuf.h" -#include "tmp-objdir.h" -#include "packfile.h" -#include "object-file.h" -#include "odb.h" - -struct bulk_checkin_packfile { - char *pack_tmp_name; - struct hashfile *f; - off_t offset; - struct pack_idx_option pack_idx_opts; - - struct pack_idx_entry **written; - uint32_t alloc_written; - uint32_t nr_written; -}; - -struct odb_transaction { - struct object_database *odb; - - struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; -}; - -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - unsigned char hash[GIT_MAX_RAWSZ]; - struct strbuf packname = STRBUF_INIT; - - if (!state->f) - return; - - if (state->nr_written == 0) { - close(state->f->fd); - free_hashfile(state->f); - unlink(state->pack_tmp_name); - goto clear_exit; - } else if (state->nr_written == 1) { - finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, - CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); - } else { - int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, - state->nr_written, hash, - state->offset); - close(fd); - } - - strbuf_addf(&packname, "%s/pack/pack-%s.", - repo_get_object_directory(transaction->odb->repo), - hash_to_hex_algop(hash, repo->hash_algo)); - - finish_tmp_packfile(transaction, &packname, hash); - for (uint32_t i = 0; i < state->nr_written; i++) - free(state->written[i]); - -clear_exit: - free(state->pack_tmp_name); - free(state->written); - memset(state, 0, sizeof(*state)); - - strbuf_release(&packname); - /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(repo); -} - -/* - * Cleanup after batch-mode fsync_object_files. - */ -static void flush_batch_fsync(struct odb_transaction *transaction) -{ - struct strbuf temp_path = STRBUF_INIT; - struct tempfile *temp; - - if (!transaction->objdir) - return; - - /* - * Issue a full hardware flush against a temporary file to ensure - * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout - * request, but it has not flushed any writeback cache in the storage - * hardware or any filesystem logs. This fsync call acts as a barrier - * to ensure that the data in each new object file is durable before - * the final name is visible. - */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", - repo_get_object_directory(transaction->odb->repo)); - temp = xmks_tempfile(temp_path.buf); - fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); - delete_tempfile(&temp); - strbuf_release(&temp_path); - - /* - * Make the object files visible in the primary ODB after their data is - * fully durable. - */ - tmp_objdir_migrate(transaction->objdir); - transaction->objdir = NULL; -} - -static int already_written(struct odb_transaction *transaction, - struct object_id *oid) -{ - /* The object may already exist in the repository */ - if (odb_has_object(transaction->odb, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - return 1; - - /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) - if (oideq(&transaction->packfile.written[i]->oid, oid)) - return 1; - - /* This is a new object we need to keep */ - return 0; -} - -/* - * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. - */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path, - unsigned flags) -{ - git_zstream s; - unsigned char ibuf[16384]; - unsigned char obuf[16384]; - unsigned hdrlen; - int status = Z_OK; - int write_object = (flags & INDEX_WRITE_OBJECT); - off_t offset = 0; - - git_deflate_init(&s, pack_compression_level); - - hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); - s.next_out = obuf + hdrlen; - s.avail_out = sizeof(obuf) - hdrlen; - - while (status != Z_STREAM_END) { - if (size && !s.avail_in) { - size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - ssize_t read_result = read_in_full(fd, ibuf, rsize); - if (read_result < 0) - die_errno("failed to read from '%s'", path); - if ((size_t)read_result != rsize) - die("failed to read %u bytes from '%s'", - (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } - s.next_in = ibuf; - s.avail_in = rsize; - size -= rsize; - } - - status = git_deflate(&s, size ? 0 : Z_FINISH); - - if (!s.avail_out || status == Z_STREAM_END) { - if (write_object) { - size_t written = s.next_out - obuf; - - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - - hashwrite(state->f, obuf, written); - state->offset += written; - } - s.next_out = obuf; - s.avail_out = sizeof(obuf); - } - - switch (status) { - case Z_OK: - case Z_BUF_ERROR: - case Z_STREAM_END: - continue; - default: - die("unexpected deflate failure: %d", status); - } - } - git_deflate_end(&s); - return 0; -} - -/* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - if (!(flags & INDEX_WRITE_OBJECT) || state->f) - return; - - state->f = create_tmp_packfile(transaction->odb->repo, - &state->pack_tmp_name); - reset_pack_idx_option(&state->pack_idx_opts); - - /* Pretend we are going to write only one object */ - state->offset = write_pack_header(state->f, 1); - if (!state->offset) - die_errno("unable to write pack header"); -} - -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - off_t seekback, already_hashed_to; - struct git_hash_ctx ctx; - unsigned char obuf[16384]; - unsigned header_len; - struct hashfile_checkpoint checkpoint; - struct pack_idx_entry *idx = NULL; - - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t) -1) - return error("cannot find the current offset"); - - header_len = format_object_header((char *)obuf, sizeof(obuf), - OBJ_BLOB, size); - transaction->odb->repo->hash_algo->init_fn(&ctx); - git_hash_update(&ctx, obuf, header_len); - - /* Note: idx is non-NULL when we are writing */ - if ((flags & INDEX_WRITE_OBJECT) != 0) { - CALLOC_ARRAY(idx, 1); - - prepare_to_stream(transaction, flags); - hashfile_checkpoint_init(state->f, &checkpoint); - } - - already_hashed_to = 0; - - while (1) { - prepare_to_stream(transaction, flags); - if (idx) { - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - } - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path, flags)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - if (!idx) - BUG("should not happen"); - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); - if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) - return error("cannot seek back"); - } - git_hash_final_oid(result_oid, &ctx); - if (!idx) - return 0; - - idx->crc32 = crc32_end(state->f); - if (already_written(transaction, result_oid)) { - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - free(idx); - } else { - oidcpy(&idx->oid, result_oid); - ALLOC_GROW(state->written, - state->nr_written + 1, - state->alloc_written); - state->written[state->nr_written++] = idx; - } - return 0; -} - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) -{ - /* - * We lazily create the temporary object directory - * the first time an object might be added, since - * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. - */ - if (!transaction || transaction->objdir) - return; - - transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); - if (transaction->objdir) - tmp_objdir_replace_primary_odb(transaction->objdir, 0); -} - -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename) -{ - /* - * If we have an active ODB transaction, we issue a call that - * cleans the filesystem page cache but avoids a hardware flush - * command. Later on we will issue a single hardware flush - * before renaming the objects to their final names as part of - * flush_batch_fsync. - */ - if (!transaction || !transaction->objdir || - git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { - if (errno == ENOSYS) - warning(_("core.fsyncMethod = batch is unsupported on this platform")); - fsync_or_die(fd, filename); - } -} - -struct odb_transaction *begin_odb_transaction(struct object_database *odb) -{ - if (odb->transaction) - return NULL; - - CALLOC_ARRAY(odb->transaction, 1); - odb->transaction->odb = odb; - - return odb->transaction; -} - -void end_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - /* - * Ensure the transaction ending matches the pending transaction. - */ - ASSERT(transaction == transaction->odb->transaction); - - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); - transaction->odb->transaction = NULL; - free(transaction); -} diff --git a/bulk-checkin.h b/bulk-checkin.h deleted file mode 100644 index eea728f0d4..0000000000 --- a/bulk-checkin.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ -#ifndef BULK_CHECKIN_H -#define BULK_CHECKIN_H - -#include "object.h" -#include "odb.h" - -struct odb_transaction; - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction); -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename); - -/* - * This writes the specified object to a packfile. Objects written here - * during the same transaction are written to the same packfile. The - * packfile is not flushed until the transaction is flushed. The caller - * is expected to ensure a valid transaction is setup for objects to be - * recorded to. - * - * This also bypasses the usual "convert-to-git" dance, and that is on - * purpose. We could write a streaming version of the converting - * functions and insert that before feeding the data to fast-import - * (or equivalent in-core API described above). However, that is - * somewhat complicated, as we do not know the size of the filter - * result, which we need to know beforehand when writing a git object. - * Since the primary motivation for trying to stream from the working - * tree file and to avoid mmaping it in core is to deal with large - * binary blobs, they generally do not want to get any conversion, and - * callers should avoid this code path when filters are requested. - */ -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags); - -/* - * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called - * to make new objects visible. If a transaction is already - * pending, NULL is returned. - */ -struct odb_transaction *begin_odb_transaction(struct object_database *odb); - -/* - * Tell the object database to make any objects from the - * current transaction visible. - */ -void end_odb_transaction(struct odb_transaction *transaction); - -#endif diff --git a/cache-tree.c b/cache-tree.c index d225554eed..79ddf6b727 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -8,7 +8,6 @@ #include "tree.h" #include "tree-walk.h" #include "cache-tree.h" -#include "bulk-checkin.h" #include "object-file.h" #include "odb.h" #include "read-cache-ll.h" diff --git a/meson.build b/meson.build index b3dfcc0497..fccb6d2eec 100644 --- a/meson.build +++ b/meson.build @@ -287,7 +287,6 @@ libgit_sources = [ 'blob.c', 'bloom.c', 'branch.c', - 'bulk-checkin.c', 'bundle-uri.c', 'bundle.c', 'cache-tree.c', diff --git a/object-file.c b/object-file.c index 5e76573549..03f9931b83 100644 --- a/object-file.c +++ b/object-file.c @@ -10,7 +10,6 @@ #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" -#include "bulk-checkin.h" #include "convert.h" #include "dir.h" #include "environment.h" @@ -28,6 +27,8 @@ #include "read-cache-ll.h" #include "setup.h" #include "streaming.h" +#include "tempfile.h" +#include "tmp-objdir.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -666,6 +667,93 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } +struct bulk_checkin_packfile { + char *pack_tmp_name; + struct hashfile *f; + off_t offset; + struct pack_idx_option pack_idx_opts; + + struct pack_idx_entry **written; + uint32_t alloc_written; + uint32_t nr_written; +}; + +struct odb_transaction { + struct object_database *odb; + + struct tmp_objdir *objdir; + struct bulk_checkin_packfile packfile; +}; + +static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) +{ + /* + * We lazily create the temporary object directory + * the first time an object might be added, since + * callers may not know whether any objects will be + * added at the time they call begin_odb_transaction. + */ + if (!transaction || transaction->objdir) + return; + + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); +} + +static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename) +{ + /* + * If we have an active ODB transaction, we issue a call that + * cleans the filesystem page cache but avoids a hardware flush + * command. Later on we will issue a single hardware flush + * before renaming the objects to their final names as part of + * flush_batch_fsync. + */ + if (!transaction || !transaction->objdir || + git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { + if (errno == ENOSYS) + warning(_("core.fsyncMethod = batch is unsupported on this platform")); + fsync_or_die(fd, filename); + } +} + +/* + * Cleanup after batch-mode fsync_object_files. + */ +static void flush_batch_fsync(struct odb_transaction *transaction) +{ + struct strbuf temp_path = STRBUF_INIT; + struct tempfile *temp; + + if (!transaction->objdir) + return; + + /* + * Issue a full hardware flush against a temporary file to ensure + * that all objects are durable before any renames occur. The code in + * fsync_loose_object_bulk_checkin has already issued a writeout + * request, but it has not flushed any writeback cache in the storage + * hardware or any filesystem logs. This fsync call acts as a barrier + * to ensure that the data in each new object file is durable before + * the final name is visible. + */ + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); + temp = xmks_tempfile(temp_path.buf); + fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); + delete_tempfile(&temp); + strbuf_release(&temp_path); + + /* + * Make the object files visible in the primary ODB after their data is + * fully durable. + */ + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; +} + /* Finalize a file on disk, and close it. */ static void close_loose_object(struct odb_source *source, int fd, const char *filename) @@ -1243,6 +1331,283 @@ static int index_core(struct index_state *istate, return ret; } +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) +{ + /* The object may already exist in the repository */ + if (odb_has_object(transaction->odb, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + return 1; + + /* Might want to keep the list sorted */ + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) + return 1; + + /* This is a new object we need to keep */ + return 0; +} + +/* Lazily create backing packfile for the state */ +static void prepare_to_stream(struct odb_transaction *transaction, + unsigned flags) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + if (!(flags & INDEX_WRITE_OBJECT) || state->f) + return; + + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); + reset_pack_idx_option(&state->pack_idx_opts); + + /* Pretend we are going to write only one object */ + state->offset = write_pack_header(state->f, 1); + if (!state->offset) + die_errno("unable to write pack header"); +} + +/* + * Read the contents from fd for size bytes, streaming it to the + * packfile in state while updating the hash in ctx. Signal a failure + * by returning a negative value when the resulting pack would exceed + * the pack size limit and this is not the first object in the pack, + * so that the caller can discard what we wrote from the current pack + * by truncating it and opening a new one. The caller will then call + * us again after rewinding the input fd. + * + * The already_hashed_to pointer is kept untouched by the caller to + * make sure we do not hash the same byte when we are called + * again. This way, the caller does not have to checkpoint its hash + * status before calling us just in case we ask it to call us again + * with a new pack. + */ +static int stream_blob_to_pack(struct bulk_checkin_packfile *state, + struct git_hash_ctx *ctx, off_t *already_hashed_to, + int fd, size_t size, const char *path, + unsigned flags) +{ + git_zstream s; + unsigned char ibuf[16384]; + unsigned char obuf[16384]; + unsigned hdrlen; + int status = Z_OK; + int write_object = (flags & INDEX_WRITE_OBJECT); + off_t offset = 0; + + git_deflate_init(&s, pack_compression_level); + + hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); + s.next_out = obuf + hdrlen; + s.avail_out = sizeof(obuf) - hdrlen; + + while (status != Z_STREAM_END) { + if (size && !s.avail_in) { + size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); + ssize_t read_result = read_in_full(fd, ibuf, rsize); + if (read_result < 0) + die_errno("failed to read from '%s'", path); + if ((size_t)read_result != rsize) + die("failed to read %u bytes from '%s'", + (unsigned)rsize, path); + offset += rsize; + if (*already_hashed_to < offset) { + size_t hsize = offset - *already_hashed_to; + if (rsize < hsize) + hsize = rsize; + if (hsize) + git_hash_update(ctx, ibuf, hsize); + *already_hashed_to = offset; + } + s.next_in = ibuf; + s.avail_in = rsize; + size -= rsize; + } + + status = git_deflate(&s, size ? 0 : Z_FINISH); + + if (!s.avail_out || status == Z_STREAM_END) { + if (write_object) { + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; + } + + hashwrite(state->f, obuf, written); + state->offset += written; + } + s.next_out = obuf; + s.avail_out = sizeof(obuf); + } + + switch (status) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + continue; + default: + die("unexpected deflate failure: %d", status); + } + } + git_deflate_end(&s); + return 0; +} + +static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, + unsigned char hash[]) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + char *idx_tmp_name = NULL; + + stage_tmp_packfiles(repo, basename, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); + + free(idx_tmp_name); +} + +static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + unsigned char hash[GIT_MAX_RAWSZ]; + struct strbuf packname = STRBUF_INIT; + + if (!state->f) + return; + + if (state->nr_written == 0) { + close(state->f->fd); + free_hashfile(state->f); + unlink(state->pack_tmp_name); + goto clear_exit; + } else if (state->nr_written == 1) { + finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + } else { + int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, + state->nr_written, hash, + state->offset); + close(fd); + } + + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + finish_tmp_packfile(transaction, &packname, hash); + for (uint32_t i = 0; i < state->nr_written; i++) + free(state->written[i]); + +clear_exit: + free(state->pack_tmp_name); + free(state->written); + memset(state, 0, sizeof(*state)); + + strbuf_release(&packname); + /* Make objects we just wrote available to ourselves */ + reprepare_packed_git(repo); +} + +/* + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. + */ +static int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + off_t seekback, already_hashed_to; + struct git_hash_ctx ctx; + unsigned char obuf[16384]; + unsigned header_len; + struct hashfile_checkpoint checkpoint; + struct pack_idx_entry *idx = NULL; + + seekback = lseek(fd, 0, SEEK_CUR); + if (seekback == (off_t)-1) + return error("cannot find the current offset"); + + header_len = format_object_header((char *)obuf, sizeof(obuf), + OBJ_BLOB, size); + transaction->odb->repo->hash_algo->init_fn(&ctx); + git_hash_update(&ctx, obuf, header_len); + + /* Note: idx is non-NULL when we are writing */ + if ((flags & INDEX_WRITE_OBJECT) != 0) { + CALLOC_ARRAY(idx, 1); + + prepare_to_stream(transaction, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + + already_hashed_to = 0; + + while (1) { + prepare_to_stream(transaction, flags); + if (idx) { + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + } + if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, + fd, size, path, flags)) + break; + /* + * Writing this object to the current pack will make + * it too big; we need to truncate it, start a new + * pack, and write into it. + */ + if (!idx) + BUG("should not happen"); + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + flush_bulk_checkin_packfile(transaction); + if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) + return error("cannot seek back"); + } + git_hash_final_oid(result_oid, &ctx); + if (!idx) + return 0; + + idx->crc32 = crc32_end(state->f); + if (already_written(transaction, result_oid)) { + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + free(idx); + } else { + oidcpy(&idx->oid, result_oid); + ALLOC_GROW(state->written, + state->nr_written + 1, + state->alloc_written); + state->written[state->nr_written++] = idx; + } + return 0; +} + int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) @@ -1609,3 +1974,30 @@ out: munmap(map, mapsize); return ret; } + +struct odb_transaction *begin_odb_transaction(struct object_database *odb) +{ + if (odb->transaction) + return NULL; + + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + + return odb->transaction; +} + +void end_odb_transaction(struct odb_transaction *transaction) +{ + if (!transaction) + return; + + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(transaction); + transaction->odb->transaction = NULL; + free(transaction); +} diff --git a/object-file.h b/object-file.h index 15d97630d3..6323d2e63c 100644 --- a/object-file.h +++ b/object-file.h @@ -218,4 +218,20 @@ int read_loose_object(struct repository *repo, void **contents, struct object_info *oi); +struct odb_transaction; + +/* + * Tell the object database to optimize for adding + * multiple objects. end_odb_transaction must be called + * to make new objects visible. If a transaction is already + * pending, NULL is returned. + */ +struct odb_transaction *begin_odb_transaction(struct object_database *odb); + +/* + * Tell the object database to make any objects from the + * current transaction visible. + */ +void end_odb_transaction(struct odb_transaction *transaction); + #endif /* OBJECT_FILE_H */ diff --git a/read-cache.c b/read-cache.c index 229b8ef11c..80591eeced 100644 --- a/read-cache.c +++ b/read-cache.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" -#include "bulk-checkin.h" #include "config.h" #include "date.h" #include "diff.h" From ed0f5f93e9f0b0b3cc1a37ee5b10b625590f08c8 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:37 -0500 Subject: [PATCH 5/6] object-file: update naming from bulk-checkin Update the names of several functions and types relocated from the bulk-checkin subsystem for better clarity. Also drop finish_tmp_packfile() as a standalone function in favor of embedding it in flush_packfile_transaction() directly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 80 +++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/object-file.c b/object-file.c index 03f9931b83..8103a2bf41 100644 --- a/object-file.c +++ b/object-file.c @@ -667,7 +667,7 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } -struct bulk_checkin_packfile { +struct transaction_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -682,10 +682,10 @@ struct odb_transaction { struct object_database *odb; struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; + struct transaction_packfile packfile; }; -static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) +static void prepare_loose_object_transaction(struct odb_transaction *transaction) { /* * We lazily create the temporary object directory @@ -701,7 +701,7 @@ static void prepare_loose_object_bulk_checkin(struct odb_transaction *transactio tmp_objdir_replace_primary_odb(transaction->objdir, 0); } -static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, +static void fsync_loose_object_transaction(struct odb_transaction *transaction, int fd, const char *filename) { /* @@ -722,7 +722,7 @@ static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, /* * Cleanup after batch-mode fsync_object_files. */ -static void flush_batch_fsync(struct odb_transaction *transaction) +static void flush_loose_object_transaction(struct odb_transaction *transaction) { struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; @@ -733,7 +733,7 @@ static void flush_batch_fsync(struct odb_transaction *transaction) /* * Issue a full hardware flush against a temporary file to ensure * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout + * fsync_loose_object_transaction has already issued a writeout * request, but it has not flushed any writeback cache in the storage * hardware or any filesystem logs. This fsync call acts as a barrier * to ensure that the data in each new object file is durable before @@ -762,7 +762,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename); + fsync_loose_object_transaction(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -940,7 +940,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(source->odb->transaction); + prepare_loose_object_transaction(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -1029,7 +1029,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(source->odb->transaction); + prepare_loose_object_transaction(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1349,10 +1349,10 @@ static int already_written(struct odb_transaction *transaction, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) +static void prepare_packfile_transaction(struct odb_transaction *transaction, + unsigned flags) { - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; @@ -1381,7 +1381,7 @@ static void prepare_to_stream(struct odb_transaction *transaction, * status before calling us just in case we ask it to call us again * with a new pack. */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, +static int stream_blob_to_pack(struct transaction_packfile *state, struct git_hash_ctx *ctx, off_t *already_hashed_to, int fd, size_t size, const char *path, unsigned flags) @@ -1457,28 +1457,13 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state, return 0; } -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) +static void flush_packfile_transaction(struct odb_transaction *transaction) { - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; + char *idx_tmp_name = NULL; if (!state->f) return; @@ -1503,11 +1488,16 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) repo_get_object_directory(transaction->odb->repo), hash_to_hex_algop(hash, repo->hash_algo)); - finish_tmp_packfile(transaction, &packname, hash); + stage_tmp_packfiles(repo, &packname, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name); + for (uint32_t i = 0; i < state->nr_written; i++) free(state->written[i]); clear_exit: + free(idx_tmp_name); free(state->pack_tmp_name); free(state->written); memset(state, 0, sizeof(*state)); @@ -1535,11 +1525,12 @@ clear_exit: * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) +static int index_blob_packfile_transaction(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, + size_t size, const char *path, + unsigned flags) { - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -1560,14 +1551,14 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction, if ((flags & INDEX_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); - prepare_to_stream(transaction, flags); + prepare_packfile_transaction(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } already_hashed_to = 0; while (1) { - prepare_to_stream(transaction, flags); + prepare_packfile_transaction(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ -1585,7 +1576,7 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); + flush_packfile_transaction(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) return error("cannot seek back"); } @@ -1632,9 +1623,10 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct odb_transaction *transaction; transaction = begin_odb_transaction(the_repository->objects); - ret = index_blob_bulk_checkin(the_repository->objects->transaction, - oid, fd, xsize_t(st->st_size), - path, flags); + ret = index_blob_packfile_transaction(the_repository->objects->transaction, + oid, fd, + xsize_t(st->st_size), + path, flags); end_odb_transaction(transaction); } @@ -1996,8 +1988,8 @@ void end_odb_transaction(struct odb_transaction *transaction) */ ASSERT(transaction == transaction->odb->transaction); - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); + flush_loose_object_transaction(transaction); + flush_packfile_transaction(transaction); transaction->odb->transaction = NULL; free(transaction); } From ce1661f9da70ea2ffcb54f7b544410fad26e965d Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:38 -0500 Subject: [PATCH 6/6] odb: add transaction interface Transactions are managed via the {begin,end}_odb_transaction() function in the object-file subsystem and its implementation is specific to the files object source. Introduce odb_transaction_{begin,commit}() in the odb subsystem to provide an eventual object source agnostic means to manage transactions. Update call sites to instead manage transactions through the odb subsystem. Also rename {begin,end}_odb_transaction() functions to object_file_transaction_{begin,commit}() to clarify the object source it supports. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/add.c | 5 +++-- builtin/unpack-objects.c | 4 ++-- builtin/update-index.c | 7 ++++--- cache-tree.c | 4 ++-- object-file.c | 12 +++++++----- object-file.h | 6 +++--- odb.c | 10 ++++++++++ odb.h | 13 +++++++++++++ read-cache.c | 4 ++-- 9 files changed, 46 insertions(+), 19 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 8294366d68..bf312c40be 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -15,6 +15,7 @@ #include "pathspec.h" #include "run-command.h" #include "object-file.h" +#include "odb.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" @@ -575,7 +576,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - transaction = begin_odb_transaction(repo->objects); + transaction = odb_transaction_begin(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -594,7 +595,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 4596fff0da..ef79e43715 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -599,12 +599,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(transaction); + odb_transaction_commit(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index ee01c4e423..8a5907767b 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -18,6 +18,7 @@ #include "cache-tree.h" #include "tree-walk.h" #include "object-file.h" +#include "odb.h" #include "refs.h" #include "resolve-undo.h" #include "parse-options.h" @@ -1122,7 +1123,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1152,7 +1153,7 @@ int cmd_update_index(int argc, * a transaction. */ if (transaction && verbose) { - end_odb_transaction(transaction); + odb_transaction_commit(transaction); transaction = NULL; } @@ -1220,7 +1221,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(transaction); + odb_transaction_commit(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/cache-tree.c b/cache-tree.c index 79ddf6b727..2aba47060e 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -489,10 +489,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/object-file.c b/object-file.c index 8103a2bf41..17a236d2fe 100644 --- a/object-file.c +++ b/object-file.c @@ -691,7 +691,7 @@ static void prepare_loose_object_transaction(struct odb_transaction *transaction * We lazily create the temporary object directory * the first time an object might be added, since * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. + * added at the time they call object_file_transaction_begin. */ if (!transaction || transaction->objdir) return; @@ -1622,12 +1622,12 @@ int index_fd(struct index_state *istate, struct object_id *oid, } else { struct odb_transaction *transaction; - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); ret = index_blob_packfile_transaction(the_repository->objects->transaction, oid, fd, xsize_t(st->st_size), path, flags); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); } close(fd); @@ -1967,8 +1967,10 @@ out: return ret; } -struct odb_transaction *begin_odb_transaction(struct object_database *odb) +struct odb_transaction *object_file_transaction_begin(struct odb_source *source) { + struct object_database *odb = source->odb; + if (odb->transaction) return NULL; @@ -1978,7 +1980,7 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb) return odb->transaction; } -void end_odb_transaction(struct odb_transaction *transaction) +void object_file_transaction_commit(struct odb_transaction *transaction) { if (!transaction) return; diff --git a/object-file.h b/object-file.h index 6323d2e63c..3fd48dcafb 100644 --- a/object-file.h +++ b/object-file.h @@ -222,16 +222,16 @@ struct odb_transaction; /* * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called + * multiple objects. object_file_transaction_commit must be called * to make new objects visible. If a transaction is already * pending, NULL is returned. */ -struct odb_transaction *begin_odb_transaction(struct object_database *odb); +struct odb_transaction *object_file_transaction_begin(struct odb_source *source); /* * Tell the object database to make any objects from the * current transaction visible. */ -void end_odb_transaction(struct odb_transaction *transaction); +void object_file_transaction_commit(struct odb_transaction *transaction); #endif /* OBJECT_FILE_H */ diff --git a/odb.c b/odb.c index 2a92a018c4..af9534bfe1 100644 --- a/odb.c +++ b/odb.c @@ -1051,3 +1051,13 @@ void odb_clear(struct object_database *o) hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); } + +struct odb_transaction *odb_transaction_begin(struct object_database *odb) +{ + return object_file_transaction_begin(odb->sources); +} + +void odb_transaction_commit(struct odb_transaction *transaction) +{ + object_file_transaction_commit(transaction); +} diff --git a/odb.h b/odb.h index a89b214390..82093753c8 100644 --- a/odb.h +++ b/odb.h @@ -185,6 +185,19 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Starts an ODB transaction. Subsequent objects are written to the transaction + * and not committed until odb_transaction_commit() is invoked on the + * transaction. If the ODB already has a pending transaction, NULL is returned. + */ +struct odb_transaction *odb_transaction_begin(struct object_database *odb); + +/* + * Commits an ODB transaction making the written objects visible. If the + * specified transaction is NULL, the function is a no-op. + */ +void odb_transaction_commit(struct odb_transaction *transaction); + /* * Find source by its object directory path. Dies in case the source couldn't * be found. diff --git a/read-cache.c b/read-cache.c index 80591eeced..94098a3861 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3972,9 +3972,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - transaction = begin_odb_transaction(repo->objects); + transaction = odb_transaction_begin(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); release_revisions(&rev); return !!data.add_errors;