From 98518304c5761ba04cefb6d73c5698db7e46d1c2 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:57 -0500 Subject: [PATCH 1/4] bulk-checkin: introduce object database transaction structure Object database transaction state is stored across several global variables in the bulk-checkin subsystem. Consolidate this state into a single `struct odb_transaction` global. In a subsequent commit, the transactional interfaces will be updated to wire this structure instead of relying on a global variable. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index b2809ab039..82a73da79e 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -19,11 +19,7 @@ #include "object-file.h" #include "odb.h" -static int odb_transaction_nesting; - -static struct tmp_objdir *bulk_fsync_objdir; - -static struct bulk_checkin_packfile { +struct bulk_checkin_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -32,7 +28,13 @@ static struct bulk_checkin_packfile { struct pack_idx_entry **written; uint32_t alloc_written; uint32_t nr_written; -} bulk_checkin_packfile; +}; + +static struct odb_transaction { + int nesting; + struct tmp_objdir *objdir; + struct bulk_checkin_packfile packfile; +} transaction; static void finish_tmp_packfile(struct strbuf *basename, const char *pack_tmp_name, @@ -101,7 +103,7 @@ static void flush_batch_fsync(void) struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; - if (!bulk_fsync_objdir) + if (!transaction.objdir) return; /* @@ -123,8 +125,8 @@ static void flush_batch_fsync(void) * Make the object files visible in the primary ODB after their data is * fully durable. */ - tmp_objdir_migrate(bulk_fsync_objdir); - bulk_fsync_objdir = NULL; + tmp_objdir_migrate(transaction.objdir); + transaction.objdir = NULL; } static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) @@ -331,12 +333,12 @@ void prepare_loose_object_bulk_checkin(void) * callers may not know whether any objects will be * added at the time they call begin_odb_transaction. */ - if (!odb_transaction_nesting || bulk_fsync_objdir) + if (!transaction.nesting || transaction.objdir) return; - bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync"); - if (bulk_fsync_objdir) - tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0); + transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + if (transaction.objdir) + tmp_objdir_replace_primary_odb(transaction.objdir, 0); } void fsync_loose_object_bulk_checkin(int fd, const char *filename) @@ -348,7 +350,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) * before renaming the objects to their final names as part of * flush_batch_fsync. */ - if (!bulk_fsync_objdir || + if (!transaction.objdir || git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { if (errno == ENOSYS) warning(_("core.fsyncMethod = batch is unsupported on this platform")); @@ -360,31 +362,31 @@ int index_blob_bulk_checkin(struct object_id *oid, int fd, size_t size, const char *path, unsigned flags) { - int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size, + int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size, path, flags); - if (!odb_transaction_nesting) - flush_bulk_checkin_packfile(&bulk_checkin_packfile); + if (!transaction.nesting) + flush_bulk_checkin_packfile(&transaction.packfile); return status; } void begin_odb_transaction(void) { - odb_transaction_nesting += 1; + transaction.nesting += 1; } void flush_odb_transaction(void) { flush_batch_fsync(); - flush_bulk_checkin_packfile(&bulk_checkin_packfile); + flush_bulk_checkin_packfile(&transaction.packfile); } void end_odb_transaction(void) { - odb_transaction_nesting -= 1; - if (odb_transaction_nesting < 0) + transaction.nesting -= 1; + if (transaction.nesting < 0) BUG("Unbalanced ODB transaction nesting"); - if (odb_transaction_nesting) + if (transaction.nesting) return; flush_odb_transaction(); From b3361447256bb92a1dbdda910a33cfb1d6fc8f88 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:58 -0500 Subject: [PATCH 2/4] bulk-checkin: remove global transaction state Object database transactions in the bulk-checkin subsystem rely on global state to track transaction status. Stop relying on global state and instead store the transaction in the `struct object_database`. Functions that operate on transactions are updated to now wire transaction state. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/add.c | 5 ++- builtin/unpack-objects.c | 5 ++- builtin/update-index.c | 7 +-- bulk-checkin.c | 92 +++++++++++++++++++++++++--------------- bulk-checkin.h | 18 +++++--- cache-tree.c | 5 ++- object-file.c | 11 ++--- odb.h | 8 ++++ read-cache.c | 5 ++- 9 files changed, 99 insertions(+), 57 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 0235854f80..740c7c4581 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -389,6 +389,7 @@ int cmd_add(int argc, char *seen = NULL; char *ps_matched = NULL; struct lock_file lock_file = LOCK_INIT; + struct odb_transaction *transaction; repo_config(repo, add_config, NULL); @@ -574,7 +575,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - begin_odb_transaction(); + transaction = begin_odb_transaction(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -593,7 +594,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(); + end_odb_transaction(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 7ae7c82b6c..28124b324d 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -584,6 +584,7 @@ static void unpack_all(void) { int i; unsigned char *hdr = fill(sizeof(struct pack_header)); + struct odb_transaction *transaction; if (get_be32(hdr) != PACK_SIGNATURE) die("bad pack file"); @@ -599,12 +600,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(); + end_odb_transaction(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index 2380f3ccd6..2ba2d29c95 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -77,7 +77,7 @@ static void report(const char *fmt, ...) * objects invisible while a transaction is active, so flush the * transaction here before reporting a change made by update-index. */ - flush_odb_transaction(); + flush_odb_transaction(the_repository->objects->transaction); va_start(vp, fmt); vprintf(fmt, vp); putchar('\n'); @@ -940,6 +940,7 @@ int cmd_update_index(int argc, strbuf_getline_fn getline_fn; int parseopt_state = PARSE_OPT_UNKNOWN; struct repository *r = the_repository; + struct odb_transaction *transaction; struct option options[] = { OPT_BIT('q', NULL, &refresh_args.flags, N_("continue refresh even when index needs update"), @@ -1130,7 +1131,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1213,7 +1214,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(); + end_odb_transaction(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/bulk-checkin.c b/bulk-checkin.c index 82a73da79e..53a20a2d92 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -30,11 +30,13 @@ struct bulk_checkin_packfile { uint32_t nr_written; }; -static struct odb_transaction { +struct odb_transaction { + struct object_database *odb; + int nesting; struct tmp_objdir *objdir; struct bulk_checkin_packfile packfile; -} transaction; +}; static void finish_tmp_packfile(struct strbuf *basename, const char *pack_tmp_name, @@ -98,12 +100,12 @@ clear_exit: /* * Cleanup after batch-mode fsync_object_files. */ -static void flush_batch_fsync(void) +static void flush_batch_fsync(struct odb_transaction *transaction) { struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; - if (!transaction.objdir) + if (!transaction->objdir) return; /* @@ -125,8 +127,8 @@ static void flush_batch_fsync(void) * Make the object files visible in the primary ODB after their data is * fully durable. */ - tmp_objdir_migrate(transaction.objdir); - transaction.objdir = NULL; + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; } static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) @@ -325,7 +327,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, return 0; } -void prepare_loose_object_bulk_checkin(void) +void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) { /* * We lazily create the temporary object directory @@ -333,15 +335,16 @@ void prepare_loose_object_bulk_checkin(void) * callers may not know whether any objects will be * added at the time they call begin_odb_transaction. */ - if (!transaction.nesting || transaction.objdir) + if (!transaction || transaction->objdir) return; - transaction.objdir = tmp_objdir_create(the_repository, "bulk-fsync"); - if (transaction.objdir) - tmp_objdir_replace_primary_odb(transaction.objdir, 0); + transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); } -void fsync_loose_object_bulk_checkin(int fd, const char *filename) +void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename) { /* * If we have an active ODB transaction, we issue a call that @@ -350,7 +353,7 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) * before renaming the objects to their final names as part of * flush_batch_fsync. */ - if (!transaction.objdir || + if (!transaction || !transaction->objdir || git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { if (errno == ENOSYS) warning(_("core.fsyncMethod = batch is unsupported on this platform")); @@ -358,36 +361,57 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename) } } -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *oid, int fd, size_t size, const char *path, unsigned flags) { - int status = deflate_blob_to_pack(&transaction.packfile, oid, fd, size, - path, flags); - if (!transaction.nesting) - flush_bulk_checkin_packfile(&transaction.packfile); + int status; + + if (transaction) { + status = deflate_blob_to_pack(&transaction->packfile, oid, fd, + size, path, flags); + } else { + struct bulk_checkin_packfile state = { 0 }; + + status = deflate_blob_to_pack(&state, oid, fd, size, path, flags); + flush_bulk_checkin_packfile(&state); + } + return status; } -void begin_odb_transaction(void) +struct odb_transaction *begin_odb_transaction(struct object_database *odb) { - transaction.nesting += 1; + if (!odb->transaction) { + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + } + + odb->transaction->nesting += 1; + + return odb->transaction; } -void flush_odb_transaction(void) +void flush_odb_transaction(struct odb_transaction *transaction) { - flush_batch_fsync(); - flush_bulk_checkin_packfile(&transaction.packfile); -} - -void end_odb_transaction(void) -{ - transaction.nesting -= 1; - if (transaction.nesting < 0) - BUG("Unbalanced ODB transaction nesting"); - - if (transaction.nesting) + if (!transaction) return; - flush_odb_transaction(); + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(&transaction->packfile); +} + +void end_odb_transaction(struct odb_transaction *transaction) +{ + if (!transaction || transaction->nesting == 0) + BUG("Unbalanced ODB transaction nesting"); + + transaction->nesting -= 1; + + if (transaction->nesting) + return; + + flush_odb_transaction(transaction); + transaction->odb->transaction = NULL; + free(transaction); } diff --git a/bulk-checkin.h b/bulk-checkin.h index 7246ea58dc..16254ce6a7 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -5,9 +5,13 @@ #define BULK_CHECKIN_H #include "object.h" +#include "odb.h" -void prepare_loose_object_bulk_checkin(void); -void fsync_loose_object_bulk_checkin(int fd, const char *filename); +struct odb_transaction; + +void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction); +void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename); /* * This creates one packfile per large blob unless bulk-checkin @@ -24,8 +28,8 @@ void fsync_loose_object_bulk_checkin(int fd, const char *filename); * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -int index_blob_bulk_checkin(struct object_id *oid, - int fd, size_t size, +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *oid, int fd, size_t size, const char *path, unsigned flags); /* @@ -35,20 +39,20 @@ int index_blob_bulk_checkin(struct object_id *oid, * and objects are only visible after the outermost transaction * is complete or the transaction is flushed. */ -void begin_odb_transaction(void); +struct odb_transaction *begin_odb_transaction(struct object_database *odb); /* * Make any objects that are currently part of a pending object * database transaction visible. It is valid to call this function * even if no transaction is active. */ -void flush_odb_transaction(void); +void flush_odb_transaction(struct odb_transaction *transaction); /* * Tell the object database to make any objects from the * current transaction visible if this is the final nested * transaction. */ -void end_odb_transaction(void); +void end_odb_transaction(struct odb_transaction *transaction); #endif diff --git a/cache-tree.c b/cache-tree.c index 66ef2becbe..d225554eed 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -474,6 +474,7 @@ static int update_one(struct cache_tree *it, int cache_tree_update(struct index_state *istate, int flags) { + struct odb_transaction *transaction; int skip, i; i = verify_cache(istate, flags); @@ -489,10 +490,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - begin_odb_transaction(); + transaction = begin_odb_transaction(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(); + end_odb_transaction(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/object-file.c b/object-file.c index 2bc36ab3ee..1740aa2b2e 100644 --- a/object-file.c +++ b/object-file.c @@ -674,7 +674,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(fd, filename); + fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -852,7 +852,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_bulk_checkin(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -941,7 +941,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_bulk_checkin(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1263,8 +1263,9 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); else - ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path, - flags); + ret = index_blob_bulk_checkin(the_repository->objects->transaction, + oid, fd, xsize_t(st->st_size), + path, flags); close(fd); return ret; } diff --git a/odb.h b/odb.h index 3dfc66d75a..a89b214390 100644 --- a/odb.h +++ b/odb.h @@ -84,6 +84,7 @@ struct odb_source { struct packed_git; struct cached_object_entry; +struct odb_transaction; /* * The object database encapsulates access to objects in a repository. It @@ -94,6 +95,13 @@ struct object_database { /* Repository that owns this database. */ struct repository *repo; + /* + * State of current current object database transaction. Only one + * transaction may be pending at a time. Is NULL when no transaction is + * configured. + */ + struct odb_transaction *transaction; + /* * Set of all object directories; the main directory is first (and * cannot be NULL after initialization). Subsequent directories are diff --git a/read-cache.c b/read-cache.c index 06ad74db22..229b8ef11c 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3947,6 +3947,7 @@ int add_files_to_cache(struct repository *repo, const char *prefix, const struct pathspec *pathspec, char *ps_matched, int include_sparse, int flags) { + struct odb_transaction *transaction; struct update_callback_data data; struct rev_info rev; @@ -3972,9 +3973,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - begin_odb_transaction(); + transaction = begin_odb_transaction(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(); + end_odb_transaction(transaction); release_revisions(&rev); return !!data.add_errors; From aa4d81b53311fcdf099400beebad99c14be4b561 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:34:59 -0500 Subject: [PATCH 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() The bulk-checkin subsystem provides a mechanism to write blobs directly to a packfile via `index_blob_bulk_checkin()`. If there is an ongoing transaction when invoked, objects written via this function are stored in the same packfile. The packfile is not flushed until the transaction itself is flushed. If there is no transaction, the single object is written to a packfile and immediately flushed. This complicates `index_blob_bulk_checkin()` as it cannot reliably use the provided transaction to get the associated repository. Update `index_blob_bulk_checkin()` to assume that a valid transaction is always provided. Callers are now expected to ensure a transaction is set up beforehand. With this simplification, `deflate_blob_bulk_checkin()` is no longer needed as a standalone internal function and is combined with `index_blob_bulk_checkin()`. The single call site in `object-file.c:index_fd()` is updated accordingly. Due to how `{begin,end}_odb_transaction()` handles nested transactions, a new transaction is only created and committed if there is not already an ongoing transaction. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 27 ++++----------------------- bulk-checkin.h | 7 +++++-- object-file.c | 21 ++++++++++++++------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 53a20a2d92..542d8125a8 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -254,11 +254,11 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state, die_errno("unable to write pack header"); } -static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, - struct object_id *result_oid, - int fd, size_t size, - const char *path, unsigned flags) +int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -361,25 +361,6 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, } } -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags) -{ - int status; - - if (transaction) { - status = deflate_blob_to_pack(&transaction->packfile, oid, fd, - size, path, flags); - } else { - struct bulk_checkin_packfile state = { 0 }; - - status = deflate_blob_to_pack(&state, oid, fd, size, path, flags); - flush_bulk_checkin_packfile(&state); - } - - return status; -} - struct odb_transaction *begin_odb_transaction(struct object_database *odb) { if (!odb->transaction) { diff --git a/bulk-checkin.h b/bulk-checkin.h index 16254ce6a7..ac8887f476 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -14,8 +14,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, int fd, const char *filename); /* - * This creates one packfile per large blob unless bulk-checkin - * machinery is "plugged". + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. * * This also bypasses the usual "convert-to-git" dance, and that is on * purpose. We could write a streaming version of the converting diff --git a/object-file.c b/object-file.c index 1740aa2b2e..bc15af4245 100644 --- a/object-file.c +++ b/object-file.c @@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid, * Call xsize_t() only when needed to avoid potentially unnecessary * die() for large files. */ - if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) { ret = index_stream_convert_blob(istate, oid, fd, path, flags); - else if (!S_ISREG(st->st_mode)) + } else if (!S_ISREG(st->st_mode)) { ret = index_pipe(istate, oid, fd, type, path, flags); - else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || - type != OBJ_BLOB || - (path && would_convert_to_git(istate, path))) + } else if ((st->st_size >= 0 && + (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || + type != OBJ_BLOB || + (path && would_convert_to_git(istate, path))) { ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); - else - ret = index_blob_bulk_checkin(the_repository->objects->transaction, + } else { + struct odb_transaction *transaction; + + transaction = begin_odb_transaction(the_repository->objects); + ret = index_blob_bulk_checkin(transaction, oid, fd, xsize_t(st->st_size), path, flags); + end_odb_transaction(transaction); + } + close(fd); return ret; } From ddc0b56ad77d7c86145a6a1774f05f9d11bf2337 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 22 Aug 2025 16:35:00 -0500 Subject: [PATCH 4/4] bulk-checkin: use repository variable from transaction The bulk-checkin subsystem depends on `the_repository`. Adapt functions and call sites to access the repository through `struct odb_transaction` instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the `pack_compression_level` and `pack_size_limit_cfg` globals are still used. Also adapt functions using packfile state to instead access it through the transaction. This makes some function parameters redundant and go away. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- bulk-checkin.c | 67 +++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/bulk-checkin.c b/bulk-checkin.c index 542d8125a8..124c493067 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -38,25 +38,26 @@ struct odb_transaction { struct bulk_checkin_packfile packfile; }; -static void finish_tmp_packfile(struct strbuf *basename, - const char *pack_tmp_name, - struct pack_idx_entry **written_list, - uint32_t nr_written, - struct pack_idx_option *pack_idx_opts, +static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, unsigned char hash[]) { + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; char *idx_tmp_name = NULL; - stage_tmp_packfiles(the_repository, basename, pack_tmp_name, - written_list, nr_written, NULL, pack_idx_opts, hash, - &idx_tmp_name); - rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name); + stage_tmp_packfiles(repo, basename, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); free(idx_tmp_name); } -static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) +static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) { + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; @@ -73,17 +74,17 @@ static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state) CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); } else { int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(the_hash_algo, fd, hash, state->pack_tmp_name, + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, state->nr_written, hash, state->offset); close(fd); } - strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository), - hash_to_hex(hash)); - finish_tmp_packfile(&packname, state->pack_tmp_name, - state->written, state->nr_written, - &state->pack_idx_opts, hash); + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + finish_tmp_packfile(transaction, &packname, hash); for (uint32_t i = 0; i < state->nr_written; i++) free(state->written[i]); @@ -94,7 +95,7 @@ clear_exit: strbuf_release(&packname); /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(the_repository); + reprepare_packed_git(repo); } /* @@ -117,7 +118,8 @@ static void flush_batch_fsync(struct odb_transaction *transaction) * to ensure that the data in each new object file is durable before * the final name is visible. */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository)); + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); temp = xmks_tempfile(temp_path.buf); fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); delete_tempfile(&temp); @@ -131,16 +133,17 @@ static void flush_batch_fsync(struct odb_transaction *transaction) transaction->objdir = NULL; } -static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid) +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) { /* The object may already exist in the repository */ - if (odb_has_object(the_repository->objects, oid, + if (odb_has_object(transaction->odb, oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 1; /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < state->nr_written; i++) - if (oideq(&state->written[i]->oid, oid)) + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) return 1; /* This is a new object we need to keep */ @@ -239,13 +242,15 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct bulk_checkin_packfile *state, +static void prepare_to_stream(struct odb_transaction *transaction, unsigned flags) { + struct bulk_checkin_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; - state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name); + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); reset_pack_idx_option(&state->pack_idx_opts); /* Pretend we are going to write only one object */ @@ -272,21 +277,21 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, header_len = format_object_header((char *)obuf, sizeof(obuf), OBJ_BLOB, size); - the_hash_algo->init_fn(&ctx); + transaction->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); /* Note: idx is non-NULL when we are writing */ if ((flags & INDEX_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); - prepare_to_stream(state, flags); + prepare_to_stream(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } already_hashed_to = 0; while (1) { - prepare_to_stream(state, flags); + prepare_to_stream(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ -304,7 +309,7 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(state); + flush_bulk_checkin_packfile(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) return error("cannot seek back"); } @@ -313,7 +318,7 @@ int index_blob_bulk_checkin(struct odb_transaction *transaction, return 0; idx->crc32 = crc32_end(state->f); - if (already_written(state, result_oid)) { + if (already_written(transaction, result_oid)) { hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; free(idx); @@ -338,7 +343,7 @@ void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) if (!transaction || transaction->objdir) return; - transaction->objdir = tmp_objdir_create(the_repository, "bulk-fsync"); + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); if (transaction->objdir) tmp_objdir_replace_primary_odb(transaction->objdir, 0); } @@ -379,7 +384,7 @@ void flush_odb_transaction(struct odb_transaction *transaction) return; flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(&transaction->packfile); + flush_bulk_checkin_packfile(transaction); } void end_odb_transaction(struct odb_transaction *transaction)