From d3d9c51973bf3187d8767969a0aa33abe79d6240 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 21 Oct 2022 20:21:45 -0400 Subject: [PATCH 1/6] repack: convert "names" util bitfield to array We keep a string_list "names" containing the hashes of packs generated on our behalf by pack-objects. The util field of each item is treated as a bitfield that tells us which extensions (.pack, .idx, .rev, etc) are present for each name. Let's switch this to allocating a real array. That will give us room in a future patch to store more data than just a single bit per extension. And it makes the code a little easier to read, as we avoid casting back and forth between uintptr_t and a void pointer. Since the only thing we're storing is an array, we could just allocate it directly. But instead I've put it into a named struct here. That further increases readability around the casts, and in particular helps differentiate us from other string_lists in the same file which use their util field differently. E.g., the existing_*_packs lists still do bit-twiddling, but their bits have different meaning than the ones in "names". This makes it hard to grep around the code to see how the util fields are used; now you can look for "generated_pack_data". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/repack.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index a5bacc7797..8e71230bf7 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -247,11 +247,15 @@ static struct { {".idx"}, }; -static unsigned populate_pack_exts(char *name) +struct generated_pack_data { + char exts[ARRAY_SIZE(exts)]; +}; + +static struct generated_pack_data *populate_pack_exts(const char *name) { struct stat statbuf; struct strbuf path = STRBUF_INIT; - unsigned ret = 0; + struct generated_pack_data *data = xcalloc(1, sizeof(*data)); int i; for (i = 0; i < ARRAY_SIZE(exts); i++) { @@ -261,11 +265,11 @@ static unsigned populate_pack_exts(char *name) if (stat(path.buf, &statbuf)) continue; - ret |= (1 << i); + data->exts[i] = 1; } strbuf_release(&path); - return ret; + return data; } static void repack_promisor_objects(const struct pack_objects_args *args, @@ -320,7 +324,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args, line.buf); write_promisor_file(promisor_name, NULL, 0); - item->util = (void *)(uintptr_t)populate_pack_exts(item->string); + item->util = populate_pack_exts(item->string); free(promisor_name); } @@ -994,7 +998,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) string_list_sort(&names); for_each_string_list_item(item, &names) { - item->util = (void *)(uintptr_t)populate_pack_exts(item->string); + item->util = populate_pack_exts(item->string); } close_object_store(the_repository->objects); @@ -1003,6 +1007,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) * Ok we have prepared all new packfiles. */ for_each_string_list_item(item, &names) { + struct generated_pack_data *data = item->util; + for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { char *fname, *fname_old; @@ -1011,7 +1017,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) fname_old = mkpathdup("%s-%s%s", packtmp, item->string, exts[ext].name); - if (((uintptr_t)item->util) & ((uintptr_t)1 << ext)) { + if (data->exts[ext]) { struct stat statbuffer; if (!stat(fname_old, &statbuffer)) { statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); @@ -1115,7 +1121,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) write_midx_file(get_object_directory(), NULL, NULL, flags); } - string_list_clear(&names, 0); + string_list_clear(&names, 1); string_list_clear(&existing_nonkept_packs, 0); string_list_clear(&existing_kept_packs, 0); clear_pack_geometry(geometry); From b639606fd0e20584edd2515236fcc69ada24e430 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 21 Oct 2022 20:21:48 -0400 Subject: [PATCH 2/6] repack: populate extension bits incrementally After generating the main pack and then any additional cruft packs, we iterate over the "names" list (which contains hashes of packs generated by pack-objects), and call populate_pack_exts() for each. There's one small problem with this. In repack_promisor_objects(), we may add entries to "names" and call populate_pack_exts() for them. Calling it again is mostly just wasteful, as we'll stat() the filename with each possible extension, get the same result, and just overwrite our bits. So we could drop the call there, and leave the final loop to populate all of the bits. But instead, this patch does the reverse: drops the final loop, and teaches the other two sites to populate the bits as they add entries. This makes the code easier to reason about, as you never have to worry about when the util field is valid; it is always valid for each entry. It also serves my ulterior purpose: recording the generated filenames as soon as possible will make it easier for a future patch to use them for cleaning up from a failed operation. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/repack.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 8e71230bf7..b5bd9e5fed 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -714,10 +714,14 @@ static int write_cruft_pack(const struct pack_objects_args *args, out = xfdopen(cmd.out, "r"); while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + if (line.len != the_hash_algo->hexsz) die(_("repack: Expecting full hex object ID lines only " "from pack-objects.")); - string_list_append(names, line.buf); + + item = string_list_append(names, line.buf); + item->util = populate_pack_exts(line.buf); } fclose(out); @@ -956,9 +960,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix) out = xfdopen(cmd.out, "r"); while (strbuf_getline_lf(&line, out) != EOF) { + struct string_list_item *item; + if (line.len != the_hash_algo->hexsz) die(_("repack: Expecting full hex object ID lines only from pack-objects.")); - string_list_append(&names, line.buf); + item = string_list_append(&names, line.buf); + item->util = populate_pack_exts(item->string); } fclose(out); ret = finish_command(&cmd); @@ -997,10 +1004,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix) string_list_sort(&names); - for_each_string_list_item(item, &names) { - item->util = populate_pack_exts(item->string); - } - close_object_store(the_repository->objects); /* From a4880b20cc9e56518f6aef96c31b256124731ea6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 21 Oct 2022 20:21:50 -0400 Subject: [PATCH 3/6] repack: expand error message for missing pack files If pack-objects tells us it generated pack $hash, we expect to find .tmp-$$-pack-$hash.pack, .idx, .rev, and so on. Some of these files are optional, but others are not. For the required ones, we'll bail with an error if any of them is missing. The error message is just "missing required file", which is a bit vague. We should be more clear that it is not the user's fault, but rather that the sub-pgoram we called is not operating as expected. In practice, nobody should ever see this message, as it would generally only be caused by a bug in Git. It probably doesn't make sense to convert this to a BUG(), though, as there are other (unlikely) possibilities, such as somebody else racily deleting the files, filesystem errors causing stat() to fail, and so on. A nice side effect here is that we stop relying on fname_old in this code path, which will let us deal with it only in the first part of the conditional. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/repack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index b5bd9e5fed..d1929bb3db 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1030,7 +1030,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (rename(fname_old, fname)) die_errno(_("renaming '%s' failed"), fname_old); } else if (!exts[ext].optional) - die(_("missing required file: %s"), fname_old); + die(_("pack-objects did not write a '%s' file for pack %s-%s"), + exts[ext].name, packtmp, item->string); else if (unlink(fname) < 0 && errno != ENOENT) die_errno(_("could not unlink: %s"), fname); From 9cf10d8786c47c18ced6cb00e140cc18db8a7509 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 21 Oct 2022 20:21:54 -0400 Subject: [PATCH 4/6] repack: use tempfiles for signal cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When git-repack exits due to a signal, it tries to clean up by calling its remove_temporary_files() function, which walks through the packs dir looking for ".tmp-$$-pack-*" files to delete (where "$$" is the pid of the current process). The biggest problem here is that remove_temporary_files() is not safe to call in a signal handler. It uses opendir(), which isn't on the POSIX async-signal-safe list. The details will be platform-specific, but a likely issue is that it needs to allocate memory; if we receive a signal while inside malloc(), etc, we'll conflict on the allocator lock and deadlock with ourselves. We can fix this by just cleaning up the files directly, without walking the directory. We already know the complete list of .tmp-* files that were generated, because we recorded them via populate_pack_exts(). When we find files there, we can use register_tempfile() to record the filenames. If we receive a signal, then the tempfile API will clean them up for us, and it's async-safe and pretty battle-tested. Note that this is slightly racier than the existing scheme. We don't record the filenames until pack-objects tells us the hash over stdout. So during the period between it generating the file and reporting the hash, we'd fail to clean up. However, that period is very small. During most of the pack generation process pack-objects is using its own internal tempfiles. It's only at the very end that it moves them into the names git-repack expects, and then it immediately reports the name to us. Given that cleanup like this is best effort (after all, we may get SIGKILL), this level of race is acceptable. When we register the tempfiles, we'll record them locally and use the result to call rename_tempfile(), rather than renaming by hand. This isn't strictly necessary, as once we've renamed the files they're gone, and the tempfile API's cleanup unlink() would simply become a pointless noop. But managing the lifetimes of the tempfile objects is the cleanest thing to do, and the tempfile pointers naturally fill the same role as the old booleans. This patch also fixes another small problem. We only hook signals, and don't set up an atexit handler. So if we see an error that causes us to die(), we'll leave the .tmp-* files in place. But since the tempfile API handles this for us, this is now fixed for free. The new test covers this by stimulating a failure of pack-objects when generating a cruft pack. Before this patch, the .tmp-* file for the main pack would have been left, but now we correctly clean it up. Two small subtleties on the implementation: - in the renaming loop, we can stop re-constructing fname_old; we only use it when we have a tempfile to rename, so we can just ask the tempfile for its path (which, barring bugs, should be identical) - when renaming fails, our error message mentions fname_old. But since a failed rename_tempfile() invalidates the tempfile struct, we'll lose access to that string. Instead, let's mention the destination filename, which is what most other callers do. Reported-by: Jan Pokorný Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 ++++++++------------------ t/t7700-repack.sh | 8 ++++++++ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index d1929bb3db..39f03c3a1d 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -122,13 +122,6 @@ static void remove_temporary_files(void) strbuf_release(&buf); } -static void remove_pack_on_signal(int signo) -{ - remove_temporary_files(); - sigchain_pop(signo); - raise(signo); -} - /* * Adds all packs hex strings to either fname_nonkept_list or * fname_kept_list based on whether each pack has a corresponding @@ -248,7 +241,7 @@ static struct { }; struct generated_pack_data { - char exts[ARRAY_SIZE(exts)]; + struct tempfile *tempfiles[ARRAY_SIZE(exts)]; }; static struct generated_pack_data *populate_pack_exts(const char *name) @@ -265,7 +258,7 @@ static struct generated_pack_data *populate_pack_exts(const char *name) if (stat(path.buf, &statbuf)) continue; - data->exts[i] = 1; + data->tempfiles[i] = register_tempfile(path.buf); } strbuf_release(&path); @@ -867,8 +860,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix) split_pack_geometry(geometry, geometric_factor); } - sigchain_push_common(remove_pack_on_signal); - prepare_pack_objects(&cmd, &po_args); show_progress = !po_args.quiet && isatty(2); @@ -1013,22 +1004,22 @@ int cmd_repack(int argc, const char **argv, const char *prefix) struct generated_pack_data *data = item->util; for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { - char *fname, *fname_old; + char *fname; fname = mkpathdup("%s/pack-%s%s", packdir, item->string, exts[ext].name); - fname_old = mkpathdup("%s-%s%s", - packtmp, item->string, exts[ext].name); - if (data->exts[ext]) { + if (data->tempfiles[ext]) { + const char *fname_old = get_tempfile_path(data->tempfiles[ext]); struct stat statbuffer; + if (!stat(fname_old, &statbuffer)) { statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); chmod(fname_old, statbuffer.st_mode); } - if (rename(fname_old, fname)) - die_errno(_("renaming '%s' failed"), fname_old); + if (rename_tempfile(&data->tempfiles[ext], fname)) + die_errno(_("renaming pack to '%s' failed"), fname); } else if (!exts[ext].optional) die(_("pack-objects did not write a '%s' file for pack %s-%s"), exts[ext].name, packtmp, item->string); @@ -1036,7 +1027,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix) die_errno(_("could not unlink: %s"), fname); free(fname); - free(fname_old); } } /* End of pack replacement. */ diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index ca45c4cd2c..592016f64a 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -432,6 +432,14 @@ test_expect_success TTY '--quiet disables progress' ' test_must_be_empty stderr ' +test_expect_success 'clean up .tmp-* packs on error' ' + test_must_fail git \ + -c repack.cruftwindow=bogus \ + repack -ad --cruft && + find $objdir/pack -name '.tmp-*' >tmpfiles && + test_must_be_empty tmpfiles +' + test_expect_success 'setup for update-server-info' ' git init update-server-info && test_commit -C update-server-info message From 193430717a4056579201f98873bfdd152b5fdd25 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 21 Oct 2022 20:21:58 -0400 Subject: [PATCH 5/6] repack: drop remove_temporary_files() After we've successfully finished the repack, we call remove_temporary_files(), which looks for and removes any files matching ".tmp-$$-pack-*", where $$ is the pid of the current process. But this is pointless. If we make it this far in the process, we've already renamed these tempfiles into place, and there is nothing left to delete. Nor is there a point in trying to call it to clean up when we _aren't_ successful. It's not safe for using in a signal handler, and the previous commit already handed that job over to the tempfile API. It might seem like it would be useful to clean up stray .tmp files left by other invocations of git-repack. But it won't clean those files; it only matches ones with its pid, and leaves the rest. Fortunately, those are cleaned up naturally by successive calls to git-repack; we'll consider .tmp-*.pack the same as normal packfiles, so "repack -ad", etc, will roll up their contents and eventually delete them. The one case that could matter is if pack-objects generates an extension we don't know about, like ".tmp-pack-$$-$hash.some-new-ext". The current code will quietly delete such a file, while after this patch we'd leave it in place. In practice this doesn't happen, and would be indicative of a bug. Leaving the file as cruft is arguably a better behavior, as it means somebody is more likely to eventually notice and fix the bug. If we really wanted to be paranoid, we could scan for and warn about such files, but that seems like overkill. There's nothing to test with regard to the removal of this function. It was doing nothing, so the behavior should be the same. However, we can verify (and protect) our assumption that "repack -ad" will eventually remove stray files by adding a test for that. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/repack.c | 32 -------------------------------- t/t7700-repack.sh | 8 ++++++++ 2 files changed, 8 insertions(+), 32 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 39f03c3a1d..cd338b161f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -91,37 +91,6 @@ static int repack_config(const char *var, const char *value, void *cb) return git_default_config(var, value, cb); } -/* - * Remove temporary $GIT_OBJECT_DIRECTORY/pack/.tmp-$$-pack-* files. - */ -static void remove_temporary_files(void) -{ - struct strbuf buf = STRBUF_INIT; - size_t dirlen, prefixlen; - DIR *dir; - struct dirent *e; - - dir = opendir(packdir); - if (!dir) - return; - - /* Point at the slash at the end of ".../objects/pack/" */ - dirlen = strlen(packdir) + 1; - strbuf_addstr(&buf, packtmp); - /* Hold the length of ".tmp-%d-pack-" */ - prefixlen = buf.len - dirlen; - - while ((e = readdir(dir))) { - if (strncmp(e->d_name, buf.buf + dirlen, prefixlen)) - continue; - strbuf_setlen(&buf, dirlen); - strbuf_addstr(&buf, e->d_name); - unlink(buf.buf); - } - closedir(dir); - strbuf_release(&buf); -} - /* * Adds all packs hex strings to either fname_nonkept_list or * fname_kept_list based on whether each pack has a corresponding @@ -1106,7 +1075,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (run_update_server_info) update_server_info(0); - remove_temporary_files(); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index 592016f64a..edcda849b9 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -440,6 +440,14 @@ test_expect_success 'clean up .tmp-* packs on error' ' test_must_be_empty tmpfiles ' +test_expect_success 'repack -ad cleans up old .tmp-* packs' ' + git rev-parse HEAD >input && + git pack-objects $objdir/pack/.tmp-1234 tmpfiles && + test_must_be_empty tmpfiles +' + test_expect_success 'setup for update-server-info' ' git init update-server-info && test_commit -C update-server-info message From 9b3fadfd067cc2c9ae7d6cc1a8bfdbdd5a253cd5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 23 Oct 2022 13:00:45 -0400 Subject: [PATCH 6/6] t7700: annotate cruft-pack failure with ok=sigpipe One of our tests intentionally causes the cruft-pack generation phase of repack to fail, in order to stimulate an exit from repack at the desired moment. It does so by feeding a bogus option argument to pack-objects. This is a simple and reliable way to get pack-objects to fail, but it has one downside: pack-objects will die before reading its stdin, which means the caller repack may racily get SIGPIPE writing to it. For the purposes of this test, that's OK. We are checking whether repack cleans up already-created .tmp files, and it will do so whether it exits or dies by signal (because the tempfile API hooks both). But we have to tell test_must_fail that either outcome is OK, or it complains about the signal. Arguably this is a workaround (compared to fixing repack), as repack dying to SIGPIPE means that it loses the opportunity to give a more detailed message. But we don't actually write such a message anyway; we rely on pack-objects to have written something useful to stderr, and it does. In either case (signal or exit), that is the main thing the user will see. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t7700-repack.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index edcda849b9..9164acbe02 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -433,7 +433,7 @@ test_expect_success TTY '--quiet disables progress' ' ' test_expect_success 'clean up .tmp-* packs on error' ' - test_must_fail git \ + test_must_fail ok=sigpipe git \ -c repack.cruftwindow=bogus \ repack -ad --cruft && find $objdir/pack -name '.tmp-*' >tmpfiles &&