From 0ad3d656521aa16a6496aa855bbde97160a2b2bc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 11:32:23 +0100 Subject: [PATCH 1/4] object-file: fix race in object collision check One of the tests in t5616 asserts that git-fetch(1) with `--refetch` triggers repository maintenance with the correct set of arguments. This test is flaky and causes us to fail sometimes: ++ git -c protocol.version=0 -c gc.autoPackLimit=0 -c maintenance.incremental-repack.auto=1234 -C pc1 fetch --refetch origin error: unable to open .git/objects/pack/pack-029d08823bd8a8eab510ad6ac75c823cfd3ed31e.pack: No such file or directory fatal: unable to rename temporary file to '.git/objects/pack/pack-029d08823bd8a8eab510ad6ac75c823cfd3ed31e.pack' fatal: could not finish pack-objects to repack local links fatal: index-pack failed error: last command exited with $?=128 The error message is quite confusing as it talks about trying to rename a temporary packfile. A first hunch would thus be that this packfile gets written by git-fetch(1), but removed by git-maintenance(1) while it hasn't yet been finalized, which shouldn't ever happen. And indeed, when looking closer one notices that the file that is supposedly of temporary nature does not have the typical `tmp_pack_` prefix. As it turns out, the "unable to rename temporary file" fatal error is a red herring and the real error is "unable to open". That error is raised by `check_collision()`, which is called by `finalize_object_file()` when moving the new packfile into place. Because t5616 re-fetches objects, we end up with the exact same pack as we already have in the repository. So when the concurrent git-maintenance(1) process rewrites the preexisting pack and unlinks it exactly at the point in time where git-fetch(1) wants to check the old and new packfiles for equality we will see ENOENT and thus `check_collision()` returns an error, which gets bubbled up by `finalize_object_file()` and is then handled by `rename_tmp_packfile()`. That function does not know about the exact root cause of the error and instead just claims that the rename has failed. This race is thus caused by b1b8dfde69 (finalize_object_file(): implement collision check, 2024-09-26), where we have newly introduced the collision check. By definition, two files cannot collide with each other when one of them has been removed. We can thus trivially fix the issue by ignoring ENOENT when opening either of the files we're about to check for collision. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/object-file.c b/object-file.c index b1a3463852..0293b93bbc 100644 --- a/object-file.c +++ b/object-file.c @@ -1982,13 +1982,15 @@ static int check_collision(const char *filename_a, const char *filename_b) fd_a = open(filename_a, O_RDONLY); if (fd_a < 0) { - ret = error_errno(_("unable to open %s"), filename_a); + if (errno != ENOENT) + ret = error_errno(_("unable to open %s"), filename_a); goto out; } fd_b = open(filename_b, O_RDONLY); if (fd_b < 0) { - ret = error_errno(_("unable to open %s"), filename_b); + if (errno != ENOENT) + ret = error_errno(_("unable to open %s"), filename_b); goto out; } From c1acf1a31761d0cfddc3ea6d39c92a6528cd9c5c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 6 Jan 2025 10:24:25 +0100 Subject: [PATCH 2/4] object-file: rename variables in `check_collision()` Rename variables used in `check_collision()` to clearly identify which file is the source and which is the destination. This will make the next step easier to reason about when we start to treat those files different from one another. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/object-file.c b/object-file.c index 0293b93bbc..e2fa1be303 100644 --- a/object-file.c +++ b/object-file.c @@ -1974,56 +1974,56 @@ static void write_object_file_prepare_literally(const struct git_hash_algo *algo hash_object_body(algo, &c, buf, len, oid, hdr, hdrlen); } -static int check_collision(const char *filename_a, const char *filename_b) +static int check_collision(const char *source, const char *dest) { - char buf_a[4096], buf_b[4096]; - int fd_a = -1, fd_b = -1; + char buf_source[4096], buf_dest[4096]; + int fd_source = -1, fd_dest = -1; int ret = 0; - fd_a = open(filename_a, O_RDONLY); - if (fd_a < 0) { + fd_source = open(source, O_RDONLY); + if (fd_source < 0) { if (errno != ENOENT) - ret = error_errno(_("unable to open %s"), filename_a); + ret = error_errno(_("unable to open %s"), source); goto out; } - fd_b = open(filename_b, O_RDONLY); - if (fd_b < 0) { + fd_dest = open(dest, O_RDONLY); + if (fd_dest < 0) { if (errno != ENOENT) - ret = error_errno(_("unable to open %s"), filename_b); + ret = error_errno(_("unable to open %s"), dest); goto out; } while (1) { ssize_t sz_a, sz_b; - sz_a = read_in_full(fd_a, buf_a, sizeof(buf_a)); + sz_a = read_in_full(fd_source, buf_source, sizeof(buf_source)); if (sz_a < 0) { - ret = error_errno(_("unable to read %s"), filename_a); + ret = error_errno(_("unable to read %s"), source); goto out; } - sz_b = read_in_full(fd_b, buf_b, sizeof(buf_b)); + sz_b = read_in_full(fd_dest, buf_dest, sizeof(buf_dest)); if (sz_b < 0) { - ret = error_errno(_("unable to read %s"), filename_b); + ret = error_errno(_("unable to read %s"), dest); goto out; } - if (sz_a != sz_b || memcmp(buf_a, buf_b, sz_a)) { + if (sz_a != sz_b || memcmp(buf_source, buf_dest, sz_a)) { ret = error(_("files '%s' and '%s' differ in contents"), - filename_a, filename_b); + source, dest); goto out; } - if (sz_a < sizeof(buf_a)) + if (sz_a < sizeof(buf_source)) break; } out: - if (fd_a > -1) - close(fd_a); - if (fd_b > -1) - close(fd_b); + if (fd_source > -1) + close(fd_source); + if (fd_dest > -1) + close(fd_dest); return ret; } From cfae50e40eb72d6116ad56c616b3322474df4a75 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 6 Jan 2025 10:24:26 +0100 Subject: [PATCH 3/4] object-file: don't special-case missing source file in collision check In 0ad3d65652 (object-file: fix race in object collision check, 2024-12-30) we have started to ignore ENOENT when opening either the source or destination file of the collision check. This was done to handle races more gracefully in case either of the potentially-colliding disappears. The fix is overly broad though: while the destination file may indeed vanish racily, this shouldn't ever happen for the source file, which is a temporary object file (either loose or in packfile format) that we have just created. So if any concurrent process would have removed that temporary file it would indicate an actual issue. Stop treating ENOENT specially for the source file so that we always bubble up this error. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/object-file.c b/object-file.c index e2fa1be303..c1bd746d9e 100644 --- a/object-file.c +++ b/object-file.c @@ -1982,8 +1982,7 @@ static int check_collision(const char *source, const char *dest) fd_source = open(source, O_RDONLY); if (fd_source < 0) { - if (errno != ENOENT) - ret = error_errno(_("unable to open %s"), source); + ret = error_errno(_("unable to open %s"), source); goto out; } From d7fcbe2c56468ac780c689b02c6a9e056ce39c12 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 6 Jan 2025 10:24:27 +0100 Subject: [PATCH 4/4] object-file: retry linking file into place when occluding file vanishes Prior to 0ad3d65652 (object-file: fix race in object collision check, 2024-12-30), callers could expect that a successful return from `finalize_object_file()` means that either the file was moved into place, or the identical bytes were already present. If neither of those happens, we'd return an error. Since that commit, if the destination file disappears between our link(3p) call and the collision check, we'd return success without actually checking the contents, and without retrying the link. This solves the common case that the files were indeed the same, but it means that we may corrupt the repository if they weren't (this implies a hash collision, but the whole point of this function is protecting against hash collisions). We can't be pessimistic and assume they're different; that hurts the common case that the mentioned commit was trying to fix. But after seeing that the destination file went away, we can retry linking again. Adapt the code to do so when we see that the destination file has racily vanished. This should generally succeed as we have just observed that the destination file does not exist anymore, except in the very unlikely event that it gets recreated by another concurrent process again. Helped-by: Jeff King Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/object-file.c b/object-file.c index c1bd746d9e..008ddf59a5 100644 --- a/object-file.c +++ b/object-file.c @@ -1974,6 +1974,8 @@ static void write_object_file_prepare_literally(const struct git_hash_algo *algo hash_object_body(algo, &c, buf, len, oid, hdr, hdrlen); } +#define CHECK_COLLISION_DEST_VANISHED -2 + static int check_collision(const char *source, const char *dest) { char buf_source[4096], buf_dest[4096]; @@ -1990,6 +1992,8 @@ static int check_collision(const char *source, const char *dest) if (fd_dest < 0) { if (errno != ENOENT) ret = error_errno(_("unable to open %s"), dest); + else + ret = CHECK_COLLISION_DEST_VANISHED; goto out; } @@ -2037,8 +2041,11 @@ int finalize_object_file(const char *tmpfile, const char *filename) int finalize_object_file_flags(const char *tmpfile, const char *filename, enum finalize_object_file_flags flags) { - struct stat st; - int ret = 0; + unsigned retries = 0; + int ret; + +retry: + ret = 0; if (object_creation_mode == OBJECT_CREATION_USES_RENAMES) goto try_rename; @@ -2059,6 +2066,8 @@ int finalize_object_file_flags(const char *tmpfile, const char *filename, * left to unlink. */ if (ret && ret != EEXIST) { + struct stat st; + try_rename: if (!stat(filename, &st)) ret = EEXIST; @@ -2074,9 +2083,17 @@ int finalize_object_file_flags(const char *tmpfile, const char *filename, errno = saved_errno; return error_errno(_("unable to write file %s"), filename); } - if (!(flags & FOF_SKIP_COLLISION_CHECK) && - check_collision(tmpfile, filename)) + if (!(flags & FOF_SKIP_COLLISION_CHECK)) { + ret = check_collision(tmpfile, filename); + if (ret == CHECK_COLLISION_DEST_VANISHED) { + if (retries++ > 5) + return error(_("unable to write repeatedly vanishing file %s"), + filename); + goto retry; + } + else if (ret) return -1; + } unlink_or_warn(tmpfile); }