From 286183da99e60258934790f6706b8db67b10dcab Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 24 Mar 2025 00:51:50 +0000 Subject: [PATCH 1/2] maintenance: force progress/no-quiet to children The --no-quiet option for 'git maintenance run' is supposed to indicate that progress should happen even while ignoring the value of isatty(2). However, Git implicitly asks child processes to check isatty(2) since these arguments are not passed through. The pass through of --no-quiet will be useful in a test in the next change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/builtin/gc.c b/builtin/gc.c index 99431fd467..6672f165bd 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1029,6 +1029,8 @@ static int run_write_commit_graph(struct maintenance_run_opts *opts) if (opts->quiet) strvec_push(&child.args, "--no-progress"); + else + strvec_push(&child.args, "--progress"); return !!run_command(&child); } @@ -1185,6 +1187,8 @@ static int pack_loose(struct maintenance_run_opts *opts) strvec_push(&pack_proc.args, "pack-objects"); if (opts->quiet) strvec_push(&pack_proc.args, "--quiet"); + else + strvec_push(&pack_proc.args, "--no-quiet"); strvec_pushf(&pack_proc.args, "%s/pack/loose", r->objects->odb->path); pack_proc.in = -1; @@ -1263,6 +1267,8 @@ static int multi_pack_index_write(struct maintenance_run_opts *opts) if (opts->quiet) strvec_push(&child.args, "--no-progress"); + else + strvec_push(&child.args, "--progress"); if (run_command(&child)) return error(_("failed to write multi-pack-index")); @@ -1279,6 +1285,8 @@ static int multi_pack_index_expire(struct maintenance_run_opts *opts) if (opts->quiet) strvec_push(&child.args, "--no-progress"); + else + strvec_push(&child.args, "--progress"); if (run_command(&child)) return error(_("'git multi-pack-index expire' failed")); @@ -1335,6 +1343,8 @@ static int multi_pack_index_repack(struct maintenance_run_opts *opts) if (opts->quiet) strvec_push(&child.args, "--no-progress"); + else + strvec_push(&child.args, "--progress"); strvec_pushf(&child.args, "--batch-size=%"PRIuMAX, (uintmax_t)get_auto_pack_size()); From 6540560fd6c91091f6cf1eaedd034bc1827e1506 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 24 Mar 2025 00:51:51 +0000 Subject: [PATCH 2/2] maintenance: add loose-objects.batchSize config The 'loose-objects' task of 'git maintenance run' first deletes loose objects that exit within packfiles and then collects loose objects into a packfile. This second step uses an implicit limit of fifty thousand that cannot be modified by users. Add a new config option that allows this limit to be adjusted or ignored entirely. While creating tests for this option, I noticed that actually there was an off-by-one error due to the strict comparison in the limit check. I considered making the limit check turn true on equality, but instead I thought to use INT_MAX as a "no limit" barrier which should mean it's never possible to hit the limit. Thus, a new decrement to the limit is provided if the value is positive. (The restriction to positive values is to avoid underflow if INT_MIN is configured.) Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config/maintenance.adoc | 5 +++++ Documentation/git-maintenance.adoc | 18 ++++++++++------- builtin/gc.c | 10 ++++++++++ t/t7900-maintenance.sh | 28 +++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 7 deletions(-) diff --git a/Documentation/config/maintenance.adoc b/Documentation/config/maintenance.adoc index 72a9d6cf81..42f9545da0 100644 --- a/Documentation/config/maintenance.adoc +++ b/Documentation/config/maintenance.adoc @@ -61,6 +61,11 @@ maintenance.loose-objects.auto:: loose objects is at least the value of `maintenance.loose-objects.auto`. The default value is 100. +maintenance.loose-objects.batchSize:: + This integer config option controls the maximum number of loose objects + written into a packfile during the `loose-objects` task. The default is + fifty thousand. Use value `0` to indicate no limit. + maintenance.incremental-repack.auto:: This integer config option controls how often the `incremental-repack` task should be run as part of `git maintenance run --auto`. If zero, diff --git a/Documentation/git-maintenance.adoc b/Documentation/git-maintenance.adoc index 0450d74aff..c90b370b1f 100644 --- a/Documentation/git-maintenance.adoc +++ b/Documentation/git-maintenance.adoc @@ -126,13 +126,17 @@ loose-objects:: objects that already exist in a pack-file; concurrent Git processes will examine the pack-file for the object data instead of the loose object. Second, it creates a new pack-file (starting with "loose-") - containing a batch of loose objects. The batch size is limited to 50 - thousand objects to prevent the job from taking too long on a - repository with many loose objects. The `gc` task writes unreachable - objects as loose objects to be cleaned up by a later step only if - they are not re-added to a pack-file; for this reason it is not - advisable to enable both the `loose-objects` and `gc` tasks at the - same time. + containing a batch of loose objects. ++ +The batch size defaults to fifty thousand objects to prevent the job from +taking too long on a repository with many loose objects. Use the +`maintenance.loose-objects.batchSize` config option to adjust this size, +including a value of `0` to remove the limit. ++ +The `gc` task writes unreachable objects as loose objects to be cleaned up +by a later step only if they are not re-added to a pack-file; for this +reason it is not advisable to enable both the `loose-objects` and `gc` +tasks at the same time. incremental-repack:: The `incremental-repack` job repacks the object directory diff --git a/builtin/gc.c b/builtin/gc.c index 6672f165bd..817081e1a5 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1163,6 +1163,7 @@ static int write_loose_object_to_stdin(const struct object_id *oid, fprintf(d->in, "%s\n", oid_to_hex(oid)); + /* If batch_size is INT_MAX, then this will return 0 always. */ return ++(d->count) > d->batch_size; } @@ -1208,6 +1209,15 @@ static int pack_loose(struct maintenance_run_opts *opts) data.count = 0; data.batch_size = 50000; + repo_config_get_int(r, "maintenance.loose-objects.batchSize", + &data.batch_size); + + /* If configured as 0, then remove limit. */ + if (!data.batch_size) + data.batch_size = INT_MAX; + else if (data.batch_size > 0) + data.batch_size--; /* Decrease for equality on limit. */ + for_each_loose_file_in_objdir(r->objects->odb->path, write_loose_object_to_stdin, NULL, diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 1909aed95e..834ddb5ad6 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -306,6 +306,34 @@ test_expect_success 'maintenance.loose-objects.auto' ' test_subcommand git prune-packed --quiet err && + grep "Enumerating objects: 50, done." err && + + GIT_PROGRESS_DELAY=0 \ + git -C loose-batch maintenance run --no-quiet --task=loose-objects 2>err && + grep "Enumerating objects: 50, done." err && + + GIT_PROGRESS_DELAY=0 \ + git -C loose-batch maintenance run --no-quiet --task=loose-objects 2>err && + grep "Enumerating objects: 2, done." err && + + GIT_PROGRESS_DELAY=0 \ + git -C loose-batch maintenance run --no-quiet --task=loose-objects 2>err && + test_must_be_empty err +' + test_expect_success 'incremental-repack task' ' packDir=.git/objects/pack && for i in $(test_seq 1 5)