From 9d505b7b49c00c5fa99a25506e63a2ef326a0062 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Tue, 11 Jan 2022 13:39:41 -0800 Subject: [PATCH 001/150] git-rev-list: add --exclude-first-parent-only flag It is useful to know when a branch first diverged in history from some integration branch in order to be able to enumerate the user's local changes. However, these local changes can include arbitrary merges, so it is necessary to ignore this merge structure when finding the divergence point. In order to do this, teach the "rev-list" family to accept "--exclude-first-parent-only", which restricts the traversal of excluded commits to only follow first parent links. -A-----E-F-G--main \ / / B-C-D--topic In this example, the goal is to return the set {B, C, D} which represents a topic branch that has been merged into main branch. `git rev-list topic ^main` will end up returning no commits since excluding main will end up traversing the commits on topic as well. `git rev-list --exclude-first-parent-only topic ^main` however will return {B, C, D} as desired. Add docs for the new flag, and clarify the doc for --first-parent to indicate that it applies to traversing the set of included commits only. Signed-off-by: Jerry Zhang Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 22 +++++++++++++++------- blame.c | 2 +- revision.c | 30 ++++++++++++++++++++---------- revision.h | 3 ++- shallow.c | 2 +- t/t6012-rev-list-simplify.sh | 18 ++++++++++++------ 6 files changed, 51 insertions(+), 26 deletions(-) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 43a86fa562..fd4f4e26c9 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -122,19 +122,27 @@ again. Equivalent forms are `--min-parents=0` (any commit has 0 or more parents) and `--max-parents=-1` (negative numbers denote no upper limit). --first-parent:: - Follow only the first parent commit upon seeing a merge - commit. This option can give a better overview when - viewing the evolution of a particular topic branch, - because merges into a topic branch tend to be only about - adjusting to updated upstream from time to time, and - this option allows you to ignore the individual commits - brought in to your history by such a merge. + When finding commits to include, follow only the first + parent commit upon seeing a merge commit. This option + can give a better overview when viewing the evolution of + a particular topic branch, because merges into a topic + branch tend to be only about adjusting to updated upstream + from time to time, and this option allows you to ignore + the individual commits brought in to your history by such + a merge. ifdef::git-log[] + This option also changes default diff format for merge commits to `first-parent`, see `--diff-merges=first-parent` for details. endif::git-log[] +--exclude-first-parent-only:: + When finding commits to exclude (with a '{caret}'), follow only + the first parent commit upon seeing a merge commit. + This can be used to find the set of changes in a topic branch + from the point where it diverged from the remote branch, given + that arbitrary merges can be valid topic branch changes. + --not:: Reverses the meaning of the '{caret}' prefix (or lack thereof) for all following revision specifiers, up to the next `--not`. diff --git a/blame.c b/blame.c index 206c295660..083d99fdbc 100644 --- a/blame.c +++ b/blame.c @@ -2615,7 +2615,7 @@ void assign_blame(struct blame_scoreboard *sb, int opt) else { commit->object.flags |= UNINTERESTING; if (commit->object.parsed) - mark_parents_uninteresting(commit); + mark_parents_uninteresting(sb->revs, commit); } /* treat root commit as boundary */ if (!commit->parents && !sb->show_root) diff --git a/revision.c b/revision.c index ad4286fbdd..d8d326d6b0 100644 --- a/revision.c +++ b/revision.c @@ -273,7 +273,7 @@ static void commit_stack_clear(struct commit_stack *stack) stack->nr = stack->alloc = 0; } -static void mark_one_parent_uninteresting(struct commit *commit, +static void mark_one_parent_uninteresting(struct rev_info *revs, struct commit *commit, struct commit_stack *pending) { struct commit_list *l; @@ -290,20 +290,26 @@ static void mark_one_parent_uninteresting(struct commit *commit, * wasn't uninteresting), in which case we need * to mark its parents recursively too.. */ - for (l = commit->parents; l; l = l->next) + for (l = commit->parents; l; l = l->next) { commit_stack_push(pending, l->item); + if (revs && revs->exclude_first_parent_only) + break; + } } -void mark_parents_uninteresting(struct commit *commit) +void mark_parents_uninteresting(struct rev_info *revs, struct commit *commit) { struct commit_stack pending = COMMIT_STACK_INIT; struct commit_list *l; - for (l = commit->parents; l; l = l->next) - mark_one_parent_uninteresting(l->item, &pending); + for (l = commit->parents; l; l = l->next) { + mark_one_parent_uninteresting(revs, l->item, &pending); + if (revs && revs->exclude_first_parent_only) + break; + } while (pending.nr > 0) - mark_one_parent_uninteresting(commit_stack_pop(&pending), + mark_one_parent_uninteresting(revs, commit_stack_pop(&pending), &pending); commit_stack_clear(&pending); @@ -441,7 +447,7 @@ static struct commit *handle_commit(struct rev_info *revs, if (repo_parse_commit(revs->repo, commit) < 0) die("unable to parse commit %s", name); if (flags & UNINTERESTING) { - mark_parents_uninteresting(commit); + mark_parents_uninteresting(revs, commit); if (!revs->topo_order || !generation_numbers_enabled(the_repository)) revs->limited = 1; @@ -1124,7 +1130,7 @@ static int process_parents(struct rev_info *revs, struct commit *commit, if (repo_parse_commit_gently(revs->repo, p, 1) < 0) continue; if (p->parents) - mark_parents_uninteresting(p); + mark_parents_uninteresting(revs, p); if (p->object.flags & SEEN) continue; p->object.flags |= (SEEN | NOT_USER_GIVEN); @@ -1132,6 +1138,8 @@ static int process_parents(struct rev_info *revs, struct commit *commit, commit_list_insert_by_date(p, list); if (queue) prio_queue_put(queue, p); + if (revs->exclude_first_parent_only) + break; } return 0; } @@ -1422,7 +1430,7 @@ static int limit_list(struct rev_info *revs) if (process_parents(revs, commit, &original_list, NULL) < 0) return -1; if (obj->flags & UNINTERESTING) { - mark_parents_uninteresting(commit); + mark_parents_uninteresting(revs, commit); slop = still_interesting(original_list, date, slop, &interesting_cache); if (slop) continue; @@ -2223,6 +2231,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg return argcount; } else if (!strcmp(arg, "--first-parent")) { revs->first_parent_only = 1; + } else if (!strcmp(arg, "--exclude-first-parent-only")) { + revs->exclude_first_parent_only = 1; } else if (!strcmp(arg, "--ancestry-path")) { revs->ancestry_path = 1; revs->simplify_history = 0; @@ -3345,7 +3355,7 @@ static void explore_walk_step(struct rev_info *revs) return; if (c->object.flags & UNINTERESTING) - mark_parents_uninteresting(c); + mark_parents_uninteresting(revs, c); for (p = c->parents; p; p = p->next) test_flag_and_insert(&info->explore_queue, p->item, TOPO_WALK_EXPLORED); diff --git a/revision.h b/revision.h index 3f66147bfd..374a4ff468 100644 --- a/revision.h +++ b/revision.h @@ -158,6 +158,7 @@ struct rev_info { bisect:1, ancestry_path:1, first_parent_only:1, + exclude_first_parent_only:1, line_level_traverse:1, tree_blobs_in_commit_order:1, @@ -398,7 +399,7 @@ const char *get_revision_mark(const struct rev_info *revs, void put_revision_mark(const struct rev_info *revs, const struct commit *commit); -void mark_parents_uninteresting(struct commit *commit); +void mark_parents_uninteresting(struct rev_info *revs, struct commit *commit); void mark_tree_uninteresting(struct repository *r, struct tree *tree); void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees); diff --git a/shallow.c b/shallow.c index 9ed18eb884..71e5876f37 100644 --- a/shallow.c +++ b/shallow.c @@ -603,7 +603,7 @@ static int mark_uninteresting(const char *refname, const struct object_id *oid, if (!commit) return 0; commit->object.flags |= UNINTERESTING; - mark_parents_uninteresting(commit); + mark_parents_uninteresting(NULL, commit); return 0; } diff --git a/t/t6012-rev-list-simplify.sh b/t/t6012-rev-list-simplify.sh index 4f7fa8b6c0..e2851fd75d 100755 --- a/t/t6012-rev-list-simplify.sh +++ b/t/t6012-rev-list-simplify.sh @@ -16,13 +16,12 @@ unnote () { } # -# Create a test repo with interesting commit graph: +# Create a test repo with an interesting commit graph: # -# A--B----------G--H--I--K--L -# \ \ / / -# \ \ / / -# C------E---F J -# \_/ +# A-----B-----G--H--I--K--L +# \ \ / / +# \ \ / / +# C--D--E--F J # # The commits are laid out from left-to-right starting with # the root commit A and terminating at the tip commit L. @@ -142,6 +141,13 @@ check_result 'I B A' --author-date-order -- file check_result 'H' --first-parent -- another-file check_result 'H' --first-parent --topo-order -- another-file +check_result 'L K I H G B A' --first-parent L +check_result 'F E D C' --exclude-first-parent-only F ^L +check_result '' F ^L +check_result 'L K I H G J' L ^F +check_result 'L K I H G B J' --exclude-first-parent-only L ^F +check_result 'L K I H G B' --exclude-first-parent-only --first-parent L ^F + check_result 'E C B A' --full-history E -- lost test_expect_success 'full history simplification without parent' ' printf "%s\n" E C B A >expect && From bb01b26dec69f5f287f0d36cbe4c765fe7f7b053 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:04:58 +0000 Subject: [PATCH 002/150] reset: fix validation in sparse index test Update t1092 test 'reset with pathspecs outside sparse definition' to verify index contents. The use of `rev-parse` verifies the contents of HEAD, not the index, providing no real validation of the reset results. Conversely, `ls-files` reports the contents of the index (OIDs, flags, filenames), which are then compared across checkouts to ensure compatible index states. Fixes 741a2c9ffa (reset: expand test coverage for sparse checkouts, 2021-09-27). Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- t/t1092-sparse-checkout-compatibility.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 49f70a6569..d5167e7ed6 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -596,13 +596,11 @@ test_expect_success 'reset with pathspecs outside sparse definition' ' test_sparse_match git reset update-folder1 -- folder1 && git -C full-checkout reset update-folder1 -- folder1 && - test_sparse_match git status --porcelain=v2 && - test_all_match git rev-parse HEAD:folder1 && + test_all_match git ls-files -s -- folder1 && test_sparse_match git reset update-folder2 -- folder2/a && git -C full-checkout reset update-folder2 -- folder2/a && - test_sparse_match git status --porcelain=v2 && - test_all_match git rev-parse HEAD:folder2/a + test_all_match git ls-files -s -- folder2/a ' test_expect_success 'reset with wildcard pathspec' ' From 1624333ec1486378c44ce38e4f8ae9d02c07d15a Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:04:59 +0000 Subject: [PATCH 003/150] reset: reorder wildcard pathspec conditions Rearrange conditions in method determining whether index expansion is necessary when a pathspec is specified for `git reset`, placing less expensive condition first. Additionally, add details & examples to related code comments to help with readability. Helped-by: Elijah Newren Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/reset.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/builtin/reset.c b/builtin/reset.c index b1ff699b43..79b40385b9 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -204,10 +204,16 @@ static int pathspec_needs_expanded_index(const struct pathspec *pathspec) /* * Special case: if the pattern is a path inside the cone * followed by only wildcards, the pattern cannot match - * partial sparse directories, so we don't expand the index. + * partial sparse directories, so we know we don't need to + * expand the index. + * + * Examples: + * - in-cone/foo***: doesn't need expanded index + * - not-in-cone/bar*: may need expanded index + * - **.c: may need expanded index */ - if (path_in_cone_mode_sparse_checkout(item.original, &the_index) && - strspn(item.original + item.nowildcard_len, "*") == item.len - item.nowildcard_len) + if (strspn(item.original + item.nowildcard_len, "*") == item.len - item.nowildcard_len && + path_in_cone_mode_sparse_checkout(item.original, &the_index)) continue; for (pos = 0; pos < active_nr; pos++) { From 1e9e10e04891a13e5ccd52b36cfadc55dfaa5066 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:05:00 +0000 Subject: [PATCH 004/150] clean: integrate with sparse index Remove full index requirement for `git clean` and test to ensure the index is not expanded in `git clean`. Add to existing test for `git clean` to verify cleanup of untracked files in sparse directories is consistent between sparse index and non-sparse index checkouts. Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/clean.c | 3 +++ t/t1092-sparse-checkout-compatibility.sh | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/builtin/clean.c b/builtin/clean.c index 98a2860409..5628fc7103 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -983,6 +983,9 @@ int cmd_clean(int argc, const char **argv, const char *prefix) dir.flags |= DIR_KEEP_UNTRACKED_CONTENTS; } + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + if (read_cache() < 0) die(_("index file corrupt")); diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index d5167e7ed6..0558736145 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -764,23 +764,42 @@ test_expect_success 'clean' ' test_all_match git commit -m "ignore bogus files" && run_on_sparse mkdir folder1 && + run_on_all mkdir -p deep/untracked-deep && run_on_all touch folder1/bogus && + run_on_all touch folder1/untracked && + run_on_all touch deep/untracked-deep/bogus && + run_on_all touch deep/untracked-deep/untracked && test_all_match git status --porcelain=v2 && test_all_match git clean -f && test_all_match git status --porcelain=v2 && test_sparse_match ls && test_sparse_match ls folder1 && + run_on_all test_path_exists folder1/bogus && + run_on_all test_path_is_missing folder1/untracked && + run_on_all test_path_exists deep/untracked-deep/bogus && + run_on_all test_path_exists deep/untracked-deep/untracked && + + test_all_match git clean -fd && + test_all_match git status --porcelain=v2 && + test_sparse_match ls && + test_sparse_match ls folder1 && + run_on_all test_path_exists folder1/bogus && + run_on_all test_path_exists deep/untracked-deep/bogus && + run_on_all test_path_is_missing deep/untracked-deep/untracked && test_all_match git clean -xf && test_all_match git status --porcelain=v2 && test_sparse_match ls && test_sparse_match ls folder1 && + run_on_all test_path_is_missing folder1/bogus && + run_on_all test_path_exists deep/untracked-deep/bogus && test_all_match git clean -xdf && test_all_match git status --porcelain=v2 && test_sparse_match ls && test_sparse_match ls folder1 && + run_on_all test_path_is_missing deep/untracked-deep/bogus && test_sparse_match test_path_is_dir folder1 ' @@ -920,6 +939,8 @@ test_expect_success 'sparse-index is not expanded' ' # Wildcard identifies only full sparse directories, no index expansion ensure_not_expanded reset deepest -- folder\* && + ensure_not_expanded clean -fd && + ensure_not_expanded checkout -f update-deep && test_config -C sparse-index pull.twohead ort && ( From b553ef674965f41bfff4e0a2c330f9087b3cd6b7 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:05:01 +0000 Subject: [PATCH 005/150] checkout-index: expand sparse checkout compatibility tests Add tests to cover `checkout-index`, with a focus on cases interesting in a sparse checkout (e.g., files specified outside sparse checkout definition). New tests are intended to serve as a baseline for existing and/or expected behavior and performance when integrating `checkout-index` with the sparse index. Note that the test 'checkout-index --all' is marked as 'test_expect_failure', indicating that `update-index --all` will be modified in a subsequent patch to behave as the test expects. Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- t/perf/p2000-sparse-operations.sh | 1 + t/t1092-sparse-checkout-compatibility.sh | 54 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index cb777c74a2..54f8602f3c 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -117,5 +117,6 @@ test_perf_on_all git diff test_perf_on_all git diff --cached test_perf_on_all git blame $SPARSE_CONE/a test_perf_on_all git blame $SPARSE_CONE/f3/a +test_perf_on_all git checkout-index -f --all test_done diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 0558736145..db7ad41109 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -755,6 +755,60 @@ test_expect_success 'cherry-pick with conflicts' ' test_all_match test_must_fail git cherry-pick to-cherry-pick ' +test_expect_success 'checkout-index inside sparse definition' ' + init_repos && + + run_on_all rm -f deep/a && + test_all_match git checkout-index -- deep/a && + test_all_match git status --porcelain=v2 && + + echo test >>new-a && + run_on_all cp ../new-a a && + test_all_match test_must_fail git checkout-index -- a && + test_all_match git checkout-index -f -- a && + test_all_match git status --porcelain=v2 +' + +test_expect_success 'checkout-index outside sparse definition' ' + init_repos && + + # File does not exist on disk yet for sparse checkouts, so checkout-index + # succeeds without -f + test_sparse_match git checkout-index -- folder1/a && + test_cmp sparse-checkout/folder1/a sparse-index/folder1/a && + test_cmp sparse-checkout/folder1/a full-checkout/folder1/a && + + run_on_sparse rm -rf folder1 && + echo test >new-a && + run_on_sparse mkdir -p folder1 && + run_on_all cp ../new-a folder1/a && + + test_all_match test_must_fail git checkout-index -- folder1/a && + test_all_match git checkout-index -f -- folder1/a && + test_cmp sparse-checkout/folder1/a sparse-index/folder1/a && + test_cmp sparse-checkout/folder1/a full-checkout/folder1/a +' + +test_expect_success 'checkout-index with folders' ' + init_repos && + + # Inside checkout definition + test_all_match test_must_fail git checkout-index -f -- deep/ && + + # Outside checkout definition + test_all_match test_must_fail git checkout-index -f -- folder1/ +' + +# NEEDSWORK: even in sparse checkouts, checkout-index --all will create all +# files (even those outside the sparse definition) on disk. However, these files +# don't appear in the percentage of tracked files in git status. +test_expect_failure 'checkout-index --all' ' + init_repos && + + test_all_match git checkout-index --all && + test_sparse_match test_path_is_missing folder1 +' + test_expect_success 'clean' ' init_repos && From 88078f543b769dc13ae9796372651178584a25a0 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:05:02 +0000 Subject: [PATCH 006/150] checkout-index: add --ignore-skip-worktree-bits option Update `checkout-index` to no longer refresh files that have the `skip-worktree` bit set, exiting with an error if `skip-worktree` filenames are directly provided to `checkout-index`. The newly-added `--ignore-skip-worktree-bits` option provides a mechanism to replicate the old behavior, checking out *all* files specified (even those with `skip-worktree` enabled). The ability to toggle whether files should be checked-out based on `skip-worktree` already exists in `git checkout` and `git restore` (both of which have an `--ignore-skip-worktree-bits` option). The change to, by default, ignore `skip-worktree` files is especially helpful for sparse-checkout; it prevents inadvertent creation of files outside the sparse definition on disk and eliminates the need to expand a sparse index when using the `--all` option. Internal usage of `checkout-index` in `git stash` and `git filter-branch` do not make explicit use of files with `skip-worktree` enabled, so `--ignore-skip-worktree-bits` is not added to them. Helped-by: Elijah Newren Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- Documentation/git-checkout-index.txt | 10 +++++++-- builtin/checkout-index.c | 13 ++++++++++++ t/t1092-sparse-checkout-compatibility.sh | 27 +++++++++++++++--------- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/Documentation/git-checkout-index.txt b/Documentation/git-checkout-index.txt index 4d33e7be0f..01dbd5cbf5 100644 --- a/Documentation/git-checkout-index.txt +++ b/Documentation/git-checkout-index.txt @@ -12,6 +12,7 @@ SYNOPSIS 'git checkout-index' [-u] [-q] [-a] [-f] [-n] [--prefix=] [--stage=|all] [--temp] + [--ignore-skip-worktree-bits] [-z] [--stdin] [--] [...] @@ -37,8 +38,9 @@ OPTIONS -a:: --all:: - checks out all files in the index. Cannot be used - together with explicit filenames. + checks out all files in the index except for those with the + skip-worktree bit set (see `--ignore-skip-worktree-bits`). + Cannot be used together with explicit filenames. -n:: --no-create:: @@ -59,6 +61,10 @@ OPTIONS write the content to temporary files. The temporary name associations will be written to stdout. +--ignore-skip-worktree-bits:: + Check out all files, including those with the skip-worktree bit + set. + --stdin:: Instead of taking list of paths from the command line, read list of paths from the standard input. Paths are diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index e21620d964..615a118e2f 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -7,6 +7,7 @@ #define USE_THE_INDEX_COMPATIBILITY_MACROS #include "builtin.h" #include "config.h" +#include "dir.h" #include "lockfile.h" #include "quote.h" #include "cache-tree.h" @@ -17,6 +18,7 @@ #define CHECKOUT_ALL 4 static int nul_term_line; static int checkout_stage; /* default to checkout stage0 */ +static int ignore_skip_worktree; /* default to 0 */ static int to_tempfile; static char topath[4][TEMPORARY_FILENAME_LENGTH + 1]; @@ -65,6 +67,7 @@ static int checkout_file(const char *name, const char *prefix) int namelen = strlen(name); int pos = cache_name_pos(name, namelen); int has_same_name = 0; + int is_skipped = 1; int did_checkout = 0; int errs = 0; @@ -78,6 +81,9 @@ static int checkout_file(const char *name, const char *prefix) break; has_same_name = 1; pos++; + if (!ignore_skip_worktree && ce_skip_worktree(ce)) + break; + is_skipped = 0; if (ce_stage(ce) != checkout_stage && (CHECKOUT_ALL != checkout_stage || !ce_stage(ce))) continue; @@ -106,6 +112,9 @@ static int checkout_file(const char *name, const char *prefix) fprintf(stderr, "git checkout-index: %s ", name); if (!has_same_name) fprintf(stderr, "is not in the cache"); + else if (is_skipped) + fprintf(stderr, "has skip-worktree enabled; " + "use '--ignore-skip-worktree-bits' to checkout"); else if (checkout_stage) fprintf(stderr, "does not exist at stage %d", checkout_stage); @@ -125,6 +134,8 @@ static int checkout_all(const char *prefix, int prefix_length) ensure_full_index(&the_index); for (i = 0; i < active_nr ; i++) { struct cache_entry *ce = active_cache[i]; + if (!ignore_skip_worktree && ce_skip_worktree(ce)) + continue; if (ce_stage(ce) != checkout_stage && (CHECKOUT_ALL != checkout_stage || !ce_stage(ce))) continue; @@ -185,6 +196,8 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix) struct option builtin_checkout_index_options[] = { OPT_BOOL('a', "all", &all, N_("check out all files in the index")), + OPT_BOOL(0, "ignore-skip-worktree-bits", &ignore_skip_worktree, + N_("do not skip files with skip-worktree set")), OPT__FORCE(&force, N_("force overwrite of existing files"), 0), OPT__QUIET(&quiet, N_("no warning for existing files and files not in index")), diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index db7ad41109..434ef0433c 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -772,9 +772,14 @@ test_expect_success 'checkout-index inside sparse definition' ' test_expect_success 'checkout-index outside sparse definition' ' init_repos && - # File does not exist on disk yet for sparse checkouts, so checkout-index - # succeeds without -f - test_sparse_match git checkout-index -- folder1/a && + # Without --ignore-skip-worktree-bits, outside-of-cone files will trigger + # an error + test_sparse_match test_must_fail git checkout-index -- folder1/a && + test_i18ngrep "folder1/a has skip-worktree enabled" sparse-checkout-err && + test_path_is_missing folder1/a && + + # With --ignore-skip-worktree-bits, outside-of-cone files are checked out + test_sparse_match git checkout-index --ignore-skip-worktree-bits -- folder1/a && test_cmp sparse-checkout/folder1/a sparse-index/folder1/a && test_cmp sparse-checkout/folder1/a full-checkout/folder1/a && @@ -783,8 +788,8 @@ test_expect_success 'checkout-index outside sparse definition' ' run_on_sparse mkdir -p folder1 && run_on_all cp ../new-a folder1/a && - test_all_match test_must_fail git checkout-index -- folder1/a && - test_all_match git checkout-index -f -- folder1/a && + test_all_match test_must_fail git checkout-index --ignore-skip-worktree-bits -- folder1/a && + test_all_match git checkout-index -f --ignore-skip-worktree-bits -- folder1/a && test_cmp sparse-checkout/folder1/a sparse-index/folder1/a && test_cmp sparse-checkout/folder1/a full-checkout/folder1/a ' @@ -799,14 +804,16 @@ test_expect_success 'checkout-index with folders' ' test_all_match test_must_fail git checkout-index -f -- folder1/ ' -# NEEDSWORK: even in sparse checkouts, checkout-index --all will create all -# files (even those outside the sparse definition) on disk. However, these files -# don't appear in the percentage of tracked files in git status. -test_expect_failure 'checkout-index --all' ' +test_expect_success 'checkout-index --all' ' init_repos && test_all_match git checkout-index --all && - test_sparse_match test_path_is_missing folder1 + test_sparse_match test_path_is_missing folder1 && + + # --ignore-skip-worktree-bits will cause `skip-worktree` files to be + # checked out, causing the outside-of-cone `folder1` to exist on-disk + test_all_match git checkout-index --ignore-skip-worktree-bits --all && + test_all_match test_path_exists folder1 ' test_expect_success 'clean' ' From 35682ada44554e136677649ac3da8c92342cdae2 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:05:03 +0000 Subject: [PATCH 007/150] checkout-index: integrate with sparse index Add repository settings to allow usage of the sparse index. When using the `--all` option, sparse directories are ignored by default due to the `skip-worktree` flag, so there is no need to expand the index. If `--ignore-skip-worktree-bits` is specified, the index is expanded in order to check out all files. When checking out individual files, existing behavior in a full index is to exit with an error if a directory is specified (as the directory name will not match an index entry). However, it is possible in a sparse index to match a directory name to a sparse directory index entry, but checking out that sparse directory still results in an error on checkout. To reduce some potential confusion for users, `checkout_file(...)` explicitly exits with an informative error if provided with a sparse directory name. The test corresponding to this scenario verifies the error message, which now differs between sparse index and non-sparse index checkouts. Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/checkout-index.c | 28 ++++++++++++++++++++++-- t/t1092-sparse-checkout-compatibility.sh | 11 +++++++++- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index 615a118e2f..97e06e8c52 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -67,6 +67,7 @@ static int checkout_file(const char *name, const char *prefix) int namelen = strlen(name); int pos = cache_name_pos(name, namelen); int has_same_name = 0; + int is_file = 0; int is_skipped = 1; int did_checkout = 0; int errs = 0; @@ -81,6 +82,9 @@ static int checkout_file(const char *name, const char *prefix) break; has_same_name = 1; pos++; + if (S_ISSPARSEDIR(ce->ce_mode)) + break; + is_file = 1; if (!ignore_skip_worktree && ce_skip_worktree(ce)) break; is_skipped = 0; @@ -112,6 +116,8 @@ static int checkout_file(const char *name, const char *prefix) fprintf(stderr, "git checkout-index: %s ", name); if (!has_same_name) fprintf(stderr, "is not in the cache"); + else if (!is_file) + fprintf(stderr, "is a sparse directory"); else if (is_skipped) fprintf(stderr, "has skip-worktree enabled; " "use '--ignore-skip-worktree-bits' to checkout"); @@ -130,10 +136,25 @@ static int checkout_all(const char *prefix, int prefix_length) int i, errs = 0; struct cache_entry *last_ce = NULL; - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); for (i = 0; i < active_nr ; i++) { struct cache_entry *ce = active_cache[i]; + + if (S_ISSPARSEDIR(ce->ce_mode)) { + if (!ce_skip_worktree(ce)) + BUG("sparse directory '%s' does not have skip-worktree set", ce->name); + + /* + * If the current entry is a sparse directory and skip-worktree + * entries are being checked out, expand the index and continue + * the loop on the current index position (now pointing to the + * first entry inside the expanded sparse directory). + */ + if (ignore_skip_worktree) { + ensure_full_index(&the_index); + ce = active_cache[i]; + } + } + if (!ignore_skip_worktree && ce_skip_worktree(ce)) continue; if (ce_stage(ce) != checkout_stage @@ -225,6 +246,9 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix) git_config(git_default_config, NULL); prefix_length = prefix ? strlen(prefix) : 0; + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + if (read_cache() < 0) { die("invalid cache"); } diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 434ef0433c..0c72c854d8 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -801,7 +801,14 @@ test_expect_success 'checkout-index with folders' ' test_all_match test_must_fail git checkout-index -f -- deep/ && # Outside checkout definition - test_all_match test_must_fail git checkout-index -f -- folder1/ + # Note: although all tests fail (as expected), the messaging differs. For + # non-sparse index checkouts, the error is that the "file" does not appear + # in the index; for sparse checkouts, the error is explicitly that the + # entry is a sparse directory. + run_on_all test_must_fail git checkout-index -f -- folder1/ && + test_cmp full-checkout-err sparse-checkout-err && + ! test_cmp full-checkout-err sparse-index-err && + grep "is a sparse directory" sparse-index-err ' test_expect_success 'checkout-index --all' ' @@ -972,6 +979,8 @@ test_expect_success 'sparse-index is not expanded' ' echo >>sparse-index/untracked.txt && ensure_not_expanded add . && + ensure_not_expanded checkout-index -f a && + ensure_not_expanded checkout-index -f --all && for ref in update-deep update-folder1 update-folder2 update-deep do echo >>sparse-index/README.md && From e015d4d9614f728cc454900fde52af2076d5ef63 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:05:04 +0000 Subject: [PATCH 008/150] update-index: add tests for sparse-checkout compatibility Introduce tests for a variety of `git update-index` use cases, including performance scenarios. Tests are intended to exercise `update-index` with options that change the commands interaction with the index (e.g., `--again`) and with files/directories inside and outside a sparse checkout cone. Of note is that these tests clearly establish the behavior of `git update-index --add` with untracked, outside-of-cone files. Unlike `git add`, which fails with an error when provided with such files, `update-index` succeeds in adding them to the index. Additionally, the `skip-worktree` flag is *not* automatically added to the new entry. Although this is pre-existing behavior, there are a couple of reasons to avoid changing it in favor of consistency with e.g. `git add`: * `update-index` is low-level command for modifying the index; while it can perform operations similar to those of `add`, it traditionally has fewer "guardrails" preventing a user from doing something they may not want to do (in this case, adding an outside-of-cone, non-`skip-worktree` file to the index) * `update-index` typically only exits with an error code if it is incapable of performing an operation (e.g., if an internal function call fails); adding a new file outside the sparse checkout definition is still a valid operation, albeit an inadvisable one * `update-index` does not implicitly set flags (e.g., `skip-worktree`) when creating new index entries with `--add`; if flags need to be updated, options like `--[no-]skip-worktree` allow a user to intentionally set them All this to say that, while there are valid reasons to consider changing the treatment of outside-of-cone files in `update-index`, there are also sufficient reasons for leaving it as-is. Co-authored-by: Derrick Stolee Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- t/perf/p2000-sparse-operations.sh | 1 + t/t1092-sparse-checkout-compatibility.sh | 167 +++++++++++++++++++++++ 2 files changed, 168 insertions(+) diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index 54f8602f3c..2a7106b949 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -118,5 +118,6 @@ test_perf_on_all git diff --cached test_perf_on_all git blame $SPARSE_CONE/a test_perf_on_all git blame $SPARSE_CONE/f3/a test_perf_on_all git checkout-index -f --all +test_perf_on_all git update-index --add --remove $SPARSE_CONE/a test_done diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 0c72c854d8..91f849f541 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -630,6 +630,173 @@ test_expect_success 'reset with wildcard pathspec' ' test_all_match git ls-files -s -- folder1 ' +test_expect_success 'update-index modify outside sparse definition' ' + init_repos && + + write_script edit-contents <<-\EOF && + echo text >>$1 + EOF + + # Create & modify folder1/a + # Note that this setup is a manual way of reaching the erroneous + # condition in which a `skip-worktree` enabled, outside-of-cone file + # exists on disk. It is used here to ensure `update-index` is stable + # and behaves predictably if such a condition occurs. + run_on_sparse mkdir -p folder1 && + run_on_sparse cp ../initial-repo/folder1/a folder1/a && + run_on_all ../edit-contents folder1/a && + + # If file has skip-worktree enabled, update-index does not modify the + # index entry + test_sparse_match git update-index folder1/a && + test_sparse_match git status --porcelain=v2 && + test_must_be_empty sparse-checkout-out && + + # When skip-worktree is disabled (even on files outside sparse cone), file + # is updated in the index + test_sparse_match git update-index --no-skip-worktree folder1/a && + test_all_match git status --porcelain=v2 && + test_all_match git update-index folder1/a && + test_all_match git status --porcelain=v2 +' + +test_expect_success 'update-index --add outside sparse definition' ' + init_repos && + + write_script edit-contents <<-\EOF && + echo text >>$1 + EOF + + # Create folder1, add new file + run_on_sparse mkdir -p folder1 && + run_on_all ../edit-contents folder1/b && + + # The *untracked* out-of-cone file is added to the index because it does + # not have a `skip-worktree` bit to signal that it should be ignored + # (unlike in `git add`, which will fail due to the file being outside + # the sparse checkout definition). + test_all_match git update-index --add folder1/b && + test_all_match git status --porcelain=v2 +' + +# NEEDSWORK: `--remove`, unlike the rest of `update-index`, does not ignore +# `skip-worktree` entries by default and will remove them from the index. +# The `--ignore-skip-worktree-entries` flag must be used in conjunction with +# `--remove` to ignore the `skip-worktree` entries and prevent their removal +# from the index. +test_expect_success 'update-index --remove outside sparse definition' ' + init_repos && + + # When --ignore-skip-worktree-entries is _not_ specified: + # out-of-cone, not-on-disk files are removed from the index + test_sparse_match git update-index --remove folder1/a && + cat >expect <<-EOF && + D folder1/a + EOF + test_sparse_match git diff --cached --name-status && + test_cmp expect sparse-checkout-out && + + # Reset the state + test_all_match git reset --hard && + + # When --ignore-skip-worktree-entries is specified, out-of-cone + # (skip-worktree) files are ignored + test_sparse_match git update-index --remove --ignore-skip-worktree-entries folder1/a && + test_sparse_match git diff --cached --name-status && + test_must_be_empty sparse-checkout-out && + + # Reset the state + test_all_match git reset --hard && + + # --force-remove supercedes --ignore-skip-worktree-entries, removing + # a skip-worktree file from the index (and disk) when both are specified + # with --remove + test_sparse_match git update-index --force-remove --ignore-skip-worktree-entries folder1/a && + cat >expect <<-EOF && + D folder1/a + EOF + test_sparse_match git diff --cached --name-status && + test_cmp expect sparse-checkout-out +' + +test_expect_success 'update-index with directories' ' + init_repos && + + # update-index will exit silently when provided with a directory name + # containing a trailing slash + test_all_match git update-index deep/ folder1/ && + grep "Ignoring path deep/" sparse-checkout-err && + grep "Ignoring path folder1/" sparse-checkout-err && + + # When update-index is given a directory name WITHOUT a trailing slash, it will + # behave in different ways depending on the status of the directory on disk: + # * if it exists, the command exits with an error ("add individual files instead") + # * if it does NOT exist (e.g., in a sparse-checkout), it is assumed to be a + # file and either triggers an error ("does not exist and --remove not passed") + # or is ignored completely (when using --remove) + test_all_match test_must_fail git update-index deep && + run_on_all test_must_fail git update-index folder1 && + test_must_fail git -C full-checkout update-index --remove folder1 && + test_sparse_match git update-index --remove folder1 && + test_all_match git status --porcelain=v2 +' + +test_expect_success 'update-index --again file outside sparse definition' ' + init_repos && + + test_all_match git checkout -b test-reupdate && + + # Update HEAD without modifying the index to introduce a difference in + # folder1/a + test_sparse_match git reset --soft update-folder1 && + + # Because folder1/a differs in the index vs HEAD, + # `git update-index --no-skip-worktree --again` will effectively perform + # `git update-index --no-skip-worktree folder1/a` and remove the skip-worktree + # flag from folder1/a + test_sparse_match git update-index --no-skip-worktree --again && + test_sparse_match git status --porcelain=v2 && + + cat >expect <<-EOF && + D folder1/a + EOF + test_sparse_match git diff --name-status && + test_cmp expect sparse-checkout-out +' + +test_expect_success 'update-index --cacheinfo' ' + init_repos && + + deep_a_oid=$(git -C full-checkout rev-parse update-deep:deep/a) && + folder2_oid=$(git -C full-checkout rev-parse update-folder2:folder2) && + folder1_a_oid=$(git -C full-checkout rev-parse update-folder1:folder1/a) && + + test_all_match git update-index --cacheinfo 100644 $deep_a_oid deep/a && + test_all_match git status --porcelain=v2 && + + # Cannot add sparse directory, even in sparse index case + test_all_match test_must_fail git update-index --add --cacheinfo 040000 $folder2_oid folder2/ && + + # Sparse match only: the new outside-of-cone entry is added *without* skip-worktree, + # so `git status` reports it as "deleted" in the worktree + test_sparse_match git update-index --add --cacheinfo 100644 $folder1_a_oid folder1/a && + test_sparse_match git status --porcelain=v2 && + cat >expect <<-EOF && + MD folder1/a + EOF + test_sparse_match git status --short -- folder1/a && + test_cmp expect sparse-checkout-out && + + # To return folder1/a to "normal" for a sparse checkout (ignored & + # outside-of-cone), add the skip-worktree flag. + test_sparse_match git update-index --skip-worktree folder1/a && + cat >expect <<-EOF && + S folder1/a + EOF + test_sparse_match git ls-files -t -- folder1/a && + test_cmp expect sparse-checkout-out +' + test_expect_success 'merge, cherry-pick, and rebase' ' init_repos && From c35e9f5ecd00f0c003dc9120d3c68e95e2ba3bd7 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:05:05 +0000 Subject: [PATCH 009/150] update-index: integrate with sparse index Enable use of the sparse index with `update-index`. Most variations of `update-index` work without explicitly expanding the index or making any other updates in or outside of `update-index.c`. The one usage requiring additional changes is `--cacheinfo`; if a file inside a sparse directory was specified, the index would not be expanded until after the cache tree is invalidated, leading to a mismatch between the index and cache tree. This scenario is handled by rearranging `add_index_entry_with_check`, allowing `index_name_stage_pos` to expand the index *before* attempting to invalidate the relevant cache tree path, avoiding cache tree/index corruption. Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/update-index.c | 3 +++ read-cache.c | 10 +++++++--- t/t1092-sparse-checkout-compatibility.sh | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/builtin/update-index.c b/builtin/update-index.c index 187203e8bb..605cc693bb 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -1077,6 +1077,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) git_config(git_default_config, NULL); + prepare_repo_settings(r); + the_repository->settings.command_requires_full_index = 0; + /* we will diagnose later if it turns out that we need to update it */ newfd = hold_locked_index(&lock_file, 0); if (newfd < 0) diff --git a/read-cache.c b/read-cache.c index cbe73f14e5..b4600e954b 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1339,9 +1339,6 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK; int new_only = option & ADD_CACHE_NEW_ONLY; - if (!(option & ADD_CACHE_KEEP_CACHE_TREE)) - cache_tree_invalidate_path(istate, ce->name); - /* * If this entry's path sorts after the last entry in the index, * we can avoid searching for it. @@ -1352,6 +1349,13 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e else pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), EXPAND_SPARSE); + /* + * Cache tree path should be invalidated only after index_name_stage_pos, + * in case it expands a sparse index. + */ + if (!(option & ADD_CACHE_KEEP_CACHE_TREE)) + cache_tree_invalidate_path(istate, ce->name); + /* existing match? Just replace it. */ if (pos >= 0) { if (!new_only) diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 91f849f541..fceaba7101 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -1253,6 +1253,21 @@ test_expect_success 'sparse index is not expanded: diff' ' ensure_not_expanded diff --cached ' +test_expect_success 'sparse index is not expanded: update-index' ' + init_repos && + + deep_a_oid=$(git -C full-checkout rev-parse update-deep:deep/a) && + ensure_not_expanded update-index --cacheinfo 100644 $deep_a_oid deep/a && + + echo "test" >sparse-index/README.md && + echo "test2" >sparse-index/a && + rm -f sparse-index/deep/a && + + ensure_not_expanded update-index --add README.md && + ensure_not_expanded update-index a && + ensure_not_expanded update-index --remove deep/a +' + test_expect_success 'sparse index is not expanded: blame' ' init_repos && From b9ca5e26579ceb820103b49648c01187a4a0dddd Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 11 Jan 2022 18:05:06 +0000 Subject: [PATCH 010/150] update-index: reduce scope of index expansion in do_reupdate Replace unconditional index expansion in 'do_reupdate()' with one scoped to only where a full index is needed. A full index is only required in 'do_reupdate()' when a sparse directory in the index differs from HEAD; in that case, the index is expanded and the operation restarted. Because the index should only be expanded if a sparse directory is modified, add a test ensuring the index is not expanded when differences only exist within the sparse cone. Signed-off-by: Victoria Dye Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/update-index.c | 14 +++++++++++--- t/t1092-sparse-checkout-compatibility.sh | 5 ++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/builtin/update-index.c b/builtin/update-index.c index 605cc693bb..52ecc714d9 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -606,7 +606,7 @@ static struct cache_entry *read_one_ent(const char *which, error("%s: not in %s branch.", path, which); return NULL; } - if (mode == S_IFDIR) { + if (!the_index.sparse_index && mode == S_IFDIR) { if (which) error("%s: not a blob in %s branch.", path, which); return NULL; @@ -743,8 +743,6 @@ static int do_reupdate(int ac, const char **av, */ has_head = 0; redo: - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); for (pos = 0; pos < active_nr; pos++) { const struct cache_entry *ce = active_cache[pos]; struct cache_entry *old = NULL; @@ -761,6 +759,16 @@ static int do_reupdate(int ac, const char **av, discard_cache_entry(old); continue; /* unchanged */ } + + /* At this point, we know the contents of the sparse directory are + * modified with respect to HEAD, so we expand the index and restart + * to process each path individually + */ + if (S_ISSPARSEDIR(ce->ce_mode)) { + ensure_full_index(&the_index); + goto redo; + } + /* Be careful. The working tree may not have the * path anymore, in which case, under 'allow_remove', * or worse yet 'allow_replace', active_nr may decrease. diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index fceaba7101..53f84881de 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -1265,7 +1265,10 @@ test_expect_success 'sparse index is not expanded: update-index' ' ensure_not_expanded update-index --add README.md && ensure_not_expanded update-index a && - ensure_not_expanded update-index --remove deep/a + ensure_not_expanded update-index --remove deep/a && + + ensure_not_expanded reset --soft update-deep && + ensure_not_expanded update-index --add --remove --again ' test_expect_success 'sparse index is not expanded: blame' ' From 69840cc0f7b4f3352903bd2b8f3de7077916c26b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 17 Jan 2022 09:12:31 +0100 Subject: [PATCH 011/150] refs: extract packed_refs_delete_refs() to allow control of transaction When deleting loose refs, then we also have to delete the refs in the packed backend. This is done by calling `refs_delete_refs()`, which then uses the packed-backend's logic to delete refs. This doesn't allow us to exercise any control over the reference transaction which is being created in the packed backend, which is required in a subsequent commit. Extract a new function `packed_refs_delete_refs()`, which hosts most of the logic to delete refs except for creating the transaction itself. Like this, we can easily create the transaction in the files backend and thus exert more control over it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 13 ++++++++++--- refs/packed-backend.c | 26 ++++++++++++++++++++------ refs/packed-backend.h | 7 +++++++ 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 90b671025a..459f18dbc1 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1249,6 +1249,7 @@ static int files_delete_refs(struct ref_store *ref_store, const char *msg, { struct files_ref_store *refs = files_downcast(ref_store, REF_STORE_WRITE, "delete_refs"); + struct ref_transaction *transaction = NULL; struct strbuf err = STRBUF_INIT; int i, result = 0; @@ -1258,10 +1259,14 @@ static int files_delete_refs(struct ref_store *ref_store, const char *msg, if (packed_refs_lock(refs->packed_ref_store, 0, &err)) goto error; - if (refs_delete_refs(refs->packed_ref_store, msg, refnames, flags)) { - packed_refs_unlock(refs->packed_ref_store); + transaction = ref_store_transaction_begin(refs->packed_ref_store, &err); + if (!transaction) + goto error; + + result = packed_refs_delete_refs(refs->packed_ref_store, + transaction, msg, refnames, flags); + if (result) goto error; - } packed_refs_unlock(refs->packed_ref_store); @@ -1272,6 +1277,7 @@ static int files_delete_refs(struct ref_store *ref_store, const char *msg, result |= error(_("could not remove reference %s"), refname); } + ref_transaction_free(transaction); strbuf_release(&err); return result; @@ -1288,6 +1294,7 @@ error: else error(_("could not delete references: %s"), err.buf); + ref_transaction_free(transaction); strbuf_release(&err); return -1; } diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 67152c664e..c964dd1617 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1522,15 +1522,10 @@ static int packed_initial_transaction_commit(struct ref_store *ref_store, static int packed_delete_refs(struct ref_store *ref_store, const char *msg, struct string_list *refnames, unsigned int flags) { - struct packed_ref_store *refs = - packed_downcast(ref_store, REF_STORE_WRITE, "delete_refs"); struct strbuf err = STRBUF_INIT; struct ref_transaction *transaction; - struct string_list_item *item; int ret; - (void)refs; /* We need the check above, but don't use the variable */ - if (!refnames->nr) return 0; @@ -1544,6 +1539,26 @@ static int packed_delete_refs(struct ref_store *ref_store, const char *msg, if (!transaction) return -1; + ret = packed_refs_delete_refs(ref_store, transaction, + msg, refnames, flags); + + ref_transaction_free(transaction); + return ret; +} + +int packed_refs_delete_refs(struct ref_store *ref_store, + struct ref_transaction *transaction, + const char *msg, + struct string_list *refnames, + unsigned int flags) +{ + struct strbuf err = STRBUF_INIT; + struct string_list_item *item; + int ret; + + /* Assert that the ref store refers to a packed backend. */ + packed_downcast(ref_store, REF_STORE_WRITE, "delete_refs"); + for_each_string_list_item(item, refnames) { if (ref_transaction_delete(transaction, item->string, NULL, flags, msg, &err)) { @@ -1563,7 +1578,6 @@ static int packed_delete_refs(struct ref_store *ref_store, const char *msg, error(_("could not delete references: %s"), err.buf); } - ref_transaction_free(transaction); strbuf_release(&err); return ret; } diff --git a/refs/packed-backend.h b/refs/packed-backend.h index f61a73ec25..a2cca5d9a3 100644 --- a/refs/packed-backend.h +++ b/refs/packed-backend.h @@ -3,6 +3,7 @@ struct repository; struct ref_transaction; +struct string_list; /* * Support for storing references in a `packed-refs` file. @@ -27,6 +28,12 @@ int packed_refs_lock(struct ref_store *ref_store, int flags, struct strbuf *err) void packed_refs_unlock(struct ref_store *ref_store); int packed_refs_is_locked(struct ref_store *ref_store); +int packed_refs_delete_refs(struct ref_store *ref_store, + struct ref_transaction *transaction, + const char *msg, + struct string_list *refnames, + unsigned int flags); + /* * Return true if `transaction` really needs to be carried out against * the specified packed_ref_store, or false if it can be skipped From fbe73f61cbc29f6c4a85478cf792c37dbe5aa26c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 17 Jan 2022 09:12:35 +0100 Subject: [PATCH 012/150] refs: allow passing flags when beginning transactions We do not currently have any flags when creating reference transactions, but we'll add one to disable execution of the reference transaction hook in some cases. Allow passing flags to `ref_store_transaction_begin()` to prepare for this change. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 8 +++++--- refs.h | 3 ++- refs/files-backend.c | 10 +++++----- refs/packed-backend.c | 2 +- refs/refs-internal.h | 1 + sequencer.c | 2 +- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/refs.c b/refs.c index 4da4996c4d..7415864b62 100644 --- a/refs.c +++ b/refs.c @@ -800,7 +800,7 @@ int refs_delete_ref(struct ref_store *refs, const char *msg, struct ref_transaction *transaction; struct strbuf err = STRBUF_INIT; - transaction = ref_store_transaction_begin(refs, &err); + transaction = ref_store_transaction_begin(refs, 0, &err); if (!transaction || ref_transaction_delete(transaction, refname, old_oid, flags, msg, &err) || @@ -1005,6 +1005,7 @@ int read_ref_at(struct ref_store *refs, const char *refname, } struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs, + unsigned int flags, struct strbuf *err) { struct ref_transaction *tr; @@ -1012,12 +1013,13 @@ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs, CALLOC_ARRAY(tr, 1); tr->ref_store = refs; + tr->flags = flags; return tr; } struct ref_transaction *ref_transaction_begin(struct strbuf *err) { - return ref_store_transaction_begin(get_main_ref_store(the_repository), err); + return ref_store_transaction_begin(get_main_ref_store(the_repository), 0, err); } void ref_transaction_free(struct ref_transaction *transaction) @@ -1156,7 +1158,7 @@ int refs_update_ref(struct ref_store *refs, const char *msg, struct strbuf err = STRBUF_INIT; int ret = 0; - t = ref_store_transaction_begin(refs, &err); + t = ref_store_transaction_begin(refs, 0, &err); if (!t || ref_transaction_update(t, refname, new_oid, old_oid, flags, msg, &err) || diff --git a/refs.h b/refs.h index 92360e55a2..31f7bf9642 100644 --- a/refs.h +++ b/refs.h @@ -231,7 +231,7 @@ char *repo_default_branch_name(struct repository *r, int quiet); * struct strbuf err = STRBUF_INIT; * int ret = 0; * - * transaction = ref_store_transaction_begin(refs, &err); + * transaction = ref_store_transaction_begin(refs, 0, &err); * if (!transaction || * ref_transaction_update(...) || * ref_transaction_create(...) || @@ -573,6 +573,7 @@ enum action_on_err { * be freed by calling ref_transaction_free(). */ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs, + unsigned int flags, struct strbuf *err); struct ref_transaction *ref_transaction_begin(struct strbuf *err); diff --git a/refs/files-backend.c b/refs/files-backend.c index 459f18dbc1..4d4f0c2099 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1121,7 +1121,7 @@ static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r) if (check_refname_format(r->name, 0)) return; - transaction = ref_store_transaction_begin(&refs->base, &err); + transaction = ref_store_transaction_begin(&refs->base, 0, &err); if (!transaction) goto cleanup; ref_transaction_add_update( @@ -1192,7 +1192,7 @@ static int files_pack_refs(struct ref_store *ref_store, unsigned int flags) struct strbuf err = STRBUF_INIT; struct ref_transaction *transaction; - transaction = ref_store_transaction_begin(refs->packed_ref_store, &err); + transaction = ref_store_transaction_begin(refs->packed_ref_store, 0, &err); if (!transaction) return -1; @@ -1259,7 +1259,7 @@ static int files_delete_refs(struct ref_store *ref_store, const char *msg, if (packed_refs_lock(refs->packed_ref_store, 0, &err)) goto error; - transaction = ref_store_transaction_begin(refs->packed_ref_store, &err); + transaction = ref_store_transaction_begin(refs->packed_ref_store, 0, &err); if (!transaction) goto error; @@ -2774,7 +2774,7 @@ static int files_transaction_prepare(struct ref_store *ref_store, */ if (!packed_transaction) { packed_transaction = ref_store_transaction_begin( - refs->packed_ref_store, err); + refs->packed_ref_store, 0, err); if (!packed_transaction) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; @@ -3045,7 +3045,7 @@ static int files_initial_transaction_commit(struct ref_store *ref_store, &affected_refnames)) BUG("initial ref transaction called with existing refs"); - packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, err); + packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, 0, err); if (!packed_transaction) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; diff --git a/refs/packed-backend.c b/refs/packed-backend.c index c964dd1617..0b45598e18 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1535,7 +1535,7 @@ static int packed_delete_refs(struct ref_store *ref_store, const char *msg, * updates into a single transaction. */ - transaction = ref_store_transaction_begin(ref_store, &err); + transaction = ref_store_transaction_begin(ref_store, 0, &err); if (!transaction) return -1; diff --git a/refs/refs-internal.h b/refs/refs-internal.h index 46a839539e..a0af63f162 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -213,6 +213,7 @@ struct ref_transaction { size_t nr; enum ref_transaction_state state; void *backend_data; + unsigned int flags; }; /* diff --git a/sequencer.c b/sequencer.c index 6abd72160c..61edd39d7a 100644 --- a/sequencer.c +++ b/sequencer.c @@ -3588,7 +3588,7 @@ static int do_label(struct repository *r, const char *name, int len) strbuf_addf(&ref_name, "refs/rewritten/%.*s", len, name); strbuf_addf(&msg, "rebase (label) '%.*s'", len, name); - transaction = ref_store_transaction_begin(refs, &err); + transaction = ref_store_transaction_begin(refs, 0, &err); if (!transaction) { error("%s", err.buf); ret = -1; From 958fbc74e3d0fcc88b2065190e23db556a963644 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 17 Jan 2022 09:12:39 +0100 Subject: [PATCH 013/150] refs: allow skipping the reference-transaction hook The reference-transaction hook is executing whenever we prepare, commit or abort a reference transaction. While this is mostly intentional, in case of the files backend we're leaking the implementation detail that the store is in fact a composite store with one loose and one packed backend to the caller. So while we want to execute the hook for all logical updates, executing it for such implementation details is unexpected. Prepare for a fix by adding a new flag which allows to skip execution of the hook. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 3 +++ refs.h | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/refs.c b/refs.c index 7415864b62..526bf5ed97 100644 --- a/refs.c +++ b/refs.c @@ -2084,6 +2084,9 @@ static int run_transaction_hook(struct ref_transaction *transaction, const char *hook; int ret = 0, i; + if (transaction->flags & REF_TRANSACTION_SKIP_HOOK) + return 0; + hook = find_hook("reference-transaction"); if (!hook) return ret; diff --git a/refs.h b/refs.h index 31f7bf9642..d4056f9fe2 100644 --- a/refs.h +++ b/refs.h @@ -568,6 +568,11 @@ enum action_on_err { UPDATE_REFS_QUIET_ON_ERR }; +/* + * Skip executing the reference-transaction hook. + */ +#define REF_TRANSACTION_SKIP_HOOK (1 << 0) + /* * Begin a reference transaction. The reference transaction must * be freed by calling ref_transaction_free(). From 2ce825436268d6409bee8ebb5f5500b7ff172b1e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 17 Jan 2022 09:12:44 +0100 Subject: [PATCH 014/150] refs: demonstrate excessive execution of the reference-transaction hook Add tests which demonstate that we're executing the reference-transaction hook too often in some cases, which thus leaks implementation details about the reference store's implementation itself. Behaviour will be fixed in follow-up commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/t1416-ref-transaction-hooks.sh | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/t/t1416-ref-transaction-hooks.sh b/t/t1416-ref-transaction-hooks.sh index 6c941027a8..0567fbdf0b 100755 --- a/t/t1416-ref-transaction-hooks.sh +++ b/t/t1416-ref-transaction-hooks.sh @@ -136,4 +136,68 @@ test_expect_success 'interleaving hook calls succeed' ' test_cmp expect target-repo.git/actual ' +test_expect_success 'hook does not get called on packing refs' ' + # Pack references first such that we are in a known state. + git pack-refs --all && + + write_script .git/hooks/reference-transaction <<-\EOF && + echo "$@" >>actual + cat >>actual + EOF + rm -f actual && + + git update-ref refs/heads/unpacked-ref $POST_OID && + git pack-refs --all && + + # We only expect a single hook invocation, which is the call to + # git-update-ref(1). But currently, packing refs will also trigger the + # hook. + cat >expect <<-EOF && + prepared + $ZERO_OID $POST_OID refs/heads/unpacked-ref + committed + $ZERO_OID $POST_OID refs/heads/unpacked-ref + prepared + $ZERO_OID $POST_OID refs/heads/unpacked-ref + committed + $ZERO_OID $POST_OID refs/heads/unpacked-ref + prepared + $POST_OID $ZERO_OID refs/heads/unpacked-ref + committed + $POST_OID $ZERO_OID refs/heads/unpacked-ref + EOF + + test_cmp expect actual +' + +test_expect_success 'deleting packed ref calls hook once' ' + # Create a reference and pack it. + git update-ref refs/heads/to-be-deleted $POST_OID && + git pack-refs --all && + + write_script .git/hooks/reference-transaction <<-\EOF && + echo "$@" >>actual + cat >>actual + EOF + rm -f actual && + + git update-ref -d refs/heads/to-be-deleted $POST_OID && + + # We only expect a single hook invocation, which is the logical + # deletion. But currently, we see two interleaving transactions, once + # for deleting the loose refs and once for deleting the packed ref. + cat >expect <<-EOF && + prepared + $ZERO_OID $ZERO_OID refs/heads/to-be-deleted + prepared + $POST_OID $ZERO_OID refs/heads/to-be-deleted + committed + $ZERO_OID $ZERO_OID refs/heads/to-be-deleted + committed + $POST_OID $ZERO_OID refs/heads/to-be-deleted + EOF + + test_cmp expect actual +' + test_done From ffad9941383465553bf26d88050f3243726f30df Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 17 Jan 2022 09:12:48 +0100 Subject: [PATCH 015/150] refs: do not execute reference-transaction hook on packing refs The reference-transaction hook is supposed to track logical changes to references, but it currently also gets executed when packing refs in a repository. This is unexpected and ultimately not all that useful: packing refs is not supposed to result in any user-visible change to the refs' state, and it ultimately is an implementation detail of how refs stores work. Fix this excessive execution of the hook when packing refs. Reported-by: Waleed Khan Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 6 ++++-- t/t1416-ref-transaction-hooks.sh | 11 +---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 4d4f0c2099..565929210a 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1121,7 +1121,8 @@ static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r) if (check_refname_format(r->name, 0)) return; - transaction = ref_store_transaction_begin(&refs->base, 0, &err); + transaction = ref_store_transaction_begin(&refs->base, + REF_TRANSACTION_SKIP_HOOK, &err); if (!transaction) goto cleanup; ref_transaction_add_update( @@ -1192,7 +1193,8 @@ static int files_pack_refs(struct ref_store *ref_store, unsigned int flags) struct strbuf err = STRBUF_INIT; struct ref_transaction *transaction; - transaction = ref_store_transaction_begin(refs->packed_ref_store, 0, &err); + transaction = ref_store_transaction_begin(refs->packed_ref_store, + REF_TRANSACTION_SKIP_HOOK, &err); if (!transaction) return -1; diff --git a/t/t1416-ref-transaction-hooks.sh b/t/t1416-ref-transaction-hooks.sh index 0567fbdf0b..f9d3d5213f 100755 --- a/t/t1416-ref-transaction-hooks.sh +++ b/t/t1416-ref-transaction-hooks.sh @@ -150,21 +150,12 @@ test_expect_success 'hook does not get called on packing refs' ' git pack-refs --all && # We only expect a single hook invocation, which is the call to - # git-update-ref(1). But currently, packing refs will also trigger the - # hook. + # git-update-ref(1). cat >expect <<-EOF && prepared $ZERO_OID $POST_OID refs/heads/unpacked-ref committed $ZERO_OID $POST_OID refs/heads/unpacked-ref - prepared - $ZERO_OID $POST_OID refs/heads/unpacked-ref - committed - $ZERO_OID $POST_OID refs/heads/unpacked-ref - prepared - $POST_OID $ZERO_OID refs/heads/unpacked-ref - committed - $POST_OID $ZERO_OID refs/heads/unpacked-ref EOF test_cmp expect actual From 2ed1b64ebdeefc7f9473ae159fb45ff0c6cf121a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 17 Jan 2022 09:12:53 +0100 Subject: [PATCH 016/150] refs: skip hooks when deleting uncovered packed refs When deleting refs from the loose-files refs backend, then we need to be careful to also delete the same ref from the packed refs backend, if it exists. If we don't, then deleting the loose ref would "uncover" the packed ref. We thus always have to queue up deletions of refs for both the loose and the packed refs backend. This is done in two separate transactions, where the end result is that the reference-transaction hook is executed twice for the deleted refs. This behaviour is quite misleading: it's exposing implementation details of how the files backend works to the user, in contrast to the logical updates that we'd really want to expose via the hook. Worse yet, whether the hook gets executed once or twice depends on how well-packed the repository is: if the ref only exists as a loose ref, then we execute it once, otherwise if it is also packed then we execute it twice. Fix this behaviour and don't execute the reference-transaction hook at all when refs in the packed-refs backend if it's driven by the files backend. This works as expected even in case the refs to be deleted only exist in the packed-refs backend because the loose-backend always queues refs in its own transaction even if they don't exist such that they can be locked for concurrent creation. And it also does the right thing in case neither of the backends has the ref because that would cause the transaction to fail completely. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/files-backend.c | 9 ++++++--- t/t1416-ref-transaction-hooks.sh | 7 +------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index 565929210a..844918cbd8 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1261,7 +1261,8 @@ static int files_delete_refs(struct ref_store *ref_store, const char *msg, if (packed_refs_lock(refs->packed_ref_store, 0, &err)) goto error; - transaction = ref_store_transaction_begin(refs->packed_ref_store, 0, &err); + transaction = ref_store_transaction_begin(refs->packed_ref_store, + REF_TRANSACTION_SKIP_HOOK, &err); if (!transaction) goto error; @@ -2776,7 +2777,8 @@ static int files_transaction_prepare(struct ref_store *ref_store, */ if (!packed_transaction) { packed_transaction = ref_store_transaction_begin( - refs->packed_ref_store, 0, err); + refs->packed_ref_store, + REF_TRANSACTION_SKIP_HOOK, err); if (!packed_transaction) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; @@ -3047,7 +3049,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store, &affected_refnames)) BUG("initial ref transaction called with existing refs"); - packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, 0, err); + packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, + REF_TRANSACTION_SKIP_HOOK, err); if (!packed_transaction) { ret = TRANSACTION_GENERIC_ERROR; goto cleanup; diff --git a/t/t1416-ref-transaction-hooks.sh b/t/t1416-ref-transaction-hooks.sh index f9d3d5213f..4e1e84a91f 100755 --- a/t/t1416-ref-transaction-hooks.sh +++ b/t/t1416-ref-transaction-hooks.sh @@ -175,16 +175,11 @@ test_expect_success 'deleting packed ref calls hook once' ' git update-ref -d refs/heads/to-be-deleted $POST_OID && # We only expect a single hook invocation, which is the logical - # deletion. But currently, we see two interleaving transactions, once - # for deleting the loose refs and once for deleting the packed ref. + # deletion. cat >expect <<-EOF && - prepared - $ZERO_OID $ZERO_OID refs/heads/to-be-deleted prepared $POST_OID $ZERO_OID refs/heads/to-be-deleted committed - $ZERO_OID $ZERO_OID refs/heads/to-be-deleted - committed $POST_OID $ZERO_OID refs/heads/to-be-deleted EOF From ae42fa4c03ef0ffe36468f8918b3d404a277da50 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:36 +0000 Subject: [PATCH 017/150] rebase: factor out checkout for up to date branch This code is heavily indented and it will be convenient later in the series to have it in its own function. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/rebase.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index 34b4744e5f..f5c37b7d4a 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -812,6 +812,23 @@ static int rebase_config(const char *var, const char *value, void *data) return git_default_config(var, value, data); } +static int checkout_up_to_date(struct rebase_options *options) +{ + struct strbuf buf = STRBUF_INIT; + int ret = 0; + + strbuf_addf(&buf, "%s: checkout %s", + getenv(GIT_REFLOG_ACTION_ENVIRONMENT), + options->switch_to); + if (reset_head(the_repository, &options->orig_head, "checkout", + options->head_name, RESET_HEAD_RUN_POST_CHECKOUT_HOOK, + NULL, buf.buf, DEFAULT_REFLOG_ACTION) < 0) + ret = error(_("could not switch to %s"), options->switch_to); + strbuf_release(&buf); + + return ret; +} + /* * Determines whether the commits in from..to are linear, i.e. contain * no merge commits. This function *expects* `from` to be an ancestor of @@ -1673,21 +1690,9 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) if (!(options.flags & REBASE_FORCE)) { /* Lazily switch to the target branch if needed... */ if (options.switch_to) { - strbuf_reset(&buf); - strbuf_addf(&buf, "%s: checkout %s", - getenv(GIT_REFLOG_ACTION_ENVIRONMENT), - options.switch_to); - if (reset_head(the_repository, - &options.orig_head, "checkout", - options.head_name, - RESET_HEAD_RUN_POST_CHECKOUT_HOOK, - NULL, buf.buf, - DEFAULT_REFLOG_ACTION) < 0) { - ret = error(_("could not switch to " - "%s"), - options.switch_to); + ret = checkout_up_to_date(&options); + if (ret) goto cleanup; - } } if (!(options.flags & REBASE_NO_QUIET)) From bd55eee04b698af6c10c77b24f88601814771ac8 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:37 +0000 Subject: [PATCH 018/150] t5403: refactor rebase post-checkout hook tests These tests only test the default backend and do not check that the arguments passed to the hook are correct. Fix this by running the tests with both backends and adding checks for the hook arguments. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- t/t5403-post-checkout-hook.sh | 42 ++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/t/t5403-post-checkout-hook.sh b/t/t5403-post-checkout-hook.sh index 1ec9e23be7..272b02687b 100755 --- a/t/t5403-post-checkout-hook.sh +++ b/t/t5403-post-checkout-hook.sh @@ -49,23 +49,33 @@ test_expect_success 'post-checkout receives the right args when not switching br test $old = $new && test $flag = 0 ' -test_expect_success 'post-checkout is triggered on rebase' ' - test_when_finished "rm -f .git/post-checkout.args" && - git checkout -b rebase-test main && - rm -f .git/post-checkout.args && - git rebase rebase-on-me && - read old new flag <.git/post-checkout.args && - test $old != $new && test $flag = 1 -' +test_rebase () { + args="$*" && + test_expect_success "post-checkout is triggered on rebase $args" ' + test_when_finished "rm -f .git/post-checkout.args" && + git checkout -B rebase-test main && + rm -f .git/post-checkout.args && + git rebase $args rebase-on-me && + read old new flag <.git/post-checkout.args && + test_cmp_rev main $old && + test_cmp_rev rebase-on-me $new && + test $flag = 1 + ' -test_expect_success 'post-checkout is triggered on rebase with fast-forward' ' - test_when_finished "rm -f .git/post-checkout.args" && - git checkout -b ff-rebase-test rebase-on-me^ && - rm -f .git/post-checkout.args && - git rebase rebase-on-me && - read old new flag <.git/post-checkout.args && - test $old != $new && test $flag = 1 -' + test_expect_success "post-checkout is triggered on rebase $args with fast-forward" ' + test_when_finished "rm -f .git/post-checkout.args" && + git checkout -B ff-rebase-test rebase-on-me^ && + rm -f .git/post-checkout.args && + git rebase $args rebase-on-me && + read old new flag <.git/post-checkout.args && + test_cmp_rev rebase-on-me^ $old && + test_cmp_rev rebase-on-me $new && + test $flag = 1 + ' +} + +test_rebase --apply && +test_rebase --merge test_expect_success 'post-checkout hook is triggered by clone' ' mkdir -p templates/hooks && From 69f4c23009ee30faeb61a831f4265791c945783e Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:38 +0000 Subject: [PATCH 019/150] rebase: pass correct arguments to post-checkout hook If a rebase started with "rebase [--apply|--merge] " detects that is an ancestor of then it fast-forwards and checks out . Unfortunately in that case it passed the null oid as the first argument to the post-checkout hook rather than the oid of HEAD. A side effect of this change is that the call to update_ref() which updates HEAD now always receives the old value of HEAD. This provides protection against another process updating HEAD during the checkout. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- reset.c | 18 +++++++++--------- t/t5403-post-checkout-hook.sh | 13 +++++++++++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/reset.c b/reset.c index f214df3d96..315fef91d3 100644 --- a/reset.c +++ b/reset.c @@ -18,7 +18,7 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, unsigned run_hook = flags & RESET_HEAD_RUN_POST_CHECKOUT_HOOK; unsigned refs_only = flags & RESET_HEAD_REFS_ONLY; unsigned update_orig_head = flags & RESET_ORIG_HEAD; - struct object_id head_oid; + struct object_id *head = NULL, head_oid; struct tree_desc desc[2] = { { NULL }, { NULL } }; struct lock_file lock = LOCK_INIT; struct unpack_trees_options unpack_tree_opts = { 0 }; @@ -26,8 +26,7 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, const char *reflog_action; struct strbuf msg = STRBUF_INIT; size_t prefix_len; - struct object_id *orig = NULL, oid_orig, - *old_orig = NULL, oid_old_orig; + struct object_id *old_orig = NULL, oid_old_orig; int ret = 0, nr = 0; if (switch_to_branch && !starts_with(switch_to_branch, "refs/")) @@ -38,7 +37,9 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, goto leave_reset_head; } - if ((!oid || !reset_hard) && get_oid("HEAD", &head_oid)) { + if (!get_oid("HEAD", &head_oid)) { + head = &head_oid; + } else if (!oid || !reset_hard) { ret = error(_("could not determine HEAD revision")); goto leave_reset_head; } @@ -98,13 +99,12 @@ reset_head_refs: if (update_orig_head) { if (!get_oid("ORIG_HEAD", &oid_old_orig)) old_orig = &oid_old_orig; - if (!get_oid("HEAD", &oid_orig)) { - orig = &oid_orig; + if (head) { if (!reflog_orig_head) { strbuf_addstr(&msg, "updating ORIG_HEAD"); reflog_orig_head = msg.buf; } - update_ref(reflog_orig_head, "ORIG_HEAD", orig, + update_ref(reflog_orig_head, "ORIG_HEAD", head, old_orig, 0, UPDATE_REFS_MSG_ON_ERR); } else if (old_orig) delete_ref(NULL, "ORIG_HEAD", old_orig, 0); @@ -116,7 +116,7 @@ reset_head_refs: reflog_head = msg.buf; } if (!switch_to_branch) - ret = update_ref(reflog_head, "HEAD", oid, orig, + ret = update_ref(reflog_head, "HEAD", oid, head, detach_head ? REF_NO_DEREF : 0, UPDATE_REFS_MSG_ON_ERR); else { @@ -128,7 +128,7 @@ reset_head_refs: } if (run_hook) run_hook_le(NULL, "post-checkout", - oid_to_hex(orig ? orig : null_oid()), + oid_to_hex(head ? head : null_oid()), oid_to_hex(oid), "1", NULL); leave_reset_head: diff --git a/t/t5403-post-checkout-hook.sh b/t/t5403-post-checkout-hook.sh index 272b02687b..17ab518f26 100755 --- a/t/t5403-post-checkout-hook.sh +++ b/t/t5403-post-checkout-hook.sh @@ -72,6 +72,19 @@ test_rebase () { test_cmp_rev rebase-on-me $new && test $flag = 1 ' + + test_expect_success "rebase $args fast-forward branch checkout runs post-checkout hook" ' + test_when_finished "test_might_fail git rebase --abort" && + test_when_finished "rm -f .git/post-checkout.args" && + git update-ref refs/heads/rebase-fast-forward three && + git checkout two && + rm -f .git/post-checkout.args && + git rebase $args HEAD rebase-fast-forward && + read old new flag <.git/post-checkout.args && + test_cmp_rev two $old && + test_cmp_rev three $new && + test $flag = 1 + ' } test_rebase --apply && From ab2fba0868860e610619f885031217d9cc63097a Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:39 +0000 Subject: [PATCH 020/150] rebase: do not remove untracked files on checkout If "git rebase [--apply|--merge] " detects that is an ancestor of then it will fast-forward and checkout . Normally a checkout or picking a commit during a rebase will refuse to overwrite untracked files, however rebase does overwrite untracked files when checking out . The fix is to only set reset in `unpack_tree_opts` if flags contains `RESET_HEAD_HARD`. t5403 may seem like an odd home for the new test but it will be extended in the next commit to check that the post-checkout hook is not run when the checkout fails. The test for `!detach_head` dates back to the original implementation of reset_head() in ac7f467fef ("builtin/rebase: support running "git rebase "", 2018-08-07) and was correct until e65123a71d ("builtin rebase: support `git rebase `", 2018-09-04) started using reset_head() to checkout when fast-forwarding. Note that 480d3d6bf9 ("Change unpack_trees' 'reset' flag into an enum", 2021-09-27) also fixes this bug as it changes reset_head() to never remove untracked files. I think this fix is still worthwhile as it makes it clear that the same settings are used for detached and non-detached checkouts. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- reset.c | 2 +- t/t5403-post-checkout-hook.sh | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/reset.c b/reset.c index 315fef91d3..3e7b9e2e13 100644 --- a/reset.c +++ b/reset.c @@ -59,7 +59,7 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, unpack_tree_opts.merge = 1; unpack_tree_opts.preserve_ignored = 0; /* FIXME: !overwrite_ignore */ init_checkout_metadata(&unpack_tree_opts.meta, switch_to_branch, oid, NULL); - if (!detach_head) + if (reset_hard) unpack_tree_opts.reset = UNPACK_RESET_PROTECT_UNTRACKED; if (repo_read_index_unmerged(r) < 0) { diff --git a/t/t5403-post-checkout-hook.sh b/t/t5403-post-checkout-hook.sh index 17ab518f26..fd2817b406 100755 --- a/t/t5403-post-checkout-hook.sh +++ b/t/t5403-post-checkout-hook.sh @@ -85,6 +85,16 @@ test_rebase () { test_cmp_rev three $new && test $flag = 1 ' + + test_expect_success "rebase $args checkout does not remove untracked files" ' + test_when_finished "test_might_fail git rebase --abort" && + git update-ref refs/heads/rebase-fast-forward three && + git checkout two && + echo untracked >three.t && + test_when_finished "rm three.t" && + test_must_fail git rebase $args HEAD rebase-fast-forward 2>err && + grep "untracked working tree files would be overwritten by checkout" err +' } test_rebase --apply && From 4840002a5f44a4c256c55f70c59d3b0506d14e21 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:40 +0000 Subject: [PATCH 021/150] rebase --apply: don't run post-checkout hook if there is an error The hook should only be run if the worktree and refs were successfully updated. This primarily affects "rebase --apply" but also "rebase --merge" when it fast-forwards. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- reset.c | 2 +- t/t5403-post-checkout-hook.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/reset.c b/reset.c index 3e7b9e2e13..3537de91f6 100644 --- a/reset.c +++ b/reset.c @@ -126,7 +126,7 @@ reset_head_refs: ret = create_symref("HEAD", switch_to_branch, reflog_head); } - if (run_hook) + if (!ret && run_hook) run_hook_le(NULL, "post-checkout", oid_to_hex(head ? head : null_oid()), oid_to_hex(oid), "1", NULL); diff --git a/t/t5403-post-checkout-hook.sh b/t/t5403-post-checkout-hook.sh index fd2817b406..d118181690 100755 --- a/t/t5403-post-checkout-hook.sh +++ b/t/t5403-post-checkout-hook.sh @@ -88,12 +88,16 @@ test_rebase () { test_expect_success "rebase $args checkout does not remove untracked files" ' test_when_finished "test_might_fail git rebase --abort" && + test_when_finished "rm -f .git/post-checkout.args" && git update-ref refs/heads/rebase-fast-forward three && git checkout two && + rm -f .git/post-checkout.args && echo untracked >three.t && test_when_finished "rm three.t" && test_must_fail git rebase $args HEAD rebase-fast-forward 2>err && - grep "untracked working tree files would be overwritten by checkout" err + grep "untracked working tree files would be overwritten by checkout" err && + test_path_is_missing .git/post-checkout.args + ' } From 1946d45844c65ede4e3a514a5decf16612ad79f0 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:41 +0000 Subject: [PATCH 022/150] reset_head(): remove action parameter The only use of the action parameter is to setup the error messages for unpack_trees(). All but two cases pass either "checkout" or "reset". The case that passes "reset --hard" would be better passing "reset" so that the error messages match the builtin reset command like all the other callers that are doing a reset. The case that passes "Fast-forwarded" is only updating HEAD and so the parameter is unused in that case as it does not call unpack_trees(). The value to pass to setup_unpack_trees_porcelain() can be determined by checking whether flags contains RESET_HEAD_HARD without the caller having to specify it. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/rebase.c | 14 +++++++------- reset.c | 5 +++-- reset.h | 2 +- sequencer.c | 3 +-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index f5c37b7d4a..2e5a535b54 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -583,7 +583,7 @@ static int move_to_original_branch(struct rebase_options *opts) opts->head_name, oid_to_hex(&opts->onto->object.oid)); strbuf_addf(&head_reflog, "rebase finished: returning to %s", opts->head_name); - ret = reset_head(the_repository, NULL, "", opts->head_name, + ret = reset_head(the_repository, NULL, opts->head_name, RESET_HEAD_REFS_ONLY, orig_head_reflog.buf, head_reflog.buf, DEFAULT_REFLOG_ACTION); @@ -674,7 +674,7 @@ static int run_am(struct rebase_options *opts) free(rebased_patches); strvec_clear(&am.args); - reset_head(the_repository, &opts->orig_head, "checkout", + reset_head(the_repository, &opts->orig_head, opts->head_name, 0, "HEAD", NULL, DEFAULT_REFLOG_ACTION); error(_("\ngit encountered an error while preparing the " @@ -820,7 +820,7 @@ static int checkout_up_to_date(struct rebase_options *options) strbuf_addf(&buf, "%s: checkout %s", getenv(GIT_REFLOG_ACTION_ENVIRONMENT), options->switch_to); - if (reset_head(the_repository, &options->orig_head, "checkout", + if (reset_head(the_repository, &options->orig_head, options->head_name, RESET_HEAD_RUN_POST_CHECKOUT_HOOK, NULL, buf.buf, DEFAULT_REFLOG_ACTION) < 0) ret = error(_("could not switch to %s"), options->switch_to); @@ -1272,7 +1272,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) rerere_clear(the_repository, &merge_rr); string_list_clear(&merge_rr, 1); - if (reset_head(the_repository, NULL, "reset", NULL, RESET_HEAD_HARD, + if (reset_head(the_repository, NULL, NULL, RESET_HEAD_HARD, NULL, NULL, DEFAULT_REFLOG_ACTION) < 0) die(_("could not discard worktree changes")); remove_branch_state(the_repository, 0); @@ -1290,7 +1290,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) if (read_basic_state(&options)) exit(1); - if (reset_head(the_repository, &options.orig_head, "reset", + if (reset_head(the_repository, &options.orig_head, options.head_name, RESET_HEAD_HARD, NULL, NULL, DEFAULT_REFLOG_ACTION) < 0) die(_("could not move back to %s"), @@ -1759,7 +1759,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) strbuf_addf(&msg, "%s: checkout %s", getenv(GIT_REFLOG_ACTION_ENVIRONMENT), options.onto_name); - if (reset_head(the_repository, &options.onto->object.oid, "checkout", NULL, + if (reset_head(the_repository, &options.onto->object.oid, NULL, RESET_HEAD_DETACH | RESET_ORIG_HEAD | RESET_HEAD_RUN_POST_CHECKOUT_HOOK, NULL, msg.buf, DEFAULT_REFLOG_ACTION)) @@ -1777,7 +1777,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) strbuf_addf(&msg, "rebase finished: %s onto %s", options.head_name ? options.head_name : "detached HEAD", oid_to_hex(&options.onto->object.oid)); - reset_head(the_repository, NULL, "Fast-forwarded", options.head_name, + reset_head(the_repository, NULL, options.head_name, RESET_HEAD_REFS_ONLY, "HEAD", msg.buf, DEFAULT_REFLOG_ACTION); strbuf_release(&msg); diff --git a/reset.c b/reset.c index 3537de91f6..7841b2b2a0 100644 --- a/reset.c +++ b/reset.c @@ -8,7 +8,7 @@ #include "tree.h" #include "unpack-trees.h" -int reset_head(struct repository *r, struct object_id *oid, const char *action, +int reset_head(struct repository *r, struct object_id *oid, const char *switch_to_branch, unsigned flags, const char *reflog_orig_head, const char *reflog_head, const char *default_reflog_action) @@ -23,7 +23,7 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, struct lock_file lock = LOCK_INIT; struct unpack_trees_options unpack_tree_opts = { 0 }; struct tree *tree; - const char *reflog_action; + const char *action, *reflog_action; struct strbuf msg = STRBUF_INIT; size_t prefix_len; struct object_id *old_orig = NULL, oid_old_orig; @@ -50,6 +50,7 @@ int reset_head(struct repository *r, struct object_id *oid, const char *action, if (refs_only) goto reset_head_refs; + action = reset_hard ? "reset" : "checkout"; setup_unpack_trees_porcelain(&unpack_tree_opts, action); unpack_tree_opts.head_idx = 1; unpack_tree_opts.src_index = r->index; diff --git a/reset.h b/reset.h index 12f83c78e2..2daec80425 100644 --- a/reset.h +++ b/reset.h @@ -12,7 +12,7 @@ #define RESET_HEAD_REFS_ONLY (1<<3) #define RESET_ORIG_HEAD (1<<4) -int reset_head(struct repository *r, struct object_id *oid, const char *action, +int reset_head(struct repository *r, struct object_id *oid, const char *switch_to_branch, unsigned flags, const char *reflog_orig_head, const char *reflog_head, const char *default_reflog_action); diff --git a/sequencer.c b/sequencer.c index b4135a78c9..e18329a399 100644 --- a/sequencer.c +++ b/sequencer.c @@ -4137,8 +4137,7 @@ void create_autostash(struct repository *r, const char *path, path); write_file(path, "%s", oid_to_hex(&oid)); printf(_("Created autostash: %s\n"), buf.buf); - if (reset_head(r, NULL, "reset --hard", - NULL, RESET_HEAD_HARD, NULL, NULL, + if (reset_head(r, NULL, NULL, RESET_HEAD_HARD, NULL, NULL, default_reflog_action) < 0) die(_("could not reset --hard")); From d6a9f5ea8e97dd9435e5fa02cc129c1b241934f2 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:42 +0000 Subject: [PATCH 023/150] reset_head(): factor out ref updates In the next commit we will stop trying to update HEAD when we are removing uncommitted changes from the working tree. Move the code that updates the refs to its own function in preparation for that. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- reset.c | 110 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 62 insertions(+), 48 deletions(-) diff --git a/reset.c b/reset.c index 7841b2b2a0..56d6e2a06d 100644 --- a/reset.c +++ b/reset.c @@ -8,25 +8,75 @@ #include "tree.h" #include "unpack-trees.h" +static int update_refs(const struct object_id *oid, const char *switch_to_branch, + const struct object_id *head, const char *reflog_head, + const char *reflog_orig_head, + const char *default_reflog_action, unsigned flags) +{ + unsigned detach_head = flags & RESET_HEAD_DETACH; + unsigned run_hook = flags & RESET_HEAD_RUN_POST_CHECKOUT_HOOK; + unsigned update_orig_head = flags & RESET_ORIG_HEAD; + struct object_id *old_orig = NULL, oid_old_orig; + struct strbuf msg = STRBUF_INIT; + const char *reflog_action; + size_t prefix_len; + int ret; + + reflog_action = getenv(GIT_REFLOG_ACTION_ENVIRONMENT); + strbuf_addf(&msg, "%s: ", reflog_action ? reflog_action : default_reflog_action); + prefix_len = msg.len; + + if (update_orig_head) { + if (!get_oid("ORIG_HEAD", &oid_old_orig)) + old_orig = &oid_old_orig; + if (head) { + if (!reflog_orig_head) { + strbuf_addstr(&msg, "updating ORIG_HEAD"); + reflog_orig_head = msg.buf; + } + update_ref(reflog_orig_head, "ORIG_HEAD", head, + old_orig, 0, UPDATE_REFS_MSG_ON_ERR); + } else if (old_orig) + delete_ref(NULL, "ORIG_HEAD", old_orig, 0); + } + + if (!reflog_head) { + strbuf_setlen(&msg, prefix_len); + strbuf_addstr(&msg, "updating HEAD"); + reflog_head = msg.buf; + } + if (!switch_to_branch) + ret = update_ref(reflog_head, "HEAD", oid, head, + detach_head ? REF_NO_DEREF : 0, + UPDATE_REFS_MSG_ON_ERR); + else { + ret = update_ref(reflog_head, switch_to_branch, oid, + NULL, 0, UPDATE_REFS_MSG_ON_ERR); + if (!ret) + ret = create_symref("HEAD", switch_to_branch, + reflog_head); + } + if (!ret && run_hook) + run_hook_le(NULL, "post-checkout", + oid_to_hex(head ? head : null_oid()), + oid_to_hex(oid), "1", NULL); + strbuf_release(&msg); + return ret; +} + int reset_head(struct repository *r, struct object_id *oid, const char *switch_to_branch, unsigned flags, const char *reflog_orig_head, const char *reflog_head, const char *default_reflog_action) { - unsigned detach_head = flags & RESET_HEAD_DETACH; unsigned reset_hard = flags & RESET_HEAD_HARD; - unsigned run_hook = flags & RESET_HEAD_RUN_POST_CHECKOUT_HOOK; unsigned refs_only = flags & RESET_HEAD_REFS_ONLY; - unsigned update_orig_head = flags & RESET_ORIG_HEAD; struct object_id *head = NULL, head_oid; struct tree_desc desc[2] = { { NULL }, { NULL } }; struct lock_file lock = LOCK_INIT; struct unpack_trees_options unpack_tree_opts = { 0 }; struct tree *tree; - const char *action, *reflog_action; - struct strbuf msg = STRBUF_INIT; - size_t prefix_len; - struct object_id *old_orig = NULL, oid_old_orig; + const char *action; int ret = 0, nr = 0; if (switch_to_branch && !starts_with(switch_to_branch, "refs/")) @@ -48,7 +98,9 @@ int reset_head(struct repository *r, struct object_id *oid, oid = &head_oid; if (refs_only) - goto reset_head_refs; + return update_refs(oid, switch_to_branch, head, reflog_head, + reflog_orig_head, default_reflog_action, + flags); action = reset_hard ? "reset" : "checkout"; setup_unpack_trees_porcelain(&unpack_tree_opts, action); @@ -92,48 +144,10 @@ int reset_head(struct repository *r, struct object_id *oid, goto leave_reset_head; } -reset_head_refs: - reflog_action = getenv(GIT_REFLOG_ACTION_ENVIRONMENT); - strbuf_addf(&msg, "%s: ", reflog_action ? reflog_action : default_reflog_action); - prefix_len = msg.len; - - if (update_orig_head) { - if (!get_oid("ORIG_HEAD", &oid_old_orig)) - old_orig = &oid_old_orig; - if (head) { - if (!reflog_orig_head) { - strbuf_addstr(&msg, "updating ORIG_HEAD"); - reflog_orig_head = msg.buf; - } - update_ref(reflog_orig_head, "ORIG_HEAD", head, - old_orig, 0, UPDATE_REFS_MSG_ON_ERR); - } else if (old_orig) - delete_ref(NULL, "ORIG_HEAD", old_orig, 0); - } - - if (!reflog_head) { - strbuf_setlen(&msg, prefix_len); - strbuf_addstr(&msg, "updating HEAD"); - reflog_head = msg.buf; - } - if (!switch_to_branch) - ret = update_ref(reflog_head, "HEAD", oid, head, - detach_head ? REF_NO_DEREF : 0, - UPDATE_REFS_MSG_ON_ERR); - else { - ret = update_ref(reflog_head, switch_to_branch, oid, - NULL, 0, UPDATE_REFS_MSG_ON_ERR); - if (!ret) - ret = create_symref("HEAD", switch_to_branch, - reflog_head); - } - if (!ret && run_hook) - run_hook_le(NULL, "post-checkout", - oid_to_hex(head ? head : null_oid()), - oid_to_hex(oid), "1", NULL); + ret = update_refs(oid, switch_to_branch, head, reflog_head, + reflog_orig_head, default_reflog_action, flags); leave_reset_head: - strbuf_release(&msg); rollback_lock_file(&lock); clear_unpack_trees_porcelain(&unpack_tree_opts); while (nr) From 1526d0fcfd20efca24bc96a4bc14c8d5459ec470 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:43 +0000 Subject: [PATCH 024/150] reset_head(): make default_reflog_action optional This parameter is only needed when a ref is going to be updated and the caller does not pass an explicit reflog message. Callers that are only discarding uncommitted changes in the working tree such as such as "rebase --skip" or create_autostash() do not update any refs so should not have to worry about passing this parameter. This change is not intended to have any user visible changes. The pointer comparison between `oid` and `&head_oid` checks that the caller did not pass an oid to be checked out. As no callers pass RESET_HEAD_RUN_POST_CHECKOUT_HOOK without passing an oid there are no changes to when the post-checkout hook is run. As update_ref() only updates the ref if the oid passed to it differs from the current ref there are no changes to when HEAD is updated. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/rebase.c | 10 ++++------ reset.c | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index 2e5a535b54..82be965915 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -585,8 +585,7 @@ static int move_to_original_branch(struct rebase_options *opts) opts->head_name); ret = reset_head(the_repository, NULL, opts->head_name, RESET_HEAD_REFS_ONLY, - orig_head_reflog.buf, head_reflog.buf, - DEFAULT_REFLOG_ACTION); + orig_head_reflog.buf, head_reflog.buf, NULL); strbuf_release(&orig_head_reflog); strbuf_release(&head_reflog); @@ -822,7 +821,7 @@ static int checkout_up_to_date(struct rebase_options *options) options->switch_to); if (reset_head(the_repository, &options->orig_head, options->head_name, RESET_HEAD_RUN_POST_CHECKOUT_HOOK, - NULL, buf.buf, DEFAULT_REFLOG_ACTION) < 0) + NULL, buf.buf, NULL) < 0) ret = error(_("could not switch to %s"), options->switch_to); strbuf_release(&buf); @@ -1273,7 +1272,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) string_list_clear(&merge_rr, 1); if (reset_head(the_repository, NULL, NULL, RESET_HEAD_HARD, - NULL, NULL, DEFAULT_REFLOG_ACTION) < 0) + NULL, NULL, NULL) < 0) die(_("could not discard worktree changes")); remove_branch_state(the_repository, 0); if (read_basic_state(&options)) @@ -1778,8 +1777,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) options.head_name ? options.head_name : "detached HEAD", oid_to_hex(&options.onto->object.oid)); reset_head(the_repository, NULL, options.head_name, - RESET_HEAD_REFS_ONLY, "HEAD", msg.buf, - DEFAULT_REFLOG_ACTION); + RESET_HEAD_REFS_ONLY, "HEAD", msg.buf, NULL); strbuf_release(&msg); ret = finish_rebase(&options); goto cleanup; diff --git a/reset.c b/reset.c index 56d6e2a06d..4a92e4bc30 100644 --- a/reset.c +++ b/reset.c @@ -22,8 +22,13 @@ static int update_refs(const struct object_id *oid, const char *switch_to_branch size_t prefix_len; int ret; - reflog_action = getenv(GIT_REFLOG_ACTION_ENVIRONMENT); - strbuf_addf(&msg, "%s: ", reflog_action ? reflog_action : default_reflog_action); + if ((update_orig_head && !reflog_orig_head) || !reflog_head) { + if (!default_reflog_action) + BUG("default_reflog_action must be given when reflog messages are omitted"); + reflog_action = getenv(GIT_REFLOG_ACTION_ENVIRONMENT); + strbuf_addf(&msg, "%s: ", reflog_action ? reflog_action : + default_reflog_action); + } prefix_len = msg.len; if (update_orig_head) { @@ -71,6 +76,7 @@ int reset_head(struct repository *r, struct object_id *oid, { unsigned reset_hard = flags & RESET_HEAD_HARD; unsigned refs_only = flags & RESET_HEAD_REFS_ONLY; + unsigned update_orig_head = flags & RESET_ORIG_HEAD; struct object_id *head = NULL, head_oid; struct tree_desc desc[2] = { { NULL }, { NULL } }; struct lock_file lock = LOCK_INIT; @@ -144,8 +150,10 @@ int reset_head(struct repository *r, struct object_id *oid, goto leave_reset_head; } - ret = update_refs(oid, switch_to_branch, head, reflog_head, - reflog_orig_head, default_reflog_action, flags); + if (oid != &head_oid || update_orig_head || switch_to_branch) + ret = update_refs(oid, switch_to_branch, head, reflog_head, + reflog_orig_head, default_reflog_action, + flags); leave_reset_head: rollback_lock_file(&lock); From b7de153bd9332a992baa6f937372f0b1833f61e5 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:44 +0000 Subject: [PATCH 025/150] create_autostash(): remove unneeded parameter The default_reflog parameter of create_autostash() is passed to reset_head(). However as creating a stash does not involve updating any refs the parameter is not used by reset_head(). Removing the parameter from create_autostash() simplifies the callers. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/merge.c | 6 ++---- builtin/rebase.c | 8 ++++---- sequencer.c | 5 ++--- sequencer.h | 3 +-- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/builtin/merge.c b/builtin/merge.c index ea3112e0c0..cb0e4e2225 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1565,8 +1565,7 @@ int cmd_merge(int argc, const char **argv, const char *prefix) if (autostash) create_autostash(the_repository, - git_path_merge_autostash(the_repository), - "merge"); + git_path_merge_autostash(the_repository)); if (checkout_fast_forward(the_repository, &head_commit->object.oid, &commit->object.oid, @@ -1637,8 +1636,7 @@ int cmd_merge(int argc, const char **argv, const char *prefix) if (autostash) create_autostash(the_repository, - git_path_merge_autostash(the_repository), - "merge"); + git_path_merge_autostash(the_repository)); /* We are going to make a new commit. */ git_committer_info(IDENT_STRICT); diff --git a/builtin/rebase.c b/builtin/rebase.c index 82be965915..3d78b5c8be 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -1657,10 +1657,10 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) if (repo_read_index(the_repository) < 0) die(_("could not read index")); - if (options.autostash) { - create_autostash(the_repository, state_dir_path("autostash", &options), - DEFAULT_REFLOG_ACTION); - } + if (options.autostash) + create_autostash(the_repository, + state_dir_path("autostash", &options)); + if (require_clean_work_tree(the_repository, "rebase", _("Please commit or stash them."), 1, 1)) { diff --git a/sequencer.c b/sequencer.c index e18329a399..119564f435 100644 --- a/sequencer.c +++ b/sequencer.c @@ -4100,8 +4100,7 @@ static enum todo_command peek_command(struct todo_list *todo_list, int offset) return -1; } -void create_autostash(struct repository *r, const char *path, - const char *default_reflog_action) +void create_autostash(struct repository *r, const char *path) { struct strbuf buf = STRBUF_INIT; struct lock_file lock_file = LOCK_INIT; @@ -4138,7 +4137,7 @@ void create_autostash(struct repository *r, const char *path, write_file(path, "%s", oid_to_hex(&oid)); printf(_("Created autostash: %s\n"), buf.buf); if (reset_head(r, NULL, NULL, RESET_HEAD_HARD, NULL, NULL, - default_reflog_action) < 0) + NULL) < 0) die(_("could not reset --hard")); if (discard_index(r->index) < 0 || diff --git a/sequencer.h b/sequencer.h index 05a7d2ba6b..da64473636 100644 --- a/sequencer.h +++ b/sequencer.h @@ -197,8 +197,7 @@ void commit_post_rewrite(struct repository *r, const struct commit *current_head, const struct object_id *new_head); -void create_autostash(struct repository *r, const char *path, - const char *default_reflog_action); +void create_autostash(struct repository *r, const char *path); int save_autostash(const char *path); int apply_autostash(const char *path); int apply_autostash_oid(const char *stash_oid); From ee464c4e37c7f34c4e5ba2fce35df4149083e5ea Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:45 +0000 Subject: [PATCH 026/150] rebase: cleanup reset_head() calls If ORIG_HEAD is not set by passing RESET_ORIG_HEAD then there is no need to pass anything for reflog_orig_head. In addition to the callers fixed in this commit move_to_original_branch() also passes reflog_orig_head without setting ORIG_HEAD. That caller is mistakenly passing the message it wants to put in the branch reflog which is not currently possible so we delay fixing that caller until we can pass the message as the branch reflog. A later commit will make it a BUG() to pass reflog_orig_head without RESET_ORIG_HEAD, that changes cannot be done here as it needs to wait for move_to_original_branch() to be fixed first. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/rebase.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index 3d78b5c8be..fdd822c470 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -675,7 +675,7 @@ static int run_am(struct rebase_options *opts) reset_head(the_repository, &opts->orig_head, opts->head_name, 0, - "HEAD", NULL, DEFAULT_REFLOG_ACTION); + NULL, NULL, DEFAULT_REFLOG_ACTION); error(_("\ngit encountered an error while preparing the " "patches to replay\n" "these revisions:\n" @@ -1777,7 +1777,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) options.head_name ? options.head_name : "detached HEAD", oid_to_hex(&options.onto->object.oid)); reset_head(the_repository, NULL, options.head_name, - RESET_HEAD_REFS_ONLY, "HEAD", msg.buf, NULL); + RESET_HEAD_REFS_ONLY, NULL, msg.buf, NULL); strbuf_release(&msg); ret = finish_rebase(&options); goto cleanup; From 6ae8086161d81a707ff36dfdc07f57e4f473e0fd Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:46 +0000 Subject: [PATCH 027/150] reset_head(): take struct rebase_head_opts This function takes a confusingly large number of parameters which makes it difficult to remember which order to pass them in. The following commits will add a couple more parameters which makes the problem worse. To address this change the function to take a struct of options. Using a struct means that it is no longer necessary to remember which order to pass the parameters in and anyone reading the code can easily see which value is passed to each parameter. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/rebase.c | 57 ++++++++++++++++++++++++++++++------------------ reset.c | 38 +++++++++++++++----------------- reset.h | 40 +++++++++++++++++++++++++++++---- sequencer.c | 5 ++--- 4 files changed, 92 insertions(+), 48 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index fdd822c470..ecc368dd4f 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -571,6 +571,7 @@ static int finish_rebase(struct rebase_options *opts) static int move_to_original_branch(struct rebase_options *opts) { struct strbuf orig_head_reflog = STRBUF_INIT, head_reflog = STRBUF_INIT; + struct reset_head_opts ropts = { 0 }; int ret; if (!opts->head_name) @@ -583,9 +584,11 @@ static int move_to_original_branch(struct rebase_options *opts) opts->head_name, oid_to_hex(&opts->onto->object.oid)); strbuf_addf(&head_reflog, "rebase finished: returning to %s", opts->head_name); - ret = reset_head(the_repository, NULL, opts->head_name, - RESET_HEAD_REFS_ONLY, - orig_head_reflog.buf, head_reflog.buf, NULL); + ropts.branch = opts->head_name; + ropts.flags = RESET_HEAD_REFS_ONLY; + ropts.orig_head_msg = orig_head_reflog.buf; + ropts.head_msg = head_reflog.buf; + ret = reset_head(the_repository, &ropts); strbuf_release(&orig_head_reflog); strbuf_release(&head_reflog); @@ -669,13 +672,15 @@ static int run_am(struct rebase_options *opts) status = run_command(&format_patch); if (status) { + struct reset_head_opts ropts = { 0 }; unlink(rebased_patches); free(rebased_patches); strvec_clear(&am.args); - reset_head(the_repository, &opts->orig_head, - opts->head_name, 0, - NULL, NULL, DEFAULT_REFLOG_ACTION); + ropts.oid = &opts->orig_head; + ropts.branch = opts->head_name; + ropts.default_reflog_action = DEFAULT_REFLOG_ACTION; + reset_head(the_repository, &ropts); error(_("\ngit encountered an error while preparing the " "patches to replay\n" "these revisions:\n" @@ -814,14 +819,17 @@ static int rebase_config(const char *var, const char *value, void *data) static int checkout_up_to_date(struct rebase_options *options) { struct strbuf buf = STRBUF_INIT; + struct reset_head_opts ropts = { 0 }; int ret = 0; strbuf_addf(&buf, "%s: checkout %s", getenv(GIT_REFLOG_ACTION_ENVIRONMENT), options->switch_to); - if (reset_head(the_repository, &options->orig_head, - options->head_name, RESET_HEAD_RUN_POST_CHECKOUT_HOOK, - NULL, buf.buf, NULL) < 0) + ropts.oid = &options->orig_head; + ropts.branch = options->head_name; + ropts.flags = RESET_HEAD_RUN_POST_CHECKOUT_HOOK; + ropts.head_msg = buf.buf; + if (reset_head(the_repository, &ropts) < 0) ret = error(_("could not switch to %s"), options->switch_to); strbuf_release(&buf); @@ -1033,6 +1041,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) int reschedule_failed_exec = -1; int allow_preemptive_ff = 1; int preserve_merges_selected = 0; + struct reset_head_opts ropts = { 0 }; struct option builtin_rebase_options[] = { OPT_STRING(0, "onto", &options.onto_name, N_("revision"), @@ -1270,9 +1279,8 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) rerere_clear(the_repository, &merge_rr); string_list_clear(&merge_rr, 1); - - if (reset_head(the_repository, NULL, NULL, RESET_HEAD_HARD, - NULL, NULL, NULL) < 0) + ropts.flags = RESET_HEAD_HARD; + if (reset_head(the_repository, &ropts) < 0) die(_("could not discard worktree changes")); remove_branch_state(the_repository, 0); if (read_basic_state(&options)) @@ -1289,9 +1297,11 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) if (read_basic_state(&options)) exit(1); - if (reset_head(the_repository, &options.orig_head, - options.head_name, RESET_HEAD_HARD, - NULL, NULL, DEFAULT_REFLOG_ACTION) < 0) + ropts.oid = &options.orig_head; + ropts.branch = options.head_name; + ropts.flags = RESET_HEAD_HARD; + ropts.default_reflog_action = DEFAULT_REFLOG_ACTION; + if (reset_head(the_repository, &ropts) < 0) die(_("could not move back to %s"), oid_to_hex(&options.orig_head)); remove_branch_state(the_repository, 0); @@ -1758,10 +1768,12 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) strbuf_addf(&msg, "%s: checkout %s", getenv(GIT_REFLOG_ACTION_ENVIRONMENT), options.onto_name); - if (reset_head(the_repository, &options.onto->object.oid, NULL, - RESET_HEAD_DETACH | RESET_ORIG_HEAD | - RESET_HEAD_RUN_POST_CHECKOUT_HOOK, - NULL, msg.buf, DEFAULT_REFLOG_ACTION)) + ropts.oid = &options.onto->object.oid; + ropts.flags = RESET_HEAD_DETACH | RESET_ORIG_HEAD | + RESET_HEAD_RUN_POST_CHECKOUT_HOOK; + ropts.head_msg = msg.buf; + ropts.default_reflog_action = DEFAULT_REFLOG_ACTION; + if (reset_head(the_repository, &ropts)) die(_("Could not detach HEAD")); strbuf_release(&msg); @@ -1776,8 +1788,11 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) strbuf_addf(&msg, "rebase finished: %s onto %s", options.head_name ? options.head_name : "detached HEAD", oid_to_hex(&options.onto->object.oid)); - reset_head(the_repository, NULL, options.head_name, - RESET_HEAD_REFS_ONLY, NULL, msg.buf, NULL); + memset(&ropts, 0, sizeof(ropts)); + ropts.branch = options.head_name; + ropts.flags = RESET_HEAD_REFS_ONLY; + ropts.head_msg = msg.buf; + reset_head(the_repository, &ropts); strbuf_release(&msg); ret = finish_rebase(&options); goto cleanup; diff --git a/reset.c b/reset.c index 4a92e4bc30..78145d5c45 100644 --- a/reset.c +++ b/reset.c @@ -8,14 +8,17 @@ #include "tree.h" #include "unpack-trees.h" -static int update_refs(const struct object_id *oid, const char *switch_to_branch, - const struct object_id *head, const char *reflog_head, - const char *reflog_orig_head, - const char *default_reflog_action, unsigned flags) +static int update_refs(const struct reset_head_opts *opts, + const struct object_id *oid, + const struct object_id *head) { - unsigned detach_head = flags & RESET_HEAD_DETACH; - unsigned run_hook = flags & RESET_HEAD_RUN_POST_CHECKOUT_HOOK; - unsigned update_orig_head = flags & RESET_ORIG_HEAD; + unsigned detach_head = opts->flags & RESET_HEAD_DETACH; + unsigned run_hook = opts->flags & RESET_HEAD_RUN_POST_CHECKOUT_HOOK; + unsigned update_orig_head = opts->flags & RESET_ORIG_HEAD; + const char *switch_to_branch = opts->branch; + const char *reflog_head = opts->head_msg; + const char *reflog_orig_head = opts->orig_head_msg; + const char *default_reflog_action = opts->default_reflog_action; struct object_id *old_orig = NULL, oid_old_orig; struct strbuf msg = STRBUF_INIT; const char *reflog_action; @@ -69,14 +72,13 @@ static int update_refs(const struct object_id *oid, const char *switch_to_branch return ret; } -int reset_head(struct repository *r, struct object_id *oid, - const char *switch_to_branch, unsigned flags, - const char *reflog_orig_head, const char *reflog_head, - const char *default_reflog_action) +int reset_head(struct repository *r, const struct reset_head_opts *opts) { - unsigned reset_hard = flags & RESET_HEAD_HARD; - unsigned refs_only = flags & RESET_HEAD_REFS_ONLY; - unsigned update_orig_head = flags & RESET_ORIG_HEAD; + const struct object_id *oid = opts->oid; + const char *switch_to_branch = opts->branch; + unsigned reset_hard = opts->flags & RESET_HEAD_HARD; + unsigned refs_only = opts->flags & RESET_HEAD_REFS_ONLY; + unsigned update_orig_head = opts->flags & RESET_ORIG_HEAD; struct object_id *head = NULL, head_oid; struct tree_desc desc[2] = { { NULL }, { NULL } }; struct lock_file lock = LOCK_INIT; @@ -104,9 +106,7 @@ int reset_head(struct repository *r, struct object_id *oid, oid = &head_oid; if (refs_only) - return update_refs(oid, switch_to_branch, head, reflog_head, - reflog_orig_head, default_reflog_action, - flags); + return update_refs(opts, oid, head); action = reset_hard ? "reset" : "checkout"; setup_unpack_trees_porcelain(&unpack_tree_opts, action); @@ -151,9 +151,7 @@ int reset_head(struct repository *r, struct object_id *oid, } if (oid != &head_oid || update_orig_head || switch_to_branch) - ret = update_refs(oid, switch_to_branch, head, reflog_head, - reflog_orig_head, default_reflog_action, - flags); + ret = update_refs(opts, oid, head); leave_reset_head: rollback_lock_file(&lock); diff --git a/reset.h b/reset.h index 2daec80425..a205be2fb8 100644 --- a/reset.h +++ b/reset.h @@ -6,15 +6,47 @@ #define GIT_REFLOG_ACTION_ENVIRONMENT "GIT_REFLOG_ACTION" +/* Request a detached checkout */ #define RESET_HEAD_DETACH (1<<0) +/* Request a reset rather than a checkout */ #define RESET_HEAD_HARD (1<<1) +/* Run the post-checkout hook */ #define RESET_HEAD_RUN_POST_CHECKOUT_HOOK (1<<2) +/* Only update refs, do not touch the worktree */ #define RESET_HEAD_REFS_ONLY (1<<3) +/* Update ORIG_HEAD as well as HEAD */ #define RESET_ORIG_HEAD (1<<4) -int reset_head(struct repository *r, struct object_id *oid, - const char *switch_to_branch, unsigned flags, - const char *reflog_orig_head, const char *reflog_head, - const char *default_reflog_action); +struct reset_head_opts { + /* + * The commit to checkout/reset to. Defaults to HEAD. + */ + const struct object_id *oid; + /* + * Optional branch to switch to. + */ + const char *branch; + /* + * Flags defined above. + */ + unsigned flags; + /* + * Optional reflog message for HEAD, if this omitted but oid or branch + * are given then default_reflog_action must be given. + */ + const char *head_msg; + /* + * Optional reflog message for ORIG_HEAD, if this omitted and flags + * contains RESET_ORIG_HEAD then default_reflog_action must be given. + */ + const char *orig_head_msg; + /* + * Action to use in default reflog messages, only required if a ref is + * being updated and the reflog messages above are omitted. + */ + const char *default_reflog_action; +}; + +int reset_head(struct repository *r, const struct reset_head_opts *opts); #endif diff --git a/sequencer.c b/sequencer.c index 119564f435..55ed074ae7 100644 --- a/sequencer.c +++ b/sequencer.c @@ -4115,6 +4115,7 @@ void create_autostash(struct repository *r, const char *path) if (has_unstaged_changes(r, 1) || has_uncommitted_changes(r, 1)) { struct child_process stash = CHILD_PROCESS_INIT; + struct reset_head_opts ropts = { .flags = RESET_HEAD_HARD }; struct object_id oid; strvec_pushl(&stash.args, @@ -4136,10 +4137,8 @@ void create_autostash(struct repository *r, const char *path) path); write_file(path, "%s", oid_to_hex(&oid)); printf(_("Created autostash: %s\n"), buf.buf); - if (reset_head(r, NULL, NULL, RESET_HEAD_HARD, NULL, NULL, - NULL) < 0) + if (reset_head(r, &ropts) < 0) die(_("could not reset --hard")); - if (discard_index(r->index) < 0 || repo_read_index(r) < 0) die(_("could not read index")); From 7700ab087b82f71d19134141045b95063e407344 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:47 +0000 Subject: [PATCH 028/150] rebase --apply: fix reflog move_to_original_branch() passes the message intended for the branch reflog as `orig_head_msg`. Fix this by adding a `branch_msg` member to struct reset_head_opts and add a regression test. Note that these reflog messages do not respect GIT_REFLOG_ACTION. They are not alone in that and will be fixed in a future series. The "merge" backend already has tests that check both the branch and HEAD reflogs. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/rebase.c | 8 ++++---- reset.c | 12 ++++++++++-- reset.h | 4 ++++ t/t3406-rebase-message.sh | 23 +++++++++++++++++++++++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index ecc368dd4f..b55a9cff05 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -570,7 +570,7 @@ static int finish_rebase(struct rebase_options *opts) static int move_to_original_branch(struct rebase_options *opts) { - struct strbuf orig_head_reflog = STRBUF_INIT, head_reflog = STRBUF_INIT; + struct strbuf branch_reflog = STRBUF_INIT, head_reflog = STRBUF_INIT; struct reset_head_opts ropts = { 0 }; int ret; @@ -580,17 +580,17 @@ static int move_to_original_branch(struct rebase_options *opts) if (!opts->onto) BUG("move_to_original_branch without onto"); - strbuf_addf(&orig_head_reflog, "rebase finished: %s onto %s", + strbuf_addf(&branch_reflog, "rebase finished: %s onto %s", opts->head_name, oid_to_hex(&opts->onto->object.oid)); strbuf_addf(&head_reflog, "rebase finished: returning to %s", opts->head_name); ropts.branch = opts->head_name; ropts.flags = RESET_HEAD_REFS_ONLY; - ropts.orig_head_msg = orig_head_reflog.buf; + ropts.branch_msg = branch_reflog.buf; ropts.head_msg = head_reflog.buf; ret = reset_head(the_repository, &ropts); - strbuf_release(&orig_head_reflog); + strbuf_release(&branch_reflog); strbuf_release(&head_reflog); return ret; } diff --git a/reset.c b/reset.c index 78145d5c45..e02915c0f6 100644 --- a/reset.c +++ b/reset.c @@ -16,6 +16,7 @@ static int update_refs(const struct reset_head_opts *opts, unsigned run_hook = opts->flags & RESET_HEAD_RUN_POST_CHECKOUT_HOOK; unsigned update_orig_head = opts->flags & RESET_ORIG_HEAD; const char *switch_to_branch = opts->branch; + const char *reflog_branch = opts->branch_msg; const char *reflog_head = opts->head_msg; const char *reflog_orig_head = opts->orig_head_msg; const char *default_reflog_action = opts->default_reflog_action; @@ -58,8 +59,9 @@ static int update_refs(const struct reset_head_opts *opts, detach_head ? REF_NO_DEREF : 0, UPDATE_REFS_MSG_ON_ERR); else { - ret = update_ref(reflog_head, switch_to_branch, oid, - NULL, 0, UPDATE_REFS_MSG_ON_ERR); + ret = update_ref(reflog_branch ? reflog_branch : reflog_head, + switch_to_branch, oid, NULL, 0, + UPDATE_REFS_MSG_ON_ERR); if (!ret) ret = create_symref("HEAD", switch_to_branch, reflog_head); @@ -90,6 +92,12 @@ int reset_head(struct repository *r, const struct reset_head_opts *opts) if (switch_to_branch && !starts_with(switch_to_branch, "refs/")) BUG("Not a fully qualified branch: '%s'", switch_to_branch); + if (opts->orig_head_msg && !update_orig_head) + BUG("ORIG_HEAD reflog message given without updating ORIG_HEAD"); + + if (opts->branch_msg && !opts->branch) + BUG("branch reflog message given without a branch"); + if (!refs_only && repo_hold_locked_index(r, &lock, LOCK_REPORT_ON_ERROR) < 0) { ret = -1; goto leave_reset_head; diff --git a/reset.h b/reset.h index a205be2fb8..7ef7e43ea8 100644 --- a/reset.h +++ b/reset.h @@ -30,6 +30,10 @@ struct reset_head_opts { * Flags defined above. */ unsigned flags; + /* + * Optional reflog message for branch, defaults to head_msg. + */ + const char *branch_msg; /* * Optional reflog message for HEAD, if this omitted but oid or branch * are given then default_reflog_action must be given. diff --git a/t/t3406-rebase-message.sh b/t/t3406-rebase-message.sh index 77a313f62e..d17b450e81 100755 --- a/t/t3406-rebase-message.sh +++ b/t/t3406-rebase-message.sh @@ -105,6 +105,29 @@ test_expect_success 'GIT_REFLOG_ACTION' ' test_cmp expect actual ' +test_expect_success 'rebase --apply reflog' ' + git checkout -b reflog-apply start && + old_head_reflog="$(git log -g --format=%gs -1 HEAD)" && + + git rebase --apply Y && + + git log -g --format=%gs -4 HEAD >actual && + cat >expect <<-EOF && + rebase finished: returning to refs/heads/reflog-apply + rebase: Z + rebase: checkout Y + $old_head_reflog + EOF + test_cmp expect actual && + + git log -g --format=%gs -2 reflog-apply >actual && + cat >expect <<-EOF && + rebase finished: refs/heads/reflog-apply onto $(git rev-parse Y) + branch: Created from start + EOF + test_cmp expect actual +' + test_expect_success 'rebase -i onto unrelated history' ' git init unrelated && test_commit -C unrelated 1 && From cd1528ef8ef9847fc27cff4016bf073f04729504 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:48 +0000 Subject: [PATCH 029/150] rebase --apply: set ORIG_HEAD correctly At the start of a rebase, ORIG_HEAD is updated to the tip of the branch being rebased. Unfortunately reset_head() always uses the current value of HEAD for this which is incorrect if the rebase is started with "git rebase " as in that case ORIG_HEAD should be updated to . This only affects the "apply" backend as the "merge" backend does not yet use reset_head() for the initial checkout. Fix this by passing in orig_head when calling reset_head() and add some regression tests. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/rebase.c | 1 + reset.c | 4 +++- reset.h | 4 ++++ t/t3418-rebase-continue.sh | 26 ++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index b55a9cff05..e942c300f8 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -1769,6 +1769,7 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) strbuf_addf(&msg, "%s: checkout %s", getenv(GIT_REFLOG_ACTION_ENVIRONMENT), options.onto_name); ropts.oid = &options.onto->object.oid; + ropts.orig_head = &options.orig_head, ropts.flags = RESET_HEAD_DETACH | RESET_ORIG_HEAD | RESET_HEAD_RUN_POST_CHECKOUT_HOOK; ropts.head_msg = msg.buf; diff --git a/reset.c b/reset.c index e02915c0f6..448cb3fd78 100644 --- a/reset.c +++ b/reset.c @@ -15,6 +15,7 @@ static int update_refs(const struct reset_head_opts *opts, unsigned detach_head = opts->flags & RESET_HEAD_DETACH; unsigned run_hook = opts->flags & RESET_HEAD_RUN_POST_CHECKOUT_HOOK; unsigned update_orig_head = opts->flags & RESET_ORIG_HEAD; + const struct object_id *orig_head = opts->orig_head; const char *switch_to_branch = opts->branch; const char *reflog_branch = opts->branch_msg; const char *reflog_head = opts->head_msg; @@ -43,7 +44,8 @@ static int update_refs(const struct reset_head_opts *opts, strbuf_addstr(&msg, "updating ORIG_HEAD"); reflog_orig_head = msg.buf; } - update_ref(reflog_orig_head, "ORIG_HEAD", head, + update_ref(reflog_orig_head, "ORIG_HEAD", + orig_head ? orig_head : head, old_orig, 0, UPDATE_REFS_MSG_ON_ERR); } else if (old_orig) delete_ref(NULL, "ORIG_HEAD", old_orig, 0); diff --git a/reset.h b/reset.h index 7ef7e43ea8..a28f81829d 100644 --- a/reset.h +++ b/reset.h @@ -22,6 +22,10 @@ struct reset_head_opts { * The commit to checkout/reset to. Defaults to HEAD. */ const struct object_id *oid; + /* + * Optional value to set ORIG_HEAD. Defaults to HEAD. + */ + const struct object_id *orig_head; /* * Optional branch to switch to. */ diff --git a/t/t3418-rebase-continue.sh b/t/t3418-rebase-continue.sh index 22eca73aa3..130e2f9b55 100755 --- a/t/t3418-rebase-continue.sh +++ b/t/t3418-rebase-continue.sh @@ -308,4 +308,30 @@ test_expect_success 'there is no --no-reschedule-failed-exec in an ongoing rebas test_expect_code 129 git rebase --edit-todo --no-reschedule-failed-exec ' +test_orig_head_helper () { + test_when_finished 'git rebase --abort && + git checkout topic && + git reset --hard commit-new-file-F2-on-topic-branch' && + git update-ref -d ORIG_HEAD && + test_must_fail git rebase "$@" && + test_cmp_rev ORIG_HEAD commit-new-file-F2-on-topic-branch +} + +test_orig_head () { + type=$1 + test_expect_success "rebase $type sets ORIG_HEAD correctly" ' + git checkout topic && + git reset --hard commit-new-file-F2-on-topic-branch && + test_orig_head_helper $type main + ' + + test_expect_success "rebase $type sets ORIG_HEAD correctly" ' + git checkout main && + test_orig_head_helper $type main topic + ' +} + +test_orig_head --apply +test_orig_head --merge + test_done From 38c541ce94048cf72aa4f465be9314423a57f445 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 26 Jan 2022 13:05:49 +0000 Subject: [PATCH 030/150] rebase -m: don't fork git checkout Now that reset_head() can handle the initial checkout of onto correctly use it in the "merge" backend instead of forking "git checkout". This opens the way for us to stop calling the post-checkout hook in the future. Not running "git checkout" means that "rebase -i/m" no longer recurse submodules when checking out "onto" (thanks to Philippe Blain for pointing this out). As the rest of rebase does not know what to do with submodules this is probably a good thing. When using merge-ort rebase ought be able to handle submodules correctly if it parsed the submodule config, such a change is left for a future patch series. The "apply" based rebase has avoided forking git checkout since ac7f467fef ("builtin/rebase: support running "git rebase "", 2018-08-07). The code that handles the checkout was moved into libgit by b309a97108 ("reset: extract reset_head() from rebase", 2020-04-07). Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- sequencer.c | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/sequencer.c b/sequencer.c index 55ed074ae7..bdd66b4b67 100644 --- a/sequencer.c +++ b/sequencer.c @@ -4223,42 +4223,26 @@ int apply_autostash_oid(const char *stash_oid) return apply_save_autostash_oid(stash_oid, 1); } -static int run_git_checkout(struct repository *r, struct replay_opts *opts, - const char *commit, const char *action) -{ - struct child_process cmd = CHILD_PROCESS_INIT; - int ret; - - cmd.git_cmd = 1; - - strvec_push(&cmd.args, "checkout"); - strvec_push(&cmd.args, commit); - strvec_pushf(&cmd.env_array, GIT_REFLOG_ACTION "=%s", action); - - if (opts->verbose) - ret = run_command(&cmd); - else - ret = run_command_silent_on_success(&cmd); - - if (!ret) - discard_index(r->index); - - return ret; -} - static int checkout_onto(struct repository *r, struct replay_opts *opts, const char *onto_name, const struct object_id *onto, const struct object_id *orig_head) { - const char *action = reflog_message(opts, "start", "checkout %s", onto_name); - - if (run_git_checkout(r, opts, oid_to_hex(onto), action)) { + struct reset_head_opts ropts = { + .oid = onto, + .orig_head = orig_head, + .flags = RESET_HEAD_DETACH | RESET_ORIG_HEAD | + RESET_HEAD_RUN_POST_CHECKOUT_HOOK, + .head_msg = reflog_message(opts, "start", "checkout %s", + onto_name), + .default_reflog_action = "rebase" + }; + if (reset_head(r, &ropts)) { apply_autostash(rebase_path_autostash()); sequencer_remove_state(opts); return error(_("could not detach HEAD")); } - return update_ref(NULL, "ORIG_HEAD", orig_head, NULL, 0, UPDATE_REFS_MSG_ON_ERR); + return 0; } static int stopped_at_head(struct repository *r) From 8d56136d038a611ee69cfd6531a5b86028ef147b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 27 Jan 2022 06:26:43 +0100 Subject: [PATCH 031/150] object-name tests: add tests for ambiguous object blind spots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the tests for ambiguous objects to check how we handle objects where we return OBJ_BAD when trying to parse them. As noted in [1] we have a blindspot when it comes to this behavior. Since we need to add new test data here let's extend these tests to be tested under SHA-256, in d7a2fc82491 (t1512: skip test if not using SHA-1, 2018-05-13) all of the existing tests were skipped, as they rely on specific SHA-1 object IDs. For these tests it only matters that the first 4 characters of the OID prefix are the same for both SHA-1 and SHA-256. This uses strings that I mined, and have the same prefix when hashed with both. We "test_cmp" the full output to guard against any future regressions, and because a subsequent commit will tweak it. Showing a diff of how the output changes is helpful to explain those subsequent commits. The "sed" invocation in test_cmp_failed_rev_parse() doesn't need a "/g" because under both SHA-1 and SHA-256 we'll wildcard match any trailing part of the OID after our known starting prefix. We'd like to convert all of that to just "..." for the "test_cmp" which follows. 1. https://lore.kernel.org/git/YZwbphPpfGk78w2f@coredump.intra.peff.net/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1512-rev-parse-disambiguation.sh | 82 +++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index 7891a6becf..010d9c1b8a 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -25,6 +25,88 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +test_cmp_failed_rev_parse () { + dir=$1 + rev=$2 + + cat >expect && + test_must_fail git -C "$dir" rev-parse "$rev" 2>actual.raw && + sed "s/\($rev\)[0-9a-f]*/\1.../" actual && + test_cmp expect actual +} + +test_expect_success 'ambiguous blob output' ' + git init --bare blob.prefix && + ( + cd blob.prefix && + + # Both start with "dead..", under both SHA-1 and SHA-256 + echo brocdnra | git hash-object -w --stdin && + echo brigddsv | git hash-object -w --stdin && + + # Both start with "beef.." + echo 1agllotbh | git hash-object -w --stdin && + echo 1bbfctrkc | git hash-object -w --stdin + ) && + + test_must_fail git -C blob.prefix rev-parse dead && + test_cmp_failed_rev_parse blob.prefix beef <<-\EOF + error: short object ID beef... is ambiguous + hint: The candidates are: + hint: beef... blob + hint: beef... blob + fatal: ambiguous argument '\''beef...'\'': unknown revision or path not in the working tree. + Use '\''--'\'' to separate paths from revisions, like this: + '\''git [...] -- [...]'\'' + EOF +' + +test_expect_success 'ambiguous loose bad object parsed as OBJ_BAD' ' + git init --bare blob.bad && + ( + cd blob.bad && + + # Both have the prefix "bad0" + echo xyzfaowcoh | git hash-object -t bad -w --stdin --literally && + echo xyzhjpyvwl | git hash-object -t bad -w --stdin --literally + ) && + + test_cmp_failed_rev_parse blob.bad bad0 <<-\EOF + error: short object ID bad0... is ambiguous + hint: The candidates are: + fatal: invalid object type + EOF +' + +test_expect_success POSIXPERM 'ambigous zlib corrupt loose blob' ' + git init --bare blob.corrupt && + ( + cd blob.corrupt && + + # Both have the prefix "cafe" + echo bnkxmdwz | git hash-object -w --stdin && + oid=$(echo bmwsjxzi | git hash-object -w --stdin) && + + oidf=objects/$(test_oid_to_path "$oid") && + chmod 755 $oidf && + echo broken >$oidf + ) && + + test_cmp_failed_rev_parse blob.corrupt cafe <<-\EOF + error: short object ID cafe... is ambiguous + hint: The candidates are: + error: inflate: data stream error (incorrect header check) + error: unable to unpack cafe... header + error: inflate: data stream error (incorrect header check) + error: unable to unpack cafe... header + hint: cafe... unknown type + hint: cafe... blob + fatal: ambiguous argument '\''cafe...'\'': unknown revision or path not in the working tree. + Use '\''--'\'' to separate paths from revisions, like this: + '\''git [...] -- [...]'\'' + EOF +' + if ! test_have_prereq SHA1 then skip_all='not using SHA-1 for objects' From 6780e6804087a89bddfb0333171d005b309941a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 27 Jan 2022 06:26:44 +0100 Subject: [PATCH 032/150] object-name: explicitly handle OBJ_BAD in show_ambiguous_object() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Amend the "unknown type" handling in the code that displays the ambiguous object list to assert() that we're either going to get the "real" object types we can pass to type_name(), or a -1 (OBJ_BAD) return value from oid_object_info(). See [1] for the current output, and [1] for the commit that added the "unknown type" handling. We are never going to get an "unknown type" in the sense of custom types crafted with "hash-object --literally", since we're not using the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag. If we manage to otherwise unpack such an object without errors we'll die() in parse_loose_header_extended() called by sort_ambiguous() before we get to show_ambiguous_object(), as is asserted by the test added in the preceding commit. So saying "unknown type" here was always misleading, we really meant to say that we had a failure parsing the object at all, i.e. that we had repository corruption. If the problem is only that it's type is unknown we won't reach this code. So let's emit a generic "[bad object]" instead. As our tests added in the preceding commit show, we'll have emitted various "error" output already in those cases. We should do better in the truly "unknown type" cases, which we'd need to handle if we were passing down the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag. But let's leave that for some future improvement. In a subsequent commit I'll improve the output we do show, and not having to handle the "unknown type" (as in OBJECT_INFO_ALLOW_UNKNOWN_TYPE) simplifies that change. 1. 5cc044e0257 (get_short_oid: sort ambiguous objects by type, then SHA-1, 2018-05-10) 2. 1ffa26c461 (get_short_sha1: list ambiguous objects on error, 2016-09-26) Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-name.c | 14 ++++++++++++-- t/t1512-rev-parse-disambiguation.sh | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/object-name.c b/object-name.c index fdff4601b2..9750634ee7 100644 --- a/object-name.c +++ b/object-name.c @@ -361,6 +361,16 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) return 0; type = oid_object_info(ds->repo, oid, NULL); + + if (type < 0) { + strbuf_addstr(&desc, "[bad object]"); + goto out; + } + + assert(type == OBJ_TREE || type == OBJ_COMMIT || + type == OBJ_BLOB || type == OBJ_TAG); + strbuf_addstr(&desc, type_name(type)); + if (type == OBJ_COMMIT) { struct commit *commit = lookup_commit(ds->repo, oid); if (commit) { @@ -374,9 +384,9 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) strbuf_addf(&desc, " %s", tag->tag); } - advise(" %s %s%s", +out: + advise(" %s %s", repo_find_unique_abbrev(ds->repo, oid, DEFAULT_ABBREV), - type_name(type) ? type_name(type) : "unknown type", desc.buf); strbuf_release(&desc); diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index 010d9c1b8a..0282ada133 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -99,7 +99,7 @@ test_expect_success POSIXPERM 'ambigous zlib corrupt loose blob' ' error: unable to unpack cafe... header error: inflate: data stream error (incorrect header check) error: unable to unpack cafe... header - hint: cafe... unknown type + hint: cafe... [bad object] hint: cafe... blob fatal: ambiguous argument '\''cafe...'\'': unknown revision or path not in the working tree. Use '\''--'\'' to separate paths from revisions, like this: From 667a560be7ae977e61d81ca4bcb95ded1cdacc9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 27 Jan 2022 06:26:45 +0100 Subject: [PATCH 033/150] object-name: explicitly handle bad tags in show_ambiguous_object() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up the handling of OBJ_BAD in the preceding commit and explicitly handle those cases where parse_tag() fails, or we don't end up with a non-NULL pointer in in tag->tag. If we run into such a tag we'd previously be silent about it. We really should also be handling these batter in parse_tag_buffer() by being more eager to emit an error(), instead of silently aborting with "return -1;". One example of such a tag is the one that's tested for in "t3800-mktag.sh", where the code takes the "size < the_hash_algo->hexsz + 24" branch. But in lieu of earlier missing "error" output let's show the user something to indicate why we're not showing a tag message in these cases, now instead of showing: hint: deadbeef tag We'll instead display: hint: deadbeef tag [tag could not be parsed] Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-name.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/object-name.c b/object-name.c index 9750634ee7..298b742bac 100644 --- a/object-name.c +++ b/object-name.c @@ -382,6 +382,8 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) struct tag *tag = lookup_tag(ds->repo, oid); if (!parse_tag(tag) && tag->tag) strbuf_addf(&desc, " %s", tag->tag); + else + strbuf_addstr(&desc, " [tag could not be parsed]"); } out: From ba5e8a0eb8011f7ffb246804e91c9ba0db3befe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 27 Jan 2022 06:26:46 +0100 Subject: [PATCH 034/150] object-name: make ambiguous object output translatable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the output of show_ambiguous_object() added in [1] and last tweaked in [2] and the preceding commit to be more friendly to translators. By being able to customize the "%s\n" format we're even ready for RTL languages, who'd presumably like to change that to "%s\n". In the case of the existing "tag [tag could not be parsed]" output we'll now instead emit "[bad tag, could not parse it]". This is consistent with the "[bad object]" output. Rephrasing the message like this is possible because we're not unconditionally adding the type_name() at the beginning. 1. 1ffa26c461 (get_short_sha1: list ambiguous objects on error, 2016-09-26) 2. 5cc044e0257 (get_short_oid: sort ambiguous objects by type, then SHA-1, 2018-05-10) Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Josh Steadmon Signed-off-by: Junio C Hamano --- object-name.c | 78 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/object-name.c b/object-name.c index 298b742bac..f31b50bc31 100644 --- a/object-name.c +++ b/object-name.c @@ -356,40 +356,98 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) const struct disambiguate_state *ds = data; struct strbuf desc = STRBUF_INIT; int type; + const char *hash; if (ds->fn && !ds->fn(ds->repo, oid, ds->cb_data)) return 0; + hash = repo_find_unique_abbrev(ds->repo, oid, DEFAULT_ABBREV); type = oid_object_info(ds->repo, oid, NULL); if (type < 0) { - strbuf_addstr(&desc, "[bad object]"); + /* + * TRANSLATORS: This is a line of ambiguous object + * output shown when we cannot look up or parse the + * object in question. E.g. "deadbeef [bad object]". + */ + strbuf_addf(&desc, _("%s [bad object]"), hash); goto out; } assert(type == OBJ_TREE || type == OBJ_COMMIT || type == OBJ_BLOB || type == OBJ_TAG); - strbuf_addstr(&desc, type_name(type)); if (type == OBJ_COMMIT) { + struct strbuf date = STRBUF_INIT; + struct strbuf msg = STRBUF_INIT; struct commit *commit = lookup_commit(ds->repo, oid); + if (commit) { struct pretty_print_context pp = {0}; pp.date_mode.type = DATE_SHORT; - format_commit_message(commit, " %ad - %s", &desc, &pp); + format_commit_message(commit, "%ad", &date, &pp); + format_commit_message(commit, "%s", &msg, &pp); } + + /* + * TRANSLATORS: This is a line of ambiguous commit + * object output. E.g.: + * + * "deadbeef commit 2021-01-01 - Some Commit Message" + */ + strbuf_addf(&desc, _("%s commit %s - %s"), + hash, date.buf, msg.buf); + + strbuf_release(&date); + strbuf_release(&msg); } else if (type == OBJ_TAG) { struct tag *tag = lookup_tag(ds->repo, oid); - if (!parse_tag(tag) && tag->tag) - strbuf_addf(&desc, " %s", tag->tag); - else - strbuf_addstr(&desc, " [tag could not be parsed]"); + + if (!parse_tag(tag) && tag->tag) { + /* + * TRANSLATORS: This is a line of ambiguous + * tag object output. E.g.: + * + * "deadbeef tag Some Tag Message" + * + * The second argument is the "tag" string + * from object.c. + */ + strbuf_addf(&desc, _("%s tag %s"), hash, tag->tag); + } else { + /* + * TRANSLATORS: This is a line of ambiguous + * tag object output where we couldn't parse + * the tag itself. E.g.: + * + * "deadbeef tag [bad tag, could not parse it]" + */ + strbuf_addf(&desc, _("%s [bad tag, could not parse it]"), + hash); + } + } else if (type == OBJ_TREE) { + /* + * TRANSLATORS: This is a line of ambiguous + * object output. E.g. "deadbeef tree". + */ + strbuf_addf(&desc, _("%s tree"), hash); + } else if (type == OBJ_BLOB) { + /* + * TRANSLATORS: This is a line of ambiguous + * object output. E.g. "deadbeef blob". + */ + strbuf_addf(&desc, _("%s blob"), hash); } + out: - advise(" %s %s", - repo_find_unique_abbrev(ds->repo, oid, DEFAULT_ABBREV), - desc.buf); + /* + * TRANSLATORS: This is line item of ambiguous object output + * from describe_ambiguous_object() above. For RTL languages + * you'll probably want to swap the "%s" and leading " " space + * around. + */ + advise(_(" %s"), desc.buf); strbuf_release(&desc); return 0; From 851b3d7671778b74e40bf155c49f6b045f5eb4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 27 Jan 2022 06:26:47 +0100 Subject: [PATCH 035/150] object-name: show date for ambiguous tag objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the ambiguous tag object output nicer in the case of tag objects such as ebf3c04b262 (Git 2.32, 2021-06-06) by including the date in the "tagger" header. I.e.: $ git rev-parse b7e68 error: short object ID b7e68 is ambiguous hint: The candidates are: hint: b7e68c41d92 tag 2021-06-06 - v2.32.0 hint: b7e68ae18e0 commit 2019-12-23 - bisect: use the standard 'if (!var)' way to check for 0 hint: b7e68f6b413 tree hint: b7e68490b97 blob b7e68 [...] Before this we'd emit a "tag" line without a date, e.g.: hint: b7e68c41d92 tag v2.32.0 Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-name.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/object-name.c b/object-name.c index f31b50bc31..cbf459f566 100644 --- a/object-name.c +++ b/object-name.c @@ -408,19 +408,24 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) * TRANSLATORS: This is a line of ambiguous * tag object output. E.g.: * - * "deadbeef tag Some Tag Message" + * "deadbeef tag 2022-01-01 - Some Tag Message" * - * The second argument is the "tag" string + * The second argument is the YYYY-MM-DD found + * in the tag. + * + * The third argument is the "tag" string * from object.c. */ - strbuf_addf(&desc, _("%s tag %s"), hash, tag->tag); + strbuf_addf(&desc, _("%s tag %s - %s"), hash, + show_date(tag->date, 0, DATE_MODE(SHORT)), + tag->tag); } else { /* * TRANSLATORS: This is a line of ambiguous * tag object output where we couldn't parse * the tag itself. E.g.: * - * "deadbeef tag [bad tag, could not parse it]" + * "deadbeef [bad tag, could not parse it]" */ strbuf_addf(&desc, _("%s [bad tag, could not parse it]"), hash); From d2ef3cb7e29c35362125a61f1d96576c200076f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 27 Jan 2022 06:26:48 +0100 Subject: [PATCH 036/150] object-name: iterate ambiguous objects before showing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the "The candidates are" header that's shown for ambiguous objects to be shown after we've iterated over all of the objects. If we get any errors while doing so we don't want to split up the the header and the list as a result. The two will now be printed together, as shown in the updated testcase. As we're accumulating the lines into as "struct strbuf" before emitting them we need to add a trailing newline to the call in show_ambiguous_object(). This and the change from "The candidates are:" to "The candidates are:\n%s" helps to give translators more context. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-name.c | 27 +++++++++++++++++++++++---- t/t1512-rev-parse-disambiguation.sh | 9 ++++----- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/object-name.c b/object-name.c index cbf459f566..6154e1ec6f 100644 --- a/object-name.c +++ b/object-name.c @@ -351,9 +351,16 @@ static int init_object_disambiguation(struct repository *r, return 0; } +struct ambiguous_output { + const struct disambiguate_state *ds; + struct strbuf advice; +}; + static int show_ambiguous_object(const struct object_id *oid, void *data) { - const struct disambiguate_state *ds = data; + struct ambiguous_output *state = data; + const struct disambiguate_state *ds = state->ds; + struct strbuf *advice = &state->advice; struct strbuf desc = STRBUF_INIT; int type; const char *hash; @@ -452,7 +459,7 @@ out: * you'll probably want to swap the "%s" and leading " " space * around. */ - advise(_(" %s"), desc.buf); + strbuf_addf(advice, _(" %s\n"), desc.buf); strbuf_release(&desc); return 0; @@ -551,6 +558,10 @@ static enum get_oid_result get_short_oid(struct repository *r, if (!quietly && (status == SHORT_NAME_AMBIGUOUS)) { struct oid_array collect = OID_ARRAY_INIT; + struct ambiguous_output out = { + .ds = &ds, + .advice = STRBUF_INIT, + }; error(_("short object ID %s is ambiguous"), ds.hex_pfx); @@ -563,13 +574,21 @@ static enum get_oid_result get_short_oid(struct repository *r, if (!ds.ambiguous) ds.fn = NULL; - advise(_("The candidates are:")); repo_for_each_abbrev(r, ds.hex_pfx, collect_ambiguous, &collect); sort_ambiguous_oid_array(r, &collect); - if (oid_array_for_each(&collect, show_ambiguous_object, &ds)) + if (oid_array_for_each(&collect, show_ambiguous_object, &out)) BUG("show_ambiguous_object shouldn't return non-zero"); + + /* + * TRANSLATORS: The argument is the list of ambiguous + * objects composed in show_ambiguous_object(). See + * its "TRANSLATORS" comments for details. + */ + advise(_("The candidates are:\n%s"), out.advice.buf); + oid_array_clear(&collect); + strbuf_release(&out.advice); } return status; diff --git a/t/t1512-rev-parse-disambiguation.sh b/t/t1512-rev-parse-disambiguation.sh index 0282ada133..be687aebb2 100755 --- a/t/t1512-rev-parse-disambiguation.sh +++ b/t/t1512-rev-parse-disambiguation.sh @@ -73,7 +73,6 @@ test_expect_success 'ambiguous loose bad object parsed as OBJ_BAD' ' test_cmp_failed_rev_parse blob.bad bad0 <<-\EOF error: short object ID bad0... is ambiguous - hint: The candidates are: fatal: invalid object type EOF ' @@ -94,11 +93,11 @@ test_expect_success POSIXPERM 'ambigous zlib corrupt loose blob' ' test_cmp_failed_rev_parse blob.corrupt cafe <<-\EOF error: short object ID cafe... is ambiguous + error: inflate: data stream error (incorrect header check) + error: unable to unpack cafe... header + error: inflate: data stream error (incorrect header check) + error: unable to unpack cafe... header hint: The candidates are: - error: inflate: data stream error (incorrect header check) - error: unable to unpack cafe... header - error: inflate: data stream error (incorrect header check) - error: unable to unpack cafe... header hint: cafe... [bad object] hint: cafe... blob fatal: ambiguous argument '\''cafe...'\'': unknown revision or path not in the working tree. From 3a73c1dfafcc53831a252fc3aededeb59be476f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 27 Jan 2022 06:26:49 +0100 Subject: [PATCH 037/150] object-name: re-use "struct strbuf" in show_ambiguous_object() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce the allocations done by show_ambiguous_object() by moving the "desc" strbuf into the "struct ambiguous_output" introduced in the preceding commit. This doesn't matter for optimization purposes, but since we're accumulating a "struct strbuf advice" anyway let's follow that pattern and add a "struct strbuf sb", we can then strbuf_reset() it rather than calling strbuf_release() for each call to show_ambiguous_object(). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- object-name.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/object-name.c b/object-name.c index 6154e1ec6f..61b58a2f29 100644 --- a/object-name.c +++ b/object-name.c @@ -354,6 +354,7 @@ static int init_object_disambiguation(struct repository *r, struct ambiguous_output { const struct disambiguate_state *ds; struct strbuf advice; + struct strbuf sb; }; static int show_ambiguous_object(const struct object_id *oid, void *data) @@ -361,7 +362,7 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) struct ambiguous_output *state = data; const struct disambiguate_state *ds = state->ds; struct strbuf *advice = &state->advice; - struct strbuf desc = STRBUF_INIT; + struct strbuf *sb = &state->sb; int type; const char *hash; @@ -377,7 +378,7 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) * output shown when we cannot look up or parse the * object in question. E.g. "deadbeef [bad object]". */ - strbuf_addf(&desc, _("%s [bad object]"), hash); + strbuf_addf(sb, _("%s [bad object]"), hash); goto out; } @@ -402,8 +403,8 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) * * "deadbeef commit 2021-01-01 - Some Commit Message" */ - strbuf_addf(&desc, _("%s commit %s - %s"), - hash, date.buf, msg.buf); + strbuf_addf(sb, _("%s commit %s - %s"), hash, date.buf, + msg.buf); strbuf_release(&date); strbuf_release(&msg); @@ -423,7 +424,7 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) * The third argument is the "tag" string * from object.c. */ - strbuf_addf(&desc, _("%s tag %s - %s"), hash, + strbuf_addf(sb, _("%s tag %s - %s"), hash, show_date(tag->date, 0, DATE_MODE(SHORT)), tag->tag); } else { @@ -434,7 +435,7 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) * * "deadbeef [bad tag, could not parse it]" */ - strbuf_addf(&desc, _("%s [bad tag, could not parse it]"), + strbuf_addf(sb, _("%s [bad tag, could not parse it]"), hash); } } else if (type == OBJ_TREE) { @@ -442,13 +443,13 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) * TRANSLATORS: This is a line of ambiguous * object output. E.g. "deadbeef tree". */ - strbuf_addf(&desc, _("%s tree"), hash); + strbuf_addf(sb, _("%s tree"), hash); } else if (type == OBJ_BLOB) { /* * TRANSLATORS: This is a line of ambiguous * object output. E.g. "deadbeef blob". */ - strbuf_addf(&desc, _("%s blob"), hash); + strbuf_addf(sb, _("%s blob"), hash); } @@ -459,9 +460,9 @@ out: * you'll probably want to swap the "%s" and leading " " space * around. */ - strbuf_addf(advice, _(" %s\n"), desc.buf); + strbuf_addf(advice, _(" %s\n"), sb->buf); - strbuf_release(&desc); + strbuf_reset(sb); return 0; } @@ -560,6 +561,7 @@ static enum get_oid_result get_short_oid(struct repository *r, struct oid_array collect = OID_ARRAY_INIT; struct ambiguous_output out = { .ds = &ds, + .sb = STRBUF_INIT, .advice = STRBUF_INIT, }; @@ -589,6 +591,7 @@ static enum get_oid_result get_short_oid(struct repository *r, oid_array_clear(&collect); strbuf_release(&out.advice); + strbuf_release(&out.sb); } return status; From 2ae8eb5d71028f0289b4b38663d07b6eefee23a6 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 28 Jan 2022 14:31:57 +0000 Subject: [PATCH 038/150] scalar: accept -C and -c options before the subcommand The `git` executable has these two very useful options: -C : switch to the specified directory before performing any actions -c =: temporarily configure this setting for the duration of the specified scalar subcommand With this commit, we teach the `scalar` executable the same trick. Note: It might look like a good idea to try to reuse the `handle_options()` function in `git.c` instead of replicating only the `-c`/`-C` part. However, that function is not only not in `libgit.a`, it is also intricately entangled with the rest of the code in `git.c` that is necessary e.g. to handle `--paginate`. Besides, no other option handled by that `handle_options()` function is relevant to Scalar, therefore the cost of refactoring vastly would outweigh the benefit. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- contrib/scalar/scalar.c | 22 +++++++++++++++++++++- contrib/scalar/scalar.txt | 10 ++++++++++ contrib/scalar/t/t9099-scalar.sh | 8 ++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c index 1ce9c2b00e..7db2a97416 100644 --- a/contrib/scalar/scalar.c +++ b/contrib/scalar/scalar.c @@ -808,6 +808,25 @@ int cmd_main(int argc, const char **argv) struct strbuf scalar_usage = STRBUF_INIT; int i; + while (argc > 1 && *argv[1] == '-') { + if (!strcmp(argv[1], "-C")) { + if (argc < 3) + die(_("-C requires a ")); + if (chdir(argv[2]) < 0) + die_errno(_("could not change to '%s'"), + argv[2]); + argc -= 2; + argv += 2; + } else if (!strcmp(argv[1], "-c")) { + if (argc < 3) + die(_("-c requires a = argument")); + git_config_push_parameter(argv[2]); + argc -= 2; + argv += 2; + } else + break; + } + if (argc > 1) { argv++; argc--; @@ -818,7 +837,8 @@ int cmd_main(int argc, const char **argv) } strbuf_addstr(&scalar_usage, - N_("scalar []\n\nCommands:\n")); + N_("scalar [-C ] [-c =] " + " []\n\nCommands:\n")); for (i = 0; builtins[i].name; i++) strbuf_addf(&scalar_usage, "\t%s\n", builtins[i].name); diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt index f416d63728..cf4e5b889c 100644 --- a/contrib/scalar/scalar.txt +++ b/contrib/scalar/scalar.txt @@ -36,6 +36,16 @@ The `scalar` command implements various subcommands, and different options depending on the subcommand. With the exception of `clone`, `list` and `reconfigure --all`, all subcommands expect to be run in an enlistment. +The following options can be specified _before_ the subcommand: + +-C :: + Before running the subcommand, change the working directory. This + option imitates the same option of linkgit:git[1]. + +-c =:: + For the duration of running the specified subcommand, configure this + setting. This option imitates the same option of linkgit:git[1]. + COMMANDS -------- diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh index 2e1502ad45..89781568f4 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/contrib/scalar/t/t9099-scalar.sh @@ -85,4 +85,12 @@ test_expect_success 'scalar delete with enlistment' ' test_path_is_missing cloned ' +test_expect_success 'scalar supports -c/-C' ' + test_when_finished "scalar delete sub" && + git init sub && + scalar -C sub -c status.aheadBehind=bogus register && + test -z "$(git -C sub config --local status.aheadBehind)" && + test true = "$(git -C sub config core.preloadIndex)" +' + test_done From 5e00514745bc9cba21fde44c9bb2b6aa162be653 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 31 Jan 2022 17:50:18 +0000 Subject: [PATCH 039/150] t1405: explictly delete reflogs for reftable Deleting a ref in reftable just records a (ObjectID => ZeroID) transaction in the reflog. To ensure 'for_each_reflog()' test below works, explictly delete reflogs for deleted refs. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- t/t1405-main-ref-store.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/t/t1405-main-ref-store.sh b/t/t1405-main-ref-store.sh index 1a3ee8845d..62e5e9d1b0 100755 --- a/t/t1405-main-ref-store.sh +++ b/t/t1405-main-ref-store.sh @@ -40,6 +40,12 @@ test_expect_success 'delete_refs(FOO, refs/tags/new-tag)' ' test_must_fail git rev-parse refs/tags/new-tag -- ' +# In reftable, we keep the reflogs around for deleted refs. +test_expect_success !REFFILES 'delete-reflog(FOO, refs/tags/new-tag)' ' + $RUN delete-reflog FOO && + $RUN delete-reflog refs/tags/new-tag +' + test_expect_success 'rename_refs(main, new-main)' ' git rev-parse main >expected && $RUN rename-ref refs/heads/main refs/heads/new-main && From 53af25e47c5f1261c7da47ca4e01ed9d9f3ffcab Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 31 Jan 2022 17:50:19 +0000 Subject: [PATCH 040/150] t1405: mark test that checks existence as REFFILES The reftable backend doesn't support mere existence of reflogs. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- t/t1405-main-ref-store.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t1405-main-ref-store.sh b/t/t1405-main-ref-store.sh index 62e5e9d1b0..51f8291628 100755 --- a/t/t1405-main-ref-store.sh +++ b/t/t1405-main-ref-store.sh @@ -111,7 +111,7 @@ test_expect_success 'delete_reflog(HEAD)' ' test_must_fail git reflog exists HEAD ' -test_expect_success 'create-reflog(HEAD)' ' +test_expect_success REFFILES 'create-reflog(HEAD)' ' $RUN create-reflog HEAD && git reflog exists HEAD ' From eaf0e83009f37263849be5d4bf6c394b73546bd3 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 31 Jan 2022 17:50:20 +0000 Subject: [PATCH 041/150] t5312: prepare for reftable Mark some tests as REFFILES if they rely on packed refs. Use ref-store helper to create bogus refs. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- t/t5312-prune-corruption.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/t/t5312-prune-corruption.sh b/t/t5312-prune-corruption.sh index ea889c088a..9d8e249ae8 100755 --- a/t/t5312-prune-corruption.sh +++ b/t/t5312-prune-corruption.sh @@ -22,8 +22,8 @@ test_expect_success 'disable reflogs' ' ' create_bogus_ref () { - test_when_finished 'rm -f .git/refs/heads/bogus..name' && - echo $bogus >.git/refs/heads/bogus..name + test-tool ref-store main update-ref msg "refs/heads/bogus..name" $bogus $ZERO_OID REF_SKIP_REFNAME_VERIFICATION && + test_when_finished "test-tool ref-store main delete-refs REF_NO_DEREF msg refs/heads/bogus..name" } test_expect_success 'create history reachable only from a bogus-named ref' ' @@ -113,7 +113,7 @@ test_expect_success 'pack-refs does not silently delete broken loose ref' ' # we do not want to count on running pack-refs to # actually pack it, as it is perfectly reasonable to # skip processing a broken ref -test_expect_success 'create packed-refs file with broken ref' ' +test_expect_success REFFILES 'create packed-refs file with broken ref' ' rm -f .git/refs/heads/main && cat >.git/packed-refs <<-EOF && $missing refs/heads/main @@ -124,13 +124,13 @@ test_expect_success 'create packed-refs file with broken ref' ' test_cmp expect actual ' -test_expect_success 'pack-refs does not silently delete broken packed ref' ' +test_expect_success REFFILES 'pack-refs does not silently delete broken packed ref' ' git pack-refs --all --prune && git rev-parse refs/heads/main >actual && test_cmp expect actual ' -test_expect_success 'pack-refs does not drop broken refs during deletion' ' +test_expect_success REFFILES 'pack-refs does not drop broken refs during deletion' ' git update-ref -d refs/heads/other && git rev-parse refs/heads/main >actual && test_cmp expect actual From 9158a3564a970def3375a79e8f3f90927cd8e793 Mon Sep 17 00:00:00 2001 From: Thomas Koutcher Date: Tue, 1 Feb 2022 18:26:04 +0100 Subject: [PATCH 042/150] subtree: force merge commit When `merge.ff` is set to `only` in .gitconfig, `git subtree pull` will fail with error `fatal: Not possible to fast-forward, aborting.`, but the command does want to make merges in these places. Add `--no-ff` argument to `git merge` to enforce this behaviour. Signed-off-by: Thomas Koutcher Reviewed-by: Johannes Altmanninger Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 71f1fd94bd..1af1d9653e 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -975,10 +975,10 @@ cmd_merge () { if test -n "$arg_addmerge_message" then - git merge -Xsubtree="$arg_prefix" \ + git merge --no-ff -Xsubtree="$arg_prefix" \ --message="$arg_addmerge_message" "$rev" else - git merge -Xsubtree="$arg_prefix" $rev + git merge --no-ff -Xsubtree="$arg_prefix" $rev fi } From e89f151db13684924feb0cd0a0ca3a13c1d71516 Mon Sep 17 00:00:00 2001 From: Glen Choo Date: Fri, 28 Jan 2022 16:04:41 -0800 Subject: [PATCH 043/150] branch: move --set-upstream-to behavior to dwim_and_setup_tracking() This commit is preparation for a future commit that will simplify create_branch() so that it always creates a branch. This will allow create_branch() to accept a dry_run parameter (which is needed for "git branch --recurse-submodules"). create_branch() used to always create a branch, but 4fc5006676 (Add branch --set-upstream, 2010-01-18) changed it to also be able to set tracking information without creating a branch. Refactor the code that sets tracking information into its own functions dwim_branch_start() and dwim_and_setup_tracking(). Also change an invocation of create_branch() in cmd_branch() in builtin/branch.c to use dwim_and_setup_tracking(), since that invocation is only for setting tracking information (in "git branch --set-upstream-to"). As of this commit, create_branch() is no longer invoked in a way that does not create branches. Helped-by: Jonathan Tan Signed-off-by: Glen Choo Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- branch.c | 87 +++++++++++++++++++++++++++++++++++++----------- branch.h | 22 ++++++++++++ builtin/branch.c | 9 ++--- 3 files changed, 92 insertions(+), 26 deletions(-) diff --git a/branch.c b/branch.c index a4e4631ef1..f3a31930fb 100644 --- a/branch.c +++ b/branch.c @@ -218,9 +218,11 @@ static int inherit_tracking(struct tracking *tracking, const char *orig_ref) } /* - * This is called when new_ref is branched off of orig_ref, and tries - * to infer the settings for branch..{remote,merge} from the - * config. + * Used internally to set the branch..{remote,merge} config + * settings so that branch 'new_ref' tracks 'orig_ref'. Unlike + * dwim_and_setup_tracking(), this does not do DWIM, i.e. "origin/main" + * will not be expanded to "refs/remotes/origin/main", so it is not safe + * for 'orig_ref' to be raw user input. */ static void setup_tracking(const char *new_ref, const char *orig_ref, enum branch_track track, int quiet) @@ -341,31 +343,37 @@ N_("\n" "will track its remote counterpart, you may want to use\n" "\"git push -u\" to set the upstream config as you push."); -void create_branch(struct repository *r, - const char *name, const char *start_name, - int force, int clobber_head_ok, int reflog, - int quiet, enum branch_track track) +/** + * DWIMs a user-provided ref to determine the starting point for a + * branch and validates it, where: + * + * - r is the repository to validate the branch for + * + * - start_name is the ref that we would like to test. This is + * expanded with DWIM and assigned to out_real_ref. + * + * - track is the tracking mode of the new branch. If tracking is + * explicitly requested, start_name must be a branch (because + * otherwise start_name cannot be tracked) + * + * - out_oid is an out parameter containing the object_id of start_name + * + * - out_real_ref is an out parameter containing the full, 'real' form + * of start_name e.g. refs/heads/main instead of main + * + */ +static void dwim_branch_start(struct repository *r, const char *start_name, + enum branch_track track, char **out_real_ref, + struct object_id *out_oid) { struct commit *commit; struct object_id oid; char *real_ref; - struct strbuf ref = STRBUF_INIT; - int forcing = 0; - int dont_change_ref = 0; int explicit_tracking = 0; if (track == BRANCH_TRACK_EXPLICIT || track == BRANCH_TRACK_OVERRIDE) explicit_tracking = 1; - if ((track == BRANCH_TRACK_OVERRIDE || clobber_head_ok) - ? validate_branchname(name, &ref) - : validate_new_branchname(name, &ref, force)) { - if (!force) - dont_change_ref = 1; - else - forcing = 1; - } - real_ref = NULL; if (get_oid_mb(start_name, &oid)) { if (explicit_tracking) { @@ -402,7 +410,37 @@ void create_branch(struct repository *r, if ((commit = lookup_commit_reference(r, &oid)) == NULL) die(_("Not a valid branch point: '%s'."), start_name); - oidcpy(&oid, &commit->object.oid); + if (out_real_ref) { + *out_real_ref = real_ref; + real_ref = NULL; + } + if (out_oid) + oidcpy(out_oid, &commit->object.oid); + + FREE_AND_NULL(real_ref); +} + +void create_branch(struct repository *r, + const char *name, const char *start_name, + int force, int clobber_head_ok, int reflog, + int quiet, enum branch_track track) +{ + struct object_id oid; + char *real_ref; + struct strbuf ref = STRBUF_INIT; + int forcing = 0; + int dont_change_ref = 0; + + if ((track == BRANCH_TRACK_OVERRIDE || clobber_head_ok) + ? validate_branchname(name, &ref) + : validate_new_branchname(name, &ref, force)) { + if (!force) + dont_change_ref = 1; + else + forcing = 1; + } + + dwim_branch_start(r, start_name, track, &real_ref, &oid); if (reflog) log_all_ref_updates = LOG_REFS_NORMAL; @@ -436,6 +474,15 @@ void create_branch(struct repository *r, free(real_ref); } +void dwim_and_setup_tracking(struct repository *r, const char *new_ref, + const char *orig_ref, enum branch_track track, + int quiet) +{ + char *real_orig_ref; + dwim_branch_start(r, orig_ref, track, &real_orig_ref, NULL); + setup_tracking(new_ref, real_orig_ref, track, quiet); +} + void remove_merge_branch_state(struct repository *r) { unlink(git_path_merge_head(r)); diff --git a/branch.h b/branch.h index 815dcd40c0..ab2315c611 100644 --- a/branch.h +++ b/branch.h @@ -18,6 +18,28 @@ extern enum branch_track git_branch_track; /* Functions for acting on the information about branches. */ +/** + * Sets branch..{remote,merge} config settings such that + * new_ref tracks orig_ref according to the specified tracking mode. + * + * - new_ref is the name of the branch that we are setting tracking + * for. + * + * - orig_ref is the name of the ref that is 'upstream' of new_ref. + * orig_ref will be expanded with DWIM so that the config settings + * are in the correct format e.g. "refs/remotes/origin/main" instead + * of "origin/main". + * + * - track is the tracking mode e.g. BRANCH_TRACK_REMOTE causes + * new_ref to track orig_ref directly, whereas BRANCH_TRACK_INHERIT + * causes new_ref to track whatever orig_ref tracks. + * + * - quiet suppresses tracking information. + */ +void dwim_and_setup_tracking(struct repository *r, const char *new_ref, + const char *orig_ref, enum branch_track track, + int quiet); + /* * Creates a new branch, where: * diff --git a/builtin/branch.c b/builtin/branch.c index a77c4ad7ba..676825242a 100644 --- a/builtin/branch.c +++ b/builtin/branch.c @@ -823,12 +823,9 @@ int cmd_branch(int argc, const char **argv, const char *prefix) if (!ref_exists(branch->refname)) die(_("branch '%s' does not exist"), branch->name); - /* - * create_branch takes care of setting up the tracking - * info and making sure new_upstream is correct - */ - create_branch(the_repository, branch->name, new_upstream, - 0, 0, 0, quiet, BRANCH_TRACK_OVERRIDE); + dwim_and_setup_tracking(the_repository, branch->name, + new_upstream, BRANCH_TRACK_OVERRIDE, + quiet); } else if (unset_upstream) { struct branch *branch = branch_get(argv[0]); struct strbuf buf = STRBUF_INIT; From bc0893cf3b0ee376ef5b6ed293b1525480a9d720 Mon Sep 17 00:00:00 2001 From: Glen Choo Date: Fri, 28 Jan 2022 16:04:42 -0800 Subject: [PATCH 044/150] branch: make create_branch() always create a branch With the previous commit, there are no more invocations of create_branch() that do not create a branch because: * BRANCH_TRACK_OVERRIDE is no longer passed * clobber_head_ok = true and force = false is never passed Assert these situations, delete dead code and ensure that we're handling clobber_head_ok and force correctly by introducing tests for `git branch --force`. As a result, create_branch() now always creates a branch. Helped-by: Jonathan Tan Signed-off-by: Glen Choo Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- branch.c | 55 ++++++++++++++++++++++------------------------- branch.h | 4 ++-- t/t3200-branch.sh | 17 +++++++++++++++ 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/branch.c b/branch.c index f3a31930fb..df24021f27 100644 --- a/branch.c +++ b/branch.c @@ -429,15 +429,19 @@ void create_branch(struct repository *r, char *real_ref; struct strbuf ref = STRBUF_INIT; int forcing = 0; - int dont_change_ref = 0; + struct ref_transaction *transaction; + struct strbuf err = STRBUF_INIT; + char *msg; - if ((track == BRANCH_TRACK_OVERRIDE || clobber_head_ok) - ? validate_branchname(name, &ref) - : validate_new_branchname(name, &ref, force)) { - if (!force) - dont_change_ref = 1; - else - forcing = 1; + if (track == BRANCH_TRACK_OVERRIDE) + BUG("'track' cannot be BRANCH_TRACK_OVERRIDE. Did you mean to call dwim_and_setup_tracking()?"); + if (clobber_head_ok && !force) + BUG("'clobber_head_ok' can only be used with 'force'"); + + if (clobber_head_ok ? + validate_branchname(name, &ref) : + validate_new_branchname(name, &ref, force)) { + forcing = 1; } dwim_branch_start(r, start_name, track, &real_ref, &oid); @@ -445,27 +449,20 @@ void create_branch(struct repository *r, if (reflog) log_all_ref_updates = LOG_REFS_NORMAL; - if (!dont_change_ref) { - struct ref_transaction *transaction; - struct strbuf err = STRBUF_INIT; - char *msg; - - if (forcing) - msg = xstrfmt("branch: Reset to %s", start_name); - else - msg = xstrfmt("branch: Created from %s", start_name); - - transaction = ref_transaction_begin(&err); - if (!transaction || - ref_transaction_update(transaction, ref.buf, - &oid, forcing ? NULL : null_oid(), - 0, msg, &err) || - ref_transaction_commit(transaction, &err)) - die("%s", err.buf); - ref_transaction_free(transaction); - strbuf_release(&err); - free(msg); - } + if (forcing) + msg = xstrfmt("branch: Reset to %s", start_name); + else + msg = xstrfmt("branch: Created from %s", start_name); + transaction = ref_transaction_begin(&err); + if (!transaction || + ref_transaction_update(transaction, ref.buf, + &oid, forcing ? NULL : null_oid(), + 0, msg, &err) || + ref_transaction_commit(transaction, &err)) + die("%s", err.buf); + ref_transaction_free(transaction); + strbuf_release(&err); + free(msg); if (real_ref && track) setup_tracking(ref.buf + 11, real_ref, track, quiet); diff --git a/branch.h b/branch.h index ab2315c611..cf3a4d3ff3 100644 --- a/branch.h +++ b/branch.h @@ -52,8 +52,8 @@ void dwim_and_setup_tracking(struct repository *r, const char *new_ref, * * - force enables overwriting an existing (non-head) branch * - * - clobber_head_ok allows the currently checked out (hence existing) - * branch to be overwritten; without 'force', it has no effect. + * - clobber_head_ok, when enabled with 'force', allows the currently + * checked out (head) branch to be overwritten * * - reflog creates a reflog for the branch * diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh index 09ab132377..71a72efcb2 100755 --- a/t/t3200-branch.sh +++ b/t/t3200-branch.sh @@ -42,6 +42,23 @@ test_expect_success 'git branch abc should create a branch' ' git branch abc && test_path_is_file .git/refs/heads/abc ' +test_expect_success 'git branch abc should fail when abc exists' ' + test_must_fail git branch abc +' + +test_expect_success 'git branch --force abc should fail when abc is checked out' ' + test_when_finished git switch main && + git switch abc && + test_must_fail git branch --force abc HEAD~1 +' + +test_expect_success 'git branch --force abc should succeed when abc exists' ' + git rev-parse HEAD~1 >expect && + git branch --force abc HEAD~1 && + git rev-parse abc >actual && + test_cmp expect actual +' + test_expect_success 'git branch a/b/c should create a branch' ' git branch a/b/c && test_path_is_file .git/refs/heads/a/b/c ' From 3f3e76082bc29ff647dff16de9f0145a4d582825 Mon Sep 17 00:00:00 2001 From: Glen Choo Date: Fri, 28 Jan 2022 16:04:43 -0800 Subject: [PATCH 045/150] branch: add a dry_run parameter to create_branch() Add a dry_run parameter to create_branch() such that dry_run = 1 will validate a new branch without trying to create it. This will be used in `git branch --recurse-submodules` to ensure that the new branch can be created in all submodules. Signed-off-by: Glen Choo Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- branch.c | 5 ++++- branch.h | 5 ++++- builtin/branch.c | 2 +- builtin/checkout.c | 3 ++- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/branch.c b/branch.c index df24021f27..02d46a69b8 100644 --- a/branch.c +++ b/branch.c @@ -423,7 +423,7 @@ static void dwim_branch_start(struct repository *r, const char *start_name, void create_branch(struct repository *r, const char *name, const char *start_name, int force, int clobber_head_ok, int reflog, - int quiet, enum branch_track track) + int quiet, enum branch_track track, int dry_run) { struct object_id oid; char *real_ref; @@ -445,6 +445,8 @@ void create_branch(struct repository *r, } dwim_branch_start(r, start_name, track, &real_ref, &oid); + if (dry_run) + goto cleanup; if (reflog) log_all_ref_updates = LOG_REFS_NORMAL; @@ -467,6 +469,7 @@ void create_branch(struct repository *r, if (real_ref && track) setup_tracking(ref.buf + 11, real_ref, track, quiet); +cleanup: strbuf_release(&ref); free(real_ref); } diff --git a/branch.h b/branch.h index cf3a4d3ff3..509cfcc34e 100644 --- a/branch.h +++ b/branch.h @@ -62,11 +62,14 @@ void dwim_and_setup_tracking(struct repository *r, const char *new_ref, * - track causes the new branch to be configured to merge the remote branch * that start_name is a tracking branch for (if any). * + * - dry_run causes the branch to be validated but not created. + * */ void create_branch(struct repository *r, const char *name, const char *start_name, int force, int clobber_head_ok, - int reflog, int quiet, enum branch_track track); + int reflog, int quiet, enum branch_track track, + int dry_run); /* * Check if 'name' can be a valid name for a branch; die otherwise. diff --git a/builtin/branch.c b/builtin/branch.c index 676825242a..0a49de0281 100644 --- a/builtin/branch.c +++ b/builtin/branch.c @@ -859,7 +859,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) create_branch(the_repository, argv[0], (argc == 2) ? argv[1] : head, - force, 0, reflog, quiet, track); + force, 0, reflog, quiet, track, 0); } else usage_with_options(builtin_branch_usage, options); diff --git a/builtin/checkout.c b/builtin/checkout.c index 1809ac12df..8600860629 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -893,7 +893,8 @@ static void update_refs_for_switch(const struct checkout_opts *opts, opts->new_branch_force ? 1 : 0, opts->new_branch_log, opts->quiet, - opts->track); + opts->track, + 0); new_branch_info->name = opts->new_branch; setup_branch_path(new_branch_info); } From 6e0a2ca0277e6010cd403c70d8f15b66af345d33 Mon Sep 17 00:00:00 2001 From: Glen Choo Date: Fri, 28 Jan 2022 16:04:44 -0800 Subject: [PATCH 046/150] builtin/branch: consolidate action-picking logic in cmd_branch() Consolidate the logic for deciding when to create a new branch in cmd_branch(), and save the result for reuse. Besides making the function more explicit, this allows us to validate options that can only be used when creating a branch. Such an option does not exist yet, but one will be introduced in a subsequent commit. Helped-by: Jonathan Tan Signed-off-by: Glen Choo Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/branch.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/builtin/branch.c b/builtin/branch.c index 0a49de0281..209b1cc442 100644 --- a/builtin/branch.c +++ b/builtin/branch.c @@ -616,14 +616,15 @@ static int edit_branch_description(const char *branch_name) int cmd_branch(int argc, const char **argv, const char *prefix) { - int delete = 0, rename = 0, copy = 0, force = 0, list = 0; - int show_current = 0; - int reflog = 0, edit_description = 0; - int quiet = 0, unset_upstream = 0; + /* possible actions */ + int delete = 0, rename = 0, copy = 0, list = 0, + unset_upstream = 0, show_current = 0, edit_description = 0; const char *new_upstream = NULL; + int noncreate_actions = 0; + /* possible options */ + int reflog = 0, quiet = 0, icase = 0, force = 0; enum branch_track track; struct ref_filter filter; - int icase = 0; static struct ref_sorting *sorting; struct string_list sorting_options = STRING_LIST_INIT_DUP; struct ref_format format = REF_FORMAT_INIT; @@ -708,8 +709,10 @@ int cmd_branch(int argc, const char **argv, const char *prefix) filter.reachable_from || filter.unreachable_from || filter.points_at.nr) list = 1; - if (!!delete + !!rename + !!copy + !!new_upstream + !!show_current + - list + edit_description + unset_upstream > 1) + noncreate_actions = !!delete + !!rename + !!copy + !!new_upstream + + !!show_current + !!list + !!edit_description + + !!unset_upstream; + if (noncreate_actions > 1) usage_with_options(builtin_branch_usage, options); if (filter.abbrev == -1) @@ -849,7 +852,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) strbuf_addf(&buf, "branch.%s.merge", branch->name); git_config_set_multivar(buf.buf, NULL, NULL, CONFIG_FLAGS_MULTI_REPLACE); strbuf_release(&buf); - } else if (argc > 0 && argc <= 2) { + } else if (!noncreate_actions && argc > 0 && argc <= 2) { if (filter.kind != FILTER_REFS_BRANCHES) die(_("The -a, and -r, options to 'git branch' do not take a branch name.\n" "Did you mean to use: -a|-r --list ?")); From 540776406974dfcacb77e94a42f4bdfd4b15b4fe Mon Sep 17 00:00:00 2001 From: Chen Bojun Date: Sat, 29 Jan 2022 14:35:38 +0800 Subject: [PATCH 047/150] receive-pack: purge temporary data if no command is ready to run When pushing a hidden ref, e.g.: $ git push origin HEAD:refs/hidden/foo "receive-pack" will reject our request with an error message like this: ! [remote rejected] HEAD -> refs/hidden/foo (deny updating a hidden ref) The remote side ("git-receive-pack") will not create the hidden ref as expected, but the pack file sent by "git-send-pack" is left inside the remote repository. I.e. the quarantine directory is not purged as it should be. Add a checkpoint before calling "tmp_objdir_migrate()" and after calling the "pre-receive" hook to purge that temporary data in the quarantine area when there is no command ready to run. The reason we do not add the checkpoint before the "pre-receive" hook, but after it, is that the "pre-receive" hook is called with a switch-off "skip_broken" flag, and all commands, even broken ones, should be fed by calling "feed_receive_hook()". Add a new test case in t5516 as well. Helped-by: Jiang Xin Helped-by: Teng Long Signed-off-by: Chen Bojun Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 9 +++++++++ t/t5516-fetch-push.sh | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 9f4a0b816c..a0b193ab3f 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1971,6 +1971,15 @@ static void execute_commands(struct command *commands, return; } + /* + * If there is no command ready to run, should return directly to destroy + * temporary data in the quarantine area. + */ + for (cmd = commands; cmd && cmd->error_string; cmd = cmd->next) + ; /* nothing */ + if (!cmd) + return; + /* * Now we'll start writing out refs, which means the objects need * to be in their final positions so that other processes can see them. diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 2f04cf9a1c..da70c45857 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1809,4 +1809,12 @@ test_expect_success 'refuse fetch to current branch of bare repository worktree' git -C bare.git fetch -u .. HEAD:wt ' +test_expect_success 'refuse to push a hidden ref, and make sure do not pollute the repository' ' + mk_empty testrepo && + git -C testrepo config receive.hiderefs refs/hidden && + git -C testrepo config receive.unpackLimit 1 && + test_must_fail git push testrepo HEAD:refs/hidden/foo && + test_dir_is_empty testrepo/.git/objects/pack +' + test_done From 56fa5ac39a8601d965127abebbc533f29eaef751 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Tue, 1 Feb 2022 20:20:15 -0800 Subject: [PATCH 048/150] patch-id: fix antipatterns in tests Clean up the tests for patch-id by moving file preparation tasks inside the test body and redirecting files directly into stdin instead of using 'cat'. Signed-off-by: Jerry Zhang Signed-off-by: Junio C Hamano --- t/t4204-patch-id.sh | 64 ++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index 80f4a65b28..2bc940a07e 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -166,40 +166,38 @@ test_expect_success 'patch-id respects config from subdir' ' ) ' -cat >nonl <<\EOF -diff --git i/a w/a -index e69de29..2e65efe 100644 ---- i/a -+++ w/a -@@ -0,0 +1 @@ -+a -\ No newline at end of file -diff --git i/b w/b -index e69de29..6178079 100644 ---- i/b -+++ w/b -@@ -0,0 +1 @@ -+b -EOF - -cat >withnl <<\EOF -diff --git i/a w/a -index e69de29..7898192 100644 ---- i/a -+++ w/a -@@ -0,0 +1 @@ -+a -diff --git i/b w/b -index e69de29..6178079 100644 ---- i/b -+++ w/b -@@ -0,0 +1 @@ -+b -EOF - test_expect_success 'patch-id handles no-nl-at-eof markers' ' - cat nonl | calc_patch_id nonl && - cat withnl | calc_patch_id withnl && + cat >nonl <<-\EOF && + diff --git i/a w/a + index e69de29..2e65efe 100644 + --- i/a + +++ w/a + @@ -0,0 +1 @@ + +a + \ No newline at end of file + diff --git i/b w/b + index e69de29..6178079 100644 + --- i/b + +++ w/b + @@ -0,0 +1 @@ + +b + EOF + cat >withnl <<-\EOF && + diff --git i/a w/a + index e69de29..7898192 100644 + --- i/a + +++ w/a + @@ -0,0 +1 @@ + +a + diff --git i/b w/b + index e69de29..6178079 100644 + --- i/b + +++ w/b + @@ -0,0 +1 @@ + +b + EOF + calc_patch_id nonl Date: Tue, 1 Feb 2022 20:19:45 -0800 Subject: [PATCH 049/150] patch-id: fix scan_hunk_header on diffs with 1 line of before/after Normally diffs will contain a hunk header of the format "@@ -2,2 +2,15 @@ code". However when there is only 1 line of change, the unified diff format allows for the second comma separated value to be omitted in either before or after line counts. This can produce hunk headers that look like "@@ -2 +2,18 @@ code" or "@@ -2,2 +2 @@ code". As a result, scan_hunk_header mistakenly returns the line number as line count, which then results in unpredictable parsing errors with the rest of the patch, including giving multiple lines of output for a single commit. Fix by explicitly setting line count to 1 when there is no comma, and add a test. apply.c contains this same logic except it is correct. A worthwhile future project might be to unify these two diff parsers so they both benefit from fixes. Signed-off-by: Jerry Zhang Signed-off-by: Junio C Hamano --- builtin/patch-id.c | 9 +++++++-- t/t4204-patch-id.sh | 31 ++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 822ffff51f..881fcf3273 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -32,8 +32,12 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after) n = strspn(q, digits); if (q[n] == ',') { q += n + 1; + *p_before = atoi(q); n = strspn(q, digits); + } else { + *p_before = 1; } + if (n == 0 || q[n] != ' ' || q[n+1] != '+') return 0; @@ -41,13 +45,14 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after) n = strspn(r, digits); if (r[n] == ',') { r += n + 1; + *p_after = atoi(r); n = strspn(r, digits); + } else { + *p_after = 1; } if (n == 0) return 0; - *p_before = atoi(q); - *p_after = atoi(r); return 1; } diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index 2bc940a07e..a730c0db98 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -38,7 +38,7 @@ calc_patch_id () { shift git patch-id "$@" >patch-id.output && sed "s/ .*//" patch-id.output >patch-id_"$patch_name" && - test_line_count -gt 0 patch-id_"$patch_name" + test_line_count -eq 1 patch-id_"$patch_name" } get_top_diff () { @@ -200,4 +200,33 @@ test_expect_success 'patch-id handles no-nl-at-eof markers' ' calc_patch_id withnl diffu1 <<-\EOF && + diff --git a/bar b/bar + index bdaf90f..31051f6 100644 + --- a/bar + +++ b/bar + @@ -2 +2,2 @@ + b + +c + diff --git a/car b/car + index 00750ed..2ae5e34 100644 + --- a/car + +++ b/car + @@ -1 +1,2 @@ + 3 + +d + diff --git a/foo b/foo + index e439850..7146eb8 100644 + --- a/foo + +++ b/foo + @@ -2 +2,2 @@ + a + +e + EOF + calc_patch_id diffu1 Date: Wed, 2 Feb 2022 14:42:59 +0800 Subject: [PATCH 050/150] t/lib-read-tree-m-3way: modernize style Many invocations of the test_expect_success command in this file are written in old style where the command, an optional prerequisite, and the test title are written on separate lines, and the executable script string begins on its own line, and these lines are pasted together with backslashes as necessary. An invocation of the test_expect_success command in modern test scripts however writes the prerequisite and the title on the same line as the test_expect_success command itself, and ends the line with a single quote that begins the executable script string. Update the style for uniformity. Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- t/lib-read-tree-m-3way.sh | 150 +++++++++++++++++++------------------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/t/lib-read-tree-m-3way.sh b/t/lib-read-tree-m-3way.sh index 168329adbc..2bf43850be 100644 --- a/t/lib-read-tree-m-3way.sh +++ b/t/lib-read-tree-m-3way.sh @@ -8,16 +8,16 @@ do p=$a$b echo This is $p from the original tree. >$p echo This is Z/$p from the original tree. >Z/$p - test_expect_success \ - "adding test file $p and Z/$p" \ - 'git update-index --add $p && - git update-index --add Z/$p' + test_expect_success "adding test file $p and Z/$p" ' + git update-index --add $p && + git update-index --add Z/$p + ' done done echo This is SS from the original tree. >SS -test_expect_success \ - 'adding test file SS' \ - 'git update-index --add SS' +test_expect_success 'adding test file SS' ' + git update-index --add SS +' cat >TT <<\EOF This is a trivial merge sample text. Branch A is expected to upcase this word, here. @@ -30,12 +30,12 @@ At the very end, here comes another line, that is the word, expected to be upcased by Branch B. This concludes the trivial merge sample file. EOF -test_expect_success \ - 'adding test file TT' \ - 'git update-index --add TT' -test_expect_success \ - 'prepare initial tree' \ - 'tree_O=$(git write-tree)' +test_expect_success 'adding test file TT' ' + git update-index --add TT +' +test_expect_success 'prepare initial tree' ' + tree_O=$(git write-tree) +' ################################################################ # Branch A and B makes the changes according to the above matrix. @@ -45,48 +45,48 @@ test_expect_success \ to_remove=$(echo D? Z/D?) rm -f $to_remove -test_expect_success \ - 'change in branch A (removal)' \ - 'git update-index --remove $to_remove' +test_expect_success 'change in branch A (removal)' ' + git update-index --remove $to_remove +' for p in M? Z/M? do echo This is modified $p in the branch A. >$p - test_expect_success \ - 'change in branch A (modification)' \ - "git update-index $p" + test_expect_success 'change in branch A (modification)' ' + git update-index $p + ' done for p in AN AA Z/AN Z/AA do echo This is added $p in the branch A. >$p - test_expect_success \ - 'change in branch A (addition)' \ - "git update-index --add $p" + test_expect_success 'change in branch A (addition)' ' + git update-index --add $p + ' done echo This is SS from the modified tree. >SS echo This is LL from the modified tree. >LL -test_expect_success \ - 'change in branch A (addition)' \ - 'git update-index --add LL && - git update-index SS' +test_expect_success 'change in branch A (addition)' ' + git update-index --add LL && + git update-index SS +' mv TT TT- sed -e '/Branch A/s/word/WORD/g' TT rm -f TT- -test_expect_success \ - 'change in branch A (edit)' \ - 'git update-index TT' +test_expect_success 'change in branch A (edit)' ' + git update-index TT +' mkdir DF echo Branch A makes a file at DF/DF, creating a directory DF. >DF/DF -test_expect_success \ - 'change in branch A (change file to directory)' \ - 'git update-index --add DF/DF' +test_expect_success 'change in branch A (change file to directory)' ' + git update-index --add DF/DF +' -test_expect_success \ - 'recording branch A tree' \ - 'tree_A=$(git write-tree)' +test_expect_success 'recording branch A tree' ' + tree_A=$(git write-tree) +' ################################################################ # Branch B @@ -94,65 +94,65 @@ test_expect_success \ rm -rf [NDMASLT][NDMASLT] Z DF mkdir Z -test_expect_success \ - 'reading original tree and checking out' \ - 'git read-tree $tree_O && - git checkout-index -a' +test_expect_success 'reading original tree and checking out' ' + git read-tree $tree_O && + git checkout-index -a +' to_remove=$(echo ?D Z/?D) rm -f $to_remove -test_expect_success \ - 'change in branch B (removal)' \ - "git update-index --remove $to_remove" +test_expect_success 'change in branch B (removal)' ' + git update-index --remove $to_remove +' for p in ?M Z/?M do echo This is modified $p in the branch B. >$p - test_expect_success \ - 'change in branch B (modification)' \ - "git update-index $p" + test_expect_success 'change in branch B (modification)' ' + git update-index $p + ' done for p in NA AA Z/NA Z/AA do echo This is added $p in the branch B. >$p - test_expect_success \ - 'change in branch B (addition)' \ - "git update-index --add $p" + test_expect_success 'change in branch B (addition)' ' + git update-index --add $p + ' done echo This is SS from the modified tree. >SS echo This is LL from the modified tree. >LL -test_expect_success \ - 'change in branch B (addition and modification)' \ - 'git update-index --add LL && - git update-index SS' +test_expect_success 'change in branch B (addition and modification)' ' + git update-index --add LL && + git update-index SS +' mv TT TT- sed -e '/Branch B/s/word/WORD/g' TT rm -f TT- -test_expect_success \ - 'change in branch B (modification)' \ - 'git update-index TT' +test_expect_success 'change in branch B (modification)' ' + git update-index TT +' echo Branch B makes a file at DF. >DF -test_expect_success \ - 'change in branch B (addition of a file to conflict with directory)' \ - 'git update-index --add DF' +test_expect_success 'change in branch B (addition of a file to conflict with directory)' ' + git update-index --add DF +' -test_expect_success \ - 'recording branch B tree' \ - 'tree_B=$(git write-tree)' +test_expect_success 'recording branch B tree' ' + tree_B=$(git write-tree) +' -test_expect_success \ - 'keep contents of 3 trees for easy access' \ - 'rm -f .git/index && - git read-tree $tree_O && - mkdir .orig-O && - git checkout-index --prefix=.orig-O/ -f -q -a && - rm -f .git/index && - git read-tree $tree_A && - mkdir .orig-A && - git checkout-index --prefix=.orig-A/ -f -q -a && - rm -f .git/index && - git read-tree $tree_B && - mkdir .orig-B && - git checkout-index --prefix=.orig-B/ -f -q -a' +test_expect_success 'keep contents of 3 trees for easy access' ' + rm -f .git/index && + git read-tree $tree_O && + mkdir .orig-O && + git checkout-index --prefix=.orig-O/ -f -q -a && + rm -f .git/index && + git read-tree $tree_A && + mkdir .orig-A && + git checkout-index --prefix=.orig-A/ -f -q -a && + rm -f .git/index && + git read-tree $tree_B && + mkdir .orig-B && + git checkout-index --prefix=.orig-B/ -f -q -a +' From cd26cd6c7c0f6c26a5328c95a667844c27ea3fd0 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Wed, 2 Feb 2022 14:43:00 +0800 Subject: [PATCH 051/150] t/lib-read-tree-m-3way: indent with tabs As Documentation/CodingGuidelines says, our shell scripts (including tests) are to use HT for indentation, but this script uses 4-column indent with SP. Fix this. Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- t/lib-read-tree-m-3way.sh | 96 +++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/t/lib-read-tree-m-3way.sh b/t/lib-read-tree-m-3way.sh index 2bf43850be..2da25b3144 100644 --- a/t/lib-read-tree-m-3way.sh +++ b/t/lib-read-tree-m-3way.sh @@ -3,9 +3,9 @@ mkdir Z for a in N D M do - for b in N D M - do - p=$a$b + for b in N D M + do + p=$a$b echo This is $p from the original tree. >$p echo This is Z/$p from the original tree. >Z/$p test_expect_success "adding test file $p and Z/$p" ' @@ -16,7 +16,7 @@ do done echo This is SS from the original tree. >SS test_expect_success 'adding test file SS' ' - git update-index --add SS + git update-index --add SS ' cat >TT <<\EOF This is a trivial merge sample text. @@ -31,10 +31,10 @@ the word, expected to be upcased by Branch B. This concludes the trivial merge sample file. EOF test_expect_success 'adding test file TT' ' - git update-index --add TT + git update-index --add TT ' test_expect_success 'prepare initial tree' ' - tree_O=$(git write-tree) + tree_O=$(git write-tree) ' ################################################################ @@ -46,46 +46,46 @@ test_expect_success 'prepare initial tree' ' to_remove=$(echo D? Z/D?) rm -f $to_remove test_expect_success 'change in branch A (removal)' ' - git update-index --remove $to_remove + git update-index --remove $to_remove ' for p in M? Z/M? do - echo This is modified $p in the branch A. >$p - test_expect_success 'change in branch A (modification)' ' - git update-index $p - ' + echo This is modified $p in the branch A. >$p + test_expect_success 'change in branch A (modification)' ' + git update-index $p + ' done for p in AN AA Z/AN Z/AA do - echo This is added $p in the branch A. >$p - test_expect_success 'change in branch A (addition)' ' - git update-index --add $p - ' + echo This is added $p in the branch A. >$p + test_expect_success 'change in branch A (addition)' ' + git update-index --add $p + ' done echo This is SS from the modified tree. >SS echo This is LL from the modified tree. >LL test_expect_success 'change in branch A (addition)' ' - git update-index --add LL && - git update-index SS + git update-index --add LL && + git update-index SS ' mv TT TT- sed -e '/Branch A/s/word/WORD/g' TT rm -f TT- test_expect_success 'change in branch A (edit)' ' - git update-index TT + git update-index TT ' mkdir DF echo Branch A makes a file at DF/DF, creating a directory DF. >DF/DF test_expect_success 'change in branch A (change file to directory)' ' - git update-index --add DF/DF + git update-index --add DF/DF ' test_expect_success 'recording branch A tree' ' - tree_A=$(git write-tree) + tree_A=$(git write-tree) ' ################################################################ @@ -95,64 +95,64 @@ test_expect_success 'recording branch A tree' ' rm -rf [NDMASLT][NDMASLT] Z DF mkdir Z test_expect_success 'reading original tree and checking out' ' - git read-tree $tree_O && - git checkout-index -a + git read-tree $tree_O && + git checkout-index -a ' to_remove=$(echo ?D Z/?D) rm -f $to_remove test_expect_success 'change in branch B (removal)' ' - git update-index --remove $to_remove + git update-index --remove $to_remove ' for p in ?M Z/?M do - echo This is modified $p in the branch B. >$p - test_expect_success 'change in branch B (modification)' ' - git update-index $p - ' + echo This is modified $p in the branch B. >$p + test_expect_success 'change in branch B (modification)' ' + git update-index $p + ' done for p in NA AA Z/NA Z/AA do - echo This is added $p in the branch B. >$p - test_expect_success 'change in branch B (addition)' ' - git update-index --add $p - ' + echo This is added $p in the branch B. >$p + test_expect_success 'change in branch B (addition)' ' + git update-index --add $p + ' done echo This is SS from the modified tree. >SS echo This is LL from the modified tree. >LL test_expect_success 'change in branch B (addition and modification)' ' - git update-index --add LL && - git update-index SS + git update-index --add LL && + git update-index SS ' mv TT TT- sed -e '/Branch B/s/word/WORD/g' TT rm -f TT- test_expect_success 'change in branch B (modification)' ' - git update-index TT + git update-index TT ' echo Branch B makes a file at DF. >DF test_expect_success 'change in branch B (addition of a file to conflict with directory)' ' - git update-index --add DF + git update-index --add DF ' test_expect_success 'recording branch B tree' ' - tree_B=$(git write-tree) + tree_B=$(git write-tree) ' test_expect_success 'keep contents of 3 trees for easy access' ' - rm -f .git/index && - git read-tree $tree_O && - mkdir .orig-O && - git checkout-index --prefix=.orig-O/ -f -q -a && - rm -f .git/index && - git read-tree $tree_A && - mkdir .orig-A && - git checkout-index --prefix=.orig-A/ -f -q -a && - rm -f .git/index && - git read-tree $tree_B && - mkdir .orig-B && - git checkout-index --prefix=.orig-B/ -f -q -a + rm -f .git/index && + git read-tree $tree_O && + mkdir .orig-O && + git checkout-index --prefix=.orig-O/ -f -q -a && + rm -f .git/index && + git read-tree $tree_A && + mkdir .orig-A && + git checkout-index --prefix=.orig-A/ -f -q -a && + rm -f .git/index && + git read-tree $tree_B && + mkdir .orig-B && + git checkout-index --prefix=.orig-B/ -f -q -a ' From 59d9442f28ca8874db93aca961225489328444ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Wed, 2 Feb 2022 12:15:09 +0100 Subject: [PATCH 052/150] completion tests: re-source git-completion.bash in a subshell MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change tests of git-completion.bash that re-source it to do so inside a subshell. Re-sourcing it will clobber variables it sets, and in the case of the "GIT_COMPLETION_SHOW_ALL=1" test added in ca2d62b7879 (parse-options: don't complete option aliases by default, 2021-07-16) change the behavior of the completion persistently. Aside from the addition of "(" and ")" on new lines this is an indentation-only change, only the "(" and ")" lines are changed under "git diff -w". So let's change that test, and for good measure do the same for the three tests that precede it, which were added in 8b0eaa41f23 (completion: clear cached --options when sourcing the completion script, 2018-03-22). The may not be wrong, but doing this establishes a more reliable pattern for future tests, which might use these as a template to copy. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 50 +++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 98c6280632..c9805f2147 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -2396,27 +2396,33 @@ test_expect_success 'options with value' ' ' test_expect_success 'sourcing the completion script clears cached commands' ' - __git_compute_all_commands && - verbose test -n "$__git_all_commands" && - . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && - verbose test -z "$__git_all_commands" + ( + __git_compute_all_commands && + verbose test -n "$__git_all_commands" && + . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && + verbose test -z "$__git_all_commands" + ) ' test_expect_success 'sourcing the completion script clears cached merge strategies' ' - __git_compute_merge_strategies && - verbose test -n "$__git_merge_strategies" && - . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && - verbose test -z "$__git_merge_strategies" + ( + __git_compute_merge_strategies && + verbose test -n "$__git_merge_strategies" && + . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && + verbose test -z "$__git_merge_strategies" + ) ' test_expect_success 'sourcing the completion script clears cached --options' ' - __gitcomp_builtin checkout && - verbose test -n "$__gitcomp_builtin_checkout" && - __gitcomp_builtin notes_edit && - verbose test -n "$__gitcomp_builtin_notes_edit" && - . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && - verbose test -z "$__gitcomp_builtin_checkout" && - verbose test -z "$__gitcomp_builtin_notes_edit" + ( + __gitcomp_builtin checkout && + verbose test -n "$__gitcomp_builtin_checkout" && + __gitcomp_builtin notes_edit && + verbose test -n "$__gitcomp_builtin_notes_edit" && + . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && + verbose test -z "$__gitcomp_builtin_checkout" && + verbose test -z "$__gitcomp_builtin_notes_edit" + ) ' test_expect_success 'option aliases are not shown by default' ' @@ -2424,12 +2430,14 @@ test_expect_success 'option aliases are not shown by default' ' ' test_expect_success 'option aliases are shown with GIT_COMPLETION_SHOW_ALL' ' - . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && - GIT_COMPLETION_SHOW_ALL=1 && export GIT_COMPLETION_SHOW_ALL && - test_completion "git clone --recurs" <<-\EOF - --recurse-submodules Z - --recursive Z - EOF + ( + . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && + GIT_COMPLETION_SHOW_ALL=1 && export GIT_COMPLETION_SHOW_ALL && + test_completion "git clone --recurs" <<-\EOF + --recurse-submodules Z + --recursive Z + EOF + ) ' test_expect_success '__git_complete' ' From d9f88dd8bbf5302256ece5e3c50a1d3d59d2cd0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Wed, 2 Feb 2022 12:15:10 +0100 Subject: [PATCH 053/150] completion: add a GIT_COMPLETION_SHOW_ALL_COMMANDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a GIT_COMPLETION_SHOW_ALL_COMMANDS=1 configuration setting to go with the existing GIT_COMPLETION_SHOW_ALL=1 added in c099f579b98 (completion: add GIT_COMPLETION_SHOW_ALL env var, 2020-08-19). This will include plumbing commands such as "cat-file" in "git " and "git c" completion. Without/with this I have 134 and 243 completion with git , respectively. It was already possible to do this by tweaking GIT_TESTING_PORCELAIN_COMMAND_LIST= from the outside, that testing variable was added in 84a97131065 (completion: let git provide the completable command list, 2018-05-20). Doing this before loading git-completion.bash worked: export GIT_TESTING_PORCELAIN_COMMAND_LIST="$(git --list-cmds=builtins,main,list-mainporcelain,others,nohelpers,alias,list-complete,config)" But such testing variables are not meant to be used from the outside, and we make no guarantees that those internal won't change. So let's expose this as a dedicated configuration knob. It would be better to teach --list-cmds=* a new category which would include all of these groups, but that's a larger change that we can leave for some other time. 1. https://lore.kernel.org/git/CAGP6POJ9gwp+t-eP3TPkivBLLbNb2+qj=61Mehcj=1BgrVOSLA@mail.gmail.com/ Reported-by: Hongyi Zhao Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 13 ++++++++++- t/t9902-completion.sh | 31 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 377d6c5494..2436b8eb6b 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -49,6 +49,11 @@ # and git-switch completion (e.g., completing "foo" when "origin/foo" # exists). # +# GIT_COMPLETION_SHOW_ALL_COMMANDS +# +# When set to "1" suggest all commands, including plumbing commands +# which are hidden by default (e.g. "cat-file" on "git ca"). +# # GIT_COMPLETION_SHOW_ALL # # When set to "1" suggest all options, including options which are @@ -3455,7 +3460,13 @@ __git_main () then __gitcomp "$GIT_TESTING_PORCELAIN_COMMAND_LIST" else - __gitcomp "$(__git --list-cmds=list-mainporcelain,others,nohelpers,alias,list-complete,config)" + local list_cmds=list-mainporcelain,others,nohelpers,alias,list-complete,config + + if test "${GIT_COMPLETION_SHOW_ALL_COMMANDS-}" = "1" + then + list_cmds=builtins,$list_cmds + fi + __gitcomp "$(__git --list-cmds=$list_cmds)" fi ;; esac diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index c9805f2147..c6d6d6ef89 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -2440,6 +2440,37 @@ test_expect_success 'option aliases are shown with GIT_COMPLETION_SHOW_ALL' ' ) ' +test_expect_success 'plumbing commands are excluded without GIT_COMPLETION_SHOW_ALL_COMMANDS' ' + ( + . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && + sane_unset GIT_TESTING_PORCELAIN_COMMAND_LIST && + + # Just mainporcelain, not plumbing commands + run_completion "git c" && + grep checkout out && + ! grep cat-file out + ) +' + +test_expect_success 'all commands are shown with GIT_COMPLETION_SHOW_ALL_COMMANDS (also main non-builtin)' ' + ( + . "$GIT_BUILD_DIR/contrib/completion/git-completion.bash" && + GIT_COMPLETION_SHOW_ALL_COMMANDS=1 && + export GIT_COMPLETION_SHOW_ALL_COMMANDS && + sane_unset GIT_TESTING_PORCELAIN_COMMAND_LIST && + + # Both mainporcelain and plumbing commands + run_completion "git c" && + grep checkout out && + grep cat-file out && + + # Check "gitk", a "main" command, but not a built-in + more plumbing + run_completion "git g" && + grep gitk out && + grep get-tar-commit-id out + ) +' + test_expect_success '__git_complete' ' unset -f __git_wrap__git_main && From 9325285df4988a35ab376947cc3599aeec0a24b6 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Thu, 3 Feb 2022 10:16:43 +0000 Subject: [PATCH 054/150] doc: check-ignore: code-quote an exclamation mark The plain quoted exclamation mark renders as italics in the Windows pdf help manual. Fix this with back-tick quoting and surrounding double quotes as exemplified by the gitignore.txt guide. While at it, fix the surrounding double quotes for the other special characters usages. Signed-off-by: Philip Oakley Signed-off-by: Junio C Hamano --- Documentation/git-check-ignore.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/git-check-ignore.txt b/Documentation/git-check-ignore.txt index 0c3924a63d..2892799e32 100644 --- a/Documentation/git-check-ignore.txt +++ b/Documentation/git-check-ignore.txt @@ -33,7 +33,7 @@ OPTIONS Instead of printing the paths that are excluded, for each path that matches an exclude pattern, print the exclude pattern together with the path. (Matching an exclude pattern usually - means the path is excluded, but if the pattern begins with '!' + means the path is excluded, but if the pattern begins with "`!`" then it is a negated pattern and matching it means the path is NOT excluded.) + @@ -77,7 +77,7 @@ If `--verbose` is specified, the output is a series of lines of the form: is the path of a file being queried, is the matching pattern, is the pattern's source file, and is the line number of the pattern within that source. If the pattern -contained a `!` prefix or `/` suffix, it will be preserved in the +contained a "`!`" prefix or "`/`" suffix, it will be preserved in the output. will be an absolute path when referring to the file configured by `core.excludesFile`, or relative to the repository root when referring to `.git/info/exclude` or a per-directory exclude file. From 8266e0c02973618f68b45957a2dcb29660737486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 3 Feb 2022 22:40:11 +0100 Subject: [PATCH 055/150] leak tests: fix a memory leak in "test-progress" helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a memory leak in the test-progress helper, and mark the corresponding "t0500-progress-display.sh" test as being leak-free under SANITIZE=leak. This fixes a leak added in 2bb74b53a4 (Test the progress display, 2019-09-16). My 48f68715b14 (tr2: stop leaking "thread_name" memory, 2021-08-27) had fixed another memory leak in this test (as it did some trace2 testing). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/helper/test-progress.c | 1 + t/t0500-progress-display.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/t/helper/test-progress.c b/t/helper/test-progress.c index 5d05cbe789..9265e6ab7c 100644 --- a/t/helper/test-progress.c +++ b/t/helper/test-progress.c @@ -69,6 +69,7 @@ int cmd__progress(int argc, const char **argv) die("invalid input: '%s'\n", line.buf); } stop_progress(&progress); + strbuf_release(&line); return 0; } diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh index 22058b503a..f37cf2eb9c 100755 --- a/t/t0500-progress-display.sh +++ b/t/t0500-progress-display.sh @@ -2,6 +2,7 @@ test_description='progress display' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh show_cr () { From 587c3d0da67aecc7c9defb576614962f8ca3faf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 3 Feb 2022 22:40:12 +0100 Subject: [PATCH 056/150] progress.c test helper: add missing braces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we have braces on one arm of an if/else all of them should have it, per the CodingGuidelines's "When there are multiple arms to a conditional[...]" advice. This formatting change makes a subsequent commit smaller. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/helper/test-progress.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/helper/test-progress.c b/t/helper/test-progress.c index 9265e6ab7c..50fd3be3da 100644 --- a/t/helper/test-progress.c +++ b/t/helper/test-progress.c @@ -63,10 +63,11 @@ int cmd__progress(int argc, const char **argv) die("invalid input: '%s'\n", line.buf); progress_test_ns = test_ms * 1000 * 1000; display_throughput(progress, byte_count); - } else if (!strcmp(line.buf, "update")) + } else if (!strcmp(line.buf, "update")) { progress_test_force_update(); - else + } else { die("invalid input: '%s'\n", line.buf); + } } stop_progress(&progress); strbuf_release(&line); From 791afae2924d032186bfad557c462c92025ac901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 3 Feb 2022 22:40:13 +0100 Subject: [PATCH 057/150] progress.c tests: make start/stop commands on stdin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the usage of the "test-tool progress" introduced in 2bb74b53a49 (Test the progress display, 2019-09-16) to take command like "start" and "stop" on stdin, instead of running them implicitly. This makes for tests that are easier to read, since the recipe will mirror the API usage, and allows for easily testing invalid usage that would yield (or should yield) a BUG(), e.g. providing two "start" calls in a row. A subsequent commit will add such tests. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/helper/test-progress.c | 44 +++++++++++++++++++++------- t/t0500-progress-display.sh | 58 +++++++++++++++++++++++-------------- 2 files changed, 70 insertions(+), 32 deletions(-) diff --git a/t/helper/test-progress.c b/t/helper/test-progress.c index 50fd3be3da..6cc9735b60 100644 --- a/t/helper/test-progress.c +++ b/t/helper/test-progress.c @@ -3,6 +3,9 @@ * * Reads instructions from standard input, one instruction per line: * + * "start [ ]" - Call start_progress(title, total), + * Uses the default title of "Working hard" + * if the " <title>" is omitted. * "progress <items>" - Call display_progress() with the given item count * as parameter. * "throughput <bytes> <millis> - Call display_throughput() with the given @@ -10,6 +13,7 @@ * specify the time elapsed since the * start_progress() call. * "update" - Set the 'progress_update' flag. + * "stop" - Call stop_progress(). * * See 't0500-progress-display.sh' for examples. */ @@ -19,34 +23,50 @@ #include "parse-options.h" #include "progress.h" #include "strbuf.h" +#include "string-list.h" int cmd__progress(int argc, const char **argv) { - int total = 0; - const char *title; + const char *const default_title = "Working hard"; + struct string_list titles = STRING_LIST_INIT_DUP; struct strbuf line = STRBUF_INIT; - struct progress *progress; + struct progress *progress = NULL; const char *usage[] = { - "test-tool progress [--total=<n>] <progress-title>", + "test-tool progress <stdin", NULL }; struct option options[] = { - OPT_INTEGER(0, "total", &total, "total number of items"), OPT_END(), }; argc = parse_options(argc, argv, NULL, options, usage, 0); - if (argc != 1) - die("need a title for the progress output"); - title = argv[0]; + if (argc) + usage_with_options(usage, options); progress_testing = 1; - progress = start_progress(title, total); while (strbuf_getline(&line, stdin) != EOF) { char *end; - if (skip_prefix(line.buf, "progress ", (const char **) &end)) { + if (skip_prefix(line.buf, "start ", (const char **) &end)) { + uint64_t total = strtoull(end, &end, 10); + const char *title; + + /* + * We can't use "end + 1" as an argument to + * start_progress(), it doesn't xstrdup() its + * "title" argument. We need to hold onto a + * valid "char *" for it until the end. + */ + if (!*end) + title = default_title; + else if (*end == ' ') + title = string_list_insert(&titles, end + 1)->string; + else + die("invalid input: '%s'\n", line.buf); + + progress = start_progress(title, total); + } else if (skip_prefix(line.buf, "progress ", (const char **) &end)) { uint64_t item_count = strtoull(end, &end, 10); if (*end != '\0') die("invalid input: '%s'\n", line.buf); @@ -65,12 +85,14 @@ int cmd__progress(int argc, const char **argv) display_throughput(progress, byte_count); } else if (!strcmp(line.buf, "update")) { progress_test_force_update(); + } else if (!strcmp(line.buf, "stop")) { + stop_progress(&progress); } else { die("invalid input: '%s'\n", line.buf); } } - stop_progress(&progress); strbuf_release(&line); + string_list_clear(&titles, 0); return 0; } diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh index f37cf2eb9c..27ab4218b0 100755 --- a/t/t0500-progress-display.sh +++ b/t/t0500-progress-display.sh @@ -18,6 +18,7 @@ test_expect_success 'simple progress display' ' EOF cat >in <<-\EOF && + start 0 update progress 1 update @@ -26,8 +27,9 @@ test_expect_success 'simple progress display' ' progress 4 update progress 5 + stop EOF - test-tool progress "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -42,11 +44,13 @@ test_expect_success 'progress display with total' ' EOF cat >in <<-\EOF && + start 3 progress 1 progress 2 progress 3 + stop EOF - test-tool progress --total=3 "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -63,14 +67,14 @@ Working hard.......2.........3.........4.........5.........6: EOF cat >in <<-\EOF && + start 100000 Working hard.......2.........3.........4.........5.........6 progress 100 progress 1000 progress 10000 progress 100000 + stop EOF - test-tool progress --total=100000 \ - "Working hard.......2.........3.........4.........5.........6" \ - <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -89,16 +93,16 @@ Working hard.......2.........3.........4.........5.........6: EOF cat >in <<-\EOF && + start 100000 Working hard.......2.........3.........4.........5.........6 update progress 1 update progress 2 progress 10000 progress 100000 + stop EOF - test-tool progress --total=100000 \ - "Working hard.......2.........3.........4.........5.........6" \ - <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -117,14 +121,14 @@ Working hard.......2.........3.........4.........5.........6: EOF cat >in <<-\EOF && + start 100000 Working hard.......2.........3.........4.........5.........6 progress 25000 progress 50000 progress 75000 progress 100000 + stop EOF - test-tool progress --total=100000 \ - "Working hard.......2.........3.........4.........5.........6" \ - <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -141,14 +145,14 @@ Working hard.......2.........3.........4.........5.........6.........7.........: EOF cat >in <<-\EOF && + start 100000 Working hard.......2.........3.........4.........5.........6.........7......... progress 25000 progress 50000 progress 75000 progress 100000 + stop EOF - test-tool progress --total=100000 \ - "Working hard.......2.........3.........4.........5.........6.........7........." \ - <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -165,12 +169,14 @@ test_expect_success 'progress shortens - crazy caller' ' EOF cat >in <<-\EOF && + start 1000 progress 100 progress 200 progress 1 progress 1000 + stop EOF - test-tool progress --total=1000 "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -186,6 +192,7 @@ test_expect_success 'progress display with throughput' ' EOF cat >in <<-\EOF && + start 0 throughput 102400 1000 update progress 10 @@ -198,8 +205,9 @@ test_expect_success 'progress display with throughput' ' throughput 409600 4000 update progress 40 + stop EOF - test-tool progress "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -215,6 +223,7 @@ test_expect_success 'progress display with throughput and total' ' EOF cat >in <<-\EOF && + start 40 throughput 102400 1000 progress 10 throughput 204800 2000 @@ -223,8 +232,9 @@ test_expect_success 'progress display with throughput and total' ' progress 30 throughput 409600 4000 progress 40 + stop EOF - test-tool progress --total=40 "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -240,6 +250,7 @@ test_expect_success 'cover up after throughput shortens' ' EOF cat >in <<-\EOF && + start 0 throughput 409600 1000 update progress 1 @@ -252,8 +263,9 @@ test_expect_success 'cover up after throughput shortens' ' throughput 1638400 4000 update progress 4 + stop EOF - test-tool progress "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -268,6 +280,7 @@ test_expect_success 'cover up after throughput shortens a lot' ' EOF cat >in <<-\EOF && + start 0 throughput 1 1000 update progress 1 @@ -277,8 +290,9 @@ test_expect_success 'cover up after throughput shortens a lot' ' throughput 3145728 3000 update progress 3 + stop EOF - test-tool progress "Working hard" <in 2>stderr && + test-tool progress <in 2>stderr && show_cr <stderr >out && test_cmp expect out @@ -286,6 +300,7 @@ test_expect_success 'cover up after throughput shortens a lot' ' test_expect_success 'progress generates traces' ' cat >in <<-\EOF && + start 40 throughput 102400 1000 update progress 10 @@ -298,10 +313,11 @@ test_expect_success 'progress generates traces' ' throughput 409600 4000 update progress 40 + stop EOF - GIT_TRACE2_EVENT="$(pwd)/trace.event" test-tool progress --total=40 \ - "Working hard" <in 2>stderr && + GIT_TRACE2_EVENT="$(pwd)/trace.event" test-tool progress \ + <in 2>stderr && # t0212/parse_events.perl intentionally omits regions and data. test_region progress "Working hard" trace.event && From bbfb1c243d5da5dc0427346315d915ba02e8d0dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 3 Feb 2022 22:40:14 +0100 Subject: [PATCH 058/150] progress.c tests: test some invalid usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test what happens when we "stop" without a "start", omit the "stop" after a "start", or start two concurrent progress bars. This extends the trace2 tests added in 98a13647408 (trace2: log progress time and throughput, 2020-05-12). These tests are not merely testing the helper, but invalid API usage that can happen if the progress.c API is misused. The "without stop" test will leak under SANITIZE=leak, since this buggy use of the API will leak memory. But let's not skip it entirely, or use the "!SANITIZE_LEAK" prerequisite check as we'd do with tests that we're skipping due to leaks we haven't fixed yet. Instead annotate the specific command that should skip leak checking with custom $LSAN_OPTIONS[1]. 1. https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t0500-progress-display.sh | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/t/t0500-progress-display.sh b/t/t0500-progress-display.sh index 27ab4218b0..1eb3a8306b 100755 --- a/t/t0500-progress-display.sh +++ b/t/t0500-progress-display.sh @@ -325,4 +325,54 @@ test_expect_success 'progress generates traces' ' grep "\"key\":\"total_bytes\",\"value\":\"409600\"" trace.event ' +test_expect_success 'progress generates traces: stop / start' ' + cat >in <<-\EOF && + start 0 + stop + EOF + + GIT_TRACE2_EVENT="$PWD/trace-startstop.event" test-tool progress \ + <in 2>stderr && + test_region progress "Working hard" trace-startstop.event +' + +test_expect_success 'progress generates traces: start without stop' ' + cat >in <<-\EOF && + start 0 + EOF + + GIT_TRACE2_EVENT="$PWD/trace-start.event" \ + LSAN_OPTIONS=detect_leaks=0 \ + test-tool progress \ + <in 2>stderr && + grep region_enter.*progress trace-start.event && + ! grep region_leave.*progress trace-start.event +' + +test_expect_success 'progress generates traces: stop without start' ' + cat >in <<-\EOF && + stop + EOF + + GIT_TRACE2_EVENT="$PWD/trace-stop.event" test-tool progress \ + <in 2>stderr && + ! grep region_enter.*progress trace-stop.event && + ! grep region_leave.*progress trace-stop.event +' + +test_expect_success 'progress generates traces: start with active progress bar (no stops)' ' + cat >in <<-\EOF && + start 0 One + start 0 Two + EOF + + GIT_TRACE2_EVENT="$PWD/trace-2start.event" \ + LSAN_OPTIONS=detect_leaks=0 \ + test-tool progress \ + <in 2>stderr && + grep region_enter.*progress.*One trace-2start.event && + grep region_enter.*progress.*Two trace-2start.event && + ! grep region_leave trace-2start.event +' + test_done From a02014bb4c711db69e029e21f6ea776c4cc7f385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 3 Feb 2022 22:40:15 +0100 Subject: [PATCH 059/150] progress.h: format and be consistent with progress.c naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an inconsistency introduced in dc6a0757c4f (make struct progress an opaque type, 2007-10-30) and rename the "progress" parameters to stop_progress{,_msg}() to "p_progress". Now these match the corresponding parameters in the *.c code. While we're at it let's move the definition of the former below the latter, a subsequent change will start defining stop_progress() in terms of stop_progress_msg(). Let's also remove the excess whitespace at the end of the file. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/progress.h b/progress.h index f1913acf73..4f6806904a 100644 --- a/progress.h +++ b/progress.h @@ -18,7 +18,6 @@ struct progress *start_sparse_progress(const char *title, uint64_t total); struct progress *start_delayed_progress(const char *title, uint64_t total); struct progress *start_delayed_sparse_progress(const char *title, uint64_t total); -void stop_progress(struct progress **progress); -void stop_progress_msg(struct progress **progress, const char *msg); - +void stop_progress_msg(struct progress **p_progress, const char *msg); +void stop_progress(struct progress **p_progress); #endif From 1ccad6a1f175080c3896a70501dcd6c9e0a0af0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 3 Feb 2022 22:40:16 +0100 Subject: [PATCH 060/150] progress.c: use dereferenced "progress" variable, not "(*p_progress)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 98a13647408 (trace2: log progress time and throughput, 2020-05-12) stop_progress() dereferences a "struct progress **" parameter in several places. Extract a dereferenced variable to reduce clutter and make it clearer who needs to write to this parameter. Now instead of using "*p_progress" several times in stop_progress() we check it once for NULL and then use a dereferenced "progress" variable thereafter. This uses the same pattern as the adjacent stop_progress_msg() function, see ac900fddb7f (progress: don't dereference before checking for NULL, 2020-08-10). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/progress.c b/progress.c index 680c6a8bf9..6e7daa3f8a 100644 --- a/progress.c +++ b/progress.c @@ -319,21 +319,24 @@ static void finish_if_sparse(struct progress *progress) void stop_progress(struct progress **p_progress) { + struct progress *progress; + if (!p_progress) BUG("don't provide NULL to stop_progress"); + progress = *p_progress; - finish_if_sparse(*p_progress); + finish_if_sparse(progress); - if (*p_progress) { + if (progress) { trace2_data_intmax("progress", the_repository, "total_objects", - (*p_progress)->total); + progress->total); - if ((*p_progress)->throughput) + if (progress->throughput) trace2_data_intmax("progress", the_repository, "total_bytes", - (*p_progress)->throughput->curr_total); + progress->throughput->curr_total); - trace2_region_leave("progress", (*p_progress)->title, the_repository); + trace2_region_leave("progress", progress->title, the_repository); } stop_progress_msg(p_progress, _("done")); From accf1eb1d0f102c6b8f099fa6063216818e45c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 3 Feb 2022 22:40:17 +0100 Subject: [PATCH 061/150] progress.c: refactor stop_progress{,_msg}() to use helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create two new static helpers for the stop_progress() and stop_progress_msg() functions. As we'll see in the subsequent commit having those two split up doesn't make much sense, and results in a bug in how we log to trace2. This narrow preparatory change makes the diff for that subsequent change smaller. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.c | 62 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/progress.c b/progress.c index 6e7daa3f8a..6cc7f902f5 100644 --- a/progress.c +++ b/progress.c @@ -317,6 +317,36 @@ static void finish_if_sparse(struct progress *progress) display_progress(progress, progress->total); } +static void force_last_update(struct progress *progress, const char *msg) +{ + char *buf; + struct throughput *tp = progress->throughput; + + if (tp) { + uint64_t now_ns = progress_getnanotime(progress); + unsigned int misecs, rate; + misecs = ((now_ns - progress->start_ns) * 4398) >> 32; + rate = tp->curr_total / (misecs ? misecs : 1); + throughput_string(&tp->display, tp->curr_total, rate); + } + progress_update = 1; + buf = xstrfmt(", %s.\n", msg); + display(progress, progress->last_value, buf); + free(buf); +} + +static void log_trace2(struct progress *progress) +{ + trace2_data_intmax("progress", the_repository, "total_objects", + progress->total); + + if (progress->throughput) + trace2_data_intmax("progress", the_repository, "total_bytes", + progress->throughput->curr_total); + + trace2_region_leave("progress", progress->title, the_repository); +} + void stop_progress(struct progress **p_progress) { struct progress *progress; @@ -327,17 +357,8 @@ void stop_progress(struct progress **p_progress) finish_if_sparse(progress); - if (progress) { - trace2_data_intmax("progress", the_repository, "total_objects", - progress->total); - - if (progress->throughput) - trace2_data_intmax("progress", the_repository, - "total_bytes", - progress->throughput->curr_total); - - trace2_region_leave("progress", progress->title, the_repository); - } + if (progress) + log_trace2(*p_progress); stop_progress_msg(p_progress, _("done")); } @@ -353,23 +374,10 @@ void stop_progress_msg(struct progress **p_progress, const char *msg) if (!progress) return; *p_progress = NULL; - if (progress->last_value != -1) { - /* Force the last update */ - char *buf; - struct throughput *tp = progress->throughput; - if (tp) { - uint64_t now_ns = progress_getnanotime(progress); - unsigned int misecs, rate; - misecs = ((now_ns - progress->start_ns) * 4398) >> 32; - rate = tp->curr_total / (misecs ? misecs : 1); - throughput_string(&tp->display, tp->curr_total, rate); - } - progress_update = 1; - buf = xstrfmt(", %s.\n", msg); - display(progress, progress->last_value, buf); - free(buf); - } + if (progress->last_value != -1) + force_last_update(progress, msg); + clear_progress_signal(); strbuf_release(&progress->counters_sb); if (progress->throughput) From 74900a6b3513e0908b1d16df7855e9d478b20b91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 3 Feb 2022 22:40:18 +0100 Subject: [PATCH 062/150] progress API: unify stop_progress{,_msg}(), fix trace2 bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bug that's been with us ever since 98a13647408 (trace2: log progress time and throughput, 2020-05-12), when the stop_progress_msg() API was used we didn't log a "region_leave" for the "region_enter" we start in "start_progress_delay()". The only user of the "stop_progress_msg()" function is "index-pack". Let's add a previously failing test to check that we have the same number of "region_enter" and "region_leave" events, with "-v" we'll log progress even in the test environment. In addition to that we've had a submarine bug here introduced with 9d81ecb52b5 (progress: add sparse mode to force 100% complete message, 2019-03-21). The "start_sparse_progress()" API would only do the right thing if the progress was ended with "stop_progress()", not "stop_progress_msg()". The only user of that API uses "stop_progress()", but let's still fix that along with the trace2 issue by making "stop_progress()" a trivial wrapper for "stop_progress_msg()". We can also drop the "if (progress)" test from "finish_if_sparse()". It's now a helper for the small "stop_progress_msg()" function. We'll already have returned from it if "progress" is "NULL". Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- progress.c | 21 +++------------------ progress.h | 6 +++++- t/t5316-pack-delta-depth.sh | 6 +++++- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/progress.c b/progress.c index 6cc7f902f5..0cdd875d37 100644 --- a/progress.c +++ b/progress.c @@ -311,8 +311,7 @@ struct progress *start_delayed_sparse_progress(const char *title, static void finish_if_sparse(struct progress *progress) { - if (progress && - progress->sparse && + if (progress->sparse && progress->last_value != progress->total) display_progress(progress, progress->total); } @@ -347,22 +346,6 @@ static void log_trace2(struct progress *progress) trace2_region_leave("progress", progress->title, the_repository); } -void stop_progress(struct progress **p_progress) -{ - struct progress *progress; - - if (!p_progress) - BUG("don't provide NULL to stop_progress"); - progress = *p_progress; - - finish_if_sparse(progress); - - if (progress) - log_trace2(*p_progress); - - stop_progress_msg(p_progress, _("done")); -} - void stop_progress_msg(struct progress **p_progress, const char *msg) { struct progress *progress; @@ -375,8 +358,10 @@ void stop_progress_msg(struct progress **p_progress, const char *msg) return; *p_progress = NULL; + finish_if_sparse(progress); if (progress->last_value != -1) force_last_update(progress, msg); + log_trace2(progress); clear_progress_signal(); strbuf_release(&progress->counters_sb); diff --git a/progress.h b/progress.h index 4f6806904a..3a945637c8 100644 --- a/progress.h +++ b/progress.h @@ -1,5 +1,6 @@ #ifndef PROGRESS_H #define PROGRESS_H +#include "gettext.h" struct progress; @@ -19,5 +20,8 @@ struct progress *start_delayed_progress(const char *title, uint64_t total); struct progress *start_delayed_sparse_progress(const char *title, uint64_t total); void stop_progress_msg(struct progress **p_progress, const char *msg); -void stop_progress(struct progress **p_progress); +static inline void stop_progress(struct progress **p_progress) +{ + stop_progress_msg(p_progress, _("done")); +} #endif diff --git a/t/t5316-pack-delta-depth.sh b/t/t5316-pack-delta-depth.sh index 759169d074..bbe2e69c75 100755 --- a/t/t5316-pack-delta-depth.sh +++ b/t/t5316-pack-delta-depth.sh @@ -61,7 +61,11 @@ test_expect_success 'create series of packs' ' echo $cur echo "$(git rev-parse :file) file" } | git pack-objects --stdout >tmp && - git index-pack --stdin --fix-thin <tmp || return 1 + GIT_TRACE2_EVENT=$PWD/trace \ + git index-pack -v --stdin --fix-thin <tmp || return 1 && + grep -c region_enter.*progress trace >enter && + grep -c region_leave.*progress trace >leave && + test_cmp enter leave && prev=$cur done ' From b3118a56f99ca0ec872b84dc760a0dc778f3890e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Thu, 3 Feb 2022 22:40:19 +0100 Subject: [PATCH 063/150] pack-bitmap-write.c: don't return without stop_progress() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bug that's been here since 7cc8f971085 (pack-objects: implement bitmap writing, 2013-12-21), we did not call stop_progress() if we reached the early exit in this function. We could call stop_progress() before we return, but better yet is to defer calling start_progress() until we need it. For now this only matters in practice because we'd previously omit the "region_leave" for the progress trace2 event. Suggested-by: SZEDER Gábor <szeder.dev@gmail.com> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- pack-bitmap-write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 88d9e696a5..f0b4044e2b 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -544,15 +544,15 @@ void bitmap_writer_select_commits(struct commit **indexed_commits, QSORT(indexed_commits, indexed_commits_nr, date_compare); - if (writer.show_progress) - writer.progress = start_progress("Selecting bitmap commits", 0); - if (indexed_commits_nr < 100) { for (i = 0; i < indexed_commits_nr; ++i) push_bitmapped_commit(indexed_commits[i]); return; } + if (writer.show_progress) + writer.progress = start_progress("Selecting bitmap commits", 0); + for (;;) { struct commit *chosen = NULL; From 961b130d20c9aea322b94a639a63ec8cca9f14fc Mon Sep 17 00:00:00 2001 From: Glen Choo <chooglen@google.com> Date: Fri, 28 Jan 2022 16:04:45 -0800 Subject: [PATCH 064/150] branch: add --recurse-submodules option for branch creation To improve the submodules UX, we would like to teach Git to handle branches in submodules. Start this process by teaching "git branch" the --recurse-submodules option so that "git branch --recurse-submodules topic" will create the `topic` branch in the superproject and its submodules. Although this commit does not introduce breaking changes, it does not work well with existing --recurse-submodules commands because "git branch --recurse-submodules" writes to the submodule ref store, but most commands only consider the superproject gitlink and ignore the submodule ref store. For example, "git checkout --recurse-submodules" will check out the commits in the superproject gitlinks (and put the submodules in detached HEAD) instead of checking out the submodule branches. Because of this, this commit introduces a new configuration value, `submodule.propagateBranches`. The plan is for Git commands to prioritize submodule ref store information over superproject gitlinks if this value is true. Because "git branch --recurse-submodules" writes to submodule ref stores, for the sake of clarity, it will not function unless this configuration value is set. This commit also includes changes that support working with submodules from a superproject commit because "branch --recurse-submodules" (and future commands) need to read .gitmodules and gitlinks from the superproject commit, but submodules are typically read from the filesystem's .gitmodules and the index's gitlinks. These changes are: * add a submodules_of_tree() helper that gives the relevant information of an in-tree submodule (e.g. path and oid) and initializes the repository * add is_tree_submodule_active() by adding a treeish_name parameter to is_submodule_active() * add the "submoduleNotUpdated" advice to advise users to update the submodules in their trees Incidentally, fix an incorrect usage string that combined the 'list' usage of git branch (-l) with the 'create' usage; this string has been incorrect since its inception, a8dfd5eac4 (Make builtin-branch.c use parse_options., 2007-10-07). Helped-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Glen Choo <chooglen@google.com> Reviewed-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/advice.txt | 3 + Documentation/config/submodule.txt | 37 ++-- Documentation/git-branch.txt | 19 +- advice.c | 1 + advice.h | 1 + branch.c | 141 ++++++++++++++ branch.h | 29 +++ builtin/branch.c | 44 ++++- builtin/submodule--helper.c | 38 ++++ submodule-config.c | 61 ++++++ submodule-config.h | 34 ++++ submodule.c | 11 +- submodule.h | 3 + t/t3207-branch-submodule.sh | 292 +++++++++++++++++++++++++++++ 14 files changed, 694 insertions(+), 20 deletions(-) create mode 100755 t/t3207-branch-submodule.sh diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt index 063eec2511..adee26fbbb 100644 --- a/Documentation/config/advice.txt +++ b/Documentation/config/advice.txt @@ -116,6 +116,9 @@ advice.*:: submoduleAlternateErrorStrategyDie:: Advice shown when a submodule.alternateErrorStrategy option configured to "die" causes a fatal error. + submodulesNotUpdated:: + Advice shown when a user runs a submodule command that fails + because `git submodule update --init` was not run. addIgnoredFile:: Advice shown if a user attempts to add an ignored file to the index. diff --git a/Documentation/config/submodule.txt b/Documentation/config/submodule.txt index ee454f8126..6490527b45 100644 --- a/Documentation/config/submodule.txt +++ b/Documentation/config/submodule.txt @@ -59,18 +59,33 @@ submodule.active:: submodule.recurse:: A boolean indicating if commands should enable the `--recurse-submodules` - option by default. - Applies to all commands that support this option - (`checkout`, `fetch`, `grep`, `pull`, `push`, `read-tree`, `reset`, - `restore` and `switch`) except `clone` and `ls-files`. + option by default. Defaults to false. ++ +When set to true, it can be deactivated via the +`--no-recurse-submodules` option. Note that some Git commands +lacking this option may call some of the above commands affected by +`submodule.recurse`; for instance `git remote update` will call +`git fetch` but does not have a `--no-recurse-submodules` option. +For these commands a workaround is to temporarily change the +configuration value by using `git -c submodule.recurse=0`. ++ +The following list shows the commands that accept +`--recurse-submodules` and whether they are supported by this +setting. + +* `checkout`, `fetch`, `grep`, `pull`, `push`, `read-tree`, +`reset`, `restore` and `switch` are always supported. +* `clone` and `ls-files` are not supported. +* `branch` is supported only if `submodule.propagateBranches` is +enabled + +submodule.propagateBranches:: + [EXPERIMENTAL] A boolean that enables branching support when + using `--recurse-submodules` or `submodule.recurse=true`. + Enabling this will allow certain commands to accept + `--recurse-submodules` and certain commands that already accept + `--recurse-submodules` will now consider branches. Defaults to false. - When set to true, it can be deactivated via the - `--no-recurse-submodules` option. Note that some Git commands - lacking this option may call some of the above commands affected by - `submodule.recurse`; for instance `git remote update` will call - `git fetch` but does not have a `--no-recurse-submodules` option. - For these commands a workaround is to temporarily change the - configuration value by using `git -c submodule.recurse=0`. submodule.fetchJobs:: Specifies how many submodules are fetched/cloned at the same time. diff --git a/Documentation/git-branch.txt b/Documentation/git-branch.txt index 731e340cbc..c8b4f9ce3c 100644 --- a/Documentation/git-branch.txt +++ b/Documentation/git-branch.txt @@ -16,7 +16,8 @@ SYNOPSIS [--points-at <object>] [--format=<format>] [(-r | --remotes) | (-a | --all)] [--list] [<pattern>...] -'git branch' [--track[=(direct|inherit)] | --no-track] [-f] <branchname> [<start-point>] +'git branch' [--track[=(direct|inherit)] | --no-track] [-f] + [--recurse-submodules] <branchname> [<start-point>] 'git branch' (--set-upstream-to=<upstream> | -u <upstream>) [<branchname>] 'git branch' --unset-upstream [<branchname>] 'git branch' (-m | -M) [<oldbranch>] <newbranch> @@ -235,6 +236,22 @@ how the `branch.<name>.remote` and `branch.<name>.merge` options are used. Do not set up "upstream" configuration, even if the branch.autoSetupMerge configuration variable is set. +--recurse-submodules:: + THIS OPTION IS EXPERIMENTAL! Causes the current command to + recurse into submodules if `submodule.propagateBranches` is + enabled. See `submodule.propagateBranches` in + linkgit:git-config[1]. Currently, only branch creation is + supported. ++ +When used in branch creation, a new branch <branchname> will be created +in the superproject and all of the submodules in the superproject's +<start-point>. In submodules, the branch will point to the submodule +commit in the superproject's <start-point> but the branch's tracking +information will be set up based on the submodule's branches and remotes +e.g. `git branch --recurse-submodules topic origin/main` will create the +submodule branch "topic" that points to the submodule commit in the +superproject's "origin/main", but tracks the submodule's "origin/main". + --set-upstream:: As this option had confusing syntax, it is no longer supported. Please use `--track` or `--set-upstream-to` instead. diff --git a/advice.c b/advice.c index 1dfc91d176..e00d30254c 100644 --- a/advice.c +++ b/advice.c @@ -70,6 +70,7 @@ static struct { [ADVICE_STATUS_HINTS] = { "statusHints", 1 }, [ADVICE_STATUS_U_OPTION] = { "statusUoption", 1 }, [ADVICE_SUBMODULE_ALTERNATE_ERROR_STRATEGY_DIE] = { "submoduleAlternateErrorStrategyDie", 1 }, + [ADVICE_SUBMODULES_NOT_UPDATED] = { "submodulesNotUpdated", 1 }, [ADVICE_UPDATE_SPARSE_PATH] = { "updateSparsePath", 1 }, [ADVICE_WAITING_FOR_EDITOR] = { "waitingForEditor", 1 }, }; diff --git a/advice.h b/advice.h index 601265fd10..a7521d6087 100644 --- a/advice.h +++ b/advice.h @@ -44,6 +44,7 @@ struct string_list; ADVICE_STATUS_HINTS, ADVICE_STATUS_U_OPTION, ADVICE_SUBMODULE_ALTERNATE_ERROR_STRATEGY_DIE, + ADVICE_SUBMODULES_NOT_UPDATED, ADVICE_UPDATE_SPARSE_PATH, ADVICE_WAITING_FOR_EDITOR, ADVICE_SKIPPED_CHERRY_PICKS, diff --git a/branch.c b/branch.c index 02d46a69b8..70026b3c79 100644 --- a/branch.c +++ b/branch.c @@ -8,6 +8,8 @@ #include "sequencer.h" #include "commit.h" #include "worktree.h" +#include "submodule-config.h" +#include "run-command.h" struct tracking { struct refspec_item spec; @@ -483,6 +485,145 @@ void dwim_and_setup_tracking(struct repository *r, const char *new_ref, setup_tracking(new_ref, real_orig_ref, track, quiet); } +/** + * Creates a branch in a submodule by calling + * create_branches_recursively() in a child process. The child process + * is necessary because install_branch_config_multiple_remotes() (which + * is called by setup_tracking()) does not support writing configs to + * submodules. + */ +static int submodule_create_branch(struct repository *r, + const struct submodule *submodule, + const char *name, const char *start_oid, + const char *tracking_name, int force, + int reflog, int quiet, + enum branch_track track, int dry_run) +{ + int ret = 0; + struct child_process child = CHILD_PROCESS_INIT; + struct strbuf child_err = STRBUF_INIT; + struct strbuf out_buf = STRBUF_INIT; + char *out_prefix = xstrfmt("submodule '%s': ", submodule->name); + child.git_cmd = 1; + child.err = -1; + child.stdout_to_stderr = 1; + + prepare_other_repo_env(&child.env_array, r->gitdir); + /* + * submodule_create_branch() is indirectly invoked by "git + * branch", but we cannot invoke "git branch" in the child + * process. "git branch" accepts a branch name and start point, + * where the start point is assumed to provide both the OID + * (start_oid) and the branch to use for tracking + * (tracking_name). But when recursing through submodules, + * start_oid and tracking name need to be specified separately + * (see create_branches_recursively()). + */ + strvec_pushl(&child.args, "submodule--helper", "create-branch", NULL); + if (dry_run) + strvec_push(&child.args, "--dry-run"); + if (force) + strvec_push(&child.args, "--force"); + if (quiet) + strvec_push(&child.args, "--quiet"); + if (reflog) + strvec_push(&child.args, "--create-reflog"); + if (track == BRANCH_TRACK_ALWAYS || track == BRANCH_TRACK_EXPLICIT) + strvec_push(&child.args, "--track"); + + strvec_pushl(&child.args, name, start_oid, tracking_name, NULL); + + if ((ret = start_command(&child))) + return ret; + ret = finish_command(&child); + strbuf_read(&child_err, child.err, 0); + strbuf_add_lines(&out_buf, out_prefix, child_err.buf, child_err.len); + + if (ret) + fprintf(stderr, "%s", out_buf.buf); + else + printf("%s", out_buf.buf); + + strbuf_release(&child_err); + strbuf_release(&out_buf); + return ret; +} + +void create_branches_recursively(struct repository *r, const char *name, + const char *start_commitish, + const char *tracking_name, int force, + int reflog, int quiet, enum branch_track track, + int dry_run) +{ + int i = 0; + char *branch_point = NULL; + struct object_id super_oid; + struct submodule_entry_list submodule_entry_list; + + /* Perform dwim on start_commitish to get super_oid and branch_point. */ + dwim_branch_start(r, start_commitish, BRANCH_TRACK_NEVER, + &branch_point, &super_oid); + + /* + * If we were not given an explicit name to track, then assume we are at + * the top level and, just like the non-recursive case, the tracking + * name is the branch point. + */ + if (!tracking_name) + tracking_name = branch_point; + + submodules_of_tree(r, &super_oid, &submodule_entry_list); + /* + * Before creating any branches, first check that the branch can + * be created in every submodule. + */ + for (i = 0; i < submodule_entry_list.entry_nr; i++) { + if (submodule_entry_list.entries[i].repo == NULL) { + if (advice_enabled(ADVICE_SUBMODULES_NOT_UPDATED)) + advise(_("You may try updating the submodules using 'git checkout %s && git submodule update --init'"), + start_commitish); + die(_("submodule '%s': unable to find submodule"), + submodule_entry_list.entries[i].submodule->name); + } + + if (submodule_create_branch( + submodule_entry_list.entries[i].repo, + submodule_entry_list.entries[i].submodule, name, + oid_to_hex(&submodule_entry_list.entries[i] + .name_entry->oid), + tracking_name, force, reflog, quiet, track, 1)) + die(_("submodule '%s': cannot create branch '%s'"), + submodule_entry_list.entries[i].submodule->name, + name); + } + + create_branch(the_repository, name, start_commitish, force, 0, reflog, quiet, + BRANCH_TRACK_NEVER, dry_run); + if (dry_run) + return; + /* + * NEEDSWORK If tracking was set up in the superproject but not the + * submodule, users might expect "git branch --recurse-submodules" to + * fail or give a warning, but this is not yet implemented because it is + * tedious to determine whether or not tracking was set up in the + * superproject. + */ + setup_tracking(name, tracking_name, track, quiet); + + for (i = 0; i < submodule_entry_list.entry_nr; i++) { + if (submodule_create_branch( + submodule_entry_list.entries[i].repo, + submodule_entry_list.entries[i].submodule, name, + oid_to_hex(&submodule_entry_list.entries[i] + .name_entry->oid), + tracking_name, force, reflog, quiet, track, 0)) + die(_("submodule '%s': cannot create branch '%s'"), + submodule_entry_list.entries[i].submodule->name, + name); + repo_clear(submodule_entry_list.entries[i].repo); + } +} + void remove_merge_branch_state(struct repository *r) { unlink(git_path_merge_head(r)); diff --git a/branch.h b/branch.h index 509cfcc34e..04df2aa5b5 100644 --- a/branch.h +++ b/branch.h @@ -71,6 +71,35 @@ void create_branch(struct repository *r, int reflog, int quiet, enum branch_track track, int dry_run); +/* + * Creates a new branch in a repository and its submodules (and its + * submodules, recursively). The parameters are mostly analogous to + * those of create_branch() except for start_name, which is represented + * by two different parameters: + * + * - start_commitish is the commit-ish, in repository r, that determines + * which commits the branches will point to. The superproject branch + * will point to the commit of start_commitish and the submodule + * branches will point to the gitlink commit oids in start_commitish's + * tree. + * + * - tracking_name is the name of the ref, in repository r, that will be + * used to set up tracking information. This value is propagated to + * all submodules, which will evaluate the ref using their own ref + * stores. If NULL, this defaults to start_commitish. + * + * When this function is called on the superproject, start_commitish + * can be any user-provided ref and tracking_name can be NULL (similar + * to create_branches()). But when recursing through submodules, + * start_commitish is the plain gitlink commit oid. Since the oid cannot + * be used for tracking information, tracking_name is propagated and + * used for tracking instead. + */ +void create_branches_recursively(struct repository *r, const char *name, + const char *start_commitish, + const char *tracking_name, int force, + int reflog, int quiet, enum branch_track track, + int dry_run); /* * Check if 'name' can be a valid name for a branch; die otherwise. * Return 1 if the named branch already exists; return 0 otherwise. diff --git a/builtin/branch.c b/builtin/branch.c index 209b1cc442..d9a3ad53dd 100644 --- a/builtin/branch.c +++ b/builtin/branch.c @@ -27,7 +27,8 @@ static const char * const builtin_branch_usage[] = { N_("git branch [<options>] [-r | -a] [--merged] [--no-merged]"), - N_("git branch [<options>] [-l] [-f] <branch-name> [<start-point>]"), + N_("git branch [<options>] [-f] [--recurse-submodules] <branch-name> [<start-point>]"), + N_("git branch [<options>] [-l] [<pattern>...]"), N_("git branch [<options>] [-r] (-d | -D) <branch-name>..."), N_("git branch [<options>] (-m | -M) [<old-branch>] <new-branch>"), N_("git branch [<options>] (-c | -C) [<old-branch>] <new-branch>"), @@ -38,6 +39,8 @@ static const char * const builtin_branch_usage[] = { static const char *head; static struct object_id head_oid; +static int recurse_submodules = 0; +static int submodule_propagate_branches = 0; static int branch_use_color = -1; static char branch_colors[][COLOR_MAXLEN] = { @@ -99,6 +102,15 @@ static int git_branch_config(const char *var, const char *value, void *cb) return config_error_nonbool(var); return color_parse(value, branch_colors[slot]); } + if (!strcmp(var, "submodule.recurse")) { + recurse_submodules = git_config_bool(var, value); + return 0; + } + if (!strcasecmp(var, "submodule.propagateBranches")) { + submodule_propagate_branches = git_config_bool(var, value); + return 0; + } + return git_color_default_config(var, value, cb); } @@ -622,7 +634,8 @@ int cmd_branch(int argc, const char **argv, const char *prefix) const char *new_upstream = NULL; int noncreate_actions = 0; /* possible options */ - int reflog = 0, quiet = 0, icase = 0, force = 0; + int reflog = 0, quiet = 0, icase = 0, force = 0, + recurse_submodules_explicit = 0; enum branch_track track; struct ref_filter filter; static struct ref_sorting *sorting; @@ -673,6 +686,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix) OPT_CALLBACK(0, "points-at", &filter.points_at, N_("object"), N_("print only branches of the object"), parse_opt_object_name), OPT_BOOL('i', "ignore-case", &icase, N_("sorting and filtering are case insensitive")), + OPT_BOOL(0, "recurse-submodules", &recurse_submodules_explicit, N_("recurse through submodules")), OPT_STRING( 0 , "format", &format.format, N_("format"), N_("format to use for the output")), OPT_END(), }; @@ -715,6 +729,17 @@ int cmd_branch(int argc, const char **argv, const char *prefix) if (noncreate_actions > 1) usage_with_options(builtin_branch_usage, options); + if (recurse_submodules_explicit) { + if (!submodule_propagate_branches) + die(_("branch with --recurse-submodules can only be used if submodule.propagateBranches is enabled")); + if (noncreate_actions) + die(_("--recurse-submodules can only be used to create branches")); + } + + recurse_submodules = + (recurse_submodules || recurse_submodules_explicit) && + submodule_propagate_branches; + if (filter.abbrev == -1) filter.abbrev = DEFAULT_ABBREV; filter.ignore_case = icase; @@ -853,6 +878,9 @@ int cmd_branch(int argc, const char **argv, const char *prefix) git_config_set_multivar(buf.buf, NULL, NULL, CONFIG_FLAGS_MULTI_REPLACE); strbuf_release(&buf); } else if (!noncreate_actions && argc > 0 && argc <= 2) { + const char *branch_name = argv[0]; + const char *start_name = argc == 2 ? argv[1] : head; + if (filter.kind != FILTER_REFS_BRANCHES) die(_("The -a, and -r, options to 'git branch' do not take a branch name.\n" "Did you mean to use: -a|-r --list <pattern>?")); @@ -860,10 +888,14 @@ int cmd_branch(int argc, const char **argv, const char *prefix) if (track == BRANCH_TRACK_OVERRIDE) die(_("the '--set-upstream' option is no longer supported. Please use '--track' or '--set-upstream-to' instead.")); - create_branch(the_repository, - argv[0], (argc == 2) ? argv[1] : head, - force, 0, reflog, quiet, track, 0); - + if (recurse_submodules) { + create_branches_recursively(the_repository, branch_name, + start_name, NULL, force, + reflog, quiet, track, 0); + return 0; + } + create_branch(the_repository, branch_name, start_name, force, 0, + reflog, quiet, track, 0); } else usage_with_options(builtin_branch_usage, options); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index e630f0c730..44b6283c08 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -20,6 +20,7 @@ #include "diff.h" #include "object-store.h" #include "advice.h" +#include "branch.h" #define OPT_QUIET (1 << 0) #define OPT_CACHED (1 << 1) @@ -2983,6 +2984,42 @@ static int module_set_branch(int argc, const char **argv, const char *prefix) return !!ret; } +static int module_create_branch(int argc, const char **argv, const char *prefix) +{ + enum branch_track track; + int quiet = 0, force = 0, reflog = 0, dry_run = 0; + + struct option options[] = { + OPT__QUIET(&quiet, N_("print only error messages")), + OPT__FORCE(&force, N_("force creation"), 0), + OPT_BOOL(0, "create-reflog", &reflog, + N_("create the branch's reflog")), + OPT_SET_INT('t', "track", &track, + N_("set up tracking mode (see git-pull(1))"), + BRANCH_TRACK_EXPLICIT), + OPT__DRY_RUN(&dry_run, + N_("show whether the branch would be created")), + OPT_END() + }; + const char *const usage[] = { + N_("git submodule--helper create-branch [-f|--force] [--create-reflog] [-q|--quiet] [-t|--track] [-n|--dry-run] <name> <start_oid> <start_name>"), + NULL + }; + + git_config(git_default_config, NULL); + track = git_branch_track; + argc = parse_options(argc, argv, prefix, options, usage, 0); + + if (argc != 3) + usage_with_options(usage, options); + + if (!quiet && !dry_run) + printf_ln(_("creating branch '%s'"), argv[0]); + + create_branches_recursively(the_repository, argv[0], argv[1], argv[2], + force, reflog, quiet, track, dry_run); + return 0; +} struct add_data { const char *prefix; const char *branch; @@ -3389,6 +3426,7 @@ static struct cmd_struct commands[] = { {"config", module_config, 0}, {"set-url", module_set_url, 0}, {"set-branch", module_set_branch, 0}, + {"create-branch", module_create_branch, 0}, }; int cmd_submodule__helper(int argc, const char **argv, const char *prefix) diff --git a/submodule-config.c b/submodule-config.c index f95344028b..c9f54bc72d 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -7,6 +7,7 @@ #include "strbuf.h" #include "object-store.h" #include "parse-options.h" +#include "tree-walk.h" /* * submodule cache lookup structure @@ -726,6 +727,66 @@ const struct submodule *submodule_from_path(struct repository *r, return config_from(r->submodule_cache, treeish_name, path, lookup_path); } +/** + * Used internally by submodules_of_tree(). Recurses into 'treeish_name' + * and appends submodule entries to 'out'. The submodule_cache expects + * a root-level treeish_name and paths, so keep track of these values + * with 'root_tree' and 'prefix'. + */ +static void traverse_tree_submodules(struct repository *r, + const struct object_id *root_tree, + char *prefix, + const struct object_id *treeish_name, + struct submodule_entry_list *out) +{ + struct tree_desc tree; + struct submodule_tree_entry *st_entry; + struct name_entry *name_entry; + char *tree_path = NULL; + + name_entry = xmalloc(sizeof(*name_entry)); + + fill_tree_descriptor(r, &tree, treeish_name); + while (tree_entry(&tree, name_entry)) { + if (prefix) + tree_path = + mkpathdup("%s/%s", prefix, name_entry->path); + else + tree_path = xstrdup(name_entry->path); + + if (S_ISGITLINK(name_entry->mode) && + is_tree_submodule_active(r, root_tree, tree_path)) { + st_entry = xmalloc(sizeof(*st_entry)); + st_entry->name_entry = xmalloc(sizeof(*st_entry->name_entry)); + *st_entry->name_entry = *name_entry; + st_entry->submodule = + submodule_from_path(r, root_tree, tree_path); + st_entry->repo = xmalloc(sizeof(*st_entry->repo)); + if (repo_submodule_init(st_entry->repo, r, tree_path, + root_tree)) + FREE_AND_NULL(st_entry->repo); + + ALLOC_GROW(out->entries, out->entry_nr + 1, + out->entry_alloc); + out->entries[out->entry_nr++] = *st_entry; + } else if (S_ISDIR(name_entry->mode)) + traverse_tree_submodules(r, root_tree, tree_path, + &name_entry->oid, out); + free(tree_path); + } +} + +void submodules_of_tree(struct repository *r, + const struct object_id *treeish_name, + struct submodule_entry_list *out) +{ + CALLOC_ARRAY(out->entries, 0); + out->entry_nr = 0; + out->entry_alloc = 0; + + traverse_tree_submodules(r, treeish_name, NULL, treeish_name, out); +} + void submodule_free(struct repository *r) { if (r->submodule_cache) diff --git a/submodule-config.h b/submodule-config.h index 65875b94ea..fa229a8b97 100644 --- a/submodule-config.h +++ b/submodule-config.h @@ -6,6 +6,7 @@ #include "hashmap.h" #include "submodule.h" #include "strbuf.h" +#include "tree-walk.h" /** * The submodule config cache API allows to read submodule @@ -101,4 +102,37 @@ int check_submodule_name(const char *name); void fetch_config_from_gitmodules(int *max_children, int *recurse_submodules); void update_clone_config_from_gitmodules(int *max_jobs); +/* + * Submodule entry that contains relevant information about a + * submodule in a tree. + */ +struct submodule_tree_entry { + /* The submodule's tree entry. */ + struct name_entry *name_entry; + /* + * A struct repository corresponding to the submodule. May be + * NULL if the submodule has not been updated. + */ + struct repository *repo; + /* + * A struct submodule containing the submodule config in the + * tree's .gitmodules. + */ + const struct submodule *submodule; +}; + +struct submodule_entry_list { + struct submodule_tree_entry *entries; + int entry_nr; + int entry_alloc; +}; + +/** + * Given a treeish, return all submodules in the tree and its subtrees, + * but excluding nested submodules. Callers that require nested + * submodules are expected to recurse into the submodules themselves. + */ +void submodules_of_tree(struct repository *r, + const struct object_id *treeish_name, + struct submodule_entry_list *ret); #endif /* SUBMODULE_CONFIG_H */ diff --git a/submodule.c b/submodule.c index c689070524..5ace18a7d9 100644 --- a/submodule.c +++ b/submodule.c @@ -267,7 +267,9 @@ int option_parse_recurse_submodules_worktree_updater(const struct option *opt, * ie, the config looks like: "[submodule] active\n". * Since that is an invalid pathspec, we should inform the user. */ -int is_submodule_active(struct repository *repo, const char *path) +int is_tree_submodule_active(struct repository *repo, + const struct object_id *treeish_name, + const char *path) { int ret = 0; char *key = NULL; @@ -275,7 +277,7 @@ int is_submodule_active(struct repository *repo, const char *path) const struct string_list *sl; const struct submodule *module; - module = submodule_from_path(repo, null_oid(), path); + module = submodule_from_path(repo, treeish_name, path); /* early return if there isn't a path->module mapping */ if (!module) @@ -317,6 +319,11 @@ int is_submodule_active(struct repository *repo, const char *path) return ret; } +int is_submodule_active(struct repository *repo, const char *path) +{ + return is_tree_submodule_active(repo, null_oid(), path); +} + int is_submodule_populated_gently(const char *path, int *return_error_code) { int ret = 0; diff --git a/submodule.h b/submodule.h index 6bd2c99fd9..784ceffc0e 100644 --- a/submodule.h +++ b/submodule.h @@ -54,6 +54,9 @@ int git_default_submodule_config(const char *var, const char *value, void *cb); struct option; int option_parse_recurse_submodules_worktree_updater(const struct option *opt, const char *arg, int unset); +int is_tree_submodule_active(struct repository *repo, + const struct object_id *treeish_name, + const char *path); int is_submodule_active(struct repository *repo, const char *path); /* * Determine if a submodule has been populated at a given 'path' by checking if diff --git a/t/t3207-branch-submodule.sh b/t/t3207-branch-submodule.sh new file mode 100755 index 0000000000..6ef2733396 --- /dev/null +++ b/t/t3207-branch-submodule.sh @@ -0,0 +1,292 @@ +#!/bin/sh + +test_description='git branch submodule tests' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-rebase.sh + +pwd=$(pwd) + +# Creates a clean test environment in "pwd" by copying the repo setup +# from test_dirs. +reset_test () { + rm -fr super && + rm -fr sub-sub-upstream && + rm -fr sub-upstream && + cp -r test_dirs/* . +} + +# Tests that the expected branch does not exist +test_no_branch () { + DIR=$1 && + BRANCH_NAME=$2 && + test_must_fail git -C "$DIR" rev-parse "$BRANCH_NAME" 2>err && + grep "ambiguous argument .$BRANCH_NAME." err +} + +test_expect_success 'setup superproject and submodule' ' + mkdir test_dirs && + ( + cd test_dirs && + git init super && + test_commit -C super foo && + git init sub-sub-upstream && + test_commit -C sub-sub-upstream foo && + git init sub-upstream && + # Submodule in a submodule + git -C sub-upstream submodule add "${pwd}/test_dirs/sub-sub-upstream" sub-sub && + git -C sub-upstream commit -m "add submodule" && + # Regular submodule + git -C super submodule add "${pwd}/test_dirs/sub-upstream" sub && + # Submodule in a subdirectory + git -C super submodule add "${pwd}/test_dirs/sub-sub-upstream" second/sub && + git -C super commit -m "add submodule" && + git -C super config submodule.propagateBranches true && + git -C super/sub submodule update --init + ) && + reset_test +' + +# Test the argument parsing +test_expect_success '--recurse-submodules should create branches' ' + test_when_finished "reset_test" && + ( + cd super && + git branch --recurse-submodules branch-a && + git rev-parse branch-a && + git -C sub rev-parse branch-a && + git -C sub/sub-sub rev-parse branch-a && + git -C second/sub rev-parse branch-a + ) +' + +test_expect_success '--recurse-submodules should die if submodule.propagateBranches is false' ' + test_when_finished "reset_test" && + ( + cd super && + echo "fatal: branch with --recurse-submodules can only be used if submodule.propagateBranches is enabled" >expected && + test_must_fail git -c submodule.propagateBranches=false branch --recurse-submodules branch-a 2>actual && + test_cmp expected actual + ) +' + +test_expect_success '--recurse-submodules should fail when not creating branches' ' + test_when_finished "reset_test" && + ( + cd super && + git branch --recurse-submodules branch-a && + echo "fatal: --recurse-submodules can only be used to create branches" >expected && + test_must_fail git branch --recurse-submodules -D branch-a 2>actual && + test_cmp expected actual && + # Assert that the branches were not deleted + git rev-parse branch-a && + git -C sub rev-parse branch-a + ) +' + +test_expect_success 'should respect submodule.recurse when creating branches' ' + test_when_finished "reset_test" && + ( + cd super && + git -c submodule.recurse=true branch branch-a && + git rev-parse branch-a && + git -C sub rev-parse branch-a + ) +' + +test_expect_success 'should ignore submodule.recurse when not creating branches' ' + test_when_finished "reset_test" && + ( + cd super && + git branch --recurse-submodules branch-a && + git -c submodule.recurse=true branch -D branch-a && + test_no_branch . branch-a && + git -C sub rev-parse branch-a + ) +' + +# Test branch creation behavior +test_expect_success 'should create branches based off commit id in superproject' ' + test_when_finished "reset_test" && + ( + cd super && + git branch --recurse-submodules branch-a && + git checkout --recurse-submodules branch-a && + git -C sub rev-parse HEAD >expected && + # Move the tip of sub:branch-a so that it no longer matches the commit in super:branch-a + git -C sub checkout branch-a && + test_commit -C sub bar && + # Create a new branch-b branch with start-point=branch-a + git branch --recurse-submodules branch-b branch-a && + git rev-parse branch-b && + git -C sub rev-parse branch-b >actual && + # Assert that the commit id of sub:second-branch matches super:branch-a and not sub:branch-a + test_cmp expected actual + ) +' + +test_expect_success 'should not create any branches if branch is not valid for all repos' ' + test_when_finished "reset_test" && + ( + cd super && + git -C sub branch branch-a && + test_must_fail git branch --recurse-submodules branch-a 2>actual && + test_no_branch . branch-a && + grep "submodule .sub.: fatal: A branch named .branch-a. already exists" actual + ) +' + +test_expect_success 'should create branches if branch exists and --force is given' ' + test_when_finished "reset_test" && + ( + cd super && + git -C sub rev-parse HEAD >expected && + test_commit -C sub baz && + # branch-a in sub now points to a newer commit. + git -C sub branch branch-a HEAD && + git -C sub rev-parse branch-a >actual-old-branch-a && + git branch --recurse-submodules --force branch-a && + git rev-parse branch-a && + git -C sub rev-parse branch-a >actual-new-branch-a && + test_cmp expected actual-new-branch-a && + # assert that branch --force actually moved the sub + # branch + ! test_cmp expected actual-old-branch-a + ) +' + +test_expect_success 'should create branch when submodule is not in HEAD:.gitmodules' ' + test_when_finished "reset_test" && + ( + cd super && + git branch branch-a && + git checkout -b branch-b && + git submodule add ../sub-upstream sub2 && + git -C sub2 submodule update --init && + # branch-b now has a committed submodule not in branch-a + git commit -m "add second submodule" && + git checkout branch-a && + git branch --recurse-submodules branch-c branch-b && + git checkout --recurse-submodules branch-c && + git -C sub2 rev-parse branch-c && + git -C sub2/sub-sub rev-parse branch-c + ) +' + +test_expect_success 'should not create branches in inactive submodules' ' + test_when_finished "reset_test" && + test_config -C super submodule.sub.active false && + ( + cd super && + git branch --recurse-submodules branch-a && + git rev-parse branch-a && + test_no_branch sub branch-a + ) +' + +test_expect_success 'should set up tracking of local branches with track=always' ' + test_when_finished "reset_test" && + ( + cd super && + git -c branch.autoSetupMerge=always branch --recurse-submodules branch-a main && + git -C sub rev-parse main && + test_cmp_config -C sub . branch.branch-a.remote && + test_cmp_config -C sub refs/heads/main branch.branch-a.merge + ) +' + +test_expect_success 'should set up tracking of local branches with explicit track' ' + test_when_finished "reset_test" && + ( + cd super && + git branch --track --recurse-submodules branch-a main && + git -C sub rev-parse main && + test_cmp_config -C sub . branch.branch-a.remote && + test_cmp_config -C sub refs/heads/main branch.branch-a.merge + ) +' + +test_expect_success 'should not set up unnecessary tracking of local branches' ' + test_when_finished "reset_test" && + ( + cd super && + git branch --recurse-submodules branch-a main && + git -C sub rev-parse main && + test_cmp_config -C sub "" --default "" branch.branch-a.remote && + test_cmp_config -C sub "" --default "" branch.branch-a.merge + ) +' + +reset_remote_test () { + rm -fr super-clone && + reset_test +} + +test_expect_success 'setup tests with remotes' ' + ( + cd test_dirs && + ( + cd super && + git branch branch-a && + git checkout -b branch-b && + git submodule add ../sub-upstream sub2 && + # branch-b now has a committed submodule not in branch-a + git commit -m "add second submodule" + ) && + git clone --branch main --recurse-submodules super super-clone && + git -C super-clone config submodule.propagateBranches true + ) && + reset_remote_test +' + +test_expect_success 'should get fatal error upon branch creation when submodule is not in .git/modules' ' + test_when_finished "reset_remote_test" && + ( + cd super-clone && + # This should succeed because super-clone has sub in .git/modules + git branch --recurse-submodules branch-a origin/branch-a && + # This should fail because super-clone does not have sub2 .git/modules + test_must_fail git branch --recurse-submodules branch-b origin/branch-b 2>actual && + grep "fatal: submodule .sub2.: unable to find submodule" actual && + test_no_branch . branch-b && + test_no_branch sub branch-b && + # User can fix themselves by initializing the submodule + git checkout origin/branch-b && + git submodule update --init --recursive && + git branch --recurse-submodules branch-b origin/branch-b + ) +' + +test_expect_success 'should set up tracking of remote-tracking branches' ' + test_when_finished "reset_remote_test" && + ( + cd super-clone && + git branch --recurse-submodules branch-a origin/branch-a && + test_cmp_config origin branch.branch-a.remote && + test_cmp_config refs/heads/branch-a branch.branch-a.merge && + # "origin/branch-a" does not exist for "sub", but it matches the refspec + # so tracking should be set up + test_cmp_config -C sub origin branch.branch-a.remote && + test_cmp_config -C sub refs/heads/branch-a branch.branch-a.merge && + test_cmp_config -C sub/sub-sub origin branch.branch-a.remote && + test_cmp_config -C sub/sub-sub refs/heads/branch-a branch.branch-a.merge + ) +' + +test_expect_success 'should not fail when unable to set up tracking in submodule' ' + test_when_finished "reset_remote_test" && + ( + cd super-clone && + git remote rename origin ex-origin && + git branch --recurse-submodules branch-a ex-origin/branch-a && + test_cmp_config ex-origin branch.branch-a.remote && + test_cmp_config refs/heads/branch-a branch.branch-a.merge && + test_cmp_config -C sub "" --default "" branch.branch-a.remote && + test_cmp_config -C sub "" --default "" branch.branch-a.merge + ) +' + +test_done From 679e3693aba0c17af60c031f7eef68f2296b8dad Mon Sep 17 00:00:00 2001 From: Glen Choo <chooglen@google.com> Date: Fri, 28 Jan 2022 16:04:46 -0800 Subject: [PATCH 065/150] branch.c: use 'goto cleanup' in setup_tracking() to fix memory leaks Signed-off-by: Glen Choo <chooglen@google.com> Reviewed-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- branch.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/branch.c b/branch.c index 70026b3c79..47251669e1 100644 --- a/branch.c +++ b/branch.c @@ -239,7 +239,7 @@ static void setup_tracking(const char *new_ref, const char *orig_ref, if (track != BRANCH_TRACK_INHERIT) for_each_remote(find_tracked_branch, &tracking); else if (inherit_tracking(&tracking, orig_ref)) - return; + goto cleanup; if (!tracking.matches) switch (track) { @@ -249,7 +249,7 @@ static void setup_tracking(const char *new_ref, const char *orig_ref, case BRANCH_TRACK_INHERIT: break; default: - return; + goto cleanup; } if (tracking.matches > 1) @@ -262,7 +262,8 @@ static void setup_tracking(const char *new_ref, const char *orig_ref, tracking.remote, tracking.srcs) < 0) exit(-1); - string_list_clear(tracking.srcs, 0); +cleanup: + string_list_clear(&tracking_srcs, 0); } int read_branch_desc(struct strbuf *buf, const char *branch_name) From 0a2bfccb9c85458f329fdbf714af699edd86fe32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Fri, 4 Feb 2022 14:42:14 +0100 Subject: [PATCH 066/150] t0051: use "skip_all" under !MINGW in single-test file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have this file added in 06ba9d03e34 (t0051: test GIT_TRACE to a windows named pipe, 2018-09-11) use the same "skip_all" pattern as an existing Windows-only test added in 0e218f91c29 (mingw: unset PERL5LIB by default, 2018-10-30) uses. This way TAP consumers like "prove" will show a nice summary when the test is skipped. Instead of: $ prove t0051-windows-named-pipe.sh [...] t0051-windows-named-pipe.sh .. ok [...] We will prominently show a "skipped" notice: $ prove t0051-windows-named-pipe.sh [...] t0051-windows-named-pipe.sh ... skipped: skipping Windows-specific tests [...] This is because we are now making use of the right TAP-y way to communicate this to the consumer. I.e. skipping the whole test file, v.s. skipping individual tests (in this case there's only one test). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t0051-windows-named-pipe.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/t/t0051-windows-named-pipe.sh b/t/t0051-windows-named-pipe.sh index 10ac92d225..412f413360 100755 --- a/t/t0051-windows-named-pipe.sh +++ b/t/t0051-windows-named-pipe.sh @@ -3,8 +3,13 @@ test_description='Windows named pipes' . ./test-lib.sh +if ! test_have_prereq MINGW +then + skip_all='skipping Windows-specific tests' + test_done +fi -test_expect_success MINGW 'o_append write to named pipe' ' +test_expect_success 'o_append write to named pipe' ' GIT_TRACE="$(pwd)/expect" git status >/dev/null 2>&1 && { test-tool windows-named-pipe t0051 >actual 2>&1 & } && pid=$! && From a699367bb8749a338aefb092c5d7ac75c69d61e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= <jn.avila@free.fr> Date: Mon, 31 Jan 2022 22:07:46 +0000 Subject: [PATCH 067/150] i18n: factorize more 'incompatible options' messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Find more incompatible options to factorize. When more than two options are mutually exclusive, print the ones which are actually on the command line. Signed-off-by: Jean-Noël Avila <jn.avila@free.fr> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/commit.c | 35 ++++++++++------------- builtin/difftool.c | 5 ++-- builtin/grep.c | 8 ++---- builtin/log.c | 5 ++-- builtin/merge-base.c | 6 ++-- parse-options.c | 34 ++++++++++++++++++++++ parse-options.h | 16 +++++++++++ t/t7500-commit-template-squash-signoff.sh | 2 +- 8 files changed, 79 insertions(+), 32 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index b9ed0374e3..33ca9e99c8 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1242,8 +1242,6 @@ static int parse_and_validate_options(int argc, const char *argv[], struct commit *current_head, struct wt_status *s) { - int f = 0; - argc = parse_options(argc, argv, prefix, options, usage, 0); finalize_deferred_config(s); @@ -1251,7 +1249,7 @@ static int parse_and_validate_options(int argc, const char *argv[], force_author = find_author_by_nickname(force_author); if (force_author && renew_authorship) - die(_("Using both --reset-author and --author does not make sense")); + die(_("options '%s' and '%s' cannot be used together"), "--reset-author", "--author"); if (logfile || have_option_m || use_message) use_editor = 0; @@ -1268,20 +1266,16 @@ static int parse_and_validate_options(int argc, const char *argv[], die(_("You are in the middle of a rebase -- cannot amend.")); } if (fixup_message && squash_message) - die(_("Options --squash and --fixup cannot be used together")); - if (use_message) - f++; - if (edit_message) - f++; - if (fixup_message) - f++; - if (logfile) - f++; - if (f > 1) - die(_("Only one of -c/-C/-F/--fixup can be used.")); - if (have_option_m && (edit_message || use_message || logfile)) - die((_("Option -m cannot be combined with -c/-C/-F."))); - if (f || have_option_m) + die(_("options '%s' and '%s' cannot be used together"), "--squash", "--fixup"); + die_for_incompatible_opt4(!!use_message, "-C", + !!edit_message, "-c", + !!logfile, "-F", + !!fixup_message, "--fixup"); + die_for_incompatible_opt4(have_option_m, "-m", + !!edit_message, "-c", + !!use_message, "-C", + !!logfile, "-F"); + if (use_message || edit_message || logfile ||fixup_message || have_option_m) template_file = NULL; if (edit_message) use_message = edit_message; @@ -1306,9 +1300,10 @@ static int parse_and_validate_options(int argc, const char *argv[], if (patch_interactive) interactive = 1; - if (also + only + all + interactive > 1) - die(_("Only one of --include/--only/--all/--interactive/--patch can be used.")); - + die_for_incompatible_opt4(also, "-i/--include", + only, "-o/--only", + all, "-a/--all", + interactive, "--interactive/-p/--patch"); if (fixup_message) { /* * We limit --fixup's suboptions to only alpha characters. diff --git a/builtin/difftool.c b/builtin/difftool.c index c79fbbf67e..faa3507369 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -732,8 +732,9 @@ int cmd_difftool(int argc, const char **argv, const char *prefix) } else if (dir_diff) die(_("options '%s' and '%s' cannot be used together"), "--dir-diff", "--no-index"); - if (use_gui_tool + !!difftool_cmd + !!extcmd > 1) - die(_("options '%s', '%s', and '%s' cannot be used together"), "--gui", "--tool", "--extcmd"); + die_for_incompatible_opt3(use_gui_tool, "--gui", + !!difftool_cmd, "--tool", + !!extcmd, "--extcmd"); if (use_gui_tool) setenv("GIT_MERGETOOL_GUI", "true", 1); diff --git a/builtin/grep.c b/builtin/grep.c index 9e34a820ad..88144f0630 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1167,11 +1167,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!show_in_pager && !opt.status_only) setup_pager(); - if (!use_index && (untracked || cached)) - die(_("--cached or --untracked cannot be used with --no-index")); - - if (untracked && cached) - die(_("--untracked cannot be used with --cached")); + die_for_incompatible_opt3(!use_index, "--no-index", + untracked, "--untracked", + cached, "--cached"); if (!use_index || untracked) { int use_exclude = (opt_exclude < 0) ? use_index : !!opt_exclude; diff --git a/builtin/log.c b/builtin/log.c index 4b493408cc..970aa3483c 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1978,8 +1978,9 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix) if (rev.show_notes) load_display_notes(&rev.notes_opt); - if (use_stdout + rev.diffopt.close_file + !!output_directory > 1) - die(_("options '%s', '%s', and '%s' cannot be used together"), "--stdout", "--output", "--output-directory"); + die_for_incompatible_opt3(use_stdout, "--stdout", + rev.diffopt.close_file, "--output", + !!output_directory, "--output-directory"); if (use_stdout) { setup_pager(); diff --git a/builtin/merge-base.c b/builtin/merge-base.c index 6719ac198d..26b84980db 100644 --- a/builtin/merge-base.c +++ b/builtin/merge-base.c @@ -159,12 +159,14 @@ int cmd_merge_base(int argc, const char **argv, const char *prefix) if (argc < 2) usage_with_options(merge_base_usage, options); if (show_all) - die("--is-ancestor cannot be used with --all"); + die(_("options '%s' and '%s' cannot be used together"), + "--is-ancestor", "--all"); return handle_is_ancestor(argc, argv); } if (cmdmode == 'r' && show_all) - die("--independent cannot be used with --all"); + die(_("options '%s' and '%s' cannot be used together"), + "--independent", "--all"); if (cmdmode == 'o') return handle_octopus(argc, argv, show_all); diff --git a/parse-options.c b/parse-options.c index a8283037be..276e3911a7 100644 --- a/parse-options.c +++ b/parse-options.c @@ -1079,3 +1079,37 @@ void NORETURN usage_msg_opt(const char *msg, die_message("%s\n", msg); /* The extra \n is intentional */ usage_with_options(usagestr, options); } + +void die_for_incompatible_opt4(int opt1, const char *opt1_name, + int opt2, const char *opt2_name, + int opt3, const char *opt3_name, + int opt4, const char *opt4_name) +{ + int count = 0; + const char *options[4]; + + if (opt1) + options[count++] = opt1_name; + if (opt2) + options[count++] = opt2_name; + if (opt3) + options[count++] = opt3_name; + if (opt4) + options[count++] = opt4_name; + switch (count) { + case 4: + die(_("options '%s', '%s', '%s', and '%s' cannot be used together"), + opt1_name, opt2_name, opt3_name, opt4_name); + break; + case 3: + die(_("options '%s', '%s', and '%s' cannot be used together"), + options[0], options[1], options[2]); + break; + case 2: + die(_("options '%s' and '%s' cannot be used together"), + options[0], options[1]); + break; + default: + break; + } +} diff --git a/parse-options.h b/parse-options.h index e22846d3b7..f773cc7859 100644 --- a/parse-options.h +++ b/parse-options.h @@ -225,6 +225,22 @@ NORETURN void usage_msg_opt(const char *msg, const char * const *usagestr, const struct option *options); +void die_for_incompatible_opt4(int opt1, const char *opt1_name, + int opt2, const char *opt2_name, + int opt3, const char *opt3_name, + int opt4, const char *opt4_name); + + +static inline void die_for_incompatible_opt3(int opt1, const char *opt1_name, + int opt2, const char *opt2_name, + int opt3, const char *opt3_name) +{ + die_for_incompatible_opt4(opt1, opt1_name, + opt2, opt2_name, + opt3, opt3_name, + 0, ""); +} + /* * Use these assertions for callbacks that expect to be called with NONEG and * NOARG respectively, and do not otherwise handle the "unset" and "arg" diff --git a/t/t7500-commit-template-squash-signoff.sh b/t/t7500-commit-template-squash-signoff.sh index 91964653a0..5fcaa0b4f2 100755 --- a/t/t7500-commit-template-squash-signoff.sh +++ b/t/t7500-commit-template-squash-signoff.sh @@ -442,7 +442,7 @@ test_expect_success '--fixup=reword: give error with pathsec' ' ' test_expect_success '--fixup=reword: -F give error message' ' - echo "fatal: Only one of -c/-C/-F/--fixup can be used." >expect && + echo "fatal: options '\''-F'\'' and '\''--fixup'\'' cannot be used together" >expect && test_must_fail git commit --fixup=reword:HEAD~ -F msg 2>actual && test_cmp expect actual ' From 1a8aea857e4225a9d35a531869fd47777f3063d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= <jn.avila@free.fr> Date: Mon, 31 Jan 2022 22:07:47 +0000 Subject: [PATCH 068/150] i18n: factorize "invalid value" messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the same message when an invalid value is passed to a command line option or a configuration variable. Signed-off-by: Jean-Noël Avila <jn.avila@free.fr> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/am.c | 8 +++++--- builtin/blame.c | 7 ++++--- builtin/fetch.c | 4 ++-- builtin/pack-objects.c | 2 +- builtin/pull.c | 6 +++--- builtin/push.c | 2 +- builtin/send-pack.c | 2 +- diff-merges.c | 2 +- gpg-interface.c | 6 +++--- ls-refs.c | 3 ++- parallel-checkout.c | 4 ++-- sequencer.c | 2 +- setup.c | 3 ++- submodule-config.c | 2 +- t/t4150-am.sh | 2 +- 15 files changed, 30 insertions(+), 25 deletions(-) diff --git a/builtin/am.c b/builtin/am.c index b6be1f1cb1..97dbeb8e49 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -199,7 +199,7 @@ static int am_option_parse_empty(const struct option *opt, else if (!strcmp(arg, "keep")) *opt_value = KEEP_EMPTY_COMMIT; else - return error(_("Invalid value for --empty: %s"), arg); + return error(_("invalid value for '%s': '%s'"), "--empty", arg); return 0; } @@ -2239,7 +2239,8 @@ static int parse_opt_patchformat(const struct option *opt, const char *arg, int * when you add new options */ else - return error(_("Invalid value for --patch-format: %s"), arg); + return error(_("invalid value for '%s': '%s'"), + "--patch-format", arg); return 0; } @@ -2282,7 +2283,8 @@ static int parse_opt_show_current_patch(const struct option *opt, const char *ar break; } if (new_value >= ARRAY_SIZE(valid_modes)) - return error(_("Invalid value for --show-current-patch: %s"), arg); + return error(_("invalid value for '%s': '%s'"), + "--show-current-patch", arg); } if (resume->mode == RESUME_SHOW_PATCH && new_value != resume->sub_mode) diff --git a/builtin/blame.c b/builtin/blame.c index 7fafeac408..9c142f17a2 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -721,8 +721,8 @@ static int git_blame_config(const char *var, const char *value, void *cb) } if (!strcmp(var, "color.blame.repeatedlines")) { if (color_parse_mem(value, strlen(value), repeated_meta_color)) - warning(_("invalid color '%s' in color.blame.repeatedLines"), - value); + warning(_("invalid value for '%s': '%s'"), + "color.blame.repeatedLines", value); return 0; } if (!strcmp(var, "color.blame.highlightrecent")) { @@ -739,7 +739,8 @@ static int git_blame_config(const char *var, const char *value, void *cb) coloring_mode &= ~(OUTPUT_COLOR_LINE | OUTPUT_SHOW_AGE_WITH_COLOR); } else { - warning(_("invalid value for blame.coloring")); + warning(_("invalid value for '%s': '%s'"), + "blame.coloring", value); return 0; } } diff --git a/builtin/fetch.c b/builtin/fetch.c index 5f06b21f8e..8be19bb879 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -763,8 +763,8 @@ static void prepare_format_display(struct ref *ref_map) else if (!strcasecmp(format, "compact")) compact_format = 1; else - die(_("configuration fetch.output contains invalid value %s"), - format); + die(_("invalid value for '%s': '%s'"), + "fetch.output", format); for (rm = ref_map; rm; rm = rm->next) { if (rm->status == REF_STATUS_REJECT_SHALLOW || diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ba2006f221..192c3ca305 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3504,7 +3504,7 @@ static int option_parse_missing_action(const struct option *opt, return 0; } - die(_("invalid value for --missing")); + die(_("invalid value for '%s': '%s'"), "--missing", arg); return 0; } diff --git a/builtin/pull.c b/builtin/pull.c index 100cbf9fb8..e54a0ccadc 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -42,9 +42,9 @@ static enum rebase_type parse_config_rebase(const char *key, const char *value, return v; if (fatal) - die(_("Invalid value for %s: %s"), key, value); + die(_("invalid value for '%s': '%s'"), key, value); else - error(_("Invalid value for %s: %s"), key, value); + error(_("invalid value for '%s': '%s'"), key, value); return REBASE_INVALID; } @@ -318,7 +318,7 @@ static const char *config_get_ff(void) if (!strcmp(value, "only")) return "--ff-only"; - die(_("Invalid value for pull.ff: %s"), value); + die(_("invalid value for '%s': '%s'"), "pull.ff", value); } /** diff --git a/builtin/push.c b/builtin/push.c index 359db90321..cad997965a 100644 --- a/builtin/push.c +++ b/builtin/push.c @@ -486,7 +486,7 @@ static int git_push_config(const char *k, const char *v, void *cb) if (value && !strcasecmp(value, "if-asked")) set_push_cert_flags(flags, SEND_PACK_PUSH_CERT_IF_ASKED); else - return error("Invalid value for '%s'", k); + return error(_("invalid value for '%s'"), k); } } } else if (!strcmp(k, "push.recursesubmodules")) { diff --git a/builtin/send-pack.c b/builtin/send-pack.c index 69c432ef1a..64962be016 100644 --- a/builtin/send-pack.c +++ b/builtin/send-pack.c @@ -145,7 +145,7 @@ static int send_pack_config(const char *k, const char *v, void *cb) if (value && !strcasecmp(value, "if-asked")) args.push_cert = SEND_PACK_PUSH_CERT_IF_ASKED; else - return error("Invalid value for '%s'", k); + return error(_("invalid value for '%s'"), k); } } } diff --git a/diff-merges.c b/diff-merges.c index 5060ccd890..cd6c102a0d 100644 --- a/diff-merges.c +++ b/diff-merges.c @@ -67,7 +67,7 @@ static void set_diff_merges(struct rev_info *revs, const char *optarg) diff_merges_setup_func_t func = func_by_opt(optarg); if (!func) - die(_("unknown value for --diff-merges: %s"), optarg); + die(_("invalid value for '%s': '%s'"), "--diff-merges", optarg); func(revs); diff --git a/gpg-interface.c b/gpg-interface.c index b52eb0e2e0..4e084b08f6 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -702,7 +702,7 @@ int git_gpg_config(const char *var, const char *value, void *cb) return config_error_nonbool(var); fmt = get_format_by_name(value); if (!fmt) - return error("unsupported value for %s: %s", + return error(_("invalid value for '%s': '%s'"), var, value); use_format = fmt; return 0; @@ -717,8 +717,8 @@ int git_gpg_config(const char *var, const char *value, void *cb) free(trust); if (ret) - return error("unsupported value for %s: %s", var, - value); + return error(_("invalid value for '%s': '%s'"), + var, value); return 0; } diff --git a/ls-refs.c b/ls-refs.c index 54078323dc..98e69373c8 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -34,7 +34,8 @@ static void ensure_config_read(void) } else if (!strcmp(str, "ignore")) { /* do nothing */ } else { - die(_("invalid value '%s' for lsrefs.unborn"), str); + die(_("invalid value for '%s': '%s'"), + "lsrefs.unborn", str); } } config_read = 1; diff --git a/parallel-checkout.c b/parallel-checkout.c index 8dd7e7bad4..31a3d0ee1b 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -39,8 +39,8 @@ void get_parallel_checkout_configs(int *num_workers, int *threshold) if (env_workers && *env_workers) { if (strtol_i(env_workers, 10, num_workers)) { - die("invalid value for GIT_TEST_CHECKOUT_WORKERS: '%s'", - env_workers); + die(_("invalid value for '%s': '%s'"), + "GIT_TEST_CHECKOUT_WORKERS", env_workers); } if (*num_workers < 1) *num_workers = online_cpus(); diff --git a/sequencer.c b/sequencer.c index 5213d16e97..683f5392eb 100644 --- a/sequencer.c +++ b/sequencer.c @@ -2806,7 +2806,7 @@ static int populate_opts_cb(const char *key, const char *value, void *data) return error(_("invalid key: %s"), key); if (!error_flag) - return error(_("invalid value for %s: %s"), key, value); + return error(_("invalid value for '%s': '%s'"), key, value); return 0; } diff --git a/setup.c b/setup.c index af3b8c09ab..04ce33cdcd 100644 --- a/setup.c +++ b/setup.c @@ -559,7 +559,8 @@ static enum extension_result handle_extension(const char *var, return config_error_nonbool(var); format = hash_algo_by_name(value); if (format == GIT_HASH_UNKNOWN) - return error("invalid value for 'extensions.objectformat'"); + return error(_("invalid value for '%s': '%s'"), + "extensions.objectformat", value); data->hash_algo = format; return EXTENSION_OK; } diff --git a/submodule-config.c b/submodule-config.c index f95344028b..fb95a026f4 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -496,7 +496,7 @@ static int parse_config(const char *var, const char *value, void *data) else if (parse_submodule_update_strategy(value, &submodule->update_strategy) < 0 || submodule->update_strategy.type == SM_UPDATE_COMMAND) - die(_("invalid value for %s"), var); + die(_("invalid value for '%s'"), var); } else if (!strcmp(item.buf, "shallow")) { if (!me->overwrite && submodule->recommend_shallow != -1) warn_multiple_config(me->treeish_name, submodule->name, diff --git a/t/t4150-am.sh b/t/t4150-am.sh index 6caff0ca39..159fae8d01 100755 --- a/t/t4150-am.sh +++ b/t/t4150-am.sh @@ -1169,7 +1169,7 @@ test_expect_success 'invalid when passing the --empty option alone' ' test_when_finished "git am --abort || :" && git checkout empty-commit^ && test_must_fail git am --empty empty-commit.patch 2>err && - echo "error: Invalid value for --empty: empty-commit.patch" >expected && + echo "error: invalid value for '\''--empty'\'': '\''empty-commit.patch'\''" >expected && test_cmp expected err ' From 959d670d1a42af282a55551a3f03642592f64eb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= <jn.avila@free.fr> Date: Mon, 31 Jan 2022 22:07:48 +0000 Subject: [PATCH 069/150] i18n: remove from i18n strings that do not hold translatable parts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jean-Noël Avila <jn.avila@free.fr> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- archive.c | 2 +- builtin/bisect--helper.c | 6 +++--- builtin/count-objects.c | 2 +- builtin/hash-object.c | 2 +- builtin/help.c | 4 ++-- builtin/mktag.c | 2 +- builtin/mktree.c | 2 +- builtin/notes.c | 6 +++--- builtin/prune-packed.c | 2 +- builtin/rebase.c | 2 +- builtin/reflog.c | 2 +- builtin/remote.c | 2 +- builtin/replace.c | 2 +- builtin/sparse-checkout.c | 8 ++++---- builtin/stripspace.c | 4 ++-- builtin/submodule--helper.c | 2 +- builtin/update-server-info.c | 2 +- 17 files changed, 26 insertions(+), 26 deletions(-) diff --git a/archive.c b/archive.c index d571249cf3..e29d0e00f6 100644 --- a/archive.c +++ b/archive.c @@ -12,7 +12,7 @@ static char const * const archive_usage[] = { N_("git archive [<options>] <tree-ish> [<path>...]"), - N_("git archive --list"), + "git archive --list", N_("git archive --remote <repo> [--exec <cmd>] [<options>] <tree-ish> [<path>...]"), N_("git archive --remote <repo> [--exec <cmd>] --list"), NULL diff --git a/builtin/bisect--helper.c b/builtin/bisect--helper.c index 28a2e6a575..f962dbd430 100644 --- a/builtin/bisect--helper.c +++ b/builtin/bisect--helper.c @@ -22,15 +22,15 @@ static GIT_PATH_FUNC(git_path_bisect_run, "BISECT_RUN") static const char * const git_bisect_helper_usage[] = { N_("git bisect--helper --bisect-reset [<commit>]"), - N_("git bisect--helper --bisect-terms [--term-good | --term-old | --term-bad | --term-new]"), + "git bisect--helper --bisect-terms [--term-good | --term-old | --term-bad | --term-new]", N_("git bisect--helper --bisect-start [--term-{new,bad}=<term> --term-{old,good}=<term>]" " [--no-checkout] [--first-parent] [<bad> [<good>...]] [--] [<paths>...]"), - N_("git bisect--helper --bisect-next"), + "git bisect--helper --bisect-next", N_("git bisect--helper --bisect-state (bad|new) [<rev>]"), N_("git bisect--helper --bisect-state (good|old) [<rev>...]"), N_("git bisect--helper --bisect-replay <filename>"), N_("git bisect--helper --bisect-skip [(<rev>|<range>)...]"), - N_("git bisect--helper --bisect-visualize"), + "git bisect--helper --bisect-visualize", N_("git bisect--helper --bisect-run <cmd>..."), NULL }; diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 3fae474f6f..07b9419596 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -87,7 +87,7 @@ static int print_alternate(struct object_directory *odb, void *data) } static char const * const count_objects_usage[] = { - N_("git count-objects [-v] [-H | --human-readable]"), + "git count-objects [-v] [-H | --human-readable]", NULL }; diff --git a/builtin/hash-object.c b/builtin/hash-object.c index c7b3ad74c6..04e2442ec7 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -81,7 +81,7 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) { static const char * const hash_object_usage[] = { N_("git hash-object [-t <type>] [-w] [--path=<file> | --no-filters] [--stdin] [--] <file>..."), - N_("git hash-object --stdin-paths"), + "git hash-object --stdin-paths", NULL }; const char *type = blob_type; diff --git a/builtin/help.c b/builtin/help.c index d387131dd8..b4f2ad3f94 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -77,8 +77,8 @@ static struct option builtin_help_options[] = { static const char * const builtin_help_usage[] = { N_("git help [-a|--all] [--[no-]verbose]]\n" " [[-i|--info] [-m|--man] [-w|--web]] [<command>]"), - N_("git help [-g|--guides]"), - N_("git help [-c|--config]"), + "git help [-g|--guides]", + "git help [-c|--config]", NULL }; diff --git a/builtin/mktag.c b/builtin/mktag.c index 3b2dbbb37e..c7b905c614 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -7,7 +7,7 @@ #include "config.h" static char const * const builtin_mktag_usage[] = { - N_("git mktag"), + "git mktag", NULL }; static int option_strict = 1; diff --git a/builtin/mktree.c b/builtin/mktree.c index ae78ca1c02..8bdaada922 100644 --- a/builtin/mktree.c +++ b/builtin/mktree.c @@ -63,7 +63,7 @@ static void write_tree(struct object_id *oid) } static const char *mktree_usage[] = { - N_("git mktree [-z] [--missing] [--batch]"), + "git mktree [-z] [--missing] [--batch]", NULL }; diff --git a/builtin/notes.c b/builtin/notes.c index 05d60483e8..f99593a185 100644 --- a/builtin/notes.c +++ b/builtin/notes.c @@ -32,8 +32,8 @@ static const char * const git_notes_usage[] = { N_("git notes [--ref <notes-ref>] edit [--allow-empty] [<object>]"), N_("git notes [--ref <notes-ref>] show [<object>]"), N_("git notes [--ref <notes-ref>] merge [-v | -q] [-s <strategy>] <notes-ref>"), - N_("git notes merge --commit [-v | -q]"), - N_("git notes merge --abort [-v | -q]"), + "git notes merge --commit [-v | -q]", + "git notes merge --abort [-v | -q]", N_("git notes [--ref <notes-ref>] remove [<object>...]"), N_("git notes [--ref <notes-ref>] prune [-n] [-v]"), N_("git notes [--ref <notes-ref>] get-ref"), @@ -89,7 +89,7 @@ static const char * const git_notes_prune_usage[] = { }; static const char * const git_notes_get_ref_usage[] = { - N_("git notes get-ref"), + "git notes get-ref", NULL }; diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c index b7b9281a8c..da3273a268 100644 --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@ -3,7 +3,7 @@ #include "prune-packed.h" static const char * const prune_packed_usage[] = { - N_("git prune-packed [-n | --dry-run] [-q | --quiet]"), + "git prune-packed [-n | --dry-run] [-q | --quiet]", NULL }; diff --git a/builtin/rebase.c b/builtin/rebase.c index 36490d06c8..64796c6a78 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -36,7 +36,7 @@ static char const * const builtin_rebase_usage[] = { "[--onto <newbase> | --keep-base] [<upstream> [<branch>]]"), N_("git rebase [-i] [options] [--exec <cmd>] [--onto <newbase>] " "--root [<branch>]"), - N_("git rebase --continue | --abort | --skip | --edit-todo"), + "git rebase --continue | --abort | --skip | --edit-todo", NULL }; diff --git a/builtin/reflog.c b/builtin/reflog.c index a4b1dd27e1..ee5ee8d8cf 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -800,7 +800,7 @@ static int cmd_reflog_exists(int argc, const char **argv, const char *prefix) */ static const char reflog_usage[] = -N_("git reflog [ show | expire | delete | exists ]"); +"git reflog [ show | expire | delete | exists ]"; int cmd_reflog(int argc, const char **argv, const char *prefix) { diff --git a/builtin/remote.c b/builtin/remote.c index 299c466116..6f27ddc47b 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -14,7 +14,7 @@ #include "commit-reach.h" static const char * const builtin_remote_usage[] = { - N_("git remote [-v | --verbose]"), + "git remote [-v | --verbose]", N_("git remote add [-t <branch>] [-m <master>] [-f] [--tags | --no-tags] [--mirror=<fetch|push>] <name> <url>"), N_("git remote rename <old> <new>"), N_("git remote remove <name>"), diff --git a/builtin/replace.c b/builtin/replace.c index 6ff1734d58..ac92337c0e 100644 --- a/builtin/replace.c +++ b/builtin/replace.c @@ -22,7 +22,7 @@ static const char * const git_replace_usage[] = { N_("git replace [-f] <object> <replacement>"), N_("git replace [-f] --edit <object>"), N_("git replace [-f] --graft <commit> [<parent>...]"), - N_("git replace [-f] --convert-graft-file"), + "git replace [-f] --convert-graft-file", N_("git replace -d <object>..."), N_("git replace [--format=<format>] [-l [<pattern>]]"), NULL diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 679c107036..771c9869a1 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -43,7 +43,7 @@ static void write_patterns_to_file(FILE *fp, struct pattern_list *pl) } static char const * const builtin_sparse_checkout_list_usage[] = { - N_("git sparse-checkout list"), + "git sparse-checkout list", NULL }; @@ -419,7 +419,7 @@ static int update_modes(int *cone_mode, int *sparse_index) } static char const * const builtin_sparse_checkout_init_usage[] = { - N_("git sparse-checkout init [--cone] [--[no-]sparse-index]"), + "git sparse-checkout init [--cone] [--[no-]sparse-index]", NULL }; @@ -762,7 +762,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) } static char const * const builtin_sparse_checkout_reapply_usage[] = { - N_("git sparse-checkout reapply [--[no-]cone] [--[no-]sparse-index]"), + "git sparse-checkout reapply [--[no-]cone] [--[no-]sparse-index]", NULL }; @@ -800,7 +800,7 @@ static int sparse_checkout_reapply(int argc, const char **argv) } static char const * const builtin_sparse_checkout_disable_usage[] = { - N_("git sparse-checkout disable"), + "git sparse-checkout disable", NULL }; diff --git a/builtin/stripspace.c b/builtin/stripspace.c index be33eb83c1..1e34cf2beb 100644 --- a/builtin/stripspace.c +++ b/builtin/stripspace.c @@ -15,8 +15,8 @@ static void comment_lines(struct strbuf *buf) } static const char * const stripspace_usage[] = { - N_("git stripspace [-s | --strip-comments]"), - N_("git stripspace [-c | --comment-lines]"), + "git stripspace [-s | --strip-comments]", + "git stripspace [-c | --comment-lines]", NULL }; diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index c5d3fc3817..b80aa9898a 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -2883,7 +2883,7 @@ static int module_config(int argc, const char **argv, const char *prefix) const char *const git_submodule_helper_usage[] = { N_("git submodule--helper config <name> [<value>]"), N_("git submodule--helper config --unset <name>"), - N_("git submodule--helper config --check-writeable"), + "git submodule--helper config --check-writeable", NULL }; diff --git a/builtin/update-server-info.c b/builtin/update-server-info.c index 4321a34456..880fffec58 100644 --- a/builtin/update-server-info.c +++ b/builtin/update-server-info.c @@ -4,7 +4,7 @@ #include "parse-options.h" static const char * const update_server_info_usage[] = { - N_("git update-server-info [--force]"), + "git update-server-info [--force]", NULL }; From 9164d97a63b31614a52571d708f1ef151b97db71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= <jn.avila@free.fr> Date: Mon, 31 Jan 2022 22:07:49 +0000 Subject: [PATCH 070/150] i18n: fix some misformated placeholders in command synopsis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add '<>' around arguments where missing * convert plurals into '...' forms This applies the style guide for documentation. Signed-off-by: Jean-Noël Avila <jn.avila@free.fr> Reviewed-by: Phillip Wood <phillip.wood123@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fast-export.c | 2 +- builtin/reflog.c | 4 ++-- builtin/rev-list.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 9f1c730e58..510139e9b5 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -26,7 +26,7 @@ #include "commit-slab.h" static const char *fast_export_usage[] = { - N_("git fast-export [rev-list-opts]"), + N_("git fast-export [<rev-list-opts>]"), NULL }; diff --git a/builtin/reflog.c b/builtin/reflog.c index ee5ee8d8cf..343a10d371 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -17,10 +17,10 @@ static const char reflog_expire_usage[] = N_("git reflog expire [--expire=<time>] " "[--expire-unreachable=<time>] " "[--rewrite] [--updateref] [--stale-fix] [--dry-run | -n] " - "[--verbose] [--all] <refs>..."); + "[--verbose] [--all] <ref>..."); static const char reflog_delete_usage[] = N_("git reflog delete [--rewrite] [--updateref] " - "[--dry-run | -n] [--verbose] <refs>..."); + "[--dry-run | -n] [--verbose] <ref>..."); static const char reflog_exists_usage[] = N_("git reflog exists <ref>"); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 777558e9b0..38528c7f15 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -20,7 +20,7 @@ #include "packfile.h" static const char rev_list_usage[] = -"git rev-list [OPTION] <commit-id>... [ -- paths... ]\n" +"git rev-list [<options>] <commit-id>... [-- <path>...]\n" " limiting output:\n" " --max-count=<n>\n" " --max-age=<epoch>\n" From d17294a05e7601b5139e09609fd0f805ac78271b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Sat, 5 Feb 2022 01:04:29 +0100 Subject: [PATCH 071/150] hash-object: fix a trivial leak in --path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a memory leak that happened when the --path option was provided. This leak has been with us ever since the option was added in 39702431500 (add --path option to git hash-object, 2008-08-03). We can now mark "t1007-hash-object.sh" as passing when git is compiled with SANITIZE=leak. It'll now run in the the "GIT_TEST_PASSING_SANITIZE_LEAK=true" test mode (the "linux-leaks" CI target). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/hash-object.c | 9 +++++++-- t/t1007-hash-object.sh | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/builtin/hash-object.c b/builtin/hash-object.c index c7b3ad74c6..db9b253527 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -92,6 +92,7 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) int nongit = 0; unsigned flags = HASH_FORMAT_CHECK; const char *vpath = NULL; + char *vpath_free = NULL; const struct option hash_object_options[] = { OPT_STRING('t', NULL, &type, N_("type"), N_("object type")), OPT_BIT('w', NULL, &flags, N_("write the object into the object database"), @@ -114,8 +115,10 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) else prefix = setup_git_directory_gently(&nongit); - if (vpath && prefix) - vpath = prefix_filename(prefix, vpath); + if (vpath && prefix) { + vpath_free = prefix_filename(prefix, vpath); + vpath = vpath_free; + } git_config(git_default_config, NULL); @@ -156,5 +159,7 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) if (stdin_paths) hash_stdin_paths(type, no_filters, flags, literally); + free(vpath_free); + return 0; } diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index 64b340f227..ac5ad8c740 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -2,6 +2,7 @@ test_description="git hash-object" +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh echo_without_newline() { From f36d4f8316ac567bd3bd0de3c051f2cd8ae2444b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Sat, 5 Feb 2022 01:08:14 +0100 Subject: [PATCH 072/150] ls-remote & transport API: release "struct transport_ls_refs_options" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a memory leak in codepaths that use the "struct transport_ls_refs_options" API. Since the introduction of the struct in 39835409d10 (connect, transport: encapsulate arg in struct, 2021-02-05) the caller has been responsible for freeing it. That commit in turn migrated code originally added in 402c47d9391 (clone: send ref-prefixes when using protocol v2, 2018-07-20) and b4be74105fe (ls-remote: pass ref prefixes when requesting a remote's refs, 2018-03-15). Only some of those codepaths were releasing the allocated resources of the struct, now all of them will. Mark the "t/t5511-refspec.sh" test as passing when git is compiled with SANITIZE=leak. They'll now be listed as running under the "GIT_TEST_PASSING_SANITIZE_LEAK=true" test mode (the "linux-leaks" CI target). Previously 24/47 tests would fail. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/clone.c | 13 ++++++------- builtin/fetch.c | 2 +- builtin/ls-remote.c | 3 ++- connect.c | 4 ++-- t/t5511-refspec.sh | 1 + transport.c | 8 +++++++- transport.h | 10 +++++++--- 7 files changed, 26 insertions(+), 15 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 727e16e0ae..8564e5f603 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1233,7 +1233,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } else { const char *branch; - char *ref; + const char *ref; + char *ref_free = NULL; if (option_branch) die(_("Remote branch %s not found in upstream %s"), @@ -1250,17 +1251,16 @@ int cmd_clone(int argc, const char **argv, const char *prefix) skip_prefix(transport_ls_refs_options.unborn_head_target, "refs/heads/", &branch)) { ref = transport_ls_refs_options.unborn_head_target; - transport_ls_refs_options.unborn_head_target = NULL; create_symref("HEAD", ref, reflog_msg.buf); } else { branch = git_default_branch_name(0); - ref = xstrfmt("refs/heads/%s", branch); + ref_free = xstrfmt("refs/heads/%s", branch); + ref = ref_free; } if (!option_bare) install_branch_config(0, branch, remote_name, ref); - - free(ref); + free(ref_free); } write_refspec_config(src_ref_prefix, our_head_points_at, @@ -1312,7 +1312,6 @@ int cmd_clone(int argc, const char **argv, const char *prefix) UNLEAK(repo); junk_mode = JUNK_LEAVE_ALL; - strvec_clear(&transport_ls_refs_options.ref_prefixes); - free(transport_ls_refs_options.unborn_head_target); + transport_ls_refs_options_release(&transport_ls_refs_options); return err; } diff --git a/builtin/fetch.c b/builtin/fetch.c index 5f06b21f8e..a3ffab727e 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1593,7 +1593,7 @@ static int do_fetch(struct transport *transport, } else remote_refs = NULL; - strvec_clear(&transport_ls_refs_options.ref_prefixes); + transport_ls_refs_options_release(&transport_ls_refs_options); ref_map = get_ref_map(transport->remote, remote_refs, rs, tags, &autotags); diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 44448fa61d..d856085e94 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -155,6 +155,7 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) ref_array_clear(&ref_array); if (transport_disconnect(transport)) - return 1; + status = 1; + transport_ls_refs_options_release(&transport_options); return status; } diff --git a/connect.c b/connect.c index eaf7d6d261..afc79a6236 100644 --- a/connect.c +++ b/connect.c @@ -379,7 +379,7 @@ struct ref **get_remote_heads(struct packet_reader *reader, /* Returns 1 when a valid ref has been added to `list`, 0 otherwise */ static int process_ref_v2(struct packet_reader *reader, struct ref ***list, - char **unborn_head_target) + const char **unborn_head_target) { int ret = 1; int i = 0; @@ -483,7 +483,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, const char *hash_name; struct strvec *ref_prefixes = transport_options ? &transport_options->ref_prefixes : NULL; - char **unborn_head_target = transport_options ? + const char **unborn_head_target = transport_options ? &transport_options->unborn_head_target : NULL; *list = NULL; diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh index be025b90f9..fc55681a3f 100755 --- a/t/t5511-refspec.sh +++ b/t/t5511-refspec.sh @@ -2,6 +2,7 @@ test_description='refspec parsing' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh test_refspec () { diff --git a/transport.c b/transport.c index 2a3e324154..253d6671b1 100644 --- a/transport.c +++ b/transport.c @@ -1292,7 +1292,7 @@ int transport_push(struct repository *r, &transport_options); trace2_region_leave("transport_push", "get_refs_list", r); - strvec_clear(&transport_options.ref_prefixes); + transport_ls_refs_options_release(&transport_options); if (flags & TRANSPORT_PUSH_ALL) match_flags |= MATCH_REFS_ALL; @@ -1420,6 +1420,12 @@ const struct ref *transport_get_remote_refs(struct transport *transport, return transport->remote_refs; } +void transport_ls_refs_options_release(struct transport_ls_refs_options *opts) +{ + strvec_clear(&opts->ref_prefixes); + free((char *)opts->unborn_head_target); +} + int transport_fetch_refs(struct transport *transport, struct ref *refs) { int rc; diff --git a/transport.h b/transport.h index 3f16e50c19..a0bc6a1e9e 100644 --- a/transport.h +++ b/transport.h @@ -257,15 +257,19 @@ struct transport_ls_refs_options { /* * If unborn_head_target is not NULL, and the remote reports HEAD as * pointing to an unborn branch, transport_get_remote_refs() stores the - * unborn branch in unborn_head_target. It should be freed by the - * caller. + * unborn branch in unborn_head_target. */ - char *unborn_head_target; + const char *unborn_head_target; }; #define TRANSPORT_LS_REFS_OPTIONS_INIT { \ .ref_prefixes = STRVEC_INIT, \ } +/** + * Release the "struct transport_ls_refs_options". + */ +void transport_ls_refs_options_release(struct transport_ls_refs_options *opts); + /* * Retrieve refs from a remote. */ From 5c11c0d52c398af65f19bcf65a46304552083214 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Mon, 7 Feb 2022 21:32:58 +0000 Subject: [PATCH 073/150] Documentation: add extensions.worktreeConfig details The extensions.worktreeConfig extension was added in 58b284a (worktree: add per-worktree config files, 2018-10-21) and was somewhat documented in Documentation/git-config.txt. However, the extensions.worktreeConfig value was not specified further in the list of possible config keys. The location of the config.worktree file is not specified, and there are some precautions that should be mentioned clearly, but are only mentioned in git-worktree.txt. Expand the documentation to help users discover the complexities of extensions.worktreeConfig by adding details and cross links in these locations (relative to Documentation/): - config/extensions.txt - git-config.txt - git-worktree.txt The updates focus on items such as * $GIT_DIR/config.worktree takes precedence over $GIT_COMMON_DIR/config. * The core.worktree and core.bare=true settings are incorrect to have in the common config file when extensions.worktreeConfig is enabled. * The sparse-checkout settings core.sparseCheckout[Cone] are recommended to be set in the worktree config. As documented in 11664196ac ("Revert "check_repository_format_gently(): refuse extensions for old repositories"", 2020-07-15), this extension must be considered regardless of the repository format version for historical reasons. A future change will update references to extensions.worktreeConfig within git-sparse-checkout.txt, but a behavior change is needed before making those updates. Helped-by: Elijah Newren <newren@gmail.com> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Reviewed-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/extensions.txt | 31 +++++++++++++++++++++++++++++ Documentation/git-config.txt | 8 ++++++-- Documentation/git-worktree.txt | 11 +++++++--- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/Documentation/config/extensions.txt b/Documentation/config/extensions.txt index 4e23d73cdc..bccaec7a96 100644 --- a/Documentation/config/extensions.txt +++ b/Documentation/config/extensions.txt @@ -6,3 +6,34 @@ extensions.objectFormat:: Note that this setting should only be set by linkgit:git-init[1] or linkgit:git-clone[1]. Trying to change it after initialization will not work and will produce hard-to-diagnose issues. + +extensions.worktreeConfig:: + If enabled, then worktrees will load config settings from the + `$GIT_DIR/config.worktree` file in addition to the + `$GIT_COMMON_DIR/config` file. Note that `$GIT_COMMON_DIR` and + `$GIT_DIR` are the same for the main working tree, while other + working trees have `$GIT_DIR` equal to + `$GIT_COMMON_DIR/worktrees/<id>/`. The settings in the + `config.worktree` file will override settings from any other + config files. ++ +When enabling `extensions.worktreeConfig`, you must be careful to move +certain values from the common config file to the main working tree's +`config.worktree` file, if present: ++ +* `core.worktree` must be moved from `$GIT_COMMON_DIR/config` to + `$GIT_COMMON_DIR/config.worktree`. +* If `core.bare` is true, then it must be moved from `$GIT_COMMON_DIR/config` + to `$GIT_COMMON_DIR/config.worktree`. ++ +It may also be beneficial to adjust the locations of `core.sparseCheckout` +and `core.sparseCheckoutCone` depending on your desire for customizable +sparse-checkout settings for each worktree. By default, the `git +sparse-checkout` builtin enables `extensions.worktreeConfig`, assigns +these config values on a per-worktree basis, and uses the +`$GIT_DIR/info/sparse-checkout` file to specify the sparsity for each +worktree independently. See linkgit:git-sparse-checkout[1] for more +details. ++ +For historical reasons, `extensions.worktreeConfig` is respected +regardless of the `core.repositoryFormatVersion` setting. diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt index 2285effb36..bdcfd94b64 100644 --- a/Documentation/git-config.txt +++ b/Documentation/git-config.txt @@ -141,9 +141,13 @@ from all available files. See also <<FILES>>. --worktree:: - Similar to `--local` except that `.git/config.worktree` is + Similar to `--local` except that `$GIT_DIR/config.worktree` is read from or written to if `extensions.worktreeConfig` is - present. If not it's the same as `--local`. + enabled. If not it's the same as `--local`. Note that `$GIT_DIR` + is equal to `$GIT_COMMON_DIR` for the main working tree, but is of + the form `$GIT_DIR/worktrees/<id>/` for other working trees. See + linkgit:git-worktree[1] to learn how to enable + `extensions.worktreeConfig`. -f <config-file>:: --file <config-file>:: diff --git a/Documentation/git-worktree.txt b/Documentation/git-worktree.txt index 9e862fbcf7..b8d53c4830 100644 --- a/Documentation/git-worktree.txt +++ b/Documentation/git-worktree.txt @@ -286,8 +286,8 @@ CONFIGURATION FILE ------------------ By default, the repository `config` file is shared across all working trees. If the config variables `core.bare` or `core.worktree` are -already present in the config file, they will be applied to the main -working trees only. +present in the common config file and `extensions.worktreeConfig` is +disabled, then they will be applied to the main working tree only. In order to have configuration specific to working trees, you can turn on the `worktreeConfig` extension, e.g.: @@ -307,11 +307,16 @@ them to the `config.worktree` of the main working tree. You may also take this opportunity to review and move other configuration that you do not want to share to all working trees: - - `core.worktree` and `core.bare` should never be shared + - `core.worktree` should never be shared. + + - `core.bare` should not be shared if the value is `core.bare=true`. - `core.sparseCheckout` is recommended per working tree, unless you are sure you always use sparse checkout for all working trees. +See the documentation of `extensions.worktreeConfig` in +linkgit:git-config[1] for more details. + DETAILS ------- Each linked working tree has a private sub-directory in the repository's From 615a84ad788b26260c5b053ace2d5720ea5f05c5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Mon, 7 Feb 2022 21:32:59 +0000 Subject: [PATCH 074/150] worktree: create init_worktree_config() Upgrading a repository to use extensions.worktreeConfig is non-trivial. There are several steps involved, including moving some config settings from the common config file to the main worktree's config.worktree file. The previous change updated the documentation with all of these details. Commands such as 'git sparse-checkout set' upgrade the repository to use extensions.worktreeConfig without following these steps, causing some user pain in some special cases. Create a helper method, init_worktree_config(), that will be used in a later change to fix this behavior within 'git sparse-checkout set'. The method is carefully documented in worktree.h. Note that we do _not_ upgrade the repository format version to 1 during this process. The worktree config extension must be considered by Git and third-party tools even if core.repositoryFormatVersion is 0 for historical reasons documented in 11664196ac ("Revert "check_repository_format_gently(): refuse extensions for old repositories"", 2020-07-15). This is a special case for this extension, and newer extensions (such as extensions.objectFormat) still need to upgrade the repository format version. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Reviewed-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- worktree.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ worktree.h | 21 ++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/worktree.c b/worktree.c index 6f598dcfcd..5292c94b3d 100644 --- a/worktree.c +++ b/worktree.c @@ -5,6 +5,7 @@ #include "worktree.h" #include "dir.h" #include "wt-status.h" +#include "config.h" void free_worktrees(struct worktree **worktrees) { @@ -826,3 +827,75 @@ int should_prune_worktree(const char *id, struct strbuf *reason, char **wtpath, *wtpath = path; return 0; } + +static int move_config_setting(const char *key, const char *value, + const char *from_file, const char *to_file) +{ + if (git_config_set_in_file_gently(to_file, key, value)) + return error(_("unable to set %s in '%s'"), key, to_file); + if (git_config_set_in_file_gently(from_file, key, NULL)) + return error(_("unable to unset %s in '%s'"), key, from_file); + return 0; +} + +int init_worktree_config(struct repository *r) +{ + int res = 0; + int bare = 0; + struct config_set cs = { { 0 } }; + const char *core_worktree; + char *common_config_file; + char *main_worktree_file; + + /* + * If the extension is already enabled, then we can skip the + * upgrade process. + */ + if (repository_format_worktree_config) + return 0; + if ((res = git_config_set_gently("extensions.worktreeConfig", "true"))) + return error(_("failed to set extensions.worktreeConfig setting")); + + common_config_file = xstrfmt("%s/config", r->commondir); + main_worktree_file = xstrfmt("%s/config.worktree", r->commondir); + + git_configset_init(&cs); + git_configset_add_file(&cs, common_config_file); + + /* + * If core.bare is true in the common config file, then we need to + * move it to the main worktree's config file or it will break all + * worktrees. If it is false, then leave it in place because it + * _could_ be negating a global core.bare=true. + */ + if (!git_configset_get_bool(&cs, "core.bare", &bare) && bare) { + if ((res = move_config_setting("core.bare", "true", + common_config_file, + main_worktree_file))) + goto cleanup; + } + /* + * If core.worktree is set, then the main worktree is located + * somewhere different than the parent of the common Git dir. + * Relocate that value to avoid breaking all worktrees with this + * upgrade to worktree config. + */ + if (!git_configset_get_value(&cs, "core.worktree", &core_worktree)) { + if ((res = move_config_setting("core.worktree", core_worktree, + common_config_file, + main_worktree_file))) + goto cleanup; + } + + /* + * Ensure that we use worktree config for the remaining lifetime + * of the current process. + */ + repository_format_worktree_config = 1; + +cleanup: + git_configset_clear(&cs); + free(common_config_file); + free(main_worktree_file); + return res; +} diff --git a/worktree.h b/worktree.h index 9e06fcbdf3..e9e839926b 100644 --- a/worktree.h +++ b/worktree.h @@ -183,4 +183,25 @@ void strbuf_worktree_ref(const struct worktree *wt, struct strbuf *sb, const char *refname); +/** + * Enable worktree config for the first time. This will make the following + * adjustments: + * + * 1. Add extensions.worktreeConfig=true in the common config file. + * + * 2. If the common config file has a core.worktree value, then that value + * is moved to the main worktree's config.worktree file. + * + * 3. If the common config file has a core.bare enabled, then that value + * is moved to the main worktree's config.worktree file. + * + * If extensions.worktreeConfig is already true, then this method + * terminates early without any of the above steps. The existing config + * arrangement is assumed to be intentional. + * + * Returns 0 on success. Reports an error message and returns non-zero + * if any of these steps fail. + */ +int init_worktree_config(struct repository *r); + #endif From fe18733927c1d28ffcec3b52908989c591bc7b87 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Mon, 7 Feb 2022 21:33:00 +0000 Subject: [PATCH 075/150] config: add repo_config_set_worktree_gently() Some config settings, such as those for sparse-checkout, are likely intended to only apply to one worktree at a time. To make this write easier, add a new config API method, repo_config_set_worktree_gently(). This method will attempt to write to the worktree-specific config, but will instead write to the common config file if worktree config is not enabled. The next change will introduce a consumer of this method. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Reviewed-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- config.c | 35 ++++++++++++++++++++++++++++++++--- config.h | 8 ++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/config.c b/config.c index 2bffa8d4a0..1a03ced1a5 100644 --- a/config.c +++ b/config.c @@ -21,6 +21,7 @@ #include "dir.h" #include "color.h" #include "refs.h" +#include "worktree.h" struct config_source { struct config_source *prev; @@ -2884,6 +2885,20 @@ int git_config_set_gently(const char *key, const char *value) return git_config_set_multivar_gently(key, value, NULL, 0); } +int repo_config_set_worktree_gently(struct repository *r, + const char *key, const char *value) +{ + /* Only use worktree-specific config if it is is already enabled. */ + if (repository_format_worktree_config) { + char *file = repo_git_path(r, "config.worktree"); + int ret = git_config_set_multivar_in_file_gently( + file, key, value, NULL, 0); + free(file); + return ret; + } + return repo_config_set_multivar_gently(r, key, value, NULL, 0); +} + void git_config_set(const char *key, const char *value) { git_config_set_multivar(key, value, NULL, 0); @@ -3181,14 +3196,28 @@ void git_config_set_multivar_in_file(const char *config_filename, int git_config_set_multivar_gently(const char *key, const char *value, const char *value_pattern, unsigned flags) { - return git_config_set_multivar_in_file_gently(NULL, key, value, value_pattern, - flags); + return repo_config_set_multivar_gently(the_repository, key, value, + value_pattern, flags); +} + +int repo_config_set_multivar_gently(struct repository *r, const char *key, + const char *value, + const char *value_pattern, unsigned flags) +{ + char *file = repo_git_path(r, "config"); + int res = git_config_set_multivar_in_file_gently(file, + key, value, + value_pattern, + flags); + free(file); + return res; } void git_config_set_multivar(const char *key, const char *value, const char *value_pattern, unsigned flags) { - git_config_set_multivar_in_file(NULL, key, value, value_pattern, + git_config_set_multivar_in_file(git_path("config"), + key, value, value_pattern, flags); } diff --git a/config.h b/config.h index f119de0130..1d98ad269b 100644 --- a/config.h +++ b/config.h @@ -253,6 +253,13 @@ void git_config_set_in_file(const char *, const char *, const char *); int git_config_set_gently(const char *, const char *); +/** + * Write a config value that should apply to the current worktree. If + * extensions.worktreeConfig is enabled, then the write will happen in the + * current worktree's config. Otherwise, write to the common config file. + */ +int repo_config_set_worktree_gently(struct repository *, const char *, const char *); + /** * write config values to `.git/config`, takes a key/value pair as parameter. */ @@ -281,6 +288,7 @@ int git_config_parse_key(const char *, char **, size_t *); int git_config_set_multivar_gently(const char *, const char *, const char *, unsigned); void git_config_set_multivar(const char *, const char *, const char *, unsigned); +int repo_config_set_multivar_gently(struct repository *, const char *, const char *, const char *, unsigned); int git_config_set_multivar_in_file_gently(const char *, const char *, const char *, const char *, unsigned); /** From 7316dc5f6f2c8297d32e47d5859933ffacb6c00e Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Mon, 7 Feb 2022 21:33:01 +0000 Subject: [PATCH 076/150] sparse-checkout: set worktree-config correctly `git sparse-checkout set/init` enables worktree-specific configuration[*] by setting extensions.worktreeConfig=true, but neglects to perform the additional necessary bookkeeping of relocating `core.bare=true` and `core.worktree` from $GIT_COMMON_DIR/config to $GIT_COMMON_DIR/config.worktree, as documented in git-worktree.txt. As a result of this oversight, these settings, which are nonsensical for secondary worktrees, can cause Git commands to incorrectly consider a worktree bare (in the case of `core.bare`) or operate on the wrong worktree (in the case of `core.worktree`). Fix this problem by taking advantage of the recently-added init_worktree_config() which enables `extensions.worktreeConfig` and takes care of necessary bookkeeping. While at it, for backward-compatibility reasons, also stop upgrading the repository format to "1" since doing so is (unintentionally) not required to take advantage of `extensions.worktreeConfig`, as explained by 11664196ac ("Revert "check_repository_format_gently(): refuse extensions for old repositories"", 2020-07-15). [*] The main reason to use worktree-specific config for the sparse-checkout builtin was to avoid enabling sparse-checkout patterns in one and causing a loss of files in another. If a worktree does not have a sparse-checkout patterns file, then the sparse-checkout logic will not kick in on that worktree. Reported-by: Sean Allred <allred.sean@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Reviewed-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/git-sparse-checkout.txt | 16 +++++++++++---- builtin/sparse-checkout.c | 28 +++++++++++++-------------- sparse-index.c | 10 +++------- t/t1091-sparse-checkout-builtin.sh | 4 ++-- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index b81dbe0654..94dad137b9 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -31,13 +31,21 @@ COMMANDS Describe the patterns in the sparse-checkout file. 'set':: - Enable the necessary config settings - (extensions.worktreeConfig, core.sparseCheckout, - core.sparseCheckoutCone) if they are not already enabled, and - write a set of patterns to the sparse-checkout file from the + Enable the necessary sparse-checkout config settings + (`core.sparseCheckout`, `core.sparseCheckoutCone`, and + `index.sparse`) if they are not already set to the desired values, + and write a set of patterns to the sparse-checkout file from the list of arguments following the 'set' subcommand. Update the working directory to match the new patterns. + +To ensure that adjusting the sparse-checkout settings within a worktree +does not alter the sparse-checkout settings in other worktrees, the 'set' +subcommand will upgrade your repository config to use worktree-specific +config if not already present. The sparsity defined by the arguments to +the 'set' subcommand are stored in the worktree-specific sparse-checkout +file. See linkgit:git-worktree[1] and the documentation of +`extensions.worktreeConfig` in linkgit:git-config[1] for more details. ++ When the `--stdin` option is provided, the patterns are read from standard in as a newline-delimited list instead of from the arguments. + diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 679c107036..314c8d61f8 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -15,6 +15,7 @@ #include "wt-status.h" #include "quote.h" #include "sparse-index.h" +#include "worktree.h" static const char *empty_base = ""; @@ -359,26 +360,23 @@ enum sparse_checkout_mode { static int set_config(enum sparse_checkout_mode mode) { - const char *config_path; - - if (upgrade_repository_format(1) < 0) - die(_("unable to upgrade repository format to enable worktreeConfig")); - if (git_config_set_gently("extensions.worktreeConfig", "true")) { - error(_("failed to set extensions.worktreeConfig setting")); + /* Update to use worktree config, if not already. */ + if (init_worktree_config(the_repository)) { + error(_("failed to initialize worktree config")); return 1; } - config_path = git_path("config.worktree"); - git_config_set_in_file_gently(config_path, - "core.sparseCheckout", - mode ? "true" : NULL); - - git_config_set_in_file_gently(config_path, - "core.sparseCheckoutCone", - mode == MODE_CONE_PATTERNS ? "true" : NULL); + if (repo_config_set_worktree_gently(the_repository, + "core.sparseCheckout", + mode ? "true" : "false") || + repo_config_set_worktree_gently(the_repository, + "core.sparseCheckoutCone", + mode == MODE_CONE_PATTERNS ? + "true" : "false")) + return 1; if (mode == MODE_NO_PATTERNS) - set_sparse_index_config(the_repository, 0); + return set_sparse_index_config(the_repository, 0); return 0; } diff --git a/sparse-index.c b/sparse-index.c index a1d505d50e..e93609999e 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -99,13 +99,9 @@ static int convert_to_sparse_rec(struct index_state *istate, int set_sparse_index_config(struct repository *repo, int enable) { - int res; - char *config_path = repo_git_path(repo, "config.worktree"); - res = git_config_set_in_file_gently(config_path, - "index.sparse", - enable ? "true" : NULL); - free(config_path); - + int res = repo_config_set_worktree_gently(repo, + "index.sparse", + enable ? "true" : "false"); prepare_repo_settings(repo); repo->settings.sparse_index = enable; return res; diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 42776984fe..be6ea4ffe3 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -117,7 +117,7 @@ test_expect_success 'switching to cone mode with non-cone mode patterns' ' cd bad-patterns && git sparse-checkout init && git sparse-checkout add dir && - git config core.sparseCheckoutCone true && + git config --worktree core.sparseCheckoutCone true && test_must_fail git sparse-checkout add dir 2>err && grep "existing sparse-checkout patterns do not use cone mode" err ) @@ -256,7 +256,7 @@ test_expect_success 'sparse-index enabled and disabled' ' test_cmp expect actual && git -C repo config --list >config && - ! grep index.sparse config + test_cmp_config -C repo false index.sparse ) ' From 53255916b7c47a0d360841477ed9ffbc4b370284 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Mon, 7 Feb 2022 21:33:02 +0000 Subject: [PATCH 077/150] worktree: copy sparse-checkout patterns and config on add When adding a new worktree, it is reasonable to expect that we want to use the current set of sparse-checkout settings for that new worktree. This is particularly important for repositories where the worktree would become too large to be useful. This is even more important when using partial clone as well, since we want to avoid downloading the missing blobs for files that should not be written to the new worktree. The only way to create such a worktree without this intermediate step of expanding the full worktree is to copy the sparse-checkout patterns and config settings during 'git worktree add'. Each worktree has its own sparse-checkout patterns, and the default behavior when the sparse-checkout file is missing is to include all paths at HEAD. Thus, we need to have patterns from somewhere, they might as well be the current worktree's patterns. These are then modified independently in the future. In addition to the sparse-checkout file, copy the worktree config file if worktree config is enabled and the file exists. This will copy over any important settings to ensure the new worktree behaves the same as the current one. The only exception we must continue to make is that core.bare and core.worktree should become unset in the worktree's config file. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Reviewed-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/worktree.c | 63 ++++++++++++++++++++++++++++++ t/t1091-sparse-checkout-builtin.sh | 31 +++++++++++---- t/t2400-worktree-add.sh | 58 ++++++++++++++++++++++++++- 3 files changed, 142 insertions(+), 10 deletions(-) diff --git a/builtin/worktree.c b/builtin/worktree.c index 2838254f7f..c6eb636329 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -335,6 +335,69 @@ static int add_worktree(const char *path, const char *refname, strbuf_addf(&sb, "%s/commondir", sb_repo.buf); write_file(sb.buf, "../.."); + /* + * If the current worktree has sparse-checkout enabled, then copy + * the sparse-checkout patterns from the current worktree. + */ + if (core_apply_sparse_checkout) { + char *from_file = git_pathdup("info/sparse-checkout"); + char *to_file = xstrfmt("%s/info/sparse-checkout", + sb_repo.buf); + + if (file_exists(from_file)) { + if (safe_create_leading_directories(to_file) || + copy_file(to_file, from_file, 0666)) + error(_("failed to copy '%s' to '%s'; sparse-checkout may not work correctly"), + from_file, to_file); + } + + free(from_file); + free(to_file); + } + + /* + * If we are using worktree config, then copy all current config + * values from the current worktree into the new one, that way the + * new worktree behaves the same as this one. + */ + if (repository_format_worktree_config) { + char *from_file = git_pathdup("config.worktree"); + char *to_file = xstrfmt("%s/config.worktree", + sb_repo.buf); + + if (file_exists(from_file)) { + struct config_set cs = { { 0 } }; + const char *core_worktree; + int bare; + + if (safe_create_leading_directories(to_file) || + copy_file(to_file, from_file, 0666)) { + error(_("failed to copy worktree config from '%s' to '%s'"), + from_file, to_file); + goto worktree_copy_cleanup; + } + + git_configset_init(&cs); + git_configset_add_file(&cs, from_file); + + if (!git_configset_get_bool(&cs, "core.bare", &bare) && + bare && + git_config_set_multivar_in_file_gently( + to_file, "core.bare", NULL, "true", 0)) + error(_("failed to unset 'core.bare' in '%s'"), to_file); + if (!git_configset_get_value(&cs, "core.worktree", &core_worktree) && + git_config_set_in_file_gently(to_file, + "core.worktree", NULL)) + error(_("failed to unset 'core.worktree' in '%s'"), to_file); + + git_configset_clear(&cs); + } + +worktree_copy_cleanup: + free(from_file); + free(to_file); + } + strvec_pushf(&child_env, "%s=%s", GIT_DIR_ENVIRONMENT, sb_git.buf); strvec_pushf(&child_env, "%s=%s", GIT_WORK_TREE_ENVIRONMENT, path); cp.git_cmd = 1; diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index be6ea4ffe3..8a757b43e6 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -146,9 +146,9 @@ test_expect_success 'interaction with clone --no-checkout (unborn index)' ' ' test_expect_success 'set enables config' ' - git init empty-config && + git init worktree-config && ( - cd empty-config && + cd worktree-config && test_commit test file && test_path_is_missing .git/config.worktree && git sparse-checkout set nothing && @@ -201,6 +201,21 @@ test_expect_success 'add to sparse-checkout' ' check_files repo "a folder1 folder2" ' +test_expect_success 'worktree: add copies sparse-checkout patterns' ' + cat repo/.git/info/sparse-checkout >old && + test_when_finished cp old repo/.git/info/sparse-checkout && + test_when_finished git -C repo worktree remove ../worktree && + git -C repo sparse-checkout set --no-cone "/*" && + git -C repo worktree add --quiet ../worktree 2>err && + test_must_be_empty err && + new="$(git -C worktree rev-parse --git-path info/sparse-checkout)" && + test_path_is_file "$new" && + test_cmp repo/.git/info/sparse-checkout "$new" && + git -C worktree sparse-checkout set --cone && + test_cmp_config -C worktree true core.sparseCheckoutCone && + test_must_fail git -C repo core.sparseCheckoutCone +' + test_expect_success 'cone mode: match patterns' ' git -C repo config --worktree core.sparseCheckoutCone true && rm -rf repo/a repo/folder1 repo/folder2 && @@ -520,13 +535,13 @@ test_expect_success 'interaction with submodules' ' ' test_expect_success 'different sparse-checkouts with worktrees' ' + git -C repo sparse-checkout set --cone deep folder1 && git -C repo worktree add --detach ../worktree && - check_files worktree "a deep folder1 folder2" && - git -C worktree sparse-checkout init --cone && - git -C repo sparse-checkout set folder1 && - git -C worktree sparse-checkout set deep/deeper1 && - check_files repo a folder1 && - check_files worktree a deep + check_files worktree "a deep folder1" && + git -C repo sparse-checkout set --cone folder1 && + git -C worktree sparse-checkout set --cone deep/deeper1 && + check_files repo "a folder1" && + check_files worktree "a deep" ' test_expect_success 'set using filename keeps file on-disk' ' diff --git a/t/t2400-worktree-add.sh b/t/t2400-worktree-add.sh index 37ad79470f..43139af08f 100755 --- a/t/t2400-worktree-add.sh +++ b/t/t2400-worktree-add.sh @@ -165,8 +165,62 @@ test_expect_success '"add" default branch of a bare repo' ' ( git clone --bare . bare2 && cd bare2 && - git worktree add ../there3 main - ) + git worktree add ../there3 main && + cd ../there3 && + # Simple check that a Git command does not + # immediately fail with the current setup + git status + ) && + cat >expect <<-EOF && + init.t + EOF + ls there3 >actual && + test_cmp expect actual +' + +test_expect_success '"add" to bare repo with worktree config' ' + ( + git clone --bare . bare3 && + cd bare3 && + git config extensions.worktreeconfig true && + + # Add config values that are erroneous to have in + # a config.worktree file outside of the main + # working tree, to check that Git filters them out + # when copying config during "git worktree add". + git config --worktree core.bare true && + git config --worktree core.worktree "$(pwd)" && + + # We want to check that bogus.key is copied + git config --worktree bogus.key value && + git config --unset core.bare && + git worktree add ../there4 main && + cd ../there4 && + + # Simple check that a Git command does not + # immediately fail with the current setup + git status && + git worktree add --detach ../there5 && + cd ../there5 && + git status + ) && + + # the worktree has the arbitrary value copied. + test_cmp_config -C there4 value bogus.key && + test_cmp_config -C there5 value bogus.key && + + # however, core.bare and core.worktree were removed. + test_must_fail git -C there4 config core.bare && + test_must_fail git -C there4 config core.worktree && + + cat >expect <<-EOF && + init.t + EOF + + ls there4 >actual && + test_cmp expect actual && + ls there5 >actual && + test_cmp expect actual ' test_expect_success 'checkout with grafts' ' From 3ce113827287079dced9aaf9c5d1e1734ecaa265 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Mon, 7 Feb 2022 21:33:03 +0000 Subject: [PATCH 078/150] config: make git_configset_get_string_tmp() private This method was created in f1de981e8 (config: fix leaks from git_config_get_string_const(), 2020-08-14) but its only use was in the repo_config_get_string_tmp() method, also declared in config.h and implemented in config.c. Since this is otherwise unused and is a very similar implementation to git_configset_get_value(), let's remove this declaration. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Reviewed-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- config.c | 4 ++-- config.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/config.c b/config.c index 1a03ced1a5..870b22dd2f 100644 --- a/config.c +++ b/config.c @@ -2179,8 +2179,8 @@ int git_configset_get_string(struct config_set *cs, const char *key, char **dest return 1; } -int git_configset_get_string_tmp(struct config_set *cs, const char *key, - const char **dest) +static int git_configset_get_string_tmp(struct config_set *cs, const char *key, + const char **dest) { const char *value; if (!git_configset_get_value(cs, key, &value)) { diff --git a/config.h b/config.h index 1d98ad269b..184aef1eca 100644 --- a/config.h +++ b/config.h @@ -494,7 +494,6 @@ void git_configset_clear(struct config_set *cs); int git_configset_get_value(struct config_set *cs, const char *key, const char **dest); int git_configset_get_string(struct config_set *cs, const char *key, char **dest); -int git_configset_get_string_tmp(struct config_set *cs, const char *key, const char **dest); int git_configset_get_int(struct config_set *cs, const char *key, int *dest); int git_configset_get_ulong(struct config_set *cs, const char *key, unsigned long *dest); int git_configset_get_bool(struct config_set *cs, const char *key, int *dest); From 059fda190215d18e7aa23f825cd607b16a016b65 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 8 Feb 2022 11:21:53 +0000 Subject: [PATCH 079/150] checkout/fetch/pull/pack-objects: allow `-h` outside a repository When we taught these commands about the sparse index, we did not account for the fact that the `cmd_*()` functions _can_ be called without a gitdir, namely when `-h` is passed to show the usage. A plausible approach to address this is to move the `prepare_repo_settings()` calls right after the `parse_options()` calls: The latter will never return when it handles `-h`, and therefore it is safe to assume that we have a `gitdir` at that point, as long as the built-in is marked with the `RUN_SETUP` flag. However, it is unfortunately not that simple. In `cmd_pack_objects()`, for example, the repo settings need to be fully populated so that the command-line options `--sparse`/`--no-sparse` can override them, not the other way round. Therefore, we choose to imitate the strategy taken in `cmd_diff()`, where we simply do not bother to prepare and initialize the repo settings unless we have a `gitdir`. This fixes https://github.com/git-for-windows/git/issues/3688 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/checkout.c | 7 ++++--- builtin/fetch.c | 6 ++++-- builtin/pack-objects.c | 8 +++++--- builtin/pull.c | 6 ++++-- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index cc804ba8e1..1c13d7fedb 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1602,9 +1602,10 @@ static int checkout_main(int argc, const char **argv, const char *prefix, opts->show_progress = -1; git_config(git_checkout_config, opts); - - prepare_repo_settings(the_repository); - the_repository->settings.command_requires_full_index = 0; + if (the_repository->gitdir) { + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + } opts->track = BRANCH_TRACK_UNSPECIFIED; diff --git a/builtin/fetch.c b/builtin/fetch.c index 5f06b21f8e..c8ca4262de 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -2014,8 +2014,10 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) } git_config(git_fetch_config, NULL); - prepare_repo_settings(the_repository); - the_repository->settings.command_requires_full_index = 0; + if (the_repository->gitdir) { + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + } argc = parse_options(argc, argv, prefix, builtin_fetch_options, builtin_fetch_usage, 0); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ba2006f221..87cb7b45c3 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3976,9 +3976,11 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) read_replace_refs = 0; sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1); - prepare_repo_settings(the_repository); - if (sparse < 0) - sparse = the_repository->settings.pack_use_sparse; + if (the_repository->gitdir) { + prepare_repo_settings(the_repository); + if (sparse < 0) + sparse = the_repository->settings.pack_use_sparse; + } reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); diff --git a/builtin/pull.c b/builtin/pull.c index 100cbf9fb8..d15007d93f 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -994,8 +994,10 @@ int cmd_pull(int argc, const char **argv, const char *prefix) set_reflog_message(argc, argv); git_config(git_pull_config, NULL); - prepare_repo_settings(the_repository); - the_repository->settings.command_requires_full_index = 0; + if (the_repository->gitdir) { + prepare_repo_settings(the_repository); + the_repository->settings.command_requires_full_index = 0; + } argc = parse_options(argc, argv, prefix, pull_options, pull_usage, 0); From 87ad07d735448a72d4e1fc4f3ce1e6b44bc613f5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 8 Feb 2022 11:21:54 +0000 Subject: [PATCH 080/150] t0012: verify that built-ins handle `-h` even without gitdir We just fixed a class of recently introduced bugs where calling, say, `git fetch -h` outside a repository would not show the usage but instead show an ugly `BUG` message. Let's verify that this does not regress anymore. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t0012-help.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/t/t0012-help.sh b/t/t0012-help.sh index 91b68c74a1..cbd725ccac 100755 --- a/t/t0012-help.sh +++ b/t/t0012-help.sh @@ -139,13 +139,18 @@ test_expect_success 'git help --config-sections-for-completion' ' ' test_expect_success 'generate builtin list' ' + mkdir -p sub && git --list-cmds=builtins >builtins ' while read builtin do test_expect_success "$builtin can handle -h" ' - test_expect_code 129 git $builtin -h >output 2>&1 && + ( + GIT_CEILING_DIRECTORIES=$(pwd) && + export GIT_CEILING_DIRECTORIES && + test_expect_code 129 git -C sub $builtin -h >output 2>&1 + ) && test_i18ngrep usage output ' done <builtins From eb57277ba3dcdf97a66dcd2c471e29bf64d3279f Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Wed, 9 Feb 2022 14:26:47 -0500 Subject: [PATCH 081/150] midx: prevent writing a .bitmap without any objects When trying to write a MIDX, we already prevent the case where there weren't any packs present, and thus we would have written an empty MIDX. But there is another "empty" case, which is more interesting, and we don't yet handle. If we try to write a MIDX which has at least one pack, but those packs together don't contain any objects, we will encounter a BUG() when trying to use the bitmap corresponding to that MIDX, like so: $ git rev-parse HEAD | git pack-objects --revs --use-bitmap-index --stdout >/dev/null BUG: pack-revindex.c:394: pack_pos_to_midx: out-of-bounds object at 0 (note that in the above reproduction, both `--use-bitmap-index` and `--stdout` are important, since without the former we won't even both to load the .bitmap, and without the latter we wont attempt pack reuse). The problem occurs when we try to discover the identity of the preferred pack to determine which range if any of existing packs we can reuse verbatim. This path is: `reuse_packfile_objects()` -> `reuse_partial_packfile_from_bitmap()` -> `midx_preferred_pack()`. #4 0x000055555575401f in pack_pos_to_midx (m=0x555555997160, pos=0) at pack-revindex.c:394 #5 0x00005555557502c8 in midx_preferred_pack (bitmap_git=0x55555599c280) at pack-bitmap.c:1431 #6 0x000055555575036c in reuse_partial_packfile_from_bitmap (bitmap_git=0x55555599c280, packfile_out=0x5555559666b0 <reuse_packfile>, entries=0x5555559666b8 <reuse_packfile_objects>, reuse_out=0x5555559666c0 <reuse_packfile_bitmap>) at pack-bitmap.c:1452 #7 0x00005555556041f6 in get_object_list_from_bitmap (revs=0x7fffffffcbf0) at builtin/pack-objects.c:3658 #8 0x000055555560465c in get_object_list (ac=2, av=0x555555997050) at builtin/pack-objects.c:3765 #9 0x0000555555605e4e in cmd_pack_objects (argc=0, argv=0x7fffffffe920, prefix=0x0) at builtin/pack-objects.c:4154 Since neither the .bitmap or MIDX stores the identity of the preferred pack, we infer it by trying to load the first object in pseudo-pack order, and then asking the MIDX which pack was chosen to represent that object. But this fails our bounds check, since there are zero objects in the MIDX to begin with, which results in the BUG(). We could catch this more carefully in `midx_preferred_pack()`, but signaling the absence of a preferred pack out to all of its callers is somewhat awkward. Instead, let's avoid writing a MIDX .bitmap without any objects altogether. We catch this case in `write_midx_internal()`, and emit a warning if the caller indicated they wanted to write a bitmap before clearing out the relevant flags. If we somehow got to write_midx_bitmap(), then we will call BUG(), but this should now be an unreachable path. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- midx.c | 9 +++++++++ t/t5326-multi-pack-bitmaps.sh | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/midx.c b/midx.c index 837b46b2af..b40605c492 100644 --- a/midx.c +++ b/midx.c @@ -1061,6 +1061,9 @@ static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash, char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, hash_to_hex(midx_hash)); int ret; + if (!ctx->entries_nr) + BUG("cannot write a bitmap without any objects"); + if (flags & MIDX_WRITE_BITMAP_HASH_CACHE) options |= BITMAP_OPT_HASH_CACHE; @@ -1385,6 +1388,12 @@ static int write_midx_internal(const char *object_dir, goto cleanup; } + if (!ctx.entries_nr) { + if (flags & MIDX_WRITE_BITMAP) + warning(_("refusing to write multi-pack .bitmap without any objects")); + flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); + } + cf = init_chunkfile(f); add_chunk(cf, MIDX_CHUNKID_PACKNAMES, pack_name_concat_len, diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh index e187f90f29..89b5d45c48 100755 --- a/t/t5326-multi-pack-bitmaps.sh +++ b/t/t5326-multi-pack-bitmaps.sh @@ -395,4 +395,26 @@ test_expect_success 'hash-cache values are propagated from pack bitmaps' ' ) ' +test_expect_success 'no .bitmap is written without any objects' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + empty="$(git pack-objects $objdir/pack/pack </dev/null)" && + cat >packs <<-EOF && + pack-$empty.idx + EOF + + git multi-pack-index write --bitmap --stdin-packs \ + <packs 2>err && + + grep "bitmap without any objects" err && + + test_path_is_file $midx && + test_path_is_missing $midx-$(midx_checksum $objdir).bitmap + ) +' + test_done From f05da2b48b48a46db65fc768b3ffecaf996dd655 Mon Sep 17 00:00:00 2001 From: Josh Steadmon <steadmon@google.com> Date: Fri, 4 Feb 2022 21:00:49 -0800 Subject: [PATCH 082/150] clone, submodule: pass partial clone filters to submodules When cloning a repo with a --filter and with --recurse-submodules enabled, the partial clone filter only applies to the top-level repo. This can lead to unexpected bandwidth and disk usage for projects which include large submodules. For example, a user might wish to make a partial clone of Gerrit and would run: `git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`. However, only the superproject would be a partial clone; all the submodules would have all blobs downloaded regardless of their size. With this change, the same filter can also be applied to submodules, meaning the expected bandwidth and disk savings apply consistently. To avoid changing default behavior, add a new clone flag, `--also-filter-submodules`. When this is set along with `--filter` and `--recurse-submodules`, the filter spec is passed along to git-submodule and git-submodule--helper, such that submodule clones also have the filter applied. This applies the same filter to the superproject and all submodules. Users who need to customize the filter per-submodule would need to clone with `--no-recurse-submodules` and then manually initialize each submodule with the proper filter. Applying filters to submodules should be safe thanks to Jonathan Tan's recent work [1, 2, 3] eliminating the use of alternates as a method of accessing submodule objects, so any submodule object access now triggers a lazy fetch from the submodule's promisor remote if the accessed object is missing. This patch is a reworked version of [4], which was created prior to Jonathan Tan's work. [1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16) [2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate', 2021-09-20) [3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules', 2021-10-25) [4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/ Signed-off-by: Josh Steadmon <steadmon@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/clone.txt | 5 ++++ Documentation/git-clone.txt | 7 ++++- Documentation/git-submodule.txt | 6 ++++- builtin/clone.c | 36 ++++++++++++++++++++++++-- builtin/submodule--helper.c | 30 +++++++++++++++++++--- git-submodule.sh | 17 ++++++++++++- t/t5617-clone-submodules-remote.sh | 41 ++++++++++++++++++++++++++++++ t/t7814-grep-recurse-submodules.sh | 41 ++++++++++++++++++++++++++++++ 8 files changed, 175 insertions(+), 8 deletions(-) diff --git a/Documentation/config/clone.txt b/Documentation/config/clone.txt index 7bcfbd18a5..26f4fb137a 100644 --- a/Documentation/config/clone.txt +++ b/Documentation/config/clone.txt @@ -6,3 +6,8 @@ clone.defaultRemoteName:: clone.rejectShallow:: Reject to clone a repository if it is a shallow one, can be overridden by passing option `--reject-shallow` in command line. See linkgit:git-clone[1] + +clone.filterSubmodules:: + If a partial clone filter is provided (see `--filter` in + linkgit:git-rev-list[1]) and `--recurse-submodules` is used, also apply + the filter to submodules. diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt index 984d194934..632bd1348e 100644 --- a/Documentation/git-clone.txt +++ b/Documentation/git-clone.txt @@ -16,7 +16,7 @@ SYNOPSIS [--depth <depth>] [--[no-]single-branch] [--no-tags] [--recurse-submodules[=<pathspec>]] [--[no-]shallow-submodules] [--[no-]remote-submodules] [--jobs <n>] [--sparse] [--[no-]reject-shallow] - [--filter=<filter>] [--] <repository> + [--filter=<filter> [--also-filter-submodules]] [--] <repository> [<directory>] DESCRIPTION @@ -182,6 +182,11 @@ objects from the source repository into a pack in the cloned repository. at least `<size>`. For more details on filter specifications, see the `--filter` option in linkgit:git-rev-list[1]. +--also-filter-submodules:: + Also apply the partial clone filter to any submodules in the repository. + Requires `--filter` and `--recurse-submodules`. This can be turned on by + default by setting the `clone.filterSubmodules` config option. + --mirror:: Set up a mirror of the source repository. This implies `--bare`. Compared to `--bare`, `--mirror` not only maps local branches of the diff --git a/Documentation/git-submodule.txt b/Documentation/git-submodule.txt index 7e5f995f77..4d3ab6b9f9 100644 --- a/Documentation/git-submodule.txt +++ b/Documentation/git-submodule.txt @@ -133,7 +133,7 @@ If you really want to remove a submodule from the repository and commit that use linkgit:git-rm[1] instead. See linkgit:gitsubmodules[7] for removal options. -update [--init] [--remote] [-N|--no-fetch] [--[no-]recommend-shallow] [-f|--force] [--checkout|--rebase|--merge] [--reference <repository>] [--depth <depth>] [--recursive] [--jobs <n>] [--[no-]single-branch] [--] [<path>...]:: +update [--init] [--remote] [-N|--no-fetch] [--[no-]recommend-shallow] [-f|--force] [--checkout|--rebase|--merge] [--reference <repository>] [--depth <depth>] [--recursive] [--jobs <n>] [--[no-]single-branch] [--filter <filter spec>] [--] [<path>...]:: + -- Update the registered submodules to match what the superproject @@ -177,6 +177,10 @@ submodule with the `--init` option. If `--recursive` is specified, this command will recurse into the registered submodules, and update any nested submodules within. + +If `--filter <filter spec>` is specified, the given partial clone filter will be +applied to the submodule. See linkgit:git-rev-list[1] for details on filter +specifications. -- set-branch (-b|--branch) <branch> [--] <path>:: set-branch (-d|--default) [--] <path>:: diff --git a/builtin/clone.c b/builtin/clone.c index 727e16e0ae..fb605e4c8d 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -71,6 +71,8 @@ static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; static struct list_objects_filter_options filter_options; +static int option_filter_submodules = -1; /* unspecified */ +static int config_filter_submodules = -1; /* unspecified */ static struct string_list server_options = STRING_LIST_INIT_NODUP; static int option_remote_submodules; @@ -150,6 +152,8 @@ static struct option builtin_clone_options[] = { OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), + OPT_BOOL(0, "also-filter-submodules", &option_filter_submodules, + N_("apply partial clone filters to submodules")), OPT_BOOL(0, "remote-submodules", &option_remote_submodules, N_("any cloned submodules will use their remote-tracking branch")), OPT_BOOL(0, "sparse", &option_sparse_checkout, @@ -650,7 +654,7 @@ static int git_sparse_checkout_init(const char *repo) return result; } -static int checkout(int submodule_progress) +static int checkout(int submodule_progress, int filter_submodules) { struct object_id oid; char *head; @@ -729,6 +733,10 @@ static int checkout(int submodule_progress) strvec_push(&args, "--no-fetch"); } + if (filter_submodules && filter_options.choice) + strvec_pushf(&args, "--filter=%s", + expand_list_objects_filter_spec(&filter_options)); + if (option_single_branch >= 0) strvec_push(&args, option_single_branch ? "--single-branch" : @@ -749,6 +757,8 @@ static int git_clone_config(const char *k, const char *v, void *cb) } if (!strcmp(k, "clone.rejectshallow")) config_reject_shallow = git_config_bool(k, v); + if (!strcmp(k, "clone.filtersubmodules")) + config_filter_submodules = git_config_bool(k, v); return git_default_config(k, v, cb); } @@ -871,6 +881,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) struct remote *remote; int err = 0, complete_refs_before_fetch = 1; int submodule_progress; + int filter_submodules = 0; struct transport_ls_refs_options transport_ls_refs_options = TRANSPORT_LS_REFS_OPTIONS_INIT; @@ -1066,6 +1077,27 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (option_reject_shallow != -1) reject_shallow = option_reject_shallow; + /* + * If option_filter_submodules is specified from CLI option, + * ignore config_filter_submodules from git_clone_config. + */ + if (config_filter_submodules != -1) + filter_submodules = config_filter_submodules; + if (option_filter_submodules != -1) + filter_submodules = option_filter_submodules; + + /* + * Exit if the user seems to be doing something silly with submodule + * filter flags (but not with filter configs, as those should be + * set-and-forget). + */ + if (option_filter_submodules > 0 && !filter_options.choice) + die(_("the option '%s' requires '%s'"), + "--also-filter-submodules", "--filter"); + if (option_filter_submodules > 0 && !option_recurse_submodules.nr) + die(_("the option '%s' requires '%s'"), + "--also-filter-submodules", "--recurse-submodules"); + /* * apply the remote name provided by --origin only after this second * call to git_config, to ensure it overrides all config-based values. @@ -1299,7 +1331,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } junk_mode = JUNK_LEAVE_REPO; - err = checkout(submodule_progress); + err = checkout(submodule_progress, filter_submodules); free(remote_name); strbuf_release(&reflog_msg); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index c5d3fc3817..11552970f2 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -20,6 +20,7 @@ #include "diff.h" #include "object-store.h" #include "advice.h" +#include "list-objects-filter-options.h" #define OPT_QUIET (1 << 0) #define OPT_CACHED (1 << 1) @@ -1630,6 +1631,7 @@ struct module_clone_data { const char *name; const char *url; const char *depth; + struct list_objects_filter_options *filter_options; struct string_list reference; unsigned int quiet: 1; unsigned int progress: 1; @@ -1796,6 +1798,10 @@ static int clone_submodule(struct module_clone_data *clone_data) strvec_push(&cp.args, "--dissociate"); if (sm_gitdir && *sm_gitdir) strvec_pushl(&cp.args, "--separate-git-dir", sm_gitdir, NULL); + if (clone_data->filter_options && clone_data->filter_options->choice) + strvec_pushf(&cp.args, "--filter=%s", + expand_list_objects_filter_spec( + clone_data->filter_options)); if (clone_data->single_branch >= 0) strvec_push(&cp.args, clone_data->single_branch ? "--single-branch" : @@ -1852,6 +1858,7 @@ static int module_clone(int argc, const char **argv, const char *prefix) { int dissociate = 0, quiet = 0, progress = 0, require_init = 0; struct module_clone_data clone_data = MODULE_CLONE_DATA_INIT; + struct list_objects_filter_options filter_options; struct option module_clone_options[] = { OPT_STRING(0, "prefix", &clone_data.prefix, @@ -1881,17 +1888,19 @@ static int module_clone(int argc, const char **argv, const char *prefix) N_("disallow cloning into non-empty directory")), OPT_BOOL(0, "single-branch", &clone_data.single_branch, N_("clone only one branch, HEAD or --branch")), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; const char *const git_submodule_helper_usage[] = { N_("git submodule--helper clone [--prefix=<path>] [--quiet] " "[--reference <repository>] [--name <name>] [--depth <depth>] " - "[--single-branch] " + "[--single-branch] [--filter <filter-spec>]" "--url <url> --path <path>"), NULL }; + memset(&filter_options, 0, sizeof(filter_options)); argc = parse_options(argc, argv, prefix, module_clone_options, git_submodule_helper_usage, 0); @@ -1899,12 +1908,14 @@ static int module_clone(int argc, const char **argv, const char *prefix) clone_data.quiet = !!quiet; clone_data.progress = !!progress; clone_data.require_init = !!require_init; + clone_data.filter_options = &filter_options; if (argc || !clone_data.url || !clone_data.path || !*(clone_data.path)) usage_with_options(git_submodule_helper_usage, module_clone_options); clone_submodule(&clone_data); + list_objects_filter_release(&filter_options); return 0; } @@ -1994,6 +2005,7 @@ struct submodule_update_clone { const char *recursive_prefix; const char *prefix; int single_branch; + struct list_objects_filter_options *filter_options; /* to be consumed by git-submodule.sh */ struct update_clone_data *update_clone; @@ -2154,6 +2166,9 @@ static int prepare_to_clone_next_submodule(const struct cache_entry *ce, strvec_pushl(&child->args, "--prefix", suc->prefix, NULL); if (suc->recommend_shallow && sub->recommend_shallow == 1) strvec_push(&child->args, "--depth=1"); + if (suc->filter_options && suc->filter_options->choice) + strvec_pushf(&child->args, "--filter=%s", + expand_list_objects_filter_spec(suc->filter_options)); if (suc->require_init) strvec_push(&child->args, "--require-init"); strvec_pushl(&child->args, "--path", sub->path, NULL); @@ -2498,6 +2513,8 @@ static int update_clone(int argc, const char **argv, const char *prefix) const char *update = NULL; struct pathspec pathspec; struct submodule_update_clone suc = SUBMODULE_UPDATE_CLONE_INIT; + struct list_objects_filter_options filter_options; + int ret; struct option module_update_clone_options[] = { OPT_STRING(0, "prefix", &prefix, @@ -2528,6 +2545,7 @@ static int update_clone(int argc, const char **argv, const char *prefix) N_("disallow cloning into non-empty directory")), OPT_BOOL(0, "single-branch", &suc.single_branch, N_("clone only one branch, HEAD or --branch")), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -2540,20 +2558,26 @@ static int update_clone(int argc, const char **argv, const char *prefix) update_clone_config_from_gitmodules(&suc.max_jobs); git_config(git_update_clone_config, &suc.max_jobs); + memset(&filter_options, 0, sizeof(filter_options)); argc = parse_options(argc, argv, prefix, module_update_clone_options, git_submodule_helper_usage, 0); + suc.filter_options = &filter_options; if (update) if (parse_submodule_update_strategy(update, &suc.update) < 0) die(_("bad value for update parameter")); - if (module_list_compute(argc, argv, prefix, &pathspec, &suc.list) < 0) + if (module_list_compute(argc, argv, prefix, &pathspec, &suc.list) < 0) { + list_objects_filter_release(&filter_options); return 1; + } if (pathspec.nr) suc.warn_if_uninitialized = 1; - return update_submodules(&suc); + ret = update_submodules(&suc); + list_objects_filter_release(&filter_options); + return ret; } static int run_update_procedure(int argc, const char **argv, const char *prefix) diff --git a/git-submodule.sh b/git-submodule.sh index 652861aa66..87772ac891 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -10,7 +10,7 @@ USAGE="[--quiet] [--cached] or: $dashless [--quiet] status [--cached] [--recursive] [--] [<path>...] or: $dashless [--quiet] init [--] [<path>...] or: $dashless [--quiet] deinit [-f|--force] (--all| [--] <path>...) - or: $dashless [--quiet] update [--init] [--remote] [-N|--no-fetch] [-f|--force] [--checkout|--merge|--rebase] [--[no-]recommend-shallow] [--reference <repository>] [--recursive] [--[no-]single-branch] [--] [<path>...] + or: $dashless [--quiet] update [--init [--filter=<filter-spec>]] [--remote] [-N|--no-fetch] [-f|--force] [--checkout|--merge|--rebase] [--[no-]recommend-shallow] [--reference <repository>] [--recursive] [--[no-]single-branch] [--] [<path>...] or: $dashless [--quiet] set-branch (--default|--branch <branch>) [--] <path> or: $dashless [--quiet] set-url [--] <path> <newurl> or: $dashless [--quiet] summary [--cached|--files] [--summary-limit <n>] [commit] [--] [<path>...] @@ -49,6 +49,7 @@ dissociate= single_branch= jobs= recommend_shallow= +filter= die_if_unmatched () { @@ -347,6 +348,14 @@ cmd_update() --no-single-branch) single_branch="--no-single-branch" ;; + --filter) + case "$2" in '') usage ;; esac + filter="--filter=$2" + shift + ;; + --filter=*) + filter="$1" + ;; --) shift break @@ -361,6 +370,11 @@ cmd_update() shift done + if test -n "$filter" && test "$init" != "1" + then + usage + fi + if test -n "$init" then cmd_init "--" "$@" || return @@ -379,6 +393,7 @@ cmd_update() $single_branch \ $recommend_shallow \ $jobs \ + $filter \ -- \ "$@" || echo "#unmatched" $? } | { diff --git a/t/t5617-clone-submodules-remote.sh b/t/t5617-clone-submodules-remote.sh index e2dbb4eaba..ca8f80083a 100755 --- a/t/t5617-clone-submodules-remote.sh +++ b/t/t5617-clone-submodules-remote.sh @@ -28,6 +28,13 @@ test_expect_success 'setup' ' ) ' +# bare clone giving "srv.bare" for use as our server. +test_expect_success 'setup bare clone for server' ' + git clone --bare "file://$(pwd)/." srv.bare && + git -C srv.bare config --local uploadpack.allowfilter 1 && + git -C srv.bare config --local uploadpack.allowanysha1inwant 1 +' + test_expect_success 'clone with --no-remote-submodules' ' test_when_finished "rm -rf super_clone" && git clone --recurse-submodules --no-remote-submodules "file://$pwd/." super_clone && @@ -65,4 +72,38 @@ test_expect_success 'clone with --single-branch' ' ) ' +# do basic partial clone from "srv.bare" +# confirm partial clone was registered in the local config for super and sub. +test_expect_success 'clone with --filter' ' + git clone --recurse-submodules \ + --filter blob:none --also-filter-submodules \ + "file://$pwd/srv.bare" super_clone && + test_cmp_config -C super_clone true remote.origin.promisor && + test_cmp_config -C super_clone blob:none remote.origin.partialclonefilter && + test_cmp_config -C super_clone/sub true remote.origin.promisor && + test_cmp_config -C super_clone/sub blob:none remote.origin.partialclonefilter +' + +# check that clone.filterSubmodules works (--also-filter-submodules can be +# omitted) +test_expect_success 'filters applied with clone.filterSubmodules' ' + test_config_global clone.filterSubmodules true && + git clone --recurse-submodules --filter blob:none \ + "file://$pwd/srv.bare" super_clone2 && + test_cmp_config -C super_clone2 true remote.origin.promisor && + test_cmp_config -C super_clone2 blob:none remote.origin.partialclonefilter && + test_cmp_config -C super_clone2/sub true remote.origin.promisor && + test_cmp_config -C super_clone2/sub blob:none remote.origin.partialclonefilter +' + +test_expect_success '--no-also-filter-submodules overrides clone.filterSubmodules=true' ' + test_config_global clone.filterSubmodules true && + git clone --recurse-submodules --filter blob:none \ + --no-also-filter-submodules \ + "file://$pwd/srv.bare" super_clone3 && + test_cmp_config -C super_clone3 true remote.origin.promisor && + test_cmp_config -C super_clone3 blob:none remote.origin.partialclonefilter && + test_cmp_config -C super_clone3/sub false --default false remote.origin.promisor +' + test_done diff --git a/t/t7814-grep-recurse-submodules.sh b/t/t7814-grep-recurse-submodules.sh index 058e5d0c96..a4476dc492 100755 --- a/t/t7814-grep-recurse-submodules.sh +++ b/t/t7814-grep-recurse-submodules.sh @@ -544,4 +544,45 @@ test_expect_failure 'grep saves textconv cache in the appropriate repository' ' test_path_is_file "$sub_textconv_cache" ' +test_expect_success 'grep partially-cloned submodule' ' + # Set up clean superproject and submodule for partial cloning. + git init super && + git init super/sub && + ( + cd super && + test_commit --no-tag "Add file in superproject" \ + super-file "Some content for super-file" && + test_commit -C sub --no-tag "Add file in submodule" \ + sub-file "Some content for sub-file" && + git submodule add ./sub && + git commit -m "Add other as submodule sub" && + test_tick && + test_commit -C sub --no-tag --append "Update file in submodule" \ + sub-file "Some more content for sub-file" && + git add sub && + git commit -m "Update submodule" && + test_tick && + git config --local uploadpack.allowfilter 1 && + git config --local uploadpack.allowanysha1inwant 1 && + git -C sub config --local uploadpack.allowfilter 1 && + git -C sub config --local uploadpack.allowanysha1inwant 1 + ) && + # Clone the superproject & submodule, then make sure we can lazy-fetch submodule objects. + git clone --filter=blob:none --also-filter-submodules \ + --recurse-submodules "file://$(pwd)/super" partial && + ( + cd partial && + cat >expect <<-\EOF && + HEAD^:sub/sub-file:Some content for sub-file + HEAD^:super-file:Some content for super-file + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace2.log" git grep -e content \ + --recurse-submodules HEAD^ >actual && + test_cmp expect actual && + # Verify that we actually fetched data from the promisor remote: + grep \"category\":\"promisor\",\"key\":\"fetch_count\",\"value\":\"1\" trace2.log + ) +' + test_done From b8403129d35889980356ac0e1c8d6945910ad396 Mon Sep 17 00:00:00 2001 From: Jaydeep Das <jaydeepjd.8914@gmail.com> Date: Tue, 8 Feb 2022 14:53:39 +0530 Subject: [PATCH 083/150] t/t0015-hash.sh: remove unnecessary '\' at line end The `|` at line end already imples that the statement is not over. So a `\` after that is redundant. Signed-off-by: Jaydeep P Das <jaydeepjd.8914@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t0015-hash.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t0015-hash.sh b/t/t0015-hash.sh index 291e9061f3..086822fc45 100755 --- a/t/t0015-hash.sh +++ b/t/t0015-hash.sh @@ -15,7 +15,7 @@ test_expect_success 'test basic SHA-1 hash values' ' grep c12252ceda8be8994d5fa0290a47231c1d16aae3 actual && printf "abcdefghijklmnopqrstuvwxyz" | test-tool sha1 >actual && grep 32d10c7b8cf96570ca04ce37f2a19d84240d3a89 actual && - perl -e "$| = 1; print q{aaaaaaaaaa} for 1..100000;" | \ + perl -e "$| = 1; print q{aaaaaaaaaa} for 1..100000;" | test-tool sha1 >actual && grep 34aa973cd4c4daa4f61eeb2bdbad27316534016f actual && printf "blob 0\0" | test-tool sha1 >actual && @@ -38,10 +38,10 @@ test_expect_success 'test basic SHA-256 hash values' ' printf "abcdefghijklmnopqrstuvwxyz" | test-tool sha256 >actual && grep 71c480df93d6ae2f1efad1447c66c9525e316218cf51fc8d9ed832f2daf18b73 actual && # Try to exercise the chunking code by turning autoflush on. - perl -e "$| = 1; print q{aaaaaaaaaa} for 1..100000;" | \ + perl -e "$| = 1; print q{aaaaaaaaaa} for 1..100000;" | test-tool sha256 >actual && grep cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0 actual && - perl -e "$| = 1; print q{abcdefghijklmnopqrstuvwxyz} for 1..100000;" | \ + perl -e "$| = 1; print q{abcdefghijklmnopqrstuvwxyz} for 1..100000;" | test-tool sha256 >actual && grep e406ba321ca712ad35a698bf0af8d61fc4dc40eca6bdcea4697962724ccbde35 actual && printf "blob 0\0" | test-tool sha256 >actual && From 2df5387ed04159b188de65dff9654d4aae1062d2 Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Wed, 9 Feb 2022 18:19:07 -0800 Subject: [PATCH 084/150] glossary: describe "worktree" We have description on "per worktree ref", but "worktree" is not described in the glossary. We do have "working tree", though. Casually put, a "working tree" is what your editor and compiler interacts with. "worktree" is a mechanism to allow one or more "working tree"s to be attached to a repository and used to check out different commits and branches independently, which includes not just a "working tree" but also repository metadata like HEAD, the index to support simultaneous use of them. Historically, we used these terms interchangeably but we have been trying to use "working tree" when we mean it, instead of "worktree". Most of the existing references to "working tree" in the glossary do refer primarily to the working tree portion, except for one that said refs like HEAD and refs/bisect/* are per "working tree", but it is more precise to say they are per "worktree". Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/glossary-content.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt index c077971335..aa2f41f5e7 100644 --- a/Documentation/glossary-content.txt +++ b/Documentation/glossary-content.txt @@ -312,7 +312,7 @@ Pathspecs are used on the command line of "git ls-files", "git ls-tree", "git add", "git grep", "git diff", "git checkout", and many other commands to limit the scope of operations to some subset of the tree or -worktree. See the documentation of each command for whether +working tree. See the documentation of each command for whether paths are relative to the current directory or toplevel. The pathspec syntax is as follows: + @@ -446,7 +446,7 @@ exclude;; interface than the <<def_plumbing,plumbing>>. [[def_per_worktree_ref]]per-worktree ref:: - Refs that are per-<<def_working_tree,worktree>>, rather than + Refs that are per-<<def_worktree,worktree>>, rather than global. This is presently only <<def_HEAD,HEAD>> and any refs that start with `refs/bisect/`, but might later include other unusual refs. @@ -669,3 +669,12 @@ The most notable example is `HEAD`. The tree of actual checked out files. The working tree normally contains the contents of the <<def_HEAD,HEAD>> commit's tree, plus any local changes that you have made but not yet committed. + +[[def_worktree]]worktree:: + A repository can have zero (i.e. bare repository) or one or + more worktrees attached to it. One "worktree" consists of a + "working tree" and repository metadata, most of which are + shared among other worktrees of a single repository, and + some of which are maintained separately per worktree + (e.g. the index, HEAD and pseudorefs like MERGE_HEAD, + per-worktree refs and per-worktree configuration file). From 8c2d8d04f0154da31f3d64e52669fe835929273c Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys <hanwen@google.com> Date: Wed, 9 Feb 2022 11:29:39 +0000 Subject: [PATCH 085/150] t1410: use test-tool ref-store to inspect reflogs This makes the test compatible with reftable (it doesn't pass yet for other reasons, unfortunately) Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t1410-reflog.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t1410-reflog.sh b/t/t1410-reflog.sh index d7ddf7612d..36f6693d9d 100755 --- a/t/t1410-reflog.sh +++ b/t/t1410-reflog.sh @@ -418,7 +418,8 @@ test_expect_success 'expire with multiple worktrees' ' test_commit -C link-wt foobar && test_tick && git reflog expire --verbose --all --expire=$test_tick && - test_must_be_empty .git/worktrees/link-wt/logs/HEAD + test-tool ref-store worktree:link-wt for-each-reflog-ent HEAD >actual && + test_must_be_empty actual ) ' From bcdff626ee7ef7e5a643c381cf6f696b03e33649 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys <hanwen@google.com> Date: Wed, 9 Feb 2022 11:29:40 +0000 Subject: [PATCH 086/150] t1410: mark bufsize boundary test as REFFILES This test fiddles with files under .git/logs to recreate a condition that is unlikely to warrant special attention under reftable, as reflog blocks are zlib compressed. Signed-off-by: Han-Wen Nienhuys <hanwen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t1410-reflog.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t1410-reflog.sh b/t/t1410-reflog.sh index 36f6693d9d..68f69bb543 100755 --- a/t/t1410-reflog.sh +++ b/t/t1410-reflog.sh @@ -341,7 +341,7 @@ test_expect_success 'stale dirs do not cause d/f conflicts (reflogs off)' ' # Each line is 114 characters, so we need 75 to still have a few before the # last 8K. The 89-character padding on the final entry lines up our # newline exactly. -test_expect_success SHA1 'parsing reverse reflogs at BUFSIZ boundaries' ' +test_expect_success REFFILES,SHA1 'parsing reverse reflogs at BUFSIZ boundaries' ' git checkout -b reflogskip && zf=$(test_oid zero_2) && ident="abc <xyz> 0000000001 +0000" && From 6fd1cc8f985ccd8b014e945a819482b267dae21f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt <ps@pks.im> Date: Thu, 10 Feb 2022 13:28:09 +0100 Subject: [PATCH 087/150] fetch-pack: use commit-graph when computing cutoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During packfile negotiation we iterate over all refs announced by the remote side to check whether their IDs refer to commits already known to us. If a commit is known to us already, then its date is a potential cutoff point for commits we have in common with the remote side. There is potentially a lot of commits announced by the remote depending on how many refs there are in the remote repository, and for every one of them we need to search for it in our object database and, if found, parse the corresponding object to find out whether it is a candidate for the cutoff date. This can be sped up by trying to look up commits via the commit-graph first, which is a lot more efficient. Benchmarks in a repository with about 2,1 million refs and an up-to-date commit-graph show an almost 20% speedup when mirror-fetching: Benchmark 1: git fetch +refs/*:refs/* (v2.35.0) Time (mean ± σ): 115.587 s ± 2.009 s [User: 109.874 s, System: 11.305 s] Range (min … max): 113.584 s … 118.820 s 5 runs Benchmark 2: git fetch +refs/*:refs/* (HEAD) Time (mean ± σ): 96.859 s ± 0.624 s [User: 91.948 s, System: 10.980 s] Range (min … max): 96.180 s … 97.875 s 5 runs Summary 'git fetch +refs/*:refs/* (HEAD)' ran 1.19 ± 0.02 times faster than 'git fetch +refs/*:refs/* (v2.35.0)' Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- fetch-pack.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index dd6ec449f2..c5967e228e 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -696,26 +696,30 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); for (ref = *refs; ref; ref = ref->next) { - struct object *o; + struct commit *commit; - if (!has_object_file_with_flags(&ref->old_oid, + commit = lookup_commit_in_graph(the_repository, &ref->old_oid); + if (!commit) { + struct object *o; + + if (!has_object_file_with_flags(&ref->old_oid, OBJECT_INFO_QUICK | - OBJECT_INFO_SKIP_FETCH_OBJECT)) - continue; - o = parse_object(the_repository, &ref->old_oid); - if (!o) - continue; + OBJECT_INFO_SKIP_FETCH_OBJECT)) + continue; + o = parse_object(the_repository, &ref->old_oid); + if (!o || o->type != OBJ_COMMIT) + continue; + + commit = (struct commit *)o; + } /* * We already have it -- which may mean that we were * in sync with the other side at some time after * that (it is OK if we guess wrong here). */ - if (o->type == OBJ_COMMIT) { - struct commit *commit = (struct commit *)o; - if (!cutoff || cutoff < commit->date) - cutoff = commit->date; - } + if (!cutoff || cutoff < commit->date) + cutoff = commit->date; } trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); From b18aaaa5e931d79d057f68ac0d7c3dd0377e5f03 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt <ps@pks.im> Date: Thu, 10 Feb 2022 13:28:16 +0100 Subject: [PATCH 088/150] fetch: skip computing output width when not printing anything MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When updating references via git-fetch(1), then by default we report to the user which references have been changed. This output is formatted in a nice table such that the different columns are aligned. Because the first column contains abbreviated object IDs we thus need to iterate over all refs which have changed and compute the minimum length for their respective abbreviated hashes. While this effort makes sense in most cases, it is wasteful when the user passes the `--quiet` flag: we don't print the summary, but still compute the length. Skip computing the summary width when the user asked for us to be quiet. This gives us a speedup of nearly 10% when doing a mirror-fetch in a repository with thousands of references being updated: Benchmark 1: git fetch --quiet +refs/*:refs/* (HEAD~) Time (mean ± σ): 96.078 s ± 0.508 s [User: 91.378 s, System: 10.870 s] Range (min … max): 95.449 s … 96.760 s 5 runs Benchmark 2: git fetch --quiet +refs/*:refs/* (HEAD) Time (mean ± σ): 88.214 s ± 0.192 s [User: 83.274 s, System: 10.978 s] Range (min … max): 87.998 s … 88.446 s 5 runs Summary 'git fetch --quiet +refs/*:refs/* (HEAD)' ran 1.09 ± 0.01 times faster than 'git fetch --quiet +refs/*:refs/* (HEAD~)' Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fetch.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 5b3b18a72f..7ef305c66d 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1094,12 +1094,15 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, struct ref *rm; char *url; int want_status; - int summary_width = transport_summary_width(ref_map); + int summary_width = 0; rc = open_fetch_head(&fetch_head); if (rc) return -1; + if (verbosity >= 0) + summary_width = transport_summary_width(ref_map); + if (raw_url) url = transport_anonymize_url(raw_url); else @@ -1345,7 +1348,6 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map, int url_len, i, result = 0; struct ref *ref, *stale_refs = get_stale_heads(rs, ref_map); char *url; - int summary_width = transport_summary_width(stale_refs); const char *dangling_msg = dry_run ? _(" (%s will become dangling)") : _(" (%s has become dangling)"); @@ -1374,6 +1376,8 @@ static int prune_refs(struct refspec *rs, struct ref *ref_map, } if (verbosity >= 0) { + int summary_width = transport_summary_width(stale_refs); + for (ref = stale_refs; ref; ref = ref->next) { struct strbuf sb = STRBUF_INIT; if (!shown_url) { From dccf6c16f1cee485f05ef42ba67a9309c358a78d Mon Sep 17 00:00:00 2001 From: Alex Henrie <alexhenrie24@gmail.com> Date: Fri, 11 Feb 2022 09:36:24 -0700 Subject: [PATCH 089/150] log: fix memory leak if --graph is passed multiple times Signed-off-by: Alex Henrie <alexhenrie24@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- graph.c | 12 ++++++++++++ graph.h | 5 +++++ revision.c | 1 + 3 files changed, 18 insertions(+) diff --git a/graph.c b/graph.c index e3828eb8f2..568b6e7cd4 100644 --- a/graph.c +++ b/graph.c @@ -401,6 +401,18 @@ struct git_graph *graph_init(struct rev_info *opt) return graph; } +void graph_clear(struct git_graph *graph) +{ + if (!graph) + return; + + free(graph->columns); + free(graph->new_columns); + free(graph->mapping); + free(graph->old_mapping); + free(graph); +} + static void graph_update_state(struct git_graph *graph, enum graph_state s) { graph->prev_state = graph->state; diff --git a/graph.h b/graph.h index 8313e293c7..e88632a014 100644 --- a/graph.h +++ b/graph.h @@ -139,6 +139,11 @@ void graph_set_column_colors(const char **colors, unsigned short colors_max); */ struct git_graph *graph_init(struct rev_info *opt); +/* + * Free a struct git_graph. + */ +void graph_clear(struct git_graph *graph); + /* * Update a git_graph with a new commit. * This will cause the graph to begin outputting lines for the new commit diff --git a/revision.c b/revision.c index ad4286fbdd..816061f3d9 100644 --- a/revision.c +++ b/revision.c @@ -2426,6 +2426,7 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if (!strcmp(arg, "--graph")) { revs->topo_order = 1; revs->rewrite_parents = 1; + graph_clear(revs->graph); revs->graph = graph_init(revs); } else if (!strcmp(arg, "--encode-email-headers")) { revs->encode_email_headers = 1; From 087c745833be1edd3b3e4d8ea5d8b1a09fc6c245 Mon Sep 17 00:00:00 2001 From: Alex Henrie <alexhenrie24@gmail.com> Date: Fri, 11 Feb 2022 09:36:25 -0700 Subject: [PATCH 090/150] log: add a --no-graph option It's useful to be able to countermand a previous --graph option, for example if `git log --graph` is run via an alias. Signed-off-by: Alex Henrie <alexhenrie24@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/blame.c | 1 + builtin/shortlog.c | 1 + revision.c | 19 ++++++++++--- revision.h | 1 + t/t4202-log.sh | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 4 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index 7fafeac408..ef831de5ac 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -934,6 +934,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix) parse_revision_opt(&revs, &ctx, options, blame_opt_usage); } parse_done: + revision_opts_finish(&revs); no_whole_file_rename = !revs.diffopt.flags.follow_renames; xdl_opts |= revs.diffopt.xdl_opts & XDF_INDENT_HEURISTIC; revs.diffopt.flags.follow_renames = 0; diff --git a/builtin/shortlog.c b/builtin/shortlog.c index e7f7af5de3..228d782754 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -388,6 +388,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) parse_revision_opt(&rev, &ctx, options, shortlog_usage); } parse_done: + revision_opts_finish(&rev); argc = parse_options_end(&ctx); if (nongit && argc > 1) { diff --git a/revision.c b/revision.c index 816061f3d9..a39fd1c278 100644 --- a/revision.c +++ b/revision.c @@ -2424,10 +2424,11 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->pretty_given = 1; revs->abbrev_commit = 1; } else if (!strcmp(arg, "--graph")) { - revs->topo_order = 1; - revs->rewrite_parents = 1; graph_clear(revs->graph); revs->graph = graph_init(revs); + } else if (!strcmp(arg, "--no-graph")) { + graph_clear(revs->graph); + revs->graph = NULL; } else if (!strcmp(arg, "--encode-email-headers")) { revs->encode_email_headers = 1; } else if (!strcmp(arg, "--no-encode-email-headers")) { @@ -2524,8 +2525,6 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg unkv[(*unkc)++] = arg; return opts; } - if (revs->graph && revs->track_linear) - die(_("options '%s' and '%s' cannot be used together"), "--show-linear-break", "--graph"); return 1; } @@ -2544,6 +2543,17 @@ void parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx, ctx->argc -= n; } +void revision_opts_finish(struct rev_info *revs) +{ + if (revs->graph && revs->track_linear) + die(_("options '%s' and '%s' cannot be used together"), "--show-linear-break", "--graph"); + + if (revs->graph) { + revs->topo_order = 1; + revs->rewrite_parents = 1; + } +} + static int for_each_bisect_ref(struct ref_store *refs, each_ref_fn fn, void *cb_data, const char *term) { @@ -2786,6 +2796,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s break; } } + revision_opts_finish(revs); if (prune_data.nr) { /* diff --git a/revision.h b/revision.h index 3f66147bfd..5a507db202 100644 --- a/revision.h +++ b/revision.h @@ -372,6 +372,7 @@ void parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx, #define REVARG_COMMITTISH 02 int handle_revision_arg(const char *arg, struct rev_info *revs, int flags, unsigned revarg_opt); +void revision_opts_finish(struct rev_info *revs); /** * Reset the flags used by the revision walking api. You can use this to do diff --git a/t/t4202-log.sh b/t/t4202-log.sh index dc884107de..a7d5edf720 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -1671,6 +1671,75 @@ test_expect_success 'log --graph with --name-only' ' test_cmp_graph --name-only tangle..reach ' +test_expect_success '--no-graph countermands --graph' ' + git log >expect && + git log --graph --no-graph >actual && + test_cmp expect actual +' + +test_expect_success '--graph countermands --no-graph' ' + git log --graph >expect && + git log --no-graph --graph >actual && + test_cmp expect actual +' + +test_expect_success '--no-graph does not unset --topo-order' ' + git log --topo-order >expect && + git log --topo-order --no-graph >actual && + test_cmp expect actual +' + +test_expect_success '--no-graph does not unset --parents' ' + git log --parents >expect && + git log --parents --no-graph >actual && + test_cmp expect actual +' + +test_expect_success '--reverse and --graph conflict' ' + test_must_fail git log --reverse --graph 2>stderr && + test_i18ngrep "cannot be used together" stderr +' + +test_expect_success '--reverse --graph --no-graph works' ' + git log --reverse >expect && + git log --reverse --graph --no-graph >actual && + test_cmp expect actual +' + +test_expect_success '--show-linear-break and --graph conflict' ' + test_must_fail git log --show-linear-break --graph 2>stderr && + test_i18ngrep "cannot be used together" stderr +' + +test_expect_success '--show-linear-break --graph --no-graph works' ' + git log --show-linear-break >expect && + git log --show-linear-break --graph --no-graph >actual && + test_cmp expect actual +' + +test_expect_success '--no-walk and --graph conflict' ' + test_must_fail git log --no-walk --graph 2>stderr && + test_i18ngrep "cannot be used together" stderr +' + +test_expect_success '--no-walk --graph --no-graph works' ' + git log --no-walk >expect && + git log --no-walk --graph --no-graph >actual && + test_cmp expect actual +' + +test_expect_success '--walk-reflogs and --graph conflict' ' + test_must_fail git log --walk-reflogs --graph 2>stderr && + (test_i18ngrep "cannot combine" stderr || + test_i18ngrep "cannot be used together" stderr) +' + +test_expect_success '--walk-reflogs --graph --no-graph works' ' + git log --walk-reflogs >expect && + git log --walk-reflogs --graph --no-graph >actual && + test_cmp expect actual +' + test_expect_success 'dotdot is a parent directory' ' mkdir -p a/b && ( echo sixth && echo fifth ) >expect && From 3d3c23b3a754cf5060a93d9f777e58662cdd5ffe Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya <bagasdotme@gmail.com> Date: Wed, 22 Dec 2021 14:58:06 +0700 Subject: [PATCH 091/150] fetch-pack: parameterize message containing 'ready' keyword The protocol keyword 'ready' isn't meant for translation. Pass it as parameter instead of spell it in die() message (and potentially confuse translators). Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- fetch-pack.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fetch-pack.c b/fetch-pack.c index 34987a2c30..3a98d66e81 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1415,9 +1415,17 @@ static int process_ack(struct fetch_negotiator *negotiator, * otherwise. */ if (*received_ready && reader->status != PACKET_READ_DELIM) - die(_("expected packfile to be sent after 'ready'")); + /* + * TRANSLATORS: The parameter will be 'ready', a protocol + * keyword. + */ + die(_("expected packfile to be sent after '%s'"), "ready"); if (!*received_ready && reader->status != PACKET_READ_FLUSH) - die(_("expected no other sections to be sent after no 'ready'")); + /* + * TRANSLATORS: The parameter will be 'ready', a protocol + * keyword. + */ + die(_("expected no other sections to be sent after no '%s'"), "ready"); return 0; } From d4fe066e4ba577afe585a640e245ce12331f6286 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com> Date: Fri, 21 Jan 2022 18:21:09 +0800 Subject: [PATCH 092/150] t0001: replace "test [-d|-f]" with test_path_is_* functions Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t0001-init.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t0001-init.sh b/t/t0001-init.sh index 3235ab4d53..d479303efa 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -6,7 +6,8 @@ TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh check_config () { - if test -d "$1" && test -f "$1/config" && test -d "$1/refs" + if test_path_is_dir "$1" && + test_path_is_file "$1/config" && test_path_is_dir "$1/refs" then : happy else From 6a5678f2576dba579e35c6d86b02584abee0ac37 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" <sandals@crustytoothpaste.net> Date: Mon, 14 Feb 2022 10:15:43 -0800 Subject: [PATCH 093/150] doc: clarify interaction between 'eol' and text=auto The `eol` takes effect on text files only when the index has the contents in LF line endings. Paths with contents in CRLF line endings in the index may become dirty unless text=auto. Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/gitattributes.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 60984a4682..a71dad2674 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -161,11 +161,12 @@ unspecified. This attribute sets a specific line-ending style to be used in the working directory. This attribute has effect only if the `text` -attribute is set or unspecified, or if it is set to `auto` and the file -is detected as text. Note that setting this attribute on paths which -are in the index with CRLF line endings may make the paths to be -considered dirty. Adding the path to the index again will normalize the -line endings in the index. +attribute is set or unspecified, or if it is set to `auto`, the file is +detected as text, and it is stored with LF endings in the index. Note +that setting this attribute on paths which are in the index with CRLF +line endings may make the paths to be considered dirty unless +`text=auto` is set. Adding the path to the index again will normalize +the line endings in the index. Set to string value "crlf":: From 332acc248dd5213cb1d61be5538bb47ad26770e5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <derrickstolee@github.com> Date: Mon, 14 Feb 2022 16:45:53 +0000 Subject: [PATCH 094/150] mailmap: change primary address for Derrick Stolee Stolee transitioned from Microsoft to GitHub in July 2020, but continued to use <dstolee@microsoft.com> because it was a valid address. He also used <stolee@gmail.com> to communicate with the mailing list since writing plaintext emails is difficult in Outlook. However, recent issues with GMail delaying mailing list messages created a need to change his primary email address. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- .mailmap | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 9c6a446bdf..07db36a9bb 100644 --- a/.mailmap +++ b/.mailmap @@ -59,8 +59,9 @@ David Reiss <dreiss@facebook.com> <dreiss@dreiss-vmware.(none)> David S. Miller <davem@davemloft.net> David Turner <novalis@novalis.org> <dturner@twopensource.com> David Turner <novalis@novalis.org> <dturner@twosigma.com> -Derrick Stolee <dstolee@microsoft.com> <stolee@gmail.com> -Derrick Stolee <dstolee@microsoft.com> Derrick Stolee via GitGitGadget <gitgitgadget@gmail.com> +Derrick Stolee <derrickstolee@github.com> <stolee@gmail.com> +Derrick Stolee <derrickstolee@github.com> Derrick Stolee via GitGitGadget <gitgitgadget@gmail.com> +Derrick Stolee <derrickstolee@github.com> <dstolee@microsoft.com> Deskin Miller <deskinm@umich.edu> Đoàn Trần Công Danh <congdanhqx@gmail.com> Doan Tran Cong Danh Dirk Süsserott <newsletter@dirk.my1.cc> From d271892fbc099d5afd98f48845e258ded28e7c89 Mon Sep 17 00:00:00 2001 From: John Cai <johncai86@gmail.com> Date: Tue, 15 Feb 2022 20:52:07 +0000 Subject: [PATCH 095/150] name-rev: replace --stdin with --annotate-stdin in synopsis 34ae3b70 (name-rev: deprecate --stdin in favor of --annotate-stdin, 2022-01-05) added --annotate-stdin to replace --stdin as a clearer flag name. Since --stdin is to be deprecated, we should replace --stdin in the output from "git name-rev -h". Signed-off-by: John Cai <johncai86@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/name-rev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 138e3c30a2..929591269d 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -473,7 +473,7 @@ static void show_name(const struct object *obj, static char const * const name_rev_usage[] = { N_("git name-rev [<options>] <commit>..."), N_("git name-rev [<options>] --all"), - N_("git name-rev [<options>] --stdin"), + N_("git name-rev [<options>] --annotate-stdin"), NULL }; From 77e3f931ef76444d6b28d50ac4fbdc933a15f358 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:30 +0100 Subject: [PATCH 096/150] grep.h: remove unused "regex_t regexp" from grep_opt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This "regex_t" in grep_opt has not been used since f9b9faf6f8a (builtin-grep: allow more than one patterns., 2006-05-02), we still use a "regex_t" for compiling regexes, but that's in the "grep_pat" struct". Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- grep.h | 1 - 1 file changed, 1 deletion(-) diff --git a/grep.h b/grep.h index 6a1f0ab017..400172676a 100644 --- a/grep.h +++ b/grep.h @@ -136,7 +136,6 @@ struct grep_opt { const char *prefix; int prefix_length; - regex_t regexp; int linenum; int columnnum; int invert; From ff37a60c36959cc1d774e1a9b266291962e8624d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:31 +0100 Subject: [PATCH 097/150] log tests: check if grep_config() is called by "log"-like cmds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the tests added in my 9df46763ef1 (log: add exhaustive tests for pattern style options & config, 2017-05-20) to check not only whether "git log" handles "grep.patternType", but also "git show" etc. It's sufficient to check whether we match a "fixed" or a "basic" regex here to see if these codepaths correctly invoked grep_config(). We don't need to check the details of their regular expression matching as the "log" test does. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t4202-log.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/t/t4202-log.sh b/t/t4202-log.sh index 5049559861..e775b378e4 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -449,6 +449,30 @@ test_expect_success !FAIL_PREREQS 'log with various grep.patternType configurati ) ' +for cmd in show whatchanged reflog format-patch +do + case "$cmd" in + format-patch) myarg="HEAD~.." ;; + *) myarg= ;; + esac + + test_expect_success "$cmd: understands grep.patternType, like 'log'" ' + git init "pattern-type-$cmd" && + ( + cd "pattern-type-$cmd" && + test_commit 1 file A && + test_commit "(1|2)" file B 2 && + + git -c grep.patternType=fixed $cmd --grep="..." $myarg >actual && + test_must_be_empty actual && + + git -c grep.patternType=basic $cmd --grep="..." $myarg >actual && + test_file_not_empty actual + ) + ' +done +test_done + test_expect_success 'log --author' ' cat >expect <<-\EOF && Author: <BOLD;RED>A U<RESET> Thor <author@example.com> From ccb1fccc21331122ad1c7c247122cb7c48787361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:32 +0100 Subject: [PATCH 098/150] grep tests: create a helper function for "BRE" or "ERE" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the repeated test code for finding out whether a given set of configuration will pick basic, extended or fixed into a new "test_pattern_type" helper function. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7810-grep.sh | 134 +++++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 80 deletions(-) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 424c31c328..6f1103b54b 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -98,6 +98,37 @@ test_expect_success 'grep should not segfault with a bad input' ' test_invalid_grep_expression --and -e A +test_pattern_type () { + H=$1 && + HC=$2 && + L=$3 && + type=$4 && + shift 4 && + + expected_str= && + case "$type" in + BRE) + expected_str="${HC}ab:a+bc" + ;; + ERE) + expected_str="${HC}ab:abc" + ;; + FIX) + expected_str="${HC}ab:a+b*c" + ;; + *) + BUG "unknown pattern type '$type'" + ;; + esac && + config_str="$@" && + + test_expect_success "grep $L with '$config_str' interpreted as $type" ' + echo $expected_str >expected && + git $config_str grep "a+b*c" $H ab >actual && + test_cmp expected actual + ' +} + for H in HEAD '' do case "$H" in @@ -393,35 +424,13 @@ do git grep --no-recursive -n -e vvv $H -- t . >actual && test_cmp expected actual ' - test_expect_success "grep $L with grep.extendedRegexp=false" ' - echo "${HC}ab:a+bc" >expected && - git -c grep.extendedRegexp=false grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - test_expect_success "grep $L with grep.extendedRegexp=true" ' - echo "${HC}ab:abc" >expected && - git -c grep.extendedRegexp=true grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - test_expect_success "grep $L with grep.patterntype=basic" ' - echo "${HC}ab:a+bc" >expected && - git -c grep.patterntype=basic grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - - test_expect_success "grep $L with grep.patterntype=extended" ' - echo "${HC}ab:abc" >expected && - git -c grep.patterntype=extended grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - - test_expect_success "grep $L with grep.patterntype=fixed" ' - echo "${HC}ab:a+b*c" >expected && - git -c grep.patterntype=fixed grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' + test_pattern_type "$H" "$HC" "$L" BRE -c grep.extendedRegexp=false + test_pattern_type "$H" "$HC" "$L" ERE -c grep.extendedRegexp=true + test_pattern_type "$H" "$HC" "$L" BRE -c grep.patternType=basic + test_pattern_type "$H" "$HC" "$L" ERE -c grep.patternType=extended + test_pattern_type "$H" "$HC" "$L" FIX -c grep.patternType=fixed test_expect_success PCRE "grep $L with grep.patterntype=perl" ' echo "${HC}ab:a+b*c" >expected && @@ -433,59 +442,24 @@ do test_must_fail git -c grep.patterntype=perl grep "foo.*bar" ' - test_expect_success "grep $L with grep.patternType=default and grep.extendedRegexp=true" ' - echo "${HC}ab:abc" >expected && - git \ - -c grep.patternType=default \ - -c grep.extendedRegexp=true \ - grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - - test_expect_success "grep $L with grep.extendedRegexp=true and grep.patternType=default" ' - echo "${HC}ab:abc" >expected && - git \ - -c grep.extendedRegexp=true \ - -c grep.patternType=default \ - grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - - test_expect_success "grep $L with grep.patternType=extended and grep.extendedRegexp=false" ' - echo "${HC}ab:abc" >expected && - git \ - -c grep.patternType=extended \ - -c grep.extendedRegexp=false \ - grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - - test_expect_success "grep $L with grep.patternType=basic and grep.extendedRegexp=true" ' - echo "${HC}ab:a+bc" >expected && - git \ - -c grep.patternType=basic \ - -c grep.extendedRegexp=true \ - grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - - test_expect_success "grep $L with grep.extendedRegexp=false and grep.patternType=extended" ' - echo "${HC}ab:abc" >expected && - git \ - -c grep.extendedRegexp=false \ - -c grep.patternType=extended \ - grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' - - test_expect_success "grep $L with grep.extendedRegexp=true and grep.patternType=basic" ' - echo "${HC}ab:a+bc" >expected && - git \ - -c grep.extendedRegexp=true \ - -c grep.patternType=basic \ - grep "a+b*c" $H ab >actual && - test_cmp expected actual - ' + test_pattern_type "$H" "$HC" "$L" ERE \ + -c grep.patternType=default \ + -c grep.extendedRegexp=true + test_pattern_type "$H" "$HC" "$L" ERE \ + -c grep.extendedRegexp=true \ + -c grep.patternType=default + test_pattern_type "$H" "$HC" "$L" ERE \ + -c grep.patternType=extended \ + -c grep.extendedRegexp=false + test_pattern_type "$H" "$HC" "$L" BRE \ + -c grep.patternType=basic \ + -c grep.extendedRegexp=true + test_pattern_type "$H" "$HC" "$L" ERE \ + -c grep.extendedRegexp=false \ + -c grep.patternType=extended + test_pattern_type "$H" "$HC" "$L" BRE \ + -c grep.extendedRegexp=true \ + -c grep.patternType=basic test_expect_success "grep --count $L" ' echo ${HC}ab:3 >expected && From a5c0ed3d83ee2111d5eb075d9b4bf752b16c0b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:33 +0100 Subject: [PATCH 099/150] grep tests: add missing "grep.patternType" config tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the grep tests to assert that setting "grep.patternType=extended" followed by "grep.patternType=default" will behave as if "--basic-regexp" was provided, and not as "--extended-regexp". In a subsequent commit we'll need to treat "grep.patternType=default" as a special-case, but let's make sure we ignore it if it's being set to "default" following an earlier non-"default" "grep.patternType" setting. Let's also test what happens when we have a sequence of "extended" followed by "default" and "fixed". In that case the "fixed" should prevail, as well as tests to check that a "grep.extendedRegexp=true" followed by a "grep.extendedRegexp=false" behaves as though "grep.extendedRegexp" wasn't provided. See [1] for the source of some of these tests, and their initial (pseudocode) implementation, and [2] for a later discussion about a breakage due to missing testing (which had been noted in [1] all along). 1. https://lore.kernel.org/git/xmqqv8zf6j86.fsf@gitster.g/ 2. https://lore.kernel.org/git/xmqqpmoczwtu.fsf@gitster.g/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7810-grep.sh | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 6f1103b54b..6935601171 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -461,6 +461,58 @@ do -c grep.extendedRegexp=true \ -c grep.patternType=basic + # grep.extendedRegexp is last-one-wins + test_pattern_type "$H" "$HC" "$L" BRE \ + -c grep.extendedRegexp=true \ + -c grep.extendedRegexp=false + + # grep.patternType=basic pays no attention to grep.extendedRegexp + test_pattern_type "$H" "$HC" "$L" BRE \ + -c grep.extendedRegexp=true \ + -c grep.patternType=basic \ + -c grep.extendedRegexp=false + + # grep.patternType=extended pays no attention to grep.extendedRegexp + test_pattern_type "$H" "$HC" "$L" ERE \ + -c grep.extendedRegexp=true \ + -c grep.patternType=extended \ + -c grep.extendedRegexp=false + + # grep.extendedRegexp is used with a last-one-wins grep.patternType=default + test_pattern_type "$H" "$HC" "$L" ERE \ + -c grep.patternType=fixed \ + -c grep.extendedRegexp=true \ + -c grep.patternType=default + + # grep.extendedRegexp is used with earlier grep.patternType=default + test_pattern_type "$H" "$HC" "$L" ERE \ + -c grep.extendedRegexp=false \ + -c grep.patternType=default \ + -c grep.extendedRegexp=true + + # grep.extendedRegexp is used with a last-one-loses grep.patternType=default + test_pattern_type "$H" "$HC" "$L" ERE \ + -c grep.extendedRegexp=false \ + -c grep.extendedRegexp=true \ + -c grep.patternType=default + + # grep.extendedRegexp and grep.patternType are both last-one-wins independently + test_pattern_type "$H" "$HC" "$L" BRE \ + -c grep.patternType=default \ + -c grep.extendedRegexp=true \ + -c grep.patternType=basic + + # grep.patternType=extended and grep.patternType=default + test_pattern_type "$H" "$HC" "$L" BRE \ + -c grep.patternType=extended \ + -c grep.patternType=default + + # grep.patternType=[extended -> default -> fixed] (BRE)" ' + test_pattern_type "$H" "$HC" "$L" FIX \ + -c grep.patternType=extended \ + -c grep.patternType=default \ + -c grep.patternType=fixed + test_expect_success "grep --count $L" ' echo ${HC}ab:3 >expected && git grep --count -e b $H -- ab >actual && From 9725c8dda20dc9bc02b552a2333963c8cb834d1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:34 +0100 Subject: [PATCH 100/150] built-ins: trust the "prefix" from run_builtin() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change code in "builtin/grep.c" and "builtin/ls-tree.c" to trust the "prefix" passed from "run_builtin()". The "prefix" we get from setup.c is either going to be NULL or a string of length >0, never "". So we can drop the "prefix && *prefix" checks added for "builtin/grep.c" in 0d042fecf2f (git-grep: show pathnames relative to the current directory, 2006-08-11), and for "builtin/ls-tree.c" in a69dd585fca (ls-tree: chomp leading directories when run from a subdirectory, 2005-12-23). As seen in code in revision.c that was added in cd676a51367 (diff --relative: output paths as relative to the current subdirectory, 2008-02-12) we already have existing code that does away with this assertion. This makes it easier to reason about a subsequent change to the "prefix_length" code in grep.c in a subsequent commit, and since we're going to the trouble of doing that let's leave behind an assert() to promise this to any future callers. For "builtin/grep.c" it would be painful to pass the "prefix" down the callchain of: cmd_grep -> grep_tree -> grep_submodule -> grep_cache -> grep_oid -> grep_source_name So for the code that needs it in grep_source_name() let's add a "grep_prefix" variable similar to the existing "ls_tree_prefix". While at it let's move the code in cmd_ls_tree() around so that we assign to the "ls_tree_prefix" right after declaring the variables, and stop assigning to "prefix". We only subsequently used that variable later in the function after clobbering it. Let's just use our own "grep_prefix" instead. Let's also add an assert() in git.c, so that we'll make this promise about the "prefix" to any current and future callers, as well as to any readers of the code. Code history: * The strlen() in "grep.c" hasn't been used since 493b7a08d80 (grep: accept relative paths outside current working directory, 2009-09-05). When that code was added in 0d042fecf2f (git-grep: show pathnames relative to the current directory, 2006-08-11) we used the length. But since 493b7a08d80 we haven't used it for anything except a boolean check that we could have done on the "prefix" member itself. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 13 ++++++++----- builtin/ls-tree.c | 2 +- git.c | 1 + grep.c | 4 +--- grep.h | 4 +--- revision.c | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 9e34a820ad..d85cbabea6 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -26,6 +26,8 @@ #include "object-store.h" #include "packfile.h" +static const char *grep_prefix; + static char const * const grep_usage[] = { N_("git grep [<options>] [-e] <pattern> [<rev>...] [[--] <path>...]"), NULL @@ -315,11 +317,11 @@ static void grep_source_name(struct grep_opt *opt, const char *filename, strbuf_reset(out); if (opt->null_following_name) { - if (opt->relative && opt->prefix_length) { + if (opt->relative && grep_prefix) { struct strbuf rel_buf = STRBUF_INIT; const char *rel_name = relative_path(filename + tree_name_len, - opt->prefix, &rel_buf); + grep_prefix, &rel_buf); if (tree_name_len) strbuf_add(out, filename, tree_name_len); @@ -332,8 +334,8 @@ static void grep_source_name(struct grep_opt *opt, const char *filename, return; } - if (opt->relative && opt->prefix_length) - quote_path(filename + tree_name_len, opt->prefix, out, 0); + if (opt->relative && grep_prefix) + quote_path(filename + tree_name_len, grep_prefix, out, 0); else quote_c_style(filename + tree_name_len, out, NULL, 0); @@ -962,9 +964,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix) PARSE_OPT_NOCOMPLETE), OPT_END() }; + grep_prefix = prefix; git_config(grep_cmd_config, NULL); - grep_init(&opt, the_repository, prefix); + grep_init(&opt, the_repository); /* * If there is no -- then the paths must exist in the working diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c index 3a442631c7..6cb554cbb0 100644 --- a/builtin/ls-tree.c +++ b/builtin/ls-tree.c @@ -150,7 +150,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix) git_config(git_default_config, NULL); ls_tree_prefix = prefix; - if (prefix && *prefix) + if (prefix) chomp_prefix = strlen(prefix); argc = parse_options(argc, argv, prefix, ls_tree_options, diff --git a/git.c b/git.c index edda922ce6..9d257e092d 100644 --- a/git.c +++ b/git.c @@ -436,6 +436,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) } else { prefix = NULL; } + assert(!prefix || *prefix); precompose_argv_prefix(argc, argv, NULL); if (use_pager == -1 && run_setup && !(p->option & DELAY_PAGER_CONFIG)) diff --git a/grep.c b/grep.c index 7bb0360869..8421dc5548 100644 --- a/grep.c +++ b/grep.c @@ -139,13 +139,11 @@ int grep_config(const char *var, const char *value, void *cb) * default values from the template we read the configuration * information in an earlier call to git_config(grep_config). */ -void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix) +void grep_init(struct grep_opt *opt, struct repository *repo) { *opt = grep_defaults; opt->repo = repo; - opt->prefix = prefix; - opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0; opt->pattern_tail = &opt->pattern_list; opt->header_tail = &opt->header_list; } diff --git a/grep.h b/grep.h index 400172676a..23a2a41d2c 100644 --- a/grep.h +++ b/grep.h @@ -134,8 +134,6 @@ struct grep_opt { */ struct repository *repo; - const char *prefix; - int prefix_length; int linenum; int columnnum; int invert; @@ -182,7 +180,7 @@ struct grep_opt { }; int grep_config(const char *var, const char *value, void *); -void grep_init(struct grep_opt *, struct repository *repo, const char *prefix); +void grep_init(struct grep_opt *, struct repository *repo); void grep_commit_pattern_type(enum grep_pattern_type, struct grep_opt *opt); void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t); diff --git a/revision.c b/revision.c index ad4286fbdd..d6e0e2b23b 100644 --- a/revision.c +++ b/revision.c @@ -1838,7 +1838,7 @@ void repo_init_revisions(struct repository *r, revs->commit_format = CMIT_FMT_DEFAULT; revs->expand_tabs_in_log_default = 8; - grep_init(&revs->grep_filter, revs->repo, prefix); + grep_init(&revs->grep_filter, revs->repo); revs->grep_filter.status_only = 1; repo_diff_setup(revs->repo, &revs->diffopt); From b8db6ed8265fc44ebf7163a17600162f25df6de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:35 +0100 Subject: [PATCH 101/150] grep.c: don't pass along NULL callback value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change grep_cmd_config() to stop passing around the always-NULL "cb" value. When this code was added in 7e8f59d577e (grep: color patterns in output, 2009-03-07) it was non-NULL, but when that changed in 15fabd1bbd4 (builtin/grep.c: make configuration callback more reusable, 2012-10-09) this code was left behind. In a subsequent change I'll start using the "cb" value, this will make it clear which functions we call need it, and which don't. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index d85cbabea6..5ec4cecae4 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -285,8 +285,8 @@ static int wait_all(void) static int grep_cmd_config(const char *var, const char *value, void *cb) { - int st = grep_config(var, value, cb); - if (git_color_default_config(var, value, cb) < 0) + int st = grep_config(var, value, NULL); + if (git_color_default_config(var, value, NULL) < 0) st = -1; if (!strcmp(var, "grep.threads")) { From 72365bb49923da065e7a43e61a912ef17f143c7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:36 +0100 Subject: [PATCH 102/150] grep API: call grep_config() after grep_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The grep_init() function used the odd pattern of initializing the passed-in "struct grep_opt" with a statically defined "grep_defaults" struct, which would be modified in-place when we invoked grep_config(). So we effectively (b) initialized config, (a) then defaults, (c) followed by user options. Usually those are ordered as "a", "b" and "c" instead. As the comments being removed here show the previous behavior needed to be carefully explained as we'd potentially share the populated configuration among different instances of grep_init(). In practice we didn't do that, but now that it can't be a concern anymore let's remove those comments. This does not change the behavior of any of the configuration variables or options. That would have been the case if we didn't move around the grep_config() call in "builtin/log.c". But now that we call "grep_config" after "git_log_config" and "git_format_config" we'll need to pass in the already initialized "struct grep_opt *". See 6ba9bb76e02 (grep: copy struct in one fell swoop, 2020-11-29) and 7687a0541e0 (grep: move the configuration parsing logic to grep.[ch], 2012-10-09) for the commits that added the comments. The memcpy() pattern here will be optimized away and follows the convention of other *_init() functions. See 5726a6b4012 (*.c *_init(): define in terms of corresponding *_INIT macro, 2021-07-01). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 4 ++-- builtin/log.c | 13 +++++++++++-- grep.c | 39 +++------------------------------------ grep.h | 21 +++++++++++++++++++++ 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 5ec4cecae4..0ea124321b 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -285,7 +285,7 @@ static int wait_all(void) static int grep_cmd_config(const char *var, const char *value, void *cb) { - int st = grep_config(var, value, NULL); + int st = grep_config(var, value, cb); if (git_color_default_config(var, value, NULL) < 0) st = -1; @@ -966,8 +966,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) }; grep_prefix = prefix; - git_config(grep_cmd_config, NULL); grep_init(&opt, the_repository); + git_config(grep_cmd_config, &opt); /* * If there is no -- then the paths must exist in the working diff --git a/builtin/log.c b/builtin/log.c index 4b493408cc..06283b37e7 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -520,8 +520,6 @@ static int git_log_config(const char *var, const char *value, void *cb) return 0; } - if (grep_config(var, value, cb) < 0) - return -1; if (git_gpg_config(var, value, cb) < 0) return -1; return git_diff_ui_config(var, value, cb); @@ -536,6 +534,8 @@ int cmd_whatchanged(int argc, const char **argv, const char *prefix) git_config(git_log_config, NULL); repo_init_revisions(the_repository, &rev, prefix); + git_config(grep_config, &rev.grep_filter); + rev.diff = 1; rev.simplify_history = 0; memset(&opt, 0, sizeof(opt)); @@ -650,6 +650,8 @@ int cmd_show(int argc, const char **argv, const char *prefix) memset(&match_all, 0, sizeof(match_all)); repo_init_revisions(the_repository, &rev, prefix); + git_config(grep_config, &rev.grep_filter); + rev.diff = 1; rev.always_show_header = 1; rev.no_walk = 1; @@ -733,6 +735,8 @@ int cmd_log_reflog(int argc, const char **argv, const char *prefix) repo_init_revisions(the_repository, &rev, prefix); init_reflog_walk(&rev.reflog_info); + git_config(grep_config, &rev.grep_filter); + rev.verbose_header = 1; memset(&opt, 0, sizeof(opt)); opt.def = "HEAD"; @@ -766,6 +770,8 @@ int cmd_log(int argc, const char **argv, const char *prefix) git_config(git_log_config, NULL); repo_init_revisions(the_repository, &rev, prefix); + git_config(grep_config, &rev.grep_filter); + rev.always_show_header = 1; memset(&opt, 0, sizeof(opt)); opt.def = "HEAD"; @@ -1848,10 +1854,13 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix) extra_hdr.strdup_strings = 1; extra_to.strdup_strings = 1; extra_cc.strdup_strings = 1; + init_log_defaults(); init_display_notes(¬es_opt); git_config(git_format_config, NULL); repo_init_revisions(the_repository, &rev, prefix); + git_config(grep_config, &rev.grep_filter); + rev.show_notes = show_notes; memcpy(&rev.notes_opt, ¬es_opt, sizeof(notes_opt)); rev.commit_format = CMIT_FMT_EMAIL; diff --git a/grep.c b/grep.c index 8421dc5548..35e12e43c0 100644 --- a/grep.c +++ b/grep.c @@ -19,27 +19,6 @@ static void std_output(struct grep_opt *opt, const void *buf, size_t size) fwrite(buf, size, 1, stdout); } -static struct grep_opt grep_defaults = { - .relative = 1, - .pathname = 1, - .max_depth = -1, - .pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED, - .colors = { - [GREP_COLOR_CONTEXT] = "", - [GREP_COLOR_FILENAME] = GIT_COLOR_MAGENTA, - [GREP_COLOR_FUNCTION] = "", - [GREP_COLOR_LINENO] = GIT_COLOR_GREEN, - [GREP_COLOR_COLUMNNO] = GIT_COLOR_GREEN, - [GREP_COLOR_MATCH_CONTEXT] = GIT_COLOR_BOLD_RED, - [GREP_COLOR_MATCH_SELECTED] = GIT_COLOR_BOLD_RED, - [GREP_COLOR_SELECTED] = "", - [GREP_COLOR_SEP] = GIT_COLOR_CYAN, - }, - .only_matching = 0, - .color = -1, - .output = std_output, -}; - static const char *color_grep_slots[] = { [GREP_COLOR_CONTEXT] = "context", [GREP_COLOR_FILENAME] = "filename", @@ -75,20 +54,12 @@ define_list_config_array_extra(color_grep_slots, {"match"}); */ int grep_config(const char *var, const char *value, void *cb) { - struct grep_opt *opt = &grep_defaults; + struct grep_opt *opt = cb; const char *slot; if (userdiff_config(var, value) < 0) return -1; - /* - * The instance of grep_opt that we set up here is copied by - * grep_init() to be used by each individual invocation. - * When populating a new field of this structure here, be - * sure to think about ownership -- e.g., you might need to - * override the shallow copy in grep_init() with a deep copy. - */ - if (!strcmp(var, "grep.extendedregexp")) { opt->extended_regexp_option = git_config_bool(var, value); return 0; @@ -134,14 +105,10 @@ int grep_config(const char *var, const char *value, void *cb) return 0; } -/* - * Initialize one instance of grep_opt and copy the - * default values from the template we read the configuration - * information in an earlier call to git_config(grep_config). - */ void grep_init(struct grep_opt *opt, struct repository *repo) { - *opt = grep_defaults; + struct grep_opt blank = GREP_OPT_INIT; + memcpy(opt, &blank, sizeof(*opt)); opt->repo = repo; opt->pattern_tail = &opt->pattern_list; diff --git a/grep.h b/grep.h index 23a2a41d2c..3112d1c2a3 100644 --- a/grep.h +++ b/grep.h @@ -179,6 +179,27 @@ struct grep_opt { void *output_priv; }; +#define GREP_OPT_INIT { \ + .relative = 1, \ + .pathname = 1, \ + .max_depth = -1, \ + .pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED, \ + .colors = { \ + [GREP_COLOR_CONTEXT] = "", \ + [GREP_COLOR_FILENAME] = GIT_COLOR_MAGENTA, \ + [GREP_COLOR_FUNCTION] = "", \ + [GREP_COLOR_LINENO] = GIT_COLOR_GREEN, \ + [GREP_COLOR_COLUMNNO] = GIT_COLOR_GREEN, \ + [GREP_COLOR_MATCH_CONTEXT] = GIT_COLOR_BOLD_RED, \ + [GREP_COLOR_MATCH_SELECTED] = GIT_COLOR_BOLD_RED, \ + [GREP_COLOR_SELECTED] = "", \ + [GREP_COLOR_SEP] = GIT_COLOR_CYAN, \ + }, \ + .only_matching = 0, \ + .color = -1, \ + .output = std_output, \ +} + int grep_config(const char *var, const char *value, void *); void grep_init(struct grep_opt *, struct repository *repo); void grep_commit_pattern_type(enum grep_pattern_type, struct grep_opt *opt); From 321ee43628c53d6050fb7fbc552332bab681f1a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:37 +0100 Subject: [PATCH 103/150] grep.h: make "grep_opt.pattern_type_option" use its enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the "pattern_type_option" member of "struct grep_opt" to use the enum type we use for it. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- grep.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grep.h b/grep.h index 3112d1c2a3..89a2ce5113 100644 --- a/grep.h +++ b/grep.h @@ -164,7 +164,7 @@ struct grep_opt { int funcname; int funcbody; int extended_regexp_option; - int pattern_type_option; + enum grep_pattern_type pattern_type_option; int ignore_locale; char colors[NR_GREP_COLORS][COLOR_MAXLEN]; unsigned pre_context; From ae807d778f502c81ec55897ac6d1f42ef3a4e23b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:38 +0100 Subject: [PATCH 104/150] grep.c: do "if (bool && memchr())" not "if (memchr() && bool)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change code in compile_regexp() to check the cheaper boolean "!opt->pcre2" condition before the "memchr()" search. This doesn't noticeably optimize anything, but makes the code more obvious and conventional. The line wrapping being added here also makes a subsequent commit smaller. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- grep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/grep.c b/grep.c index 35e12e43c0..60228a95a4 100644 --- a/grep.c +++ b/grep.c @@ -492,7 +492,8 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->ignore_case = opt->ignore_case; p->fixed = opt->fixed; - if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2) + if (!opt->pcre2 && + memchr(p->pattern, 0, p->patternlen)) die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2")); p->is_fixed = is_fixed(p->pattern, p->patternlen); From 04bf052eef53c6be04d313d8ce11690beaf890b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 01:00:39 +0100 Subject: [PATCH 105/150] grep: simplify config parsing and option parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the parsing of "grep.patternType" and "grep.extendedRegexp". This changes no behavior, but gets rid of complex parsing logic that isn't needed anymore. When "grep.patternType" was introduced in 84befcd0a4a (grep: add a grep.patternType configuration setting, 2012-08-03) we promised that: 1. You can set "grep.patternType", and "[setting it to] 'default' will return to the default matching behavior". In that context "the default" meant whatever the configuration system specified before that change, i.e. via grep.extendedRegexp. 2. We'd support the existing "grep.extendedRegexp" option, but ignore it when the new "grep.patternType" option is set. We said we'd only ignore the older "grep.extendedRegexp" option "when the `grep.patternType` option is set to a value other than 'default'". In a preceding commit we changed grep_config() to be called after grep_init(), which means that much of the complexity here can go away. As before both "grep.patternType" and "grep.extendedRegexp" are last-one-wins variable, with "grep.extendedRegexp" yielding to "grep.patternType", except when "grep.patternType=default". Note that as the previously added tests indicate this cannot be done on-the-fly as we see the config variables, without introducing more state keeping. I.e. if we see: -c grep.extendedRegexp=false -c grep.patternType=default -c extendedRegexp=true We need to select ERE, since grep.patternType=default unselects that variable, which normally has higher precedence, but we also need to select BRE in cases of: -c grep.extendedRegexp=true \ -c grep.extendedRegexp=false Which would not be the case for this, which select ERE: -c grep.patternType=extended \ -c grep.extendedRegexp=false Therefore we cannot do this on-the-fly in grep_config without also introducing tracking variables for not only the pattern type, but what the source of that pattern type was. So we need to decide on the pattern after our config was fully parsed. Let's do that by deferring the decision on the pattern type until it's time to compile it in compile_regexp(). By that time we've not only parsed the config, but also handled the command-line options. Those will set "opt.pattern_type_option" (*not* "opt.extended_regexp_option"!). At that point all we need to do is see if "grep.patternType" was UNSPECIFIED in the end (including an explicit "=default"), if so we'll use the "grep.extendedRegexp" configuration, if any. See my 07a3d411739 (grep: remove regflags from the public grep_opt API, 2017-06-29) for addition of the two comments being removed here, i.e. the complexity noted in that commit is now going away. 1. https://lore.kernel.org/git/patch-v8-09.10-c211bb0c69d-20220118T155211Z-avarab@gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/grep.c | 10 +++----- grep.c | 69 +++++++------------------------------------------- grep.h | 3 --- revision.c | 2 -- 4 files changed, 13 insertions(+), 71 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 0ea124321b..942c4b2507 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -845,7 +845,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) int i; int dummy; int use_index = 1; - int pattern_type_arg = GREP_PATTERN_TYPE_UNSPECIFIED; int allow_revs; struct option options[] = { @@ -879,16 +878,16 @@ int cmd_grep(int argc, const char **argv, const char *prefix) N_("descend at most <depth> levels"), PARSE_OPT_NONEG, NULL, 1 }, OPT_GROUP(""), - OPT_SET_INT('E', "extended-regexp", &pattern_type_arg, + OPT_SET_INT('E', "extended-regexp", &opt.pattern_type_option, N_("use extended POSIX regular expressions"), GREP_PATTERN_TYPE_ERE), - OPT_SET_INT('G', "basic-regexp", &pattern_type_arg, + OPT_SET_INT('G', "basic-regexp", &opt.pattern_type_option, N_("use basic POSIX regular expressions (default)"), GREP_PATTERN_TYPE_BRE), - OPT_SET_INT('F', "fixed-strings", &pattern_type_arg, + OPT_SET_INT('F', "fixed-strings", &opt.pattern_type_option, N_("interpret patterns as fixed strings"), GREP_PATTERN_TYPE_FIXED), - OPT_SET_INT('P', "perl-regexp", &pattern_type_arg, + OPT_SET_INT('P', "perl-regexp", &opt.pattern_type_option, N_("use Perl-compatible regular expressions"), GREP_PATTERN_TYPE_PCRE), OPT_GROUP(""), @@ -982,7 +981,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, options, grep_usage, PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_STOP_AT_NON_OPTION); - grep_commit_pattern_type(pattern_type_arg, &opt); if (use_index && !startup_info->have_repository) { int fallback = 0; diff --git a/grep.c b/grep.c index 60228a95a4..a8f503f55c 100644 --- a/grep.c +++ b/grep.c @@ -115,62 +115,6 @@ void grep_init(struct grep_opt *opt, struct repository *repo) opt->header_tail = &opt->header_list; } -static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) -{ - /* - * When committing to the pattern type by setting the relevant - * fields in grep_opt it's generally not necessary to zero out - * the fields we're not choosing, since they won't have been - * set by anything. The extended_regexp_option field is the - * only exception to this. - * - * This is because in the process of parsing grep.patternType - * & grep.extendedRegexp we set opt->pattern_type_option and - * opt->extended_regexp_option, respectively. We then - * internally use opt->extended_regexp_option to see if we're - * compiling an ERE. It must be unset if that's not actually - * the case. - */ - if (pattern_type != GREP_PATTERN_TYPE_ERE && - opt->extended_regexp_option) - opt->extended_regexp_option = 0; - - switch (pattern_type) { - case GREP_PATTERN_TYPE_UNSPECIFIED: - /* fall through */ - - case GREP_PATTERN_TYPE_BRE: - break; - - case GREP_PATTERN_TYPE_ERE: - opt->extended_regexp_option = 1; - break; - - case GREP_PATTERN_TYPE_FIXED: - opt->fixed = 1; - break; - - case GREP_PATTERN_TYPE_PCRE: - opt->pcre2 = 1; - break; - } -} - -void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt) -{ - if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED) - grep_set_pattern_type_option(pattern_type, opt); - else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED) - grep_set_pattern_type_option(opt->pattern_type_option, opt); - else if (opt->extended_regexp_option) - /* - * This branch *must* happen after setting from the - * opt->pattern_type_option above, we don't want - * grep.extendedRegexp to override grep.patternType! - */ - grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt); -} - static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t, @@ -488,11 +432,16 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) int err; int regflags = REG_NEWLINE; + if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED) + opt->pattern_type_option = (opt->extended_regexp_option + ? GREP_PATTERN_TYPE_ERE + : GREP_PATTERN_TYPE_BRE); + p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - p->fixed = opt->fixed; + p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED; - if (!opt->pcre2 && + if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE && memchr(p->pattern, 0, p->patternlen)) die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2")); @@ -544,14 +493,14 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } - if (opt->pcre2) { + if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) { compile_pcre2_pattern(p, opt); return; } if (p->ignore_case) regflags |= REG_ICASE; - if (opt->extended_regexp_option) + if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE) regflags |= REG_EXTENDED; err = regcomp(&p->regexp, p->pattern, regflags); if (err) { diff --git a/grep.h b/grep.h index 89a2ce5113..c722d25ed9 100644 --- a/grep.h +++ b/grep.h @@ -143,7 +143,6 @@ struct grep_opt { int unmatch_name_only; int count; int word_regexp; - int fixed; int all_match; int no_body_match; int body_hit; @@ -154,7 +153,6 @@ struct grep_opt { int allow_textconv; int extended; int use_reflog_filter; - int pcre2; int relative; int pathname; int null_following_name; @@ -202,7 +200,6 @@ struct grep_opt { int grep_config(const char *var, const char *value, void *); void grep_init(struct grep_opt *, struct repository *repo); -void grep_commit_pattern_type(enum grep_pattern_type, struct grep_opt *opt); void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t); void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t); diff --git a/revision.c b/revision.c index d6e0e2b23b..dd301e3014 100644 --- a/revision.c +++ b/revision.c @@ -2860,8 +2860,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s diff_setup_done(&revs->diffopt); - grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED, - &revs->grep_filter); if (!is_encoding_utf8(get_log_output_encoding())) revs->grep_filter.ignore_locale = 1; compile_grep_patterns(&revs->grep_filter); From f6c71f81f9f597d94b95828b5f70f01283a56202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 09:14:01 +0100 Subject: [PATCH 106/150] cache.h: remove always unused show_date_human() declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There has never been a show_date_human() function on the "master" branch in git.git. This declaration was added in b841d4ff438 (Add `human` format to test-tool, 2019-01-28). A look at the ML history reveals that it was leftover cruft from an earlier version of that commit[1]. 1. https://lore.kernel.org/git/20190118061805.19086-5-ischis2@cox.net/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- cache.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/cache.h b/cache.h index 4148b6322d..703a474e5a 100644 --- a/cache.h +++ b/cache.h @@ -1588,8 +1588,6 @@ struct date_mode *date_mode_from_type(enum date_mode_type type); const char *show_date(timestamp_t time, int timezone, const struct date_mode *mode); void show_date_relative(timestamp_t time, struct strbuf *timebuf); -void show_date_human(timestamp_t time, int tz, const struct timeval *now, - struct strbuf *timebuf); int parse_date(const char *date, struct strbuf *out); int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset); int parse_expiry_date(const char *date, timestamp_t *timestamp); From 88c7b4c3c8d51510d20ebb9990750ad0e97afbfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 09:14:02 +0100 Subject: [PATCH 107/150] date API: create a date.h, split from cache.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the declaration of the date.c functions from cache.h, and adjust the relevant users to include the new date.h header. The show_ident_date() function belonged in pretty.h (it's defined in pretty.c), its two users outside of pretty.c didn't strictly need to include pretty.h, as they get it indirectly, but let's add it to them anyway. Similarly, the change to "builtin/{fast-import,show-branch,tag}.c" isn't needed as far as the compiler is concerned, but since they all use the "DATE_MODE()" macro we now define in date.h, let's have them include it. We could simply include this new header in "cache.h", but as this change shows these functions weren't common enough to warrant including in it in the first place. By moving them out of cache.h changes to this API will no longer cause a (mostly) full re-build of the project when "make" is run. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- archive-zip.c | 1 + builtin/am.c | 1 + builtin/commit.c | 1 + builtin/fast-import.c | 1 + builtin/show-branch.c | 1 + builtin/tag.c | 1 + cache.h | 48 ------------------------------------------- config.c | 1 + date.c | 1 + date.h | 43 ++++++++++++++++++++++++++++++++++++++ http-backend.c | 1 + ident.c | 1 + object-name.c | 1 + pretty.h | 10 +++++++++ reflog-walk.h | 1 + refs.c | 1 + strbuf.c | 1 + t/helper/test-date.c | 1 + 18 files changed, 68 insertions(+), 48 deletions(-) create mode 100644 date.h diff --git a/archive-zip.c b/archive-zip.c index 2961e01c75..8ea9d1a5da 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -9,6 +9,7 @@ #include "object-store.h" #include "userdiff.h" #include "xdiff-interface.h" +#include "date.h" static int zip_date; static int zip_time; diff --git a/builtin/am.c b/builtin/am.c index 7de2c89ef2..eb24bc89bb 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -34,6 +34,7 @@ #include "string-list.h" #include "packfile.h" #include "repository.h" +#include "pretty.h" /** * Returns the length of the first line of msg. diff --git a/builtin/commit.c b/builtin/commit.c index b9ed0374e3..6b99ac276d 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -37,6 +37,7 @@ #include "help.h" #include "commit-reach.h" #include "commit-graph.h" +#include "pretty.h" static const char * const builtin_commit_usage[] = { N_("git commit [<options>] [--] <pathspec>..."), diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 2b2e28bad7..28f2b9cc91 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -19,6 +19,7 @@ #include "mem-pool.h" #include "commit-reach.h" #include "khash.h" +#include "date.h" #define PACK_ID_BITS 16 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1) diff --git a/builtin/show-branch.c b/builtin/show-branch.c index e12c5e80e3..330b0553b9 100644 --- a/builtin/show-branch.c +++ b/builtin/show-branch.c @@ -8,6 +8,7 @@ #include "parse-options.h" #include "dir.h" #include "commit-slab.h" +#include "date.h" static const char* show_branch_usage[] = { N_("git show-branch [-a | --all] [-r | --remotes] [--topo-order | --date-order]\n" diff --git a/builtin/tag.c b/builtin/tag.c index 134b3f1edf..2479da0704 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -20,6 +20,7 @@ #include "oid-array.h" #include "column.h" #include "ref-filter.h" +#include "date.h" static const char * const git_tag_usage[] = { N_("git tag [-a | -s | -u <key-id>] [-f] [-m <msg> | -F <file>]\n" diff --git a/cache.h b/cache.h index 703a474e5a..48e77aa069 100644 --- a/cache.h +++ b/cache.h @@ -1559,46 +1559,6 @@ struct object *repo_peel_to_type(struct repository *r, #define peel_to_type(name, namelen, obj, type) \ repo_peel_to_type(the_repository, name, namelen, obj, type) -enum date_mode_type { - DATE_NORMAL = 0, - DATE_HUMAN, - DATE_RELATIVE, - DATE_SHORT, - DATE_ISO8601, - DATE_ISO8601_STRICT, - DATE_RFC2822, - DATE_STRFTIME, - DATE_RAW, - DATE_UNIX -}; - -struct date_mode { - enum date_mode_type type; - const char *strftime_fmt; - int local; -}; - -/* - * Convenience helper for passing a constant type, like: - * - * show_date(t, tz, DATE_MODE(NORMAL)); - */ -#define DATE_MODE(t) date_mode_from_type(DATE_##t) -struct date_mode *date_mode_from_type(enum date_mode_type type); - -const char *show_date(timestamp_t time, int timezone, const struct date_mode *mode); -void show_date_relative(timestamp_t time, struct strbuf *timebuf); -int parse_date(const char *date, struct strbuf *out); -int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset); -int parse_expiry_date(const char *date, timestamp_t *timestamp); -void datestamp(struct strbuf *out); -#define approxidate(s) approxidate_careful((s), NULL) -timestamp_t approxidate_careful(const char *, int *); -timestamp_t approxidate_relative(const char *date); -void parse_date_format(const char *format, struct date_mode *mode); -int date_overflows(timestamp_t date); -time_t tm_to_time_t(const struct tm *tm); - #define IDENT_STRICT 1 #define IDENT_NO_DATE 2 #define IDENT_NO_NAME 4 @@ -1644,14 +1604,6 @@ struct ident_split { */ int split_ident_line(struct ident_split *, const char *, int); -/* - * Like show_date, but pull the timestamp and tz parameters from - * the ident_split. It will also sanity-check the values and produce - * a well-known sentinel date if they appear bogus. - */ -const char *show_ident_date(const struct ident_split *id, - const struct date_mode *mode); - /* * Compare split idents for equality or strict ordering. Note that we * compare only the ident part of the line, ignoring any timestamp. diff --git a/config.c b/config.c index e0c03d154c..430868f1ec 100644 --- a/config.c +++ b/config.c @@ -6,6 +6,7 @@ * */ #include "cache.h" +#include "date.h" #include "branch.h" #include "config.h" #include "environment.h" diff --git a/date.c b/date.c index 84bb4451c1..863b07e9e6 100644 --- a/date.c +++ b/date.c @@ -5,6 +5,7 @@ */ #include "cache.h" +#include "date.h" /* * This is like mktime, but without normalization of tm_wday and tm_yday. diff --git a/date.h b/date.h new file mode 100644 index 0000000000..5db9ec8dd2 --- /dev/null +++ b/date.h @@ -0,0 +1,43 @@ +#ifndef DATE_H +#define DATE_H + +enum date_mode_type { + DATE_NORMAL = 0, + DATE_HUMAN, + DATE_RELATIVE, + DATE_SHORT, + DATE_ISO8601, + DATE_ISO8601_STRICT, + DATE_RFC2822, + DATE_STRFTIME, + DATE_RAW, + DATE_UNIX +}; + +struct date_mode { + enum date_mode_type type; + const char *strftime_fmt; + int local; +}; + +/* + * Convenience helper for passing a constant type, like: + * + * show_date(t, tz, DATE_MODE(NORMAL)); + */ +#define DATE_MODE(t) date_mode_from_type(DATE_##t) +struct date_mode *date_mode_from_type(enum date_mode_type type); + +const char *show_date(timestamp_t time, int timezone, const struct date_mode *mode); +void show_date_relative(timestamp_t time, struct strbuf *timebuf); +int parse_date(const char *date, struct strbuf *out); +int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset); +int parse_expiry_date(const char *date, timestamp_t *timestamp); +void datestamp(struct strbuf *out); +#define approxidate(s) approxidate_careful((s), NULL) +timestamp_t approxidate_careful(const char *, int *); +timestamp_t approxidate_relative(const char *date); +void parse_date_format(const char *format, struct date_mode *mode); +int date_overflows(timestamp_t date); +time_t tm_to_time_t(const struct tm *tm); +#endif diff --git a/http-backend.c b/http-backend.c index 807fb8839e..81a7229ece 100644 --- a/http-backend.c +++ b/http-backend.c @@ -13,6 +13,7 @@ #include "packfile.h" #include "object-store.h" #include "protocol.h" +#include "date.h" static const char content_type[] = "Content-Type"; static const char content_length[] = "Content-Length"; diff --git a/ident.c b/ident.c index 6aba4b5cb6..89ca5b4700 100644 --- a/ident.c +++ b/ident.c @@ -7,6 +7,7 @@ */ #include "cache.h" #include "config.h" +#include "date.h" static struct strbuf git_default_name = STRBUF_INIT; static struct strbuf git_default_email = STRBUF_INIT; diff --git a/object-name.c b/object-name.c index 92862eeb1a..060d892a97 100644 --- a/object-name.c +++ b/object-name.c @@ -15,6 +15,7 @@ #include "submodule.h" #include "midx.h" #include "commit-reach.h" +#include "date.h" static int get_oid_oneline(struct repository *r, const char *, struct object_id *, struct commit_list *); diff --git a/pretty.h b/pretty.h index 2f16acd213..f34e24c53a 100644 --- a/pretty.h +++ b/pretty.h @@ -2,6 +2,7 @@ #define PRETTY_H #include "cache.h" +#include "date.h" #include "string-list.h" struct commit; @@ -163,4 +164,13 @@ int format_set_trailers_options(struct process_trailer_options *opts, const char **arg, char **invalid_arg); +/* + * Like show_date, but pull the timestamp and tz parameters from + * the ident_split. It will also sanity-check the values and produce + * a well-known sentinel date if they appear bogus. + */ +const char *show_ident_date(const struct ident_split *id, + const struct date_mode *mode); + + #endif /* PRETTY_H */ diff --git a/reflog-walk.h b/reflog-walk.h index f26408f6cc..e9e00ffd47 100644 --- a/reflog-walk.h +++ b/reflog-walk.h @@ -5,6 +5,7 @@ struct commit; struct reflog_walk_info; +struct date_mode; void init_reflog_walk(struct reflog_walk_info **info); int add_reflog_for_walk(struct reflog_walk_info *info, diff --git a/refs.c b/refs.c index 7017ae5980..b74f3815a5 100644 --- a/refs.c +++ b/refs.c @@ -19,6 +19,7 @@ #include "strvec.h" #include "repository.h" #include "sigchain.h" +#include "date.h" /* * List of all available backends diff --git a/strbuf.c b/strbuf.c index 613fee8c82..00abeb55af 100644 --- a/strbuf.c +++ b/strbuf.c @@ -2,6 +2,7 @@ #include "refs.h" #include "string-list.h" #include "utf8.h" +#include "date.h" int starts_with(const char *str, const char *prefix) { diff --git a/t/helper/test-date.c b/t/helper/test-date.c index 099eff4f0f..ded3d059f5 100644 --- a/t/helper/test-date.c +++ b/t/helper/test-date.c @@ -1,5 +1,6 @@ #include "test-tool.h" #include "cache.h" +#include "date.h" static const char *usage_msg = "\n" " test-tool date relative [time_t]...\n" From f1842898324330dcf7a3b30ea08d18a68bd19ceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 09:14:03 +0100 Subject: [PATCH 108/150] date API: provide and use a DATE_MODE_INIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide and use a DATE_MODE_INIT macro. Most of the users of struct date_mode" use it via pretty.h's "struct pretty_print_context" which doesn't have an initialization macro, so we're still bound to being initialized to "{ 0 }" by default. But we can change the couple of callers that directly declared a variable on the stack to instead use the initializer, and thus do away with the "mode.local = 0" added in add00ba2de9 (date: make "local" orthogonal to date format, 2015-09-03). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- date.c | 3 +-- date.h | 4 ++++ ref-filter.c | 2 +- t/helper/test-date.c | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/date.c b/date.c index 863b07e9e6..54c709e4a0 100644 --- a/date.c +++ b/date.c @@ -206,11 +206,10 @@ void show_date_relative(timestamp_t time, struct strbuf *timebuf) struct date_mode *date_mode_from_type(enum date_mode_type type) { - static struct date_mode mode; + static struct date_mode mode = DATE_MODE_INIT; if (type == DATE_STRFTIME) BUG("cannot create anonymous strftime date_mode struct"); mode.type = type; - mode.local = 0; return &mode; } diff --git a/date.h b/date.h index 5db9ec8dd2..c3a00d08ed 100644 --- a/date.h +++ b/date.h @@ -20,6 +20,10 @@ struct date_mode { int local; }; +#define DATE_MODE_INIT { \ + .type = DATE_NORMAL, \ +} + /* * Convenience helper for passing a constant type, like: * diff --git a/ref-filter.c b/ref-filter.c index f7a2f17bfd..3399bde932 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1251,7 +1251,7 @@ static void grab_date(const char *buf, struct atom_value *v, const char *atomnam char *zone; timestamp_t timestamp; long tz; - struct date_mode date_mode = { DATE_NORMAL }; + struct date_mode date_mode = DATE_MODE_INIT; const char *formatp; /* diff --git a/t/helper/test-date.c b/t/helper/test-date.c index ded3d059f5..111071e1dd 100644 --- a/t/helper/test-date.c +++ b/t/helper/test-date.c @@ -35,7 +35,7 @@ static void show_human_dates(const char **argv) static void show_dates(const char **argv, const char *format) { - struct date_mode mode; + struct date_mode mode = DATE_MODE_INIT; parse_date_format(format, &mode); for (; *argv; argv++) { From 2bacb8346660b54dc4440da44103a7cd3d469009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 09:14:04 +0100 Subject: [PATCH 109/150] date API: add basic API docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add basic API doc comments to date.h, and while doing so move the the parse_date_format() function adjacent to show_date(). This way all the "struct date_mode" functions are grouped together. Documenting the rest is one of our #leftoverbits. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- date.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/date.h b/date.h index c3a00d08ed..bbd6a6477b 100644 --- a/date.h +++ b/date.h @@ -1,6 +1,12 @@ #ifndef DATE_H #define DATE_H +/** + * The date mode type. This has DATE_NORMAL at an explicit "= 0" to + * accommodate a memset([...], 0, [...]) initialization when "struct + * date_mode" is used as an embedded struct member, as in the case of + * e.g. "struct pretty_print_context" and "struct rev_info". + */ enum date_mode_type { DATE_NORMAL = 0, DATE_HUMAN, @@ -24,7 +30,7 @@ struct date_mode { .type = DATE_NORMAL, \ } -/* +/** * Convenience helper for passing a constant type, like: * * show_date(t, tz, DATE_MODE(NORMAL)); @@ -32,7 +38,22 @@ struct date_mode { #define DATE_MODE(t) date_mode_from_type(DATE_##t) struct date_mode *date_mode_from_type(enum date_mode_type type); +/** + * Format <'time', 'timezone'> into static memory according to 'mode' + * and return it. The mode is an initialized "struct date_mode" + * (usually from the DATE_MODE() macro). + */ const char *show_date(timestamp_t time, int timezone, const struct date_mode *mode); + +/** + * Parse a date format for later use with show_date(). + * + * When the "date_mode_type" is DATE_STRFTIME the "strftime_fmt" + * member of "struct date_mode" will be a malloc()'d format string to + * be used with strbuf_addftime(). + */ +void parse_date_format(const char *format, struct date_mode *mode); + void show_date_relative(timestamp_t time, struct strbuf *timebuf); int parse_date(const char *date, struct strbuf *out); int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset); @@ -41,7 +62,6 @@ void datestamp(struct strbuf *out); #define approxidate(s) approxidate_careful((s), NULL) timestamp_t approxidate_careful(const char *, int *); timestamp_t approxidate_relative(const char *date); -void parse_date_format(const char *format, struct date_mode *mode); int date_overflows(timestamp_t date); time_t tm_to_time_t(const struct tm *tm); #endif From 974c919d36d944e9005def346fb363d8a83399f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 09:14:05 +0100 Subject: [PATCH 110/150] date API: add and use a date_mode_release() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a memory leak in the parse_date_format() function by providing a new date_mode_release() companion function. By using this in "t/helper/test-date.c" we can mark the "t0006-date.sh" test as passing when git is compiled with SANITIZE=leak, and whitelist it to run under "GIT_TEST_PASSING_SANITIZE_LEAK=true" by adding "TEST_PASSES_SANITIZE_LEAK=true" to the test itself. The other tests that expose this memory leak (i.e. take the "mode->type == DATE_STRFTIME" branch in parse_date_format()) are "t6300-for-each-ref.sh" and "t7004-tag.sh". The former is due to an easily fixed leak in "ref-filter.c", and brings the failures in "t6300-for-each-ref.sh" down from 51 to 48. Fixing the remaining leaks will have to wait until there's a release_revisions() in "revision.c", as they have to do with leaks via "struct rev_info". There is also a leak in "builtin/blame.c" due to its call to parse_date_format() to parse the "blame.date" configuration. However as it declares a file-level "static struct date_mode blame_date_mode" to track the data, LSAN will not report it as a leak. It's possible to get valgrind(1) to complain about it with e.g.: valgrind --leak-check=full --show-leak-kinds=all ./git -P -c blame.date=format:%Y blame README.md But let's focus on things LSAN complains about, and are thus observable with "TEST_PASSES_SANITIZE_LEAK=true". We should get to fixing memory leaks in "builtin/blame.c", but as doing so would require some re-arrangement of cmd_blame() let's leave it for some other time. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- date.c | 5 +++++ date.h | 9 ++++++++- ref-filter.c | 1 + t/helper/test-date.c | 2 ++ t/t0006-date.sh | 2 ++ 5 files changed, 18 insertions(+), 1 deletion(-) diff --git a/date.c b/date.c index 54c709e4a0..68a260c214 100644 --- a/date.c +++ b/date.c @@ -993,6 +993,11 @@ void parse_date_format(const char *format, struct date_mode *mode) die("unknown date format %s", format); } +void date_mode_release(struct date_mode *mode) +{ + free((char *)mode->strftime_fmt); +} + void datestamp(struct strbuf *out) { time_t now; diff --git a/date.h b/date.h index bbd6a6477b..5d4eaba0a9 100644 --- a/date.h +++ b/date.h @@ -50,10 +50,17 @@ const char *show_date(timestamp_t time, int timezone, const struct date_mode *mo * * When the "date_mode_type" is DATE_STRFTIME the "strftime_fmt" * member of "struct date_mode" will be a malloc()'d format string to - * be used with strbuf_addftime(). + * be used with strbuf_addftime(), in which case you'll need to call + * date_mode_release() later. */ void parse_date_format(const char *format, struct date_mode *mode); +/** + * Release a "struct date_mode", currently only required if + * parse_date_format() has parsed a "DATE_STRFTIME" format. + */ +void date_mode_release(struct date_mode *mode); + void show_date_relative(timestamp_t time, struct strbuf *timebuf); int parse_date(const char *date, struct strbuf *out); int parse_date_basic(const char *date, timestamp_t *timestamp, int *offset); diff --git a/ref-filter.c b/ref-filter.c index 3399bde932..7838bd22b8 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1276,6 +1276,7 @@ static void grab_date(const char *buf, struct atom_value *v, const char *atomnam goto bad; v->s = xstrdup(show_date(timestamp, tz, &date_mode)); v->value = timestamp; + date_mode_release(&date_mode); return; bad: v->s = xstrdup(""); diff --git a/t/helper/test-date.c b/t/helper/test-date.c index 111071e1dd..45951b1df8 100644 --- a/t/helper/test-date.c +++ b/t/helper/test-date.c @@ -54,6 +54,8 @@ static void show_dates(const char **argv, const char *format) printf("%s -> %s\n", *argv, show_date(t, tz, &mode)); } + + date_mode_release(&mode); } static void parse_dates(const char **argv) diff --git a/t/t0006-date.sh b/t/t0006-date.sh index 794186961e..2490162071 100755 --- a/t/t0006-date.sh +++ b/t/t0006-date.sh @@ -1,6 +1,8 @@ #!/bin/sh test_description='test date parsing and printing' + +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh # arbitrary reference time: 2009-08-30 19:20:00 From 244c27242f44e6b88e3a381c90bde08d134c274b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 11:56:28 +0100 Subject: [PATCH 111/150] diff.[ch]: have diff_free() call clear_pathspec(opts.pathspec) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have the diff_free() function call clear_pathspec(). Since the diff_flush() function calls this all its callers can be simplified to rely on it instead. When I added the diff_free() function in e900d494dcf (diff: add an API for deferred freeing, 2021-02-11) I simply missed this, or wasn't interested in it. Let's consolidate this now. This means that any future callers (and I've got revision.c in mind) that embed a "struct diff_options" can simply call diff_free() instead of needing know that it has an embedded pathspec. This does fix a bunch of leaks, but I can't mark any test here as passing under the SANITIZE=leak testing mode because in 886e1084d78 (builtin/: add UNLEAKs, 2017-10-01) an UNLEAK(rev) was added, which plasters over the memory leak. E.g. "t4011-diff-symlink.sh" would report fewer leaks with this fix, but because of the UNLEAK() reports none. I'll eventually loop around to removing that UNLEAK(rev) annotation as I'll fix deeper issues with the revisions API leaking. This is one small step on the way there, a new freeing function in revisions.c will want to call this diff_free(). Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Reviewed-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- add-interactive.c | 6 +++--- blame.c | 3 --- builtin/reset.c | 1 - diff.c | 1 + notes-merge.c | 2 -- 5 files changed, 4 insertions(+), 9 deletions(-) diff --git a/add-interactive.c b/add-interactive.c index 6498ae196f..e1ab39cce3 100644 --- a/add-interactive.c +++ b/add-interactive.c @@ -797,14 +797,14 @@ static int run_revert(struct add_i_state *s, const struct pathspec *ps, diffopt.flags.override_submodule_config = 1; diffopt.repo = s->r; - if (do_diff_cache(&oid, &diffopt)) + if (do_diff_cache(&oid, &diffopt)) { + diff_free(&diffopt); res = -1; - else { + } else { diffcore_std(&diffopt); diff_flush(&diffopt); } free(paths); - clear_pathspec(&diffopt.pathspec); if (!res && write_locked_index(s->r->index, &index_lock, COMMIT_LOCK) < 0) diff --git a/blame.c b/blame.c index 206c295660..401990726e 100644 --- a/blame.c +++ b/blame.c @@ -1403,7 +1403,6 @@ static struct blame_origin *find_origin(struct repository *r, } } diff_flush(&diff_opts); - clear_pathspec(&diff_opts.pathspec); return porigin; } @@ -1447,7 +1446,6 @@ static struct blame_origin *find_rename(struct repository *r, } } diff_flush(&diff_opts); - clear_pathspec(&diff_opts.pathspec); return porigin; } @@ -2328,7 +2326,6 @@ static void find_copy_in_parent(struct blame_scoreboard *sb, } while (unblamed); target->suspects = reverse_blame(leftover, NULL); diff_flush(&diff_opts); - clear_pathspec(&diff_opts.pathspec); } /* diff --git a/builtin/reset.c b/builtin/reset.c index b97745ee94..24968dd628 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -274,7 +274,6 @@ static int read_from_tree(const struct pathspec *pathspec, return 1; diffcore_std(&opt); diff_flush(&opt); - clear_pathspec(&opt.pathspec); return 0; } diff --git a/diff.c b/diff.c index c862771a58..0aef3db6e1 100644 --- a/diff.c +++ b/diff.c @@ -6345,6 +6345,7 @@ void diff_free(struct diff_options *options) diff_free_file(options); diff_free_ignore_regex(options); + clear_pathspec(&options->pathspec); } void diff_flush(struct diff_options *options) diff --git a/notes-merge.c b/notes-merge.c index b4a3a903e8..7ba40cfb08 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -175,7 +175,6 @@ static struct notes_merge_pair *diff_tree_remote(struct notes_merge_options *o, oid_to_hex(&mp->remote)); } diff_flush(&opt); - clear_pathspec(&opt.pathspec); *num_changes = len; return changes; @@ -261,7 +260,6 @@ static void diff_tree_local(struct notes_merge_options *o, oid_to_hex(&mp->local)); } diff_flush(&opt); - clear_pathspec(&opt.pathspec); } static void check_notes_merge_worktree(struct notes_merge_options *o) From 6ee36364eb32287f071878a91d3bbcd86313754a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= <avarab@gmail.com> Date: Wed, 16 Feb 2022 11:56:29 +0100 Subject: [PATCH 112/150] diff.[ch]: have diff_free() free options->parseopts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "struct option" added in 4a288478394 (diff.c: prepare to use parse_options() for parsing, 2019-01-27) would be free'd in the case of diff_setup_done() being called. But not all codepaths that allocate it reach that, e.g. "t6427-diff3-conflict-markers.sh" will now free memory that it didn't free before. By using FREE_AND_NULL() here (which diff_setup_done() also does) we ensure that we free the memory, and that we won't have double-free's. Before this running: ./t6427-diff3-conflict-markers.sh -vixd --run=7 Would report: SUMMARY: LeakSanitizer: 7823 byte(s) leaked in 6 allocation(s). But now we'll report: SUMMARY: LeakSanitizer: 703 byte(s) leaked in 5 allocation(s). I.e. the largest leak in that particular test has now been addressed. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Reviewed-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- diff.c | 1 + 1 file changed, 1 insertion(+) diff --git a/diff.c b/diff.c index 0aef3db6e1..fb8bc8aadb 100644 --- a/diff.c +++ b/diff.c @@ -6346,6 +6346,7 @@ void diff_free(struct diff_options *options) diff_free_file(options); diff_free_ignore_regex(options); clear_pathspec(&options->pathspec); + FREE_AND_NULL(options->parseopts); } void diff_flush(struct diff_options *options) From 26b89464219d3cfb0af7dc2274751eff641dea8d Mon Sep 17 00:00:00 2001 From: Derrick Stolee <derrickstolee@github.com> Date: Thu, 17 Feb 2022 21:00:29 +0000 Subject: [PATCH 113/150] dir: force untracked cache with core.untrackedCache The GIT_FORCE_UNTRACKED_CACHE environment variable writes the untracked cache more frequently than the core.untrackedCache config variable. This is due to how read_directory() handles the creation of an untracked cache. Before this change, Git would not create the untracked cache extension for an index that did not already have one. Users would need to run a command such as 'git update-index --untracked-cache' before the index would actually contain an untracked cache. In particular, users noticed that the untracked cache would not appear even with core.untrackedCache=true. Some users reported setting GIT_FORCE_UNTRACKED_CACHE=1 in their engineering system environment to ensure the untracked cache would be created. The decision to not write the untracked cache without an environment variable tracks back to fc9ecbeb9 (dir.c: don't flag the index as dirty for changes to the untracked cache, 2018-02-05). The motivation of that change is that writing the index is expensive, and if the untracked cache is the only thing that needs to be written, then it is more expensive than the benefit of the cache. However, this also means that the untracked cache never gets populated, so the user who enabled it via config does not actually get the extension until running 'git update-index --untracked-cache' manually or using the environment variable. We have had a version of this change in the microsoft/git fork for a few major releases now. It has been working well to get users into a good state. Yes, that first index write is slow, but the remaining index writes are much faster than they would be without this change. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index d91295f2bc..79a5f6918c 100644 --- a/dir.c +++ b/dir.c @@ -2936,7 +2936,9 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, if (force_untracked_cache < 0) force_untracked_cache = - git_env_bool("GIT_FORCE_UNTRACKED_CACHE", 0); + git_env_bool("GIT_FORCE_UNTRACKED_CACHE", -1); + if (force_untracked_cache < 0) + force_untracked_cache = (istate->repo->settings.core_untracked_cache == UNTRACKED_CACHE_WRITE); if (force_untracked_cache && dir->untracked == istate->untracked && (dir->untracked->dir_opened || From 97169fc361fd3ab2eedbfedd6c13806433be4036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <l.s.r@web.de> Date: Thu, 17 Feb 2022 22:14:29 +0100 Subject: [PATCH 114/150] grep: fix triggering PCRE2_NO_START_OPTIMIZE workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCRE2 bug 2642 was fixed in version 10.36. Our 95ca1f987e (grep/pcre2: better support invalid UTF-8 haystacks, 2021-01-24) worked around it on older versions by setting the flag PCRE2_NO_START_OPTIMIZE. 797c359978 (grep/pcre2: use compile-time PCREv2 version test, 2021-02-18) switched it around to set the flag on 10.36 and higher instead, while it claimed to use "the same test done at compile-time". Switch the condition back to apply the workaround on PCRE2 versions _before_ 10.36. Signed-off-by: René Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grep.c b/grep.c index 636ac48bf0..29deb85904 100644 --- a/grep.c +++ b/grep.c @@ -386,7 +386,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt !(!opt->ignore_case && (p->fixed || p->is_fixed))) options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF); -#ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER +#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */ if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS)) options |= PCRE2_NO_START_OPTIMIZE; From 4a3d86e1bb67bea71a481b4cb1860295b97d044a Mon Sep 17 00:00:00 2001 From: Elijah Newren <newren@gmail.com> Date: Thu, 17 Feb 2022 06:38:42 +0000 Subject: [PATCH 115/150] merge-ort: make informational messages from recursive merges clearer This is another simple change with a long explanation... merge-recursive and merge-ort are both based on the same recursive idea: if there is more than one merge base, merge the merge bases (which may require first merging the merge bases of the merges bases, etc.). The depth of the inner merge is recorded via a variable called "call_depth", which we'll bring up again later. Naturally, the inner merges themselves can have conflicts and various messages generated about those files. merge-recursive immediately prints to stdout as it goes, at the risk of printing multiple conflict notices for the same path separated far apart from each other with many intervenining conflict notices for other paths between them. And this is true even if there are no inner merges involved. An example of this was given in [1] and apparently caused some confusion: CONFLICT (rename/add): Rename A->B in HEAD. B added in otherbranch ...dozens of conflicts for OTHER paths... CONFLICT (content): Merge conflicts in B In contrast, merge-ort collects messages and stores them by path so that it can print them grouped by path. Thus, the same case handled by merge-ort would have output of the form: CONFLICT (rename/add): Rename A->B in HEAD. B added in otherbranch CONFLICT (content): Merge conflicts in B ...dozens of conflicts for OTHER paths... This is generally helpful, but does make a separate bug more problematic. In particular, while merge-recursive might report the following for a recursive merge: Auto-merging dir.c Auto-merging midx.c CONFLICT (content): Merge conflict in midx.c Auto-merging diff.c Auto-merging dir.c CONFLICT (content): Merge conflict in dir.c merge-ort would instead report: Auto-merging diff.c Auto-merging dir.c Auto-merging dir.c CONFLICT (content): Merge conflict in dir.c Auto-merging midx.c CONFLICT (content): Merge conflict in midx.c The fact that messages for the same file are together is probably helpful in general, but with the indentation missing for the inner merge it unfortunately serves to confuse. This probably would lead users to wonder: * Why is Git reporting that "dir.c" is being merged twice? * If midx.c has conflicts, why do I not see any when I open up the file and why are no conflicts shown in the index? Fix this output confusion by changing the output to clearly differentiate the messages for outer merges from the ones for inner merges, changing the above output from merge-ort to: Auto-merging diff.c From inner merge: Auto-merging dir.c Auto-merging dir.c CONFLICT (content): Merge conflict in dir.c From inner merge: Auto-merging midx.c From inner merge: CONFLICT (content): Merge conflict in midx.c (Note: the number of spaces after the 'From inner merge:' is 2*call_depth). One other thing to note here, that I didn't notice until typing up this commit message, is that merge-recursive does not print any messages from the inner merges by default; the extra verbosity has to be requested. merge-ort currently has no verbosity controls and always prints these. We may also want to change that, but for now, just make the output clearer with these extra markings and indentation. [1] https://lore.kernel.org/git/CAGyf7-He4in8JWUh9dpAwvoPkQz9hr8nCBpxOxhZEd8+jtqTpg@mail.gmail.com/ Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- merge-ort.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index d85b1cd99e..55decb2587 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -651,6 +651,11 @@ static void path_msg(struct merge_options *opt, dest = (opt->record_conflict_msgs_as_headers ? &tmp : sb); va_start(ap, fmt); + if (opt->priv->call_depth) { + strbuf_addchars(dest, ' ', 2); + strbuf_addstr(dest, "From inner merge:"); + strbuf_addchars(dest, ' ', opt->priv->call_depth * 2); + } strbuf_vaddf(dest, fmt, ap); va_end(ap); From e2ac9141e64e2cd3e690d1b5fc848949827c09b4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Thu, 17 Feb 2022 16:24:23 -0800 Subject: [PATCH 116/150] The fifth batch Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/RelNotes/2.36.0.txt | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Documentation/RelNotes/2.36.0.txt b/Documentation/RelNotes/2.36.0.txt index 3dfa5e409f..3e1261c584 100644 --- a/Documentation/RelNotes/2.36.0.txt +++ b/Documentation/RelNotes/2.36.0.txt @@ -25,6 +25,15 @@ UI, Workflows & Features * "git log --remerge-diff" shows the difference from mechanical merge result and the result that is actually recorded in a merge commit. + * "git log" and friends learned an option --exclude-first-parent-only + to propagate UNINTERESTING bit down only along the first-parent + chain, just like --first-parent option shows commits that lack the + UNINTERESTING bit only along the first-parent chain. + + * The command line completion script (in contrib/) learned to + complete all Git subcommands, including the ones that are normally + hidden, when GIT_COMPLETION_SHOW_ALL_COMMANDS is used. + Performance, Internal Implementation, Development Support etc. @@ -47,6 +56,9 @@ Performance, Internal Implementation, Development Support etc. all. Start the process of renaming it to "--annotate-stdin". (merge a2585719b3 jc/name-rev-stdin later to maint). + * "git update-index", "git checkout-index", and "git clean" are + taught to work better with the sparse checkout feature. + Fixes since v2.35 ----------------- @@ -140,6 +152,16 @@ Fixes since v2.35 * "git diff --diff-filter=aR" is now parsed correctly. (merge 75408ca949 js/diff-filter-negation-fix later to maint). + * When "git subtree" wants to create a merge, it used "git merge" and + let it be affected by end-user's "merge.ff" configuration, which + has been corrected. + (merge 9158a3564a tk/subtree-merge-not-ff-only later to maint). + + * Unlike "git apply", "git patch-id" did not handle patches with + hunks that has only 1 line in either preimage or postimage, which + has been corrected. + (merge 757e75c81e jz/patch-id-hunk-header-parsing-fix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge cfc5cf428b jc/find-header later to maint). (merge 40e7cfdd46 jh/p4-fix-use-of-process-error-exception later to maint). @@ -157,3 +179,5 @@ Fixes since v2.35 (merge 45d0212a71 ll/doc-mktree-typofix later to maint). (merge e9b272e4c1 js/no-more-legacy-stash later to maint). (merge 6798b08e84 ab/do-not-hide-failures-in-git-dot-pm later to maint). + (merge 9325285df4 po/doc-check-ignore-markup-fix later to maint). + (merge cd26cd6c7c sy/modernize-t-lib-read-tree-m-3way later to maint). From e8d56ca863b146720bdf05b54c7c1e379d3d57b7 Mon Sep 17 00:00:00 2001 From: Taylor Blau <me@ttaylorr.com> Date: Fri, 18 Feb 2022 14:07:50 -0500 Subject: [PATCH 117/150] CODE_OF_CONDUCT.md: update PLC members list As part of our code of conduct, we maintain a list of active members on the Project Leadership Committee, which serves a couple of purposes. The details are in 3f9ef874a7 (CODE_OF_CONDUCT: mention individual project-leader emails, 2019-09-26), but the gist is as follows: - It makes it clear that people with a CoC complaint may contact members individually as opposed to the general PLC list (in case the subject of their complaint has to do with one of the committee members). - It also serves as the de-facto list of people on the PLC, which isn't committed anywhere else in the tree. As of [1], Peff is no longer a member of Git's Project Leadership Committee. Let's update the list of active members accordingly [2]. This also gives us a convenient opportunity to thank Peff for his many years of service on the PLC, during which he helped the Git community in more ways than we can easily list here. [1]: https://lore.kernel.org/git/YboaAe4LWySOoAe7@coredump.intra.peff.net/ [2]: https://lore.kernel.org/git/CAP8UFD2XxP9r3PJ4GQjxUbV=E1ASDq1NDgB-h+S=v-bZQ7DYwQ@mail.gmail.com/ Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- CODE_OF_CONDUCT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 65651beada..0215b1fd4c 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -70,8 +70,8 @@ git@sfconservancy.org, or individually: - Ævar Arnfjörð Bjarmason <avarab@gmail.com> - Christian Couder <christian.couder@gmail.com> - - Jeff King <peff@peff.net> - Junio C Hamano <gitster@pobox.com> + - Taylor Blau <me@ttaylorr.com> All complaints will be reviewed and investigated promptly and fairly. From e6ebfd0e8cbbd10878070c8a356b5ad1b3ca464e Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Fri, 18 Feb 2022 09:53:31 -0800 Subject: [PATCH 118/150] The sixth batch Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/RelNotes/2.36.0.txt | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/Documentation/RelNotes/2.36.0.txt b/Documentation/RelNotes/2.36.0.txt index 3e1261c584..4e8309701b 100644 --- a/Documentation/RelNotes/2.36.0.txt +++ b/Documentation/RelNotes/2.36.0.txt @@ -34,6 +34,8 @@ UI, Workflows & Features complete all Git subcommands, including the ones that are normally hidden, when GIT_COMPLETION_SHOW_ALL_COMMANDS is used. + * "git branch" learned the "--recurse-submodules" option. + Performance, Internal Implementation, Development Support etc. @@ -59,6 +61,10 @@ Performance, Internal Implementation, Development Support etc. * "git update-index", "git checkout-index", and "git clean" are taught to work better with the sparse checkout feature. + * Use an internal call to reset_head() helper function instead of + spawning "git checkout" in "rebase", and update code paths that are + involved in the change. + Fixes since v2.35 ----------------- @@ -162,6 +168,32 @@ Fixes since v2.35 has been corrected. (merge 757e75c81e jz/patch-id-hunk-header-parsing-fix later to maint). + * "receive-pack" checks if it will do any ref updates (various + conditions could reject a push) before received objects are taken + out of the temporary directory used for quarantine purposes, so + that a push that is known-to-fail will not leave crufts that a + future "gc" needs to clean up. + (merge 5407764069 cb/clear-quarantine-early-on-all-ref-update-errors later to maint). + + * Because a deletion of ref would need to remove it from both the + loose ref store and the packed ref store, a delete-ref operation + that logically removes one ref may end up invoking ref-transaction + hook twice, which has been corrected. + (merge 2ed1b64ebd ps/avoid-unnecessary-hook-invocation-with-packed-refs later to maint). + + * When there is no object to write .bitmap file for, "git + multi-pack-index" triggered an error, instead of just skipping, + which has been corrected. + (merge eb57277ba3 tb/midx-no-bitmap-for-no-objects later to maint). + + * "git cmd -h" outside a repository should error out cleanly for many + commands, but instead it hit a BUG(), which has been corrected. + (merge 87ad07d735 js/short-help-outside-repo-fix later to maint). + + * "working tree" and "per-worktree ref" were in glossary, but + "worktree" itself wasn't, which has been corrected. + (merge 2df5387ed0 jc/glossary-worktree later to maint). + * Other code cleanup, docfix, build fix, etc. (merge cfc5cf428b jc/find-header later to maint). (merge 40e7cfdd46 jh/p4-fix-use-of-process-error-exception later to maint). @@ -181,3 +213,5 @@ Fixes since v2.35 (merge 6798b08e84 ab/do-not-hide-failures-in-git-dot-pm later to maint). (merge 9325285df4 po/doc-check-ignore-markup-fix later to maint). (merge cd26cd6c7c sy/modernize-t-lib-read-tree-m-3way later to maint). + (merge d17294a05e ab/hash-object-leakfix later to maint). + (merge b8403129d3 jd/t0015-modernize later to maint). From dab1b7905d0b295f1acef9785bb2b9cbb0fdec84 Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Wed, 23 Feb 2022 16:58:13 -0800 Subject: [PATCH 119/150] The seventh batch Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/RelNotes/2.36.0.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/RelNotes/2.36.0.txt b/Documentation/RelNotes/2.36.0.txt index 4e8309701b..78e037b865 100644 --- a/Documentation/RelNotes/2.36.0.txt +++ b/Documentation/RelNotes/2.36.0.txt @@ -194,6 +194,17 @@ Fixes since v2.35 "worktree" itself wasn't, which has been corrected. (merge 2df5387ed0 jc/glossary-worktree later to maint). + * L10n support for a few error messages. + (merge 3d3c23b3a7 bs/forbid-i18n-of-protocol-token-in-fetch-pack later to maint). + + * Test modernization. + (merge d4fe066e4b sy/t0001-use-path-is-helper later to maint). + + * "git log --graph --graph" used to leak a graph structure, and there + was no way to countermand "--graph" that appear earlier on the + command line. A "--no-graph" option has been added and resource + leakage has been plugged. + * Other code cleanup, docfix, build fix, etc. (merge cfc5cf428b jc/find-header later to maint). (merge 40e7cfdd46 jh/p4-fix-use-of-process-error-exception later to maint). @@ -215,3 +226,4 @@ Fixes since v2.35 (merge cd26cd6c7c sy/modernize-t-lib-read-tree-m-3way later to maint). (merge d17294a05e ab/hash-object-leakfix later to maint). (merge b8403129d3 jd/t0015-modernize later to maint). + (merge 332acc248d ds/mailmap later to maint). From 715d08a9e51251ad8290b181b6ac3b9e1f9719d7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano <gitster@pobox.com> Date: Fri, 25 Feb 2022 15:47:20 -0800 Subject: [PATCH 120/150] The eighth batch Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/RelNotes/2.36.0.txt | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/RelNotes/2.36.0.txt b/Documentation/RelNotes/2.36.0.txt index 78e037b865..de1e11e25a 100644 --- a/Documentation/RelNotes/2.36.0.txt +++ b/Documentation/RelNotes/2.36.0.txt @@ -9,6 +9,10 @@ Backward compatibility warts * "git name-rev --stdin" has been deprecated and issues a warning when used; use "git name-rev --annotate-stdin" instead. + * "git clone --filter=... --recurse-submodules" only makes the + top-level a partial clone, while submodules are fully cloned. This + behaviour is changed to pass the same filter down to the submodules. + Note to those who build from the source @@ -65,6 +69,10 @@ Performance, Internal Implementation, Development Support etc. spawning "git checkout" in "rebase", and update code paths that are involved in the change. + * Messages "ort" merge backend prepares while dealing with conflicted + paths were unnecessarily confusing since it did not differentiate + inner merges and outer merges. + Fixes since v2.35 ----------------- @@ -205,6 +213,25 @@ Fixes since v2.35 command line. A "--no-graph" option has been added and resource leakage has been plugged. + * Error output given in response to an ambiguous object name has been + improved. + (merge 3a73c1dfaf ab/ambiguous-object-name later to maint). + + * "git sparse-checkout" wants to work with per-worktree configuration, + but did not work well in a worktree attached to a bare repository. + (merge 3ce1138272 ds/sparse-checkout-requires-per-worktree-config later to maint). + + * Setting core.untrackedCache to true failed to add the untracked + cache extension to the index. + + * Workaround we have for versions of PCRE2 before their version 10.36 + were in effect only for their versions newer than 10.36 by mistake, + which has been corrected. + (merge 97169fc361 rs/pcre-invalid-utf8-fix-fix later to maint). + + * Document Taylor as a new member of Git PLC at SFC. Welcome. + (merge e8d56ca863 tb/coc-plc-update later to maint). + * Other code cleanup, docfix, build fix, etc. (merge cfc5cf428b jc/find-header later to maint). (merge 40e7cfdd46 jh/p4-fix-use-of-process-error-exception later to maint). @@ -227,3 +254,5 @@ Fixes since v2.35 (merge d17294a05e ab/hash-object-leakfix later to maint). (merge b8403129d3 jd/t0015-modernize later to maint). (merge 332acc248d ds/mailmap later to maint). + (merge 04bf052eef ab/grep-patterntype later to maint). + (merge 6ee36364eb ab/diff-free-more later to maint). From 974c1b398711b8782c01bbd3aec959a458296da8 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:44 +0000 Subject: [PATCH 121/150] fsmonitor: enhance existing comments, clarify trivial response handling Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- fsmonitor.c | 64 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/fsmonitor.c b/fsmonitor.c index ab9bfc60b3..448d0ee33f 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -168,29 +168,15 @@ static int query_fsmonitor(int version, const char *last_update, struct strbuf * if (result) trace2_data_intmax("fsm_hook", NULL, "query/failed", result); - else { + else trace2_data_intmax("fsm_hook", NULL, "query/response-length", query_result->len); - if (fsmonitor_is_trivial_response(query_result)) - trace2_data_intmax("fsm_hook", NULL, - "query/trivial-response", 1); - } - trace2_region_leave("fsm_hook", "query", NULL); return result; } -int fsmonitor_is_trivial_response(const struct strbuf *query_result) -{ - static char trivial_response[3] = { '\0', '/', '\0' }; - - return query_result->len >= 3 && - !memcmp(trivial_response, - &query_result->buf[query_result->len - 3], 3); -} - static void fsmonitor_refresh_callback(struct index_state *istate, char *name) { int i, len = strlen(name); @@ -238,6 +224,7 @@ void refresh_fsmonitor(struct index_state *istate) struct strbuf last_update_token = STRBUF_INIT; char *buf; unsigned int i; + int is_trivial = 0; if (!core_fsmonitor || istate->fsmonitor_has_run_once) return; @@ -283,6 +270,7 @@ void refresh_fsmonitor(struct index_state *istate) query_success = 0; } else { bol = last_update_token.len + 1; + is_trivial = query_result.buf[bol] == '/'; } } else if (hook_version < 0) { hook_version = HOOK_INTERFACE_VERSION1; @@ -294,16 +282,38 @@ void refresh_fsmonitor(struct index_state *istate) if (hook_version == HOOK_INTERFACE_VERSION1) { query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1, istate->fsmonitor_last_update, &query_result); + if (query_success) + is_trivial = query_result.buf[0] == '/'; } + if (is_trivial) + trace2_data_intmax("fsm_hook", NULL, + "query/trivial-response", 1); + trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor); trace_printf_key(&trace_fsmonitor, "fsmonitor process '%s' returned %s", core_fsmonitor, query_success ? "success" : "failure"); } - /* a fsmonitor process can return '/' to indicate all entries are invalid */ - if (query_success && query_result.buf[bol] != '/') { - /* Mark all entries returned by the monitor as dirty */ + /* + * The response from FSMonitor (excluding the header token) is + * either: + * + * [a] a (possibly empty) list of NUL delimited relative + * pathnames of changed paths. This list can contain + * files and directories. Directories have a trailing + * slash. + * + * [b] a single '/' to indicate the provider had no + * information and that we should consider everything + * invalid. We call this a trivial response. + */ + if (query_success && !is_trivial) { + /* + * Mark all pathnames returned by the monitor as dirty. + * + * This updates both the cache-entries and the untracked-cache. + */ buf = query_result.buf; for (i = bol; i < query_result.len; i++) { if (buf[i] != '\0') @@ -318,11 +328,16 @@ void refresh_fsmonitor(struct index_state *istate) if (istate->untracked) istate->untracked->use_fsmonitor = 1; } else { - - /* We only want to run the post index changed hook if we've actually changed entries, so keep track - * if we actually changed entries or not */ + /* + * We failed to get a response or received a trivial response, + * so invalidate everything. + * + * We only want to run the post index changed hook if + * we've actually changed entries, so keep track if we + * actually changed entries or not. + */ int is_cache_changed = 0; - /* Mark all entries invalid */ + for (i = 0; i < istate->cache_nr; i++) { if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) { is_cache_changed = 1; @@ -330,7 +345,10 @@ void refresh_fsmonitor(struct index_state *istate) } } - /* If we're going to check every file, ensure we save the results */ + /* + * If we're going to check every file, ensure we save + * the results. + */ if (is_cache_changed) istate->cache_changed |= FSMONITOR_CHANGED; From d2bd862e7a4a791d88740cc81aad14cdcd90dd2c Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:45 +0000 Subject: [PATCH 122/150] fsmonitor-ipc: create client routines for git-fsmonitor--daemon Create fsmonitor_ipc__*() client routines to spawn the built-in file system monitor daemon and send it an IPC request using the `Simple IPC` API. Stub in empty fsmonitor_ipc__*() functions for unsupported platforms. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Makefile | 1 + fsmonitor-ipc.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++++ fsmonitor-ipc.h | 48 ++++++++++++++ 3 files changed, 220 insertions(+) create mode 100644 fsmonitor-ipc.c create mode 100644 fsmonitor-ipc.h diff --git a/Makefile b/Makefile index 6f0b4b775f..a19d850e71 100644 --- a/Makefile +++ b/Makefile @@ -907,6 +907,7 @@ LIB_OBJS += fetch-pack.o LIB_OBJS += fmt-merge-msg.o LIB_OBJS += fsck.o LIB_OBJS += fsmonitor.o +LIB_OBJS += fsmonitor-ipc.o LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o diff --git a/fsmonitor-ipc.c b/fsmonitor-ipc.c new file mode 100644 index 0000000000..789e7397ba --- /dev/null +++ b/fsmonitor-ipc.c @@ -0,0 +1,171 @@ +#include "cache.h" +#include "fsmonitor.h" +#include "simple-ipc.h" +#include "fsmonitor-ipc.h" +#include "run-command.h" +#include "strbuf.h" +#include "trace2.h" + +#ifndef HAVE_FSMONITOR_DAEMON_BACKEND + +/* + * A trivial implementation of the fsmonitor_ipc__ API for unsupported + * platforms. + */ + +int fsmonitor_ipc__is_supported(void) +{ + return 0; +} + +const char *fsmonitor_ipc__get_path(void) +{ + return NULL; +} + +enum ipc_active_state fsmonitor_ipc__get_state(void) +{ + return IPC_STATE__OTHER_ERROR; +} + +int fsmonitor_ipc__send_query(const char *since_token, + struct strbuf *answer) +{ + return -1; +} + +int fsmonitor_ipc__send_command(const char *command, + struct strbuf *answer) +{ + return -1; +} + +#else + +int fsmonitor_ipc__is_supported(void) +{ + return 1; +} + +GIT_PATH_FUNC(fsmonitor_ipc__get_path, "fsmonitor--daemon.ipc") + +enum ipc_active_state fsmonitor_ipc__get_state(void) +{ + return ipc_get_active_state(fsmonitor_ipc__get_path()); +} + +static int spawn_daemon(void) +{ + const char *args[] = { "fsmonitor--daemon", "start", NULL }; + + return run_command_v_opt_tr2(args, RUN_COMMAND_NO_STDIN | RUN_GIT_CMD, + "fsmonitor"); +} + +int fsmonitor_ipc__send_query(const char *since_token, + struct strbuf *answer) +{ + int ret = -1; + int tried_to_spawn = 0; + enum ipc_active_state state = IPC_STATE__OTHER_ERROR; + struct ipc_client_connection *connection = NULL; + struct ipc_client_connect_options options + = IPC_CLIENT_CONNECT_OPTIONS_INIT; + const char *tok = since_token ? since_token : ""; + size_t tok_len = since_token ? strlen(since_token) : 0; + + options.wait_if_busy = 1; + options.wait_if_not_found = 0; + + trace2_region_enter("fsm_client", "query", NULL); + trace2_data_string("fsm_client", NULL, "query/command", tok); + +try_again: + state = ipc_client_try_connect(fsmonitor_ipc__get_path(), &options, + &connection); + + switch (state) { + case IPC_STATE__LISTENING: + ret = ipc_client_send_command_to_connection( + connection, tok, tok_len, answer); + ipc_client_close_connection(connection); + + trace2_data_intmax("fsm_client", NULL, + "query/response-length", answer->len); + goto done; + + case IPC_STATE__NOT_LISTENING: + case IPC_STATE__PATH_NOT_FOUND: + if (tried_to_spawn) + goto done; + + tried_to_spawn++; + if (spawn_daemon()) + goto done; + + /* + * Try again, but this time give the daemon a chance to + * actually create the pipe/socket. + * + * Granted, the daemon just started so it can't possibly have + * any FS cached yet, so we'll always get a trivial answer. + * BUT the answer should include a new token that can serve + * as the basis for subsequent requests. + */ + options.wait_if_not_found = 1; + goto try_again; + + case IPC_STATE__INVALID_PATH: + ret = error(_("fsmonitor_ipc__send_query: invalid path '%s'"), + fsmonitor_ipc__get_path()); + goto done; + + case IPC_STATE__OTHER_ERROR: + default: + ret = error(_("fsmonitor_ipc__send_query: unspecified error on '%s'"), + fsmonitor_ipc__get_path()); + goto done; + } + +done: + trace2_region_leave("fsm_client", "query", NULL); + + return ret; +} + +int fsmonitor_ipc__send_command(const char *command, + struct strbuf *answer) +{ + struct ipc_client_connection *connection = NULL; + struct ipc_client_connect_options options + = IPC_CLIENT_CONNECT_OPTIONS_INIT; + int ret; + enum ipc_active_state state; + const char *c = command ? command : ""; + size_t c_len = command ? strlen(command) : 0; + + strbuf_reset(answer); + + options.wait_if_busy = 1; + options.wait_if_not_found = 0; + + state = ipc_client_try_connect(fsmonitor_ipc__get_path(), &options, + &connection); + if (state != IPC_STATE__LISTENING) { + die(_("fsmonitor--daemon is not running")); + return -1; + } + + ret = ipc_client_send_command_to_connection(connection, c, c_len, + answer); + ipc_client_close_connection(connection); + + if (ret == -1) { + die(_("could not send '%s' command to fsmonitor--daemon"), c); + return -1; + } + + return 0; +} + +#endif diff --git a/fsmonitor-ipc.h b/fsmonitor-ipc.h new file mode 100644 index 0000000000..b6a7067c3a --- /dev/null +++ b/fsmonitor-ipc.h @@ -0,0 +1,48 @@ +#ifndef FSMONITOR_IPC_H +#define FSMONITOR_IPC_H + +#include "simple-ipc.h" + +/* + * Returns true if built-in file system monitor daemon is defined + * for this platform. + */ +int fsmonitor_ipc__is_supported(void); + +/* + * Returns the pathname to the IPC named pipe or Unix domain socket + * where a `git-fsmonitor--daemon` process will listen. This is a + * per-worktree value. + * + * Returns NULL if the daemon is not supported on this platform. + */ +const char *fsmonitor_ipc__get_path(void); + +/* + * Try to determine whether there is a `git-fsmonitor--daemon` process + * listening on the IPC pipe/socket. + */ +enum ipc_active_state fsmonitor_ipc__get_state(void); + +/* + * Connect to a `git-fsmonitor--daemon` process via simple-ipc + * and ask for the set of changed files since the given token. + * + * Spawn a daemon process in the background if necessary. + * + * Returns -1 on error; 0 on success. + */ +int fsmonitor_ipc__send_query(const char *since_token, + struct strbuf *answer); + +/* + * Connect to a `git-fsmonitor--daemon` process via simple-ipc and + * send a command verb. If no daemon is available, we DO NOT try to + * start one. + * + * Returns -1 on error; 0 on success. + */ +int fsmonitor_ipc__send_command(const char *command, + struct strbuf *answer); + +#endif /* FSMONITOR_IPC_H */ From 1e0ea5c4316d2241dd76ef430a2779db9a097dfb Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:46 +0000 Subject: [PATCH 123/150] fsmonitor: config settings are repository-specific Move fsmonitor config settings to a new and opaque `struct fsmonitor_settings` structure. Add a lazily-loaded pointer to this into `struct repo_settings` Create an `enum fsmonitor_mode` type in `struct fsmonitor_settings` to represent the state of fsmonitor. This lets us represent which, if any, fsmonitor provider (hook or IPC) is enabled. Create `fsm_settings__get_*()` getters to lazily look up fsmonitor- related config settings. Get rid of the `core_fsmonitor` global variable. Move the code to lookup the existing `core.fsmonitor` config value into the fsmonitor settings. Create a hook pathname variable in `struct fsmonitor-settings` and only set it when in hook mode. Extend the definition of `core.fsmonitor` to be either a boolean or a hook pathname. When true, the builtin FSMonitor is used. When false or unset, no FSMonitor (neither builtin nor hook) is used. The existing `core_fsmonitor` global variable was used to store the pathname to the fsmonitor hook *and* it was used as a boolean to see if fsmonitor was enabled. This dual usage and global visibility leads to confusion when we add the IPC-based provider. So lets hide the details in fsmonitor-settings.c and let it decide which provider to use in the case of multiple settings. This avoids cluttering up repo-settings.c with these private details. A future commit in builtin-fsmonitor series will add the ability to disqualify worktrees for various reasons, such as being mounted from a remote volume, where fsmonitor should not be started. Having the config settings hidden in fsmonitor-settings.c allows such worktree restrictions to override the config values used. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Makefile | 1 + builtin/update-index.c | 7 ++- cache.h | 1 - config.c | 14 ----- config.h | 1 - environment.c | 1 - fsmonitor-settings.c | 114 +++++++++++++++++++++++++++++++++++++++++ fsmonitor-settings.h | 21 ++++++++ fsmonitor.c | 63 ++++++++++++++--------- fsmonitor.h | 15 ++++-- repository.h | 3 ++ t/README | 4 +- 12 files changed, 196 insertions(+), 49 deletions(-) create mode 100644 fsmonitor-settings.c create mode 100644 fsmonitor-settings.h diff --git a/Makefile b/Makefile index a19d850e71..707a56d4c1 100644 --- a/Makefile +++ b/Makefile @@ -908,6 +908,7 @@ LIB_OBJS += fmt-merge-msg.o LIB_OBJS += fsck.o LIB_OBJS += fsmonitor.o LIB_OBJS += fsmonitor-ipc.o +LIB_OBJS += fsmonitor-settings.o LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o diff --git a/builtin/update-index.c b/builtin/update-index.c index aafe7eeac2..876112abb2 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -1236,14 +1236,17 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) } if (fsmonitor > 0) { - if (git_config_get_fsmonitor() == 0) + enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r); + if (fsm_mode == FSMONITOR_MODE_DISABLED) { warning(_("core.fsmonitor is unset; " "set it if you really want to " "enable fsmonitor")); + } add_fsmonitor(&the_index); report(_("fsmonitor enabled")); } else if (!fsmonitor) { - if (git_config_get_fsmonitor() == 1) + enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r); + if (fsm_mode > FSMONITOR_MODE_DISABLED) warning(_("core.fsmonitor is set; " "remove it if you really want to " "disable fsmonitor")); diff --git a/cache.h b/cache.h index 04d4d2db25..aaf334e2aa 100644 --- a/cache.h +++ b/cache.h @@ -999,7 +999,6 @@ extern int core_preload_index; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; -extern const char *core_fsmonitor; extern int core_apply_sparse_checkout; extern int core_sparse_checkout_cone; diff --git a/config.c b/config.c index 383b1a4885..3f9b0739a7 100644 --- a/config.c +++ b/config.c @@ -2626,20 +2626,6 @@ int git_config_get_max_percent_split_change(void) return -1; /* default value */ } -int git_config_get_fsmonitor(void) -{ - if (git_config_get_pathname("core.fsmonitor", &core_fsmonitor)) - core_fsmonitor = getenv("GIT_TEST_FSMONITOR"); - - if (core_fsmonitor && !*core_fsmonitor) - core_fsmonitor = NULL; - - if (core_fsmonitor) - return 1; - - return 0; -} - int git_config_get_index_threads(int *dest) { int is_bool, val; diff --git a/config.h b/config.h index bb49baf1ee..7654f61c63 100644 --- a/config.h +++ b/config.h @@ -597,7 +597,6 @@ int git_config_get_pathname(const char *key, const char **dest); int git_config_get_index_threads(int *dest); int git_config_get_split_index(void); int git_config_get_max_percent_split_change(void); -int git_config_get_fsmonitor(void); /* This dies if the configured or default date is in the future */ int git_config_get_expiry(const char *key, const char **output); diff --git a/environment.c b/environment.c index fd0501e77a..00682e638d 100644 --- a/environment.c +++ b/environment.c @@ -84,7 +84,6 @@ int protect_hfs = PROTECT_HFS_DEFAULT; #define PROTECT_NTFS_DEFAULT 1 #endif int protect_ntfs = PROTECT_NTFS_DEFAULT; -const char *core_fsmonitor; /* * The character that begins a commented line in user-editable file diff --git a/fsmonitor-settings.c b/fsmonitor-settings.c new file mode 100644 index 0000000000..757d230d53 --- /dev/null +++ b/fsmonitor-settings.c @@ -0,0 +1,114 @@ +#include "cache.h" +#include "config.h" +#include "repository.h" +#include "fsmonitor-settings.h" + +/* + * We keep this structure defintion private and have getters + * for all fields so that we can lazy load it as needed. + */ +struct fsmonitor_settings { + enum fsmonitor_mode mode; + char *hook_path; +}; + +static void lookup_fsmonitor_settings(struct repository *r) +{ + struct fsmonitor_settings *s; + const char *const_str; + int bool_value; + + if (r->settings.fsmonitor) + return; + + CALLOC_ARRAY(s, 1); + s->mode = FSMONITOR_MODE_DISABLED; + + r->settings.fsmonitor = s; + + /* + * Overload the existing "core.fsmonitor" config setting (which + * has historically been either unset or a hook pathname) to + * now allow a boolean value to enable the builtin FSMonitor + * or to turn everything off. (This does imply that you can't + * use a hook script named "true" or "false", but that's OK.) + */ + switch (repo_config_get_maybe_bool(r, "core.fsmonitor", &bool_value)) { + + case 0: /* config value was set to <bool> */ + if (bool_value) + fsm_settings__set_ipc(r); + return; + + case 1: /* config value was unset */ + const_str = getenv("GIT_TEST_FSMONITOR"); + break; + + case -1: /* config value set to an arbitrary string */ + if (repo_config_get_pathname(r, "core.fsmonitor", &const_str)) + return; /* should not happen */ + break; + + default: /* should not happen */ + return; + } + + if (!const_str || !*const_str) + return; + + fsm_settings__set_hook(r, const_str); +} + +enum fsmonitor_mode fsm_settings__get_mode(struct repository *r) +{ + if (!r) + r = the_repository; + + lookup_fsmonitor_settings(r); + + return r->settings.fsmonitor->mode; +} + +const char *fsm_settings__get_hook_path(struct repository *r) +{ + if (!r) + r = the_repository; + + lookup_fsmonitor_settings(r); + + return r->settings.fsmonitor->hook_path; +} + +void fsm_settings__set_ipc(struct repository *r) +{ + if (!r) + r = the_repository; + + lookup_fsmonitor_settings(r); + + r->settings.fsmonitor->mode = FSMONITOR_MODE_IPC; + FREE_AND_NULL(r->settings.fsmonitor->hook_path); +} + +void fsm_settings__set_hook(struct repository *r, const char *path) +{ + if (!r) + r = the_repository; + + lookup_fsmonitor_settings(r); + + r->settings.fsmonitor->mode = FSMONITOR_MODE_HOOK; + FREE_AND_NULL(r->settings.fsmonitor->hook_path); + r->settings.fsmonitor->hook_path = strdup(path); +} + +void fsm_settings__set_disabled(struct repository *r) +{ + if (!r) + r = the_repository; + + lookup_fsmonitor_settings(r); + + r->settings.fsmonitor->mode = FSMONITOR_MODE_DISABLED; + FREE_AND_NULL(r->settings.fsmonitor->hook_path); +} diff --git a/fsmonitor-settings.h b/fsmonitor-settings.h new file mode 100644 index 0000000000..a4c5d7b488 --- /dev/null +++ b/fsmonitor-settings.h @@ -0,0 +1,21 @@ +#ifndef FSMONITOR_SETTINGS_H +#define FSMONITOR_SETTINGS_H + +struct repository; + +enum fsmonitor_mode { + FSMONITOR_MODE_DISABLED = 0, + FSMONITOR_MODE_HOOK = 1, /* core.fsmonitor=<hook_path> */ + FSMONITOR_MODE_IPC = 2, /* core.fsmonitor=<true> */ +}; + +void fsm_settings__set_ipc(struct repository *r); +void fsm_settings__set_hook(struct repository *r, const char *path); +void fsm_settings__set_disabled(struct repository *r); + +enum fsmonitor_mode fsm_settings__get_mode(struct repository *r); +const char *fsm_settings__get_hook_path(struct repository *r); + +struct fsmonitor_settings; + +#endif /* FSMONITOR_SETTINGS_H */ diff --git a/fsmonitor.c b/fsmonitor.c index 448d0ee33f..0e961b74d8 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -3,6 +3,7 @@ #include "dir.h" #include "ewah/ewok.h" #include "fsmonitor.h" +#include "fsmonitor-ipc.h" #include "run-command.h" #include "strbuf.h" @@ -148,15 +149,18 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) /* * Call the query-fsmonitor hook passing the last update token of the saved results. */ -static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result) +static int query_fsmonitor_hook(struct repository *r, + int version, + const char *last_update, + struct strbuf *query_result) { struct child_process cp = CHILD_PROCESS_INIT; int result; - if (!core_fsmonitor) + if (fsm_settings__get_mode(r) != FSMONITOR_MODE_HOOK) return -1; - strvec_push(&cp.args, core_fsmonitor); + strvec_push(&cp.args, fsm_settings__get_hook_path(r)); strvec_pushf(&cp.args, "%d", version); strvec_pushf(&cp.args, "%s", last_update); cp.use_shell = 1; @@ -225,17 +229,28 @@ void refresh_fsmonitor(struct index_state *istate) char *buf; unsigned int i; int is_trivial = 0; + struct repository *r = istate->repo ? istate->repo : the_repository; + enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(r); - if (!core_fsmonitor || istate->fsmonitor_has_run_once) + if (fsm_mode <= FSMONITOR_MODE_DISABLED || + istate->fsmonitor_has_run_once) return; - hook_version = fsmonitor_hook_version(); - istate->fsmonitor_has_run_once = 1; trace_printf_key(&trace_fsmonitor, "refresh fsmonitor"); + + if (fsm_mode == FSMONITOR_MODE_IPC) { + /* TODO */ + return; + } + + assert(fsm_mode == FSMONITOR_MODE_HOOK); + + hook_version = fsmonitor_hook_version(); + /* - * This could be racy so save the date/time now and query_fsmonitor + * This could be racy so save the date/time now and query_fsmonitor_hook * should be inclusive to ensure we don't miss potential changes. */ last_update = getnanotime(); @@ -243,13 +258,14 @@ void refresh_fsmonitor(struct index_state *istate) strbuf_addf(&last_update_token, "%"PRIu64"", last_update); /* - * If we have a last update token, call query_fsmonitor for the set of + * If we have a last update token, call query_fsmonitor_hook for the set of * changes since that token, else assume everything is possibly dirty * and check it all. */ if (istate->fsmonitor_last_update) { if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) { - query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION2, + query_success = !query_fsmonitor_hook( + r, HOOK_INTERFACE_VERSION2, istate->fsmonitor_last_update, &query_result); if (query_success) { @@ -280,7 +296,8 @@ void refresh_fsmonitor(struct index_state *istate) } if (hook_version == HOOK_INTERFACE_VERSION1) { - query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1, + query_success = !query_fsmonitor_hook( + r, HOOK_INTERFACE_VERSION1, istate->fsmonitor_last_update, &query_result); if (query_success) is_trivial = query_result.buf[0] == '/'; @@ -290,9 +307,12 @@ void refresh_fsmonitor(struct index_state *istate) trace2_data_intmax("fsm_hook", NULL, "query/trivial-response", 1); - trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor); - trace_printf_key(&trace_fsmonitor, "fsmonitor process '%s' returned %s", - core_fsmonitor, query_success ? "success" : "failure"); + trace_performance_since(last_update, "fsmonitor process '%s'", + fsm_settings__get_hook_path(r)); + trace_printf_key(&trace_fsmonitor, + "fsmonitor process '%s' returned %s", + fsm_settings__get_hook_path(r), + query_success ? "success" : "failure"); } /* @@ -429,7 +449,8 @@ void remove_fsmonitor(struct index_state *istate) void tweak_fsmonitor(struct index_state *istate) { unsigned int i; - int fsmonitor_enabled = git_config_get_fsmonitor(); + int fsmonitor_enabled = (fsm_settings__get_mode(istate->repo) + > FSMONITOR_MODE_DISABLED); if (istate->fsmonitor_dirty) { if (fsmonitor_enabled) { @@ -449,16 +470,8 @@ void tweak_fsmonitor(struct index_state *istate) istate->fsmonitor_dirty = NULL; } - switch (fsmonitor_enabled) { - case -1: /* keep: do nothing */ - break; - case 0: /* false */ - remove_fsmonitor(istate); - break; - case 1: /* true */ + if (fsmonitor_enabled) add_fsmonitor(istate); - break; - default: /* unknown value: do nothing */ - break; - } + else + remove_fsmonitor(istate); } diff --git a/fsmonitor.h b/fsmonitor.h index f20d72631d..3f41f65369 100644 --- a/fsmonitor.h +++ b/fsmonitor.h @@ -3,6 +3,7 @@ #include "cache.h" #include "dir.h" +#include "fsmonitor-settings.h" extern struct trace_key trace_fsmonitor; @@ -57,7 +58,10 @@ int fsmonitor_is_trivial_response(const struct strbuf *query_result); */ static inline int is_fsmonitor_refreshed(const struct index_state *istate) { - return !core_fsmonitor || istate->fsmonitor_has_run_once; + enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(istate->repo); + + return fsm_mode <= FSMONITOR_MODE_DISABLED || + istate->fsmonitor_has_run_once; } /* @@ -67,7 +71,10 @@ static inline int is_fsmonitor_refreshed(const struct index_state *istate) */ static inline void mark_fsmonitor_valid(struct index_state *istate, struct cache_entry *ce) { - if (core_fsmonitor && !(ce->ce_flags & CE_FSMONITOR_VALID)) { + enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(istate->repo); + + if (fsm_mode > FSMONITOR_MODE_DISABLED && + !(ce->ce_flags & CE_FSMONITOR_VALID)) { istate->cache_changed = 1; ce->ce_flags |= CE_FSMONITOR_VALID; trace_printf_key(&trace_fsmonitor, "mark_fsmonitor_clean '%s'", ce->name); @@ -83,7 +90,9 @@ static inline void mark_fsmonitor_valid(struct index_state *istate, struct cache */ static inline void mark_fsmonitor_invalid(struct index_state *istate, struct cache_entry *ce) { - if (core_fsmonitor) { + enum fsmonitor_mode fsm_mode = fsm_settings__get_mode(istate->repo); + + if (fsm_mode > FSMONITOR_MODE_DISABLED) { ce->ce_flags &= ~CE_FSMONITOR_VALID; untracked_cache_invalidate_path(istate, ce->name, 1); trace_printf_key(&trace_fsmonitor, "mark_fsmonitor_invalid '%s'", ce->name); diff --git a/repository.h b/repository.h index ca837cb9e9..9bbb4659cc 100644 --- a/repository.h +++ b/repository.h @@ -4,6 +4,7 @@ #include "path.h" struct config_set; +struct fsmonitor_settings; struct git_hash_algo; struct index_state; struct lock_file; @@ -35,6 +36,8 @@ struct repo_settings { int command_requires_full_index; int sparse_index; + struct fsmonitor_settings *fsmonitor; /* lazily loaded */ + int index_version; enum untracked_cache_setting core_untracked_cache; diff --git a/t/README b/t/README index f48e0542cd..9ffea1d314 100644 --- a/t/README +++ b/t/README @@ -405,8 +405,8 @@ every 'git commit-graph write', as if the `--changed-paths` option was passed in. GIT_TEST_FSMONITOR=$PWD/t7519/fsmonitor-all exercises the fsmonitor -code path for utilizing a file system monitor to speed up detecting -new or changed files. +code paths for utilizing a (hook based) file system monitor to speed up +detecting new or changed files. GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path for the index version specified. Can be set to any valid version From 9c307e8afd930485a8409cadf8cc7065512d9845 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:47 +0000 Subject: [PATCH 124/150] fsmonitor: use IPC to query the builtin FSMonitor daemon Use simple IPC to directly communicate with the new builtin file system monitor daemon when `core.fsmonitor` is set to true. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- fsmonitor.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/fsmonitor.c b/fsmonitor.c index 0e961b74d8..a38b5710eb 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -241,8 +241,41 @@ void refresh_fsmonitor(struct index_state *istate) trace_printf_key(&trace_fsmonitor, "refresh fsmonitor"); if (fsm_mode == FSMONITOR_MODE_IPC) { - /* TODO */ - return; + query_success = !fsmonitor_ipc__send_query( + istate->fsmonitor_last_update ? + istate->fsmonitor_last_update : "builtin:fake", + &query_result); + if (query_success) { + /* + * The response contains a series of nul terminated + * strings. The first is the new token. + * + * Use `char *buf` as an interlude to trick the CI + * static analysis to let us use `strbuf_addstr()` + * here (and only copy the token) rather than + * `strbuf_addbuf()`. + */ + buf = query_result.buf; + strbuf_addstr(&last_update_token, buf); + bol = last_update_token.len + 1; + is_trivial = query_result.buf[bol] == '/'; + if (is_trivial) + trace2_data_intmax("fsm_client", NULL, + "query/trivial-response", 1); + } else { + /* + * The builtin daemon is not available on this + * platform -OR- we failed to get a response. + * + * Generate a fake token (rather than a V1 + * timestamp) for the index extension. (If + * they switch back to the hook API, we don't + * want ambiguous state.) + */ + strbuf_addstr(&last_update_token, "builtin:fake"); + } + + goto apply_results; } assert(fsm_mode == FSMONITOR_MODE_HOOK); @@ -315,6 +348,7 @@ void refresh_fsmonitor(struct index_state *istate) query_success ? "success" : "failure"); } +apply_results: /* * The response from FSMonitor (excluding the header token) is * either: From 3248486920d0bfc5584747dc7af8414d05282191 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:48 +0000 Subject: [PATCH 125/150] fsmonitor: document builtin fsmonitor Document how `core.fsmonitor` can be set to a boolean to enable or disable the builtin FSMonitor. Update references to `core.fsmonitor` and `core.fsmonitorHookVersion` and pointers to `Watchman` to refer to it. Create `git-fsmonitor--daemon` manual page and describe its features. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Documentation/config/core.txt | 60 +++++++++++++++----- Documentation/git-fsmonitor--daemon.txt | 75 +++++++++++++++++++++++++ Documentation/git-update-index.txt | 8 ++- 3 files changed, 126 insertions(+), 17 deletions(-) create mode 100644 Documentation/git-fsmonitor--daemon.txt diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index c04f62a54a..6303c36c7e 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -62,22 +62,54 @@ core.protectNTFS:: Defaults to `true` on Windows, and `false` elsewhere. core.fsmonitor:: - If set, the value of this variable is used as a command which - will identify all files that may have changed since the - requested date/time. This information is used to speed up git by - avoiding unnecessary processing of files that have not changed. - See the "fsmonitor-watchman" section of linkgit:githooks[5]. + If set to true, enable the built-in file system monitor + daemon for this working directory (linkgit:git-fsmonitor--daemon[1]). ++ +Like hook-based file system monitors, the built-in file system monitor +can speed up Git commands that need to refresh the Git index +(e.g. `git status`) in a working directory with many files. The +built-in monitor eliminates the need to install and maintain an +external third-party tool. ++ +The built-in file system monitor is currently available only on a +limited set of supported platforms. Currently, this includes Windows +and MacOS. ++ + Otherwise, this variable contains the pathname of the "fsmonitor" + hook command. ++ +This hook command is used to identify all files that may have changed +since the requested date/time. This information is used to speed up +git by avoiding unnecessary scanning of files that have not changed. ++ +See the "fsmonitor-watchman" section of linkgit:githooks[5]. ++ +Note that if you concurrently use multiple versions of Git, such +as one version on the command line and another version in an IDE +tool, that the definition of `core.fsmonitor` was extended to +allow boolean values in addition to hook pathnames. Git versions +2.35.1 and prior will not understand the boolean values and will +consider the "true" or "false" values as hook pathnames to be +invoked. Git versions 2.26 thru 2.35.1 default to hook protocol +V2 and will fall back to no fsmonitor (full scan). Git versions +prior to 2.26 default to hook protocol V1 and will silently +assume there were no changes to report (no scan), so status +commands may report incomplete results. For this reason, it is +best to upgrade all of your Git versions before using the built-in +file system monitor. core.fsmonitorHookVersion:: - Sets the version of hook that is to be used when calling fsmonitor. - There are currently versions 1 and 2. When this is not set, - version 2 will be tried first and if it fails then version 1 - will be tried. Version 1 uses a timestamp as input to determine - which files have changes since that time but some monitors - like watchman have race conditions when used with a timestamp. - Version 2 uses an opaque string so that the monitor can return - something that can be used to determine what files have changed - without race conditions. + Sets the protocol version to be used when invoking the + "fsmonitor" hook. ++ +There are currently versions 1 and 2. When this is not set, +version 2 will be tried first and if it fails then version 1 +will be tried. Version 1 uses a timestamp as input to determine +which files have changes since that time but some monitors +like Watchman have race conditions when used with a timestamp. +Version 2 uses an opaque string so that the monitor can return +something that can be used to determine what files have changed +without race conditions. core.trustctime:: If false, the ctime differences between the index and the diff --git a/Documentation/git-fsmonitor--daemon.txt b/Documentation/git-fsmonitor--daemon.txt new file mode 100644 index 0000000000..0fedf5a456 --- /dev/null +++ b/Documentation/git-fsmonitor--daemon.txt @@ -0,0 +1,75 @@ +git-fsmonitor--daemon(1) +======================== + +NAME +---- +git-fsmonitor--daemon - A Built-in File System Monitor + +SYNOPSIS +-------- +[verse] +'git fsmonitor--daemon' start +'git fsmonitor--daemon' run +'git fsmonitor--daemon' stop +'git fsmonitor--daemon' status + +DESCRIPTION +----------- + +A daemon to watch the working directory for file and directory +changes using platform-specific file system notification facilities. + +This daemon communicates directly with commands like `git status` +using the link:technical/api-simple-ipc.html[simple IPC] interface +instead of the slower linkgit:githooks[5] interface. + +This daemon is built into Git so that no third-party tools are +required. + +OPTIONS +------- + +start:: + Starts a daemon in the background. + +run:: + Runs a daemon in the foreground. + +stop:: + Stops the daemon running in the current working + directory, if present. + +status:: + Exits with zero status if a daemon is watching the + current working directory. + +REMARKS +------- + +This daemon is a long running process used to watch a single working +directory and maintain a list of the recently changed files and +directories. Performance of commands such as `git status` can be +increased if they just ask for a summary of changes to the working +directory and can avoid scanning the disk. + +When `core.fsmonitor` is set to `true` (see linkgit:git-config[1]) +commands, such as `git status`, will ask the daemon for changes and +automatically start it (if necessary). + +For more information see the "File System Monitor" section in +linkgit:git-update-index[1]. + +CAVEATS +------- + +The fsmonitor daemon does not currently know about submodules and does +not know to filter out file system events that happen within a +submodule. If fsmonitor daemon is watching a super repo and a file is +modified within the working directory of a submodule, it will report +the change (as happening against the super repo). However, the client +will properly ignore these extra events, so performance may be affected +but it will not cause an incorrect result. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt index 2853f168d9..53ea48a04e 100644 --- a/Documentation/git-update-index.txt +++ b/Documentation/git-update-index.txt @@ -498,7 +498,9 @@ FILE SYSTEM MONITOR This feature is intended to speed up git operations for repos that have large working directories. -It enables git to work together with a file system monitor (see the +It enables git to work together with a file system monitor (see +linkgit:git-fsmonitor--daemon[1] +and the "fsmonitor-watchman" section of linkgit:githooks[5]) that can inform it as to what files have been modified. This enables git to avoid having to lstat() every file to find modified files. @@ -509,8 +511,8 @@ looking for new files. If you want to enable (or disable) this feature, it is easier to use the `core.fsmonitor` configuration variable (see -linkgit:git-config[1]) than using the `--fsmonitor` option to -`git update-index` in each repository, especially if you want to do so +linkgit:git-config[1]) than using the `--fsmonitor` option to `git +update-index` in each repository, especially if you want to do so across all repositories you use, because you can set the configuration variable in your `$HOME/.gitconfig` just once and have it affect all repositories you touch. From 16d9d6175b53ca6197831da78ca8fb0dedb961b6 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:49 +0000 Subject: [PATCH 126/150] fsmonitor--daemon: add a built-in fsmonitor daemon Create a built-in file system monitoring daemon that can be used by the existing `fsmonitor` feature (protocol API and index extension) to improve the performance of various Git commands, such as `status`. The `fsmonitor--daemon` feature builds upon the `Simple IPC` API and provides an alternative to hook access to existing fsmonitors such as `watchman`. This commit merely adds the new command without any functionality. Co-authored-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- .gitignore | 1 + Makefile | 1 + builtin.h | 1 + builtin/fsmonitor--daemon.c | 46 +++++++++++++++++++++++++++++++++++++ git.c | 1 + 5 files changed, 50 insertions(+) create mode 100644 builtin/fsmonitor--daemon.c diff --git a/.gitignore b/.gitignore index f817c509ec..e81de1063a 100644 --- a/.gitignore +++ b/.gitignore @@ -72,6 +72,7 @@ /git-format-patch /git-fsck /git-fsck-objects +/git-fsmonitor--daemon /git-gc /git-get-tar-commit-id /git-grep diff --git a/Makefile b/Makefile index 707a56d4c1..5af1d5b112 100644 --- a/Makefile +++ b/Makefile @@ -1114,6 +1114,7 @@ BUILTIN_OBJS += builtin/fmt-merge-msg.o BUILTIN_OBJS += builtin/for-each-ref.o BUILTIN_OBJS += builtin/for-each-repo.o BUILTIN_OBJS += builtin/fsck.o +BUILTIN_OBJS += builtin/fsmonitor--daemon.o BUILTIN_OBJS += builtin/gc.o BUILTIN_OBJS += builtin/get-tar-commit-id.o BUILTIN_OBJS += builtin/grep.o diff --git a/builtin.h b/builtin.h index 83379f3832..40e9ecc848 100644 --- a/builtin.h +++ b/builtin.h @@ -159,6 +159,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix); int cmd_for_each_repo(int argc, const char **argv, const char *prefix); int cmd_format_patch(int argc, const char **argv, const char *prefix); int cmd_fsck(int argc, const char **argv, const char *prefix); +int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix); int cmd_gc(int argc, const char **argv, const char *prefix); int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix); int cmd_grep(int argc, const char **argv, const char *prefix); diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c new file mode 100644 index 0000000000..f049879337 --- /dev/null +++ b/builtin/fsmonitor--daemon.c @@ -0,0 +1,46 @@ +#include "builtin.h" +#include "config.h" +#include "parse-options.h" +#include "fsmonitor.h" +#include "fsmonitor-ipc.h" +#include "simple-ipc.h" +#include "khash.h" + +static const char * const builtin_fsmonitor__daemon_usage[] = { + NULL +}; + +#ifdef HAVE_FSMONITOR_DAEMON_BACKEND + +int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix) +{ + const char *subcmd; + + struct option options[] = { + OPT_END() + }; + + git_config(git_default_config, NULL); + + argc = parse_options(argc, argv, prefix, options, + builtin_fsmonitor__daemon_usage, 0); + if (argc != 1) + usage_with_options(builtin_fsmonitor__daemon_usage, options); + subcmd = argv[0]; + + die(_("Unhandled subcommand '%s'"), subcmd); +} + +#else +int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix) +{ + struct option options[] = { + OPT_END() + }; + + if (argc == 2 && !strcmp(argv[1], "-h")) + usage_with_options(builtin_fsmonitor__daemon_usage, options); + + die(_("fsmonitor--daemon not supported on this platform")); +} +#endif diff --git a/git.c b/git.c index a25940d72e..3d8e48cf55 100644 --- a/git.c +++ b/git.c @@ -537,6 +537,7 @@ static struct cmd_struct commands[] = { { "format-patch", cmd_format_patch, RUN_SETUP }, { "fsck", cmd_fsck, RUN_SETUP }, { "fsck-objects", cmd_fsck, RUN_SETUP }, + { "fsmonitor--daemon", cmd_fsmonitor__daemon, RUN_SETUP }, { "gc", cmd_gc, RUN_SETUP }, { "get-tar-commit-id", cmd_get_tar_commit_id, NO_PARSEOPT }, { "grep", cmd_grep, RUN_SETUP_GENTLY }, From abc9dbc0c17fb8ad894162517296c1177a7eb7ff Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:50 +0000 Subject: [PATCH 127/150] fsmonitor--daemon: implement 'stop' and 'status' commands Implement `stop` and `status` client commands to control and query the status of a `fsmonitor--daemon` server process (and implicitly start a server process if necessary). Later commits will implement the actual server and monitor the file system. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 51 +++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index f049879337..5e3178b8bd 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -7,10 +7,55 @@ #include "khash.h" static const char * const builtin_fsmonitor__daemon_usage[] = { + N_("git fsmonitor--daemon stop"), + N_("git fsmonitor--daemon status"), NULL }; #ifdef HAVE_FSMONITOR_DAEMON_BACKEND +/* + * Acting as a CLIENT. + * + * Send a "quit" command to the `git-fsmonitor--daemon` (if running) + * and wait for it to shutdown. + */ +static int do_as_client__send_stop(void) +{ + struct strbuf answer = STRBUF_INIT; + int ret; + + ret = fsmonitor_ipc__send_command("quit", &answer); + + /* The quit command does not return any response data. */ + strbuf_release(&answer); + + if (ret) + return ret; + + trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL); + while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING) + sleep_millisec(50); + trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL); + + return 0; +} + +static int do_as_client__status(void) +{ + enum ipc_active_state state = fsmonitor_ipc__get_state(); + + switch (state) { + case IPC_STATE__LISTENING: + printf(_("fsmonitor-daemon is watching '%s'\n"), + the_repository->worktree); + return 0; + + default: + printf(_("fsmonitor-daemon is not watching '%s'\n"), + the_repository->worktree); + return 1; + } +} int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix) { @@ -28,6 +73,12 @@ int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix) usage_with_options(builtin_fsmonitor__daemon_usage, options); subcmd = argv[0]; + if (!strcmp(subcmd, "stop")) + return !!do_as_client__send_stop(); + + if (!strcmp(subcmd, "status")) + return !!do_as_client__status(); + die(_("Unhandled subcommand '%s'"), subcmd); } From 62c7367133e081973e27b7d53e812103fc7ad6d9 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:51 +0000 Subject: [PATCH 128/150] compat/fsmonitor/fsm-listen-win32: stub in backend for Windows Stub in empty filesystem listener backend for fsmonitor--daemon on Windows. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Makefile | 13 ++++++++ compat/fsmonitor/fsm-listen-win32.c | 21 +++++++++++++ compat/fsmonitor/fsm-listen.h | 49 +++++++++++++++++++++++++++++ config.mak.uname | 10 ++++++ contrib/buildsystems/CMakeLists.txt | 7 +++++ repo-settings.c | 1 + 6 files changed, 101 insertions(+) create mode 100644 compat/fsmonitor/fsm-listen-win32.c create mode 100644 compat/fsmonitor/fsm-listen.h diff --git a/Makefile b/Makefile index 5af1d5b112..26567d4f77 100644 --- a/Makefile +++ b/Makefile @@ -470,6 +470,11 @@ all:: # directory, and the JSON compilation database 'compile_commands.json' will be # created at the root of the repository. # +# If your platform supports a built-in fsmonitor backend, set +# FSMONITOR_DAEMON_BACKEND to the "<name>" of the corresponding +# `compat/fsmonitor/fsm-listen-<name>.c` that implements the +# `fsm_listen__*()` routines. +# # Define DEVELOPER to enable more compiler warnings. Compiler version # and family are auto detected, but could be overridden by defining # COMPILER_FEATURES (see config.mak.dev). You can still set @@ -1968,6 +1973,11 @@ ifdef NEED_ACCESS_ROOT_HANDLER COMPAT_OBJS += compat/access.o endif +ifdef FSMONITOR_DAEMON_BACKEND + COMPAT_CFLAGS += -DHAVE_FSMONITOR_DAEMON_BACKEND + COMPAT_OBJS += compat/fsmonitor/fsm-listen-$(FSMONITOR_DAEMON_BACKEND).o +endif + ifeq ($(TCLTK_PATH),) NO_TCLTK = NoThanks endif @@ -2887,6 +2897,9 @@ GIT-BUILD-OPTIONS: FORCE @echo DC_SHA1=\''$(subst ','\'',$(subst ','\'',$(DC_SHA1)))'\' >>$@+ @echo SANITIZE_LEAK=\''$(subst ','\'',$(subst ','\'',$(SANITIZE_LEAK)))'\' >>$@+ @echo X=\'$(X)\' >>$@+ +ifdef FSMONITOR_DAEMON_BACKEND + @echo FSMONITOR_DAEMON_BACKEND=\''$(subst ','\'',$(subst ','\'',$(FSMONITOR_DAEMON_BACKEND)))'\' >>$@+ +endif ifdef TEST_OUTPUT_DIRECTORY @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+ endif diff --git a/compat/fsmonitor/fsm-listen-win32.c b/compat/fsmonitor/fsm-listen-win32.c new file mode 100644 index 0000000000..916cbea254 --- /dev/null +++ b/compat/fsmonitor/fsm-listen-win32.c @@ -0,0 +1,21 @@ +#include "cache.h" +#include "config.h" +#include "fsmonitor.h" +#include "fsm-listen.h" + +void fsm_listen__stop_async(struct fsmonitor_daemon_state *state) +{ +} + +void fsm_listen__loop(struct fsmonitor_daemon_state *state) +{ +} + +int fsm_listen__ctor(struct fsmonitor_daemon_state *state) +{ + return -1; +} + +void fsm_listen__dtor(struct fsmonitor_daemon_state *state) +{ +} diff --git a/compat/fsmonitor/fsm-listen.h b/compat/fsmonitor/fsm-listen.h new file mode 100644 index 0000000000..f0539349ba --- /dev/null +++ b/compat/fsmonitor/fsm-listen.h @@ -0,0 +1,49 @@ +#ifndef FSM_LISTEN_H +#define FSM_LISTEN_H + +/* This needs to be implemented by each backend */ + +#ifdef HAVE_FSMONITOR_DAEMON_BACKEND + +struct fsmonitor_daemon_state; + +/* + * Initialize platform-specific data for the fsmonitor listener thread. + * This will be called from the main thread PRIOR to staring the + * fsmonitor_fs_listener thread. + * + * Returns 0 if successful. + * Returns -1 otherwise. + */ +int fsm_listen__ctor(struct fsmonitor_daemon_state *state); + +/* + * Cleanup platform-specific data for the fsmonitor listener thread. + * This will be called from the main thread AFTER joining the listener. + */ +void fsm_listen__dtor(struct fsmonitor_daemon_state *state); + +/* + * The main body of the platform-specific event loop to watch for + * filesystem events. This will run in the fsmonitor_fs_listen thread. + * + * It should call `ipc_server_stop_async()` if the listener thread + * prematurely terminates (because of a filesystem error or if it + * detects that the .git directory has been deleted). (It should NOT + * do so if the listener thread receives a normal shutdown signal from + * the IPC layer.) + * + * It should set `state->error_code` to -1 if the daemon should exit + * with an error. + */ +void fsm_listen__loop(struct fsmonitor_daemon_state *state); + +/* + * Gently request that the fsmonitor listener thread shutdown. + * It does not wait for it to stop. The caller should do a JOIN + * to wait for it. + */ +void fsm_listen__stop_async(struct fsmonitor_daemon_state *state); + +#endif /* HAVE_FSMONITOR_DAEMON_BACKEND */ +#endif /* FSM_LISTEN_H */ diff --git a/config.mak.uname b/config.mak.uname index 4352ea39e9..26074f56be 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -435,6 +435,11 @@ ifeq ($(uname_S),Windows) # so we don't need this: # # SNPRINTF_RETURNS_BOGUS = YesPlease + + # The builtin FSMonitor requires Named Pipes and Threads on Windows. + # These are always available, so we do not have to conditionally + # support it. + FSMONITOR_DAEMON_BACKEND = win32 NO_SVN_TESTS = YesPlease RUNTIME_PREFIX = YesPlease HAVE_WPGMPTR = YesWeDo @@ -619,6 +624,11 @@ ifeq ($(uname_S),MINGW) NO_STRTOUMAX = YesPlease NO_MKDTEMP = YesPlease NO_SVN_TESTS = YesPlease + + # The builtin FSMonitor requires Named Pipes and Threads on Windows. + # These are always available, so we do not have to conditionally + # support it. + FSMONITOR_DAEMON_BACKEND = win32 RUNTIME_PREFIX = YesPlease HAVE_WPGMPTR = YesWeDo NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index e44232f85d..0963629db7 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -285,6 +285,13 @@ else() endif() endif() +if(SUPPORTS_SIMPLE_IPC) + if(CMAKE_SYSTEM_NAME STREQUAL "Windows") + add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-win32.c) + endif() +endif() + set(EXE_EXTENSION ${CMAKE_EXECUTABLE_SUFFIX}) #header checks diff --git a/repo-settings.c b/repo-settings.c index b4fbd16cdc..2dfcb2b654 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -2,6 +2,7 @@ #include "config.h" #include "repository.h" #include "midx.h" +#include "compat/fsmonitor/fsm-listen.h" static void repo_cfg_bool(struct repository *r, const char *key, int *dest, int def) From f67df2556f372f6095270bf870f32ff84def2e4b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:52 +0000 Subject: [PATCH 129/150] compat/fsmonitor/fsm-listen-darwin: stub in backend for Darwin Stub in empty implementation of fsmonitor--daemon backend for Darwin (aka MacOS). Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- compat/fsmonitor/fsm-listen-darwin.c | 20 ++++++++++++++++++++ config.mak.uname | 10 ++++++++++ contrib/buildsystems/CMakeLists.txt | 3 +++ 3 files changed, 33 insertions(+) create mode 100644 compat/fsmonitor/fsm-listen-darwin.c diff --git a/compat/fsmonitor/fsm-listen-darwin.c b/compat/fsmonitor/fsm-listen-darwin.c new file mode 100644 index 0000000000..c84e3344ab --- /dev/null +++ b/compat/fsmonitor/fsm-listen-darwin.c @@ -0,0 +1,20 @@ +#include "cache.h" +#include "fsmonitor.h" +#include "fsm-listen.h" + +int fsm_listen__ctor(struct fsmonitor_daemon_state *state) +{ + return -1; +} + +void fsm_listen__dtor(struct fsmonitor_daemon_state *state) +{ +} + +void fsm_listen__stop_async(struct fsmonitor_daemon_state *state) +{ +} + +void fsm_listen__loop(struct fsmonitor_daemon_state *state) +{ +} diff --git a/config.mak.uname b/config.mak.uname index 26074f56be..501970902d 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -157,6 +157,16 @@ ifeq ($(uname_S),Darwin) MSGFMT = /usr/local/opt/gettext/bin/msgfmt endif endif + + # The builtin FSMonitor on MacOS builds upon Simple-IPC. Both require + # Unix domain sockets and PThreads. + ifndef NO_PTHREADS + ifndef NO_UNIX_SOCKETS + FSMONITOR_DAEMON_BACKEND = darwin + endif + endif + + BASIC_LDFLAGS += -framework CoreServices endif ifeq ($(uname_S),SunOS) NEEDS_SOCKET = YesPlease diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 0963629db7..ee0d7257b7 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -289,6 +289,9 @@ if(SUPPORTS_SIMPLE_IPC) if(CMAKE_SYSTEM_NAME STREQUAL "Windows") add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND) list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-win32.c) + elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-darwin.c) endif() endif() From 9dcba0ba08d0e843eda1f141c99c72e0aa67811f Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:53 +0000 Subject: [PATCH 130/150] fsmonitor--daemon: implement 'run' command Implement `run` command to try to begin listening for file system events. This version defines the thread structure with a single fsmonitor_fs_listen thread to watch for file system events and a simple IPC thread pool to watch for connection from Git clients over a well-known named pipe or Unix domain socket. This commit does not actually do anything yet because the platform backends are still just stubs. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 228 +++++++++++++++++++++++++++++++++++- fsmonitor--daemon.h | 34 ++++++ 2 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 fsmonitor--daemon.h diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 5e3178b8bd..5591339399 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -3,16 +3,52 @@ #include "parse-options.h" #include "fsmonitor.h" #include "fsmonitor-ipc.h" +#include "compat/fsmonitor/fsm-listen.h" +#include "fsmonitor--daemon.h" #include "simple-ipc.h" #include "khash.h" static const char * const builtin_fsmonitor__daemon_usage[] = { + N_("git fsmonitor--daemon run [<options>]"), N_("git fsmonitor--daemon stop"), N_("git fsmonitor--daemon status"), NULL }; #ifdef HAVE_FSMONITOR_DAEMON_BACKEND +/* + * Global state loaded from config. + */ +#define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads" +static int fsmonitor__ipc_threads = 8; + +#define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup" +static int fsmonitor__announce_startup = 0; + +static int fsmonitor_config(const char *var, const char *value, void *cb) +{ + if (!strcmp(var, FSMONITOR__IPC_THREADS)) { + int i = git_config_int(var, value); + if (i < 1) + return error(_("value of '%s' out of range: %d"), + FSMONITOR__IPC_THREADS, i); + fsmonitor__ipc_threads = i; + return 0; + } + + if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) { + int is_bool; + int i = git_config_bool_or_int(var, value, &is_bool); + if (i < 0) + return error(_("value of '%s' not bool or int: %d"), + var, i); + fsmonitor__announce_startup = i; + return 0; + } + + return git_default_config(var, value, cb); +} + /* * Acting as a CLIENT. * @@ -57,15 +93,198 @@ static int do_as_client__status(void) } } +static ipc_server_application_cb handle_client; + +static int handle_client(void *data, + const char *command, size_t command_len, + ipc_server_reply_cb *reply, + struct ipc_server_reply_data *reply_data) +{ + /* struct fsmonitor_daemon_state *state = data; */ + int result; + + /* + * The Simple IPC API now supports {char*, len} arguments, but + * FSMonitor always uses proper null-terminated strings, so + * we can ignore the command_len argument. (Trust, but verify.) + */ + if (command_len != strlen(command)) + BUG("FSMonitor assumes text messages"); + + trace2_region_enter("fsmonitor", "handle_client", the_repository); + trace2_data_string("fsmonitor", the_repository, "request", command); + + result = 0; /* TODO Do something here. */ + + trace2_region_leave("fsmonitor", "handle_client", the_repository); + + return result; +} + +static void *fsm_listen__thread_proc(void *_state) +{ + struct fsmonitor_daemon_state *state = _state; + + trace2_thread_start("fsm-listen"); + + trace_printf_key(&trace_fsmonitor, "Watching: worktree '%s'", + state->path_worktree_watch.buf); + if (state->nr_paths_watching > 1) + trace_printf_key(&trace_fsmonitor, "Watching: gitdir '%s'", + state->path_gitdir_watch.buf); + + fsm_listen__loop(state); + + trace2_thread_exit(); + return NULL; +} + +static int fsmonitor_run_daemon_1(struct fsmonitor_daemon_state *state) +{ + struct ipc_server_opts ipc_opts = { + .nr_threads = fsmonitor__ipc_threads, + + /* + * We know that there are no other active threads yet, + * so we can let the IPC layer temporarily chdir() if + * it needs to when creating the server side of the + * Unix domain socket. + */ + .uds_disallow_chdir = 0 + }; + + /* + * Start the IPC thread pool before the we've started the file + * system event listener thread so that we have the IPC handle + * before we need it. + */ + if (ipc_server_run_async(&state->ipc_server_data, + fsmonitor_ipc__get_path(), &ipc_opts, + handle_client, state)) + return error_errno( + _("could not start IPC thread pool on '%s'"), + fsmonitor_ipc__get_path()); + + /* + * Start the fsmonitor listener thread to collect filesystem + * events. + */ + if (pthread_create(&state->listener_thread, NULL, + fsm_listen__thread_proc, state) < 0) { + ipc_server_stop_async(state->ipc_server_data); + ipc_server_await(state->ipc_server_data); + + return error(_("could not start fsmonitor listener thread")); + } + + /* + * The daemon is now fully functional in background threads. + * Wait for the IPC thread pool to shutdown (whether by client + * request or from filesystem activity). + */ + ipc_server_await(state->ipc_server_data); + + /* + * The fsmonitor listener thread may have received a shutdown + * event from the IPC thread pool, but it doesn't hurt to tell + * it again. And wait for it to shutdown. + */ + fsm_listen__stop_async(state); + pthread_join(state->listener_thread, NULL); + + return state->error_code; +} + +static int fsmonitor_run_daemon(void) +{ + struct fsmonitor_daemon_state state; + int err; + + memset(&state, 0, sizeof(state)); + + pthread_mutex_init(&state.main_lock, NULL); + state.error_code = 0; + state.current_token_data = NULL; + + /* Prepare to (recursively) watch the <worktree-root> directory. */ + strbuf_init(&state.path_worktree_watch, 0); + strbuf_addstr(&state.path_worktree_watch, absolute_path(get_git_work_tree())); + state.nr_paths_watching = 1; + + /* + * We create and delete cookie files somewhere inside the .git + * directory to help us keep sync with the file system. If + * ".git" is not a directory, then <gitdir> is not inside the + * cone of <worktree-root>, so set up a second watch to watch + * the <gitdir> so that we get events for the cookie files. + */ + strbuf_init(&state.path_gitdir_watch, 0); + strbuf_addbuf(&state.path_gitdir_watch, &state.path_worktree_watch); + strbuf_addstr(&state.path_gitdir_watch, "/.git"); + if (!is_directory(state.path_gitdir_watch.buf)) { + strbuf_reset(&state.path_gitdir_watch); + strbuf_addstr(&state.path_gitdir_watch, absolute_path(get_git_dir())); + state.nr_paths_watching = 2; + } + + /* + * Confirm that we can create platform-specific resources for the + * filesystem listener before we bother starting all the threads. + */ + if (fsm_listen__ctor(&state)) { + err = error(_("could not initialize listener thread")); + goto done; + } + + err = fsmonitor_run_daemon_1(&state); + +done: + pthread_mutex_destroy(&state.main_lock); + fsm_listen__dtor(&state); + + ipc_server_free(state.ipc_server_data); + + strbuf_release(&state.path_worktree_watch); + strbuf_release(&state.path_gitdir_watch); + + return err; +} + +static int try_to_run_foreground_daemon(void) +{ + /* + * Technically, we don't need to probe for an existing daemon + * process, since we could just call `fsmonitor_run_daemon()` + * and let it fail if the pipe/socket is busy. + * + * However, this method gives us a nicer error message for a + * common error case. + */ + if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING) + die(_("fsmonitor--daemon is already running '%s'"), + the_repository->worktree); + + if (fsmonitor__announce_startup) { + fprintf(stderr, _("running fsmonitor-daemon in '%s'\n"), + the_repository->worktree); + fflush(stderr); + } + + return !!fsmonitor_run_daemon(); +} + int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix) { const char *subcmd; struct option options[] = { + OPT_INTEGER(0, "ipc-threads", + &fsmonitor__ipc_threads, + N_("use <n> ipc worker threads")), OPT_END() }; - git_config(git_default_config, NULL); + git_config(fsmonitor_config, NULL); argc = parse_options(argc, argv, prefix, options, builtin_fsmonitor__daemon_usage, 0); @@ -73,6 +292,13 @@ int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix) usage_with_options(builtin_fsmonitor__daemon_usage, options); subcmd = argv[0]; + if (fsmonitor__ipc_threads < 1) + die(_("invalid 'ipc-threads' value (%d)"), + fsmonitor__ipc_threads); + + if (!strcmp(subcmd, "run")) + return !!try_to_run_foreground_daemon(); + if (!strcmp(subcmd, "stop")) return !!do_as_client__send_stop(); diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h new file mode 100644 index 0000000000..3009c1a83d --- /dev/null +++ b/fsmonitor--daemon.h @@ -0,0 +1,34 @@ +#ifndef FSMONITOR_DAEMON_H +#define FSMONITOR_DAEMON_H + +#ifdef HAVE_FSMONITOR_DAEMON_BACKEND + +#include "cache.h" +#include "dir.h" +#include "run-command.h" +#include "simple-ipc.h" +#include "thread-utils.h" + +struct fsmonitor_batch; +struct fsmonitor_token_data; + +struct fsmonitor_daemon_backend_data; /* opaque platform-specific data */ + +struct fsmonitor_daemon_state { + pthread_t listener_thread; + pthread_mutex_t main_lock; + + struct strbuf path_worktree_watch; + struct strbuf path_gitdir_watch; + int nr_paths_watching; + + struct fsmonitor_token_data *current_token_data; + + int error_code; + struct fsmonitor_daemon_backend_data *backend_data; + + struct ipc_server_data *ipc_server_data; +}; + +#endif /* HAVE_FSMONITOR_DAEMON_BACKEND */ +#endif /* FSMONITOR_DAEMON_H */ From c284e27ba77ee385d322bb90aeb2284bf52c014b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:54 +0000 Subject: [PATCH 131/150] fsmonitor--daemon: implement 'start' command Implement 'git fsmonitor--daemon start' command. This command starts an instance of 'git fsmonitor--daemon run' in the background using the new 'start_bg_command()' function. We avoid the fork-and-call technique on Unix systems in favor of a fork-and-exec technique. This gives us more uniform Trace2 child-* events. It also makes our usage more consistent with Windows usage. On Windows, teach 'git fsmonitor--daemon run' to optionally call 'FreeConsole()' to release handles to the inherited Win32 console (despite being passed invalid handles for stdin/out/err). Without this, command prompts and powershell terminal windows could hang in "exit" until the last background child process exited or released their Win32 console handle. (This was not seen with git-bash shells because they don't have a Win32 console attached to them.) Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 109 +++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 2 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 5591339399..69dd39121a 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -9,6 +9,7 @@ #include "khash.h" static const char * const builtin_fsmonitor__daemon_usage[] = { + N_("git fsmonitor--daemon start [<options>]"), N_("git fsmonitor--daemon run [<options>]"), N_("git fsmonitor--daemon stop"), N_("git fsmonitor--daemon status"), @@ -22,6 +23,9 @@ static const char * const builtin_fsmonitor__daemon_usage[] = { #define FSMONITOR__IPC_THREADS "fsmonitor.ipcthreads" static int fsmonitor__ipc_threads = 8; +#define FSMONITOR__START_TIMEOUT "fsmonitor.starttimeout" +static int fsmonitor__start_timeout_sec = 60; + #define FSMONITOR__ANNOUNCE_STARTUP "fsmonitor.announcestartup" static int fsmonitor__announce_startup = 0; @@ -36,6 +40,15 @@ static int fsmonitor_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, FSMONITOR__START_TIMEOUT)) { + int i = git_config_int(var, value); + if (i < 0) + return error(_("value of '%s' out of range: %d"), + FSMONITOR__START_TIMEOUT, i); + fsmonitor__start_timeout_sec = i; + return 0; + } + if (!strcmp(var, FSMONITOR__ANNOUNCE_STARTUP)) { int is_bool; int i = git_config_bool_or_int(var, value, &is_bool); @@ -250,7 +263,7 @@ done: return err; } -static int try_to_run_foreground_daemon(void) +static int try_to_run_foreground_daemon(int detach_console) { /* * Technically, we don't need to probe for an existing daemon @@ -270,17 +283,106 @@ static int try_to_run_foreground_daemon(void) fflush(stderr); } +#ifdef GIT_WINDOWS_NATIVE + if (detach_console) + FreeConsole(); +#endif + return !!fsmonitor_run_daemon(); } +static start_bg_wait_cb bg_wait_cb; + +static int bg_wait_cb(const struct child_process *cp, void *cb_data) +{ + enum ipc_active_state s = fsmonitor_ipc__get_state(); + + switch (s) { + case IPC_STATE__LISTENING: + /* child is "ready" */ + return 0; + + case IPC_STATE__NOT_LISTENING: + case IPC_STATE__PATH_NOT_FOUND: + /* give child more time */ + return 1; + + default: + case IPC_STATE__INVALID_PATH: + case IPC_STATE__OTHER_ERROR: + /* all the time in world won't help */ + return -1; + } +} + +static int try_to_start_background_daemon(void) +{ + struct child_process cp = CHILD_PROCESS_INIT; + enum start_bg_result sbgr; + + /* + * Before we try to create a background daemon process, see + * if a daemon process is already listening. This makes it + * easier for us to report an already-listening error to the + * console, since our spawn/daemon can only report the success + * of creating the background process (and not whether it + * immediately exited). + */ + if (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING) + die(_("fsmonitor--daemon is already running '%s'"), + the_repository->worktree); + + if (fsmonitor__announce_startup) { + fprintf(stderr, _("starting fsmonitor-daemon in '%s'\n"), + the_repository->worktree); + fflush(stderr); + } + + cp.git_cmd = 1; + + strvec_push(&cp.args, "fsmonitor--daemon"); + strvec_push(&cp.args, "run"); + strvec_push(&cp.args, "--detach"); + strvec_pushf(&cp.args, "--ipc-threads=%d", fsmonitor__ipc_threads); + + cp.no_stdin = 1; + cp.no_stdout = 1; + cp.no_stderr = 1; + + sbgr = start_bg_command(&cp, bg_wait_cb, NULL, + fsmonitor__start_timeout_sec); + + switch (sbgr) { + case SBGR_READY: + return 0; + + default: + case SBGR_ERROR: + case SBGR_CB_ERROR: + return error(_("daemon failed to start")); + + case SBGR_TIMEOUT: + return error(_("daemon not online yet")); + + case SBGR_DIED: + return error(_("daemon terminated")); + } +} + int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix) { const char *subcmd; + int detach_console = 0; struct option options[] = { + OPT_BOOL(0, "detach", &detach_console, N_("detach from console")), OPT_INTEGER(0, "ipc-threads", &fsmonitor__ipc_threads, N_("use <n> ipc worker threads")), + OPT_INTEGER(0, "start-timeout", + &fsmonitor__start_timeout_sec, + N_("max seconds to wait for background daemon startup")), + OPT_END() }; @@ -296,8 +398,11 @@ int cmd_fsmonitor__daemon(int argc, const char **argv, const char *prefix) die(_("invalid 'ipc-threads' value (%d)"), fsmonitor__ipc_threads); + if (!strcmp(subcmd, "start")) + return !!try_to_start_background_daemon(); + if (!strcmp(subcmd, "run")) - return !!try_to_run_foreground_daemon(); + return !!try_to_run_foreground_daemon(detach_console); if (!strcmp(subcmd, "stop")) return !!do_as_client__send_stop(); From 0ae7a1d9ab086330c4f7d86f8b421cd974416848 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:55 +0000 Subject: [PATCH 132/150] fsmonitor--daemon: add pathname classification Teach fsmonitor--daemon to classify relative and absolute pathnames and decide how they should be handled. This will be used by the platform-specific backend to respond to each filesystem event. When we register for filesystem notifications on a directory, we get events for everything (recursively) in the directory. We want to report to clients changes to tracked and untracked paths within the working directory proper. We do not want to report changes within the .git directory, for example. This classification will be used in a later commit by the different backends to classify paths as events are received. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 81 ++++++++++++++++++++++++++++++++++ fsmonitor--daemon.h | 87 +++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 69dd39121a..1ce00b7c15 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -134,6 +134,87 @@ static int handle_client(void *data, return result; } +#define FSMONITOR_COOKIE_PREFIX ".fsmonitor-daemon-" + +enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative( + const char *rel) +{ + if (fspathncmp(rel, ".git", 4)) + return IS_WORKDIR_PATH; + rel += 4; + + if (!*rel) + return IS_DOT_GIT; + if (*rel != '/') + return IS_WORKDIR_PATH; /* e.g. .gitignore */ + rel++; + + if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX, + strlen(FSMONITOR_COOKIE_PREFIX))) + return IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX; + + return IS_INSIDE_DOT_GIT; +} + +enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative( + const char *rel) +{ + if (!fspathncmp(rel, FSMONITOR_COOKIE_PREFIX, + strlen(FSMONITOR_COOKIE_PREFIX))) + return IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX; + + return IS_INSIDE_GITDIR; +} + +static enum fsmonitor_path_type try_classify_workdir_abs_path( + struct fsmonitor_daemon_state *state, + const char *path) +{ + const char *rel; + + if (fspathncmp(path, state->path_worktree_watch.buf, + state->path_worktree_watch.len)) + return IS_OUTSIDE_CONE; + + rel = path + state->path_worktree_watch.len; + + if (!*rel) + return IS_WORKDIR_PATH; /* it is the root dir exactly */ + if (*rel != '/') + return IS_OUTSIDE_CONE; + rel++; + + return fsmonitor_classify_path_workdir_relative(rel); +} + +enum fsmonitor_path_type fsmonitor_classify_path_absolute( + struct fsmonitor_daemon_state *state, + const char *path) +{ + const char *rel; + enum fsmonitor_path_type t; + + t = try_classify_workdir_abs_path(state, path); + if (state->nr_paths_watching == 1) + return t; + if (t != IS_OUTSIDE_CONE) + return t; + + if (fspathncmp(path, state->path_gitdir_watch.buf, + state->path_gitdir_watch.len)) + return IS_OUTSIDE_CONE; + + rel = path + state->path_gitdir_watch.len; + + if (!*rel) + return IS_GITDIR; /* it is the <gitdir> exactly */ + if (*rel != '/') + return IS_OUTSIDE_CONE; + rel++; + + return fsmonitor_classify_path_gitdir_relative(rel); +} + static void *fsm_listen__thread_proc(void *_state) { struct fsmonitor_daemon_state *state = _state; diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h index 3009c1a83d..8c3a71a48b 100644 --- a/fsmonitor--daemon.h +++ b/fsmonitor--daemon.h @@ -30,5 +30,92 @@ struct fsmonitor_daemon_state { struct ipc_server_data *ipc_server_data; }; +/* + * Pathname classifications. + * + * The daemon classifies the pathnames that it receives from file + * system notification events into the following categories and uses + * that to decide whether clients are told about them. (And to watch + * for file system synchronization events.) + * + * The daemon only collects and reports on the set of modified paths + * within the working directory (proper). + * + * The client should only care about paths within the working + * directory proper (inside the working directory and not ".git" nor + * inside of ".git/"). That is, the client has read the index and is + * asking for a list of any paths in the working directory that have + * been modified since the last token. The client does not care about + * file system changes within the ".git/" directory (such as new loose + * objects or packfiles). So the client will only receive paths that + * are classified as IS_WORKDIR_PATH. + * + * Note that ".git" is usually a directory and is therefore inside + * the cone of the FS watch that we have on the working directory root, + * so we will also get FS events for disk activity on and within ".git/" + * that we need to respond to or filter from the client. + * + * But Git also allows ".git" to be a *file* that points to a GITDIR + * outside of the working directory. When this happens, we need to + * create FS watches on both the working directory root *and* on the + * (external) GITDIR root. (The latter is required because we put + * cookie files inside it and use them to sync with the FS event + * stream.) + * + * Note that in the context of this discussion, I'm using "GITDIR" + * to only mean an external GITDIR referenced by a ".git" file. + * + * The platform FS event backends will receive watch-specific + * relative paths (except for those OS's that always emit absolute + * paths). We use the following enum and routines to classify each + * path so that we know how to handle it. There is a slight asymmetry + * here because ".git/" is inside the working directory and the + * (external) GITDIR is not, and therefore how we handle events may + * vary slightly, so I have different enums for "IS...DOT_GIT..." and + * "IS...GITDIR...". + * + * The daemon uses the IS_DOT_GIT and IS_GITDIR internally to mean the + * exact ".git" file/directory or GITDIR directory. If the daemon + * receives a delete event for either of these paths, it will + * automatically shutdown, for example. + * + * Note that the daemon DOES NOT explicitly watch nor special case the + * index. The daemon does not read the index nor have any internal + * index-relative state, so there are no "IS...INDEX..." enum values. + */ +enum fsmonitor_path_type { + IS_WORKDIR_PATH = 0, + + IS_DOT_GIT, + IS_INSIDE_DOT_GIT, + IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX, + + IS_GITDIR, + IS_INSIDE_GITDIR, + IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX, + + IS_OUTSIDE_CONE, +}; + +/* + * Classify a pathname relative to the root of the working directory. + */ +enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative( + const char *relative_path); + +/* + * Classify a pathname relative to a <gitdir> that is external to the + * worktree directory. + */ +enum fsmonitor_path_type fsmonitor_classify_path_gitdir_relative( + const char *relative_path); + +/* + * Classify an absolute pathname received from a filesystem event. + */ +enum fsmonitor_path_type fsmonitor_classify_path_absolute( + struct fsmonitor_daemon_state *state, + const char *path); + #endif /* HAVE_FSMONITOR_DAEMON_BACKEND */ #endif /* FSMONITOR_DAEMON_H */ From aeef767a4155f55233338928f705a3e9986fe2bf Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:56 +0000 Subject: [PATCH 133/150] fsmonitor--daemon: define token-ids Teach fsmonitor--daemon to create token-ids and define the overall token naming scheme. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 116 +++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 1ce00b7c15..1c7c156288 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -106,6 +106,120 @@ static int do_as_client__status(void) } } +/* + * Requests to and from a FSMonitor Protocol V2 provider use an opaque + * "token" as a virtual timestamp. Clients can request a summary of all + * created/deleted/modified files relative to a token. In the response, + * clients receive a new token for the next (relative) request. + * + * + * Token Format + * ============ + * + * The contents of the token are private and provider-specific. + * + * For the built-in fsmonitor--daemon, we define a token as follows: + * + * "builtin" ":" <token_id> ":" <sequence_nr> + * + * The "builtin" prefix is used as a namespace to avoid conflicts + * with other providers (such as Watchman). + * + * The <token_id> is an arbitrary OPAQUE string, such as a GUID, + * UUID, or {timestamp,pid}. It is used to group all filesystem + * events that happened while the daemon was monitoring (and in-sync + * with the filesystem). + * + * Unlike FSMonitor Protocol V1, it is not defined as a timestamp + * and does not define less-than/greater-than relationships. + * (There are too many race conditions to rely on file system + * event timestamps.) + * + * The <sequence_nr> is a simple integer incremented whenever the + * daemon needs to make its state public. For example, if 1000 file + * system events come in, but no clients have requested the data, + * the daemon can continue to accumulate file changes in the same + * bin and does not need to advance the sequence number. However, + * as soon as a client does arrive, the daemon needs to start a new + * bin and increment the sequence number. + * + * The sequence number serves as the boundary between 2 sets + * of bins -- the older ones that the client has already seen + * and the newer ones that it hasn't. + * + * When a new <token_id> is created, the <sequence_nr> is reset to + * zero. + * + * + * About Token Ids + * =============== + * + * A new token_id is created: + * + * [1] each time the daemon is started. + * + * [2] any time that the daemon must re-sync with the filesystem + * (such as when the kernel drops or we miss events on a very + * active volume). + * + * [3] in response to a client "flush" command (for dropped event + * testing). + * + * When a new token_id is created, the daemon is free to discard all + * cached filesystem events associated with any previous token_ids. + * Events associated with a non-current token_id will never be sent + * to a client. A token_id change implicitly means that the daemon + * has gap in its event history. + * + * Therefore, clients that present a token with a stale (non-current) + * token_id will always be given a trivial response. + */ +struct fsmonitor_token_data { + struct strbuf token_id; + struct fsmonitor_batch *batch_head; + struct fsmonitor_batch *batch_tail; + uint64_t client_ref_count; +}; + +static struct fsmonitor_token_data *fsmonitor_new_token_data(void) +{ + static int test_env_value = -1; + static uint64_t flush_count = 0; + struct fsmonitor_token_data *token; + + CALLOC_ARRAY(token, 1); + + strbuf_init(&token->token_id, 0); + token->batch_head = NULL; + token->batch_tail = NULL; + token->client_ref_count = 0; + + if (test_env_value < 0) + test_env_value = git_env_bool("GIT_TEST_FSMONITOR_TOKEN", 0); + + if (!test_env_value) { + struct timeval tv; + struct tm tm; + time_t secs; + + gettimeofday(&tv, NULL); + secs = tv.tv_sec; + gmtime_r(&secs, &tm); + + strbuf_addf(&token->token_id, + "%"PRIu64".%d.%4d%02d%02dT%02d%02d%02d.%06ldZ", + flush_count++, + getpid(), + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, + (long)tv.tv_usec); + } else { + strbuf_addf(&token->token_id, "test_%08x", test_env_value++); + } + + return token; +} + static ipc_server_application_cb handle_client; static int handle_client(void *data, @@ -298,7 +412,7 @@ static int fsmonitor_run_daemon(void) pthread_mutex_init(&state.main_lock, NULL); state.error_code = 0; - state.current_token_data = NULL; + state.current_token_data = fsmonitor_new_token_data(); /* Prepare to (recursively) watch the <worktree-root> directory. */ strbuf_init(&state.path_worktree_watch, 0); From bec486b9c13c51b7eca0231ed4b7dac75deaf6a6 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:57 +0000 Subject: [PATCH 134/150] fsmonitor--daemon: create token-based changed path cache Teach fsmonitor--daemon to build a list of changed paths and associate them with a token-id. This will be used by the platform-specific backends to accumulate changed paths in response to filesystem events. The platform-specific file system listener thread receives file system events containing one or more changed pathnames (with whatever bucketing or grouping that is convenient for the file system). These paths are accumulated (without locking) by the file system layer into a `fsmonitor_batch`. When the file system layer has drained the kernel event queue, it will "publish" them to our token queue and make them visible to concurrent client worker threads. The token layer is free to combine and/or de-dup paths within these batches for efficient presentation to clients. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 230 +++++++++++++++++++++++++++++++++++- fsmonitor--daemon.h | 40 +++++++ 2 files changed, 268 insertions(+), 2 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 1c7c156288..69312119b0 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -181,17 +181,27 @@ struct fsmonitor_token_data { uint64_t client_ref_count; }; +struct fsmonitor_batch { + struct fsmonitor_batch *next; + uint64_t batch_seq_nr; + const char **interned_paths; + size_t nr, alloc; + time_t pinned_time; +}; + static struct fsmonitor_token_data *fsmonitor_new_token_data(void) { static int test_env_value = -1; static uint64_t flush_count = 0; struct fsmonitor_token_data *token; + struct fsmonitor_batch *batch; CALLOC_ARRAY(token, 1); + batch = fsmonitor_batch__new(); strbuf_init(&token->token_id, 0); - token->batch_head = NULL; - token->batch_tail = NULL; + token->batch_head = batch; + token->batch_tail = batch; token->client_ref_count = 0; if (test_env_value < 0) @@ -217,9 +227,143 @@ static struct fsmonitor_token_data *fsmonitor_new_token_data(void) strbuf_addf(&token->token_id, "test_%08x", test_env_value++); } + /* + * We created a new <token_id> and are starting a new series + * of tokens with a zero <seq_nr>. + * + * Since clients cannot guess our new (non test) <token_id> + * they will always receive a trivial response (because of the + * mismatch on the <token_id>). The trivial response will + * tell them our new <token_id> so that subsequent requests + * will be relative to our new series. (And when sending that + * response, we pin the current head of the batch list.) + * + * Even if the client correctly guesses the <token_id>, their + * request of "builtin:<token_id>:0" asks for all changes MORE + * RECENT than batch/bin 0. + * + * This implies that it is a waste to accumulate paths in the + * initial batch/bin (because they will never be transmitted). + * + * So the daemon could be running for days and watching the + * file system, but doesn't need to actually accumulate any + * paths UNTIL we need to set a reference point for a later + * relative request. + * + * However, it is very useful for testing to always have a + * reference point set. Pin batch 0 to force early file system + * events to accumulate. + */ + if (test_env_value) + batch->pinned_time = time(NULL); + return token; } +struct fsmonitor_batch *fsmonitor_batch__new(void) +{ + struct fsmonitor_batch *batch; + + CALLOC_ARRAY(batch, 1); + + return batch; +} + +void fsmonitor_batch__free_list(struct fsmonitor_batch *batch) +{ + while (batch) { + struct fsmonitor_batch *next = batch->next; + + /* + * The actual strings within the array of this batch + * are interned, so we don't own them. We only own + * the array. + */ + free(batch->interned_paths); + free(batch); + + batch = next; + } +} + +void fsmonitor_batch__add_path(struct fsmonitor_batch *batch, + const char *path) +{ + const char *interned_path = strintern(path); + + trace_printf_key(&trace_fsmonitor, "event: %s", interned_path); + + ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc); + batch->interned_paths[batch->nr++] = interned_path; +} + +static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest, + const struct fsmonitor_batch *batch_src) +{ + size_t k; + + ALLOC_GROW(batch_dest->interned_paths, + batch_dest->nr + batch_src->nr + 1, + batch_dest->alloc); + + for (k = 0; k < batch_src->nr; k++) + batch_dest->interned_paths[batch_dest->nr++] = + batch_src->interned_paths[k]; +} + +static void fsmonitor_free_token_data(struct fsmonitor_token_data *token) +{ + if (!token) + return; + + assert(token->client_ref_count == 0); + + strbuf_release(&token->token_id); + + fsmonitor_batch__free_list(token->batch_head); + + free(token); +} + +/* + * Flush all of our cached data about the filesystem. Call this if we + * lose sync with the filesystem and miss some notification events. + * + * [1] If we are missing events, then we no longer have a complete + * history of the directory (relative to our current start token). + * We should create a new token and start fresh (as if we just + * booted up). + * + * If there are no concurrent threads reading the current token data + * series, we can free it now. Otherwise, let the last reader free + * it. + * + * Either way, the old token data series is no longer associated with + * our state data. + */ +static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state) +{ + /* assert current thread holding state->main_lock */ + + struct fsmonitor_token_data *free_me = NULL; + struct fsmonitor_token_data *new_one = NULL; + + new_one = fsmonitor_new_token_data(); + + if (state->current_token_data->client_ref_count == 0) + free_me = state->current_token_data; + state->current_token_data = new_one; + + fsmonitor_free_token_data(free_me); +} + +void fsmonitor_force_resync(struct fsmonitor_daemon_state *state) +{ + pthread_mutex_lock(&state->main_lock); + with_lock__do_force_resync(state); + pthread_mutex_unlock(&state->main_lock); +} + static ipc_server_application_cb handle_client; static int handle_client(void *data, @@ -329,6 +473,81 @@ enum fsmonitor_path_type fsmonitor_classify_path_absolute( return fsmonitor_classify_path_gitdir_relative(rel); } +/* + * We try to combine small batches at the front of the batch-list to avoid + * having a long list. This hopefully makes it a little easier when we want + * to truncate and maintain the list. However, we don't want the paths array + * to just keep growing and growing with realloc, so we insert an arbitrary + * limit. + */ +#define MY_COMBINE_LIMIT (1024) + +void fsmonitor_publish(struct fsmonitor_daemon_state *state, + struct fsmonitor_batch *batch, + const struct string_list *cookie_names) +{ + if (!batch && !cookie_names->nr) + return; + + pthread_mutex_lock(&state->main_lock); + + if (batch) { + struct fsmonitor_batch *head; + + head = state->current_token_data->batch_head; + if (!head) { + BUG("token does not have batch"); + } else if (head->pinned_time) { + /* + * We cannot alter the current batch list + * because: + * + * [a] it is being transmitted to at least one + * client and the handle_client() thread has a + * ref-count, but not a lock on the batch list + * starting with this item. + * + * [b] it has been transmitted in the past to + * at least one client such that future + * requests are relative to this head batch. + * + * So, we can only prepend a new batch onto + * the front of the list. + */ + batch->batch_seq_nr = head->batch_seq_nr + 1; + batch->next = head; + state->current_token_data->batch_head = batch; + } else if (!head->batch_seq_nr) { + /* + * Batch 0 is unpinned. See the note in + * `fsmonitor_new_token_data()` about why we + * don't need to accumulate these paths. + */ + fsmonitor_batch__free_list(batch); + } else if (head->nr + batch->nr > MY_COMBINE_LIMIT) { + /* + * The head batch in the list has never been + * transmitted to a client, but folding the + * contents of the new batch onto it would + * exceed our arbitrary limit, so just prepend + * the new batch onto the list. + */ + batch->batch_seq_nr = head->batch_seq_nr + 1; + batch->next = head; + state->current_token_data->batch_head = batch; + } else { + /* + * We are free to add the paths in the given + * batch onto the end of the current head batch. + */ + fsmonitor_batch__combine(head, batch); + fsmonitor_batch__free_list(batch); + } + } + + pthread_mutex_unlock(&state->main_lock); +} + static void *fsm_listen__thread_proc(void *_state) { struct fsmonitor_daemon_state *state = _state; @@ -343,6 +562,13 @@ static void *fsm_listen__thread_proc(void *_state) fsm_listen__loop(state); + pthread_mutex_lock(&state->main_lock); + if (state->current_token_data && + state->current_token_data->client_ref_count == 0) + fsmonitor_free_token_data(state->current_token_data); + state->current_token_data = NULL; + pthread_mutex_unlock(&state->main_lock); + trace2_thread_exit(); return NULL; } diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h index 8c3a71a48b..010fbfe60e 100644 --- a/fsmonitor--daemon.h +++ b/fsmonitor--daemon.h @@ -12,6 +12,27 @@ struct fsmonitor_batch; struct fsmonitor_token_data; +/* + * Create a new batch of path(s). The returned batch is considered + * private and not linked into the fsmonitor daemon state. The caller + * should fill this batch with one or more paths and then publish it. + */ +struct fsmonitor_batch *fsmonitor_batch__new(void); + +/* + * Free the list of batches starting with this one. + */ +void fsmonitor_batch__free_list(struct fsmonitor_batch *batch); + +/* + * Add this path to this batch of modified files. + * + * The batch should be private and NOT (yet) linked into the fsmonitor + * daemon state and therefore not yet visible to worker threads and so + * no locking is required. + */ +void fsmonitor_batch__add_path(struct fsmonitor_batch *batch, const char *path); + struct fsmonitor_daemon_backend_data; /* opaque platform-specific data */ struct fsmonitor_daemon_state { @@ -117,5 +138,24 @@ enum fsmonitor_path_type fsmonitor_classify_path_absolute( struct fsmonitor_daemon_state *state, const char *path); +/* + * Prepend the this batch of path(s) onto the list of batches associated + * with the current token. This makes the batch visible to worker threads. + * + * The caller no longer owns the batch and must not free it. + * + * Wake up the client threads waiting on these cookies. + */ +void fsmonitor_publish(struct fsmonitor_daemon_state *state, + struct fsmonitor_batch *batch, + const struct string_list *cookie_names); + +/* + * If the platform-specific layer loses sync with the filesystem, + * it should call this to invalidate cached data and abort waiting + * threads. + */ +void fsmonitor_force_resync(struct fsmonitor_daemon_state *state); + #endif /* HAVE_FSMONITOR_DAEMON_BACKEND */ #endif /* FSMONITOR_DAEMON_H */ From 1448edfb5115c947e377d650e02f8a3ab7fa7b93 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:58 +0000 Subject: [PATCH 135/150] compat/fsmonitor/fsm-listen-win32: implement FSMonitor backend on Windows Teach the win32 backend to register a watch on the working tree root directory (recursively). Also watch the <gitdir> if it is not inside the working tree. And to collect path change notifications into batches and publish. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- compat/fsmonitor/fsm-listen-win32.c | 565 ++++++++++++++++++++++++++++ 1 file changed, 565 insertions(+) diff --git a/compat/fsmonitor/fsm-listen-win32.c b/compat/fsmonitor/fsm-listen-win32.c index 916cbea254..5b928ab66e 100644 --- a/compat/fsmonitor/fsm-listen-win32.c +++ b/compat/fsmonitor/fsm-listen-win32.c @@ -2,20 +2,585 @@ #include "config.h" #include "fsmonitor.h" #include "fsm-listen.h" +#include "fsmonitor--daemon.h" + +/* + * The documentation of ReadDirectoryChangesW() states that the maximum + * buffer size is 64K when the monitored directory is remote. + * + * Larger buffers may be used when the monitored directory is local and + * will help us receive events faster from the kernel and avoid dropped + * events. + * + * So we try to use a very large buffer and silently fallback to 64K if + * we get an error. + */ +#define MAX_RDCW_BUF_FALLBACK (65536) +#define MAX_RDCW_BUF (65536 * 8) + +struct one_watch +{ + char buffer[MAX_RDCW_BUF]; + DWORD buf_len; + DWORD count; + + struct strbuf path; + HANDLE hDir; + HANDLE hEvent; + OVERLAPPED overlapped; + + /* + * Is there an active ReadDirectoryChangesW() call pending. If so, we + * need to later call GetOverlappedResult() and possibly CancelIoEx(). + */ + BOOL is_active; +}; + +struct fsmonitor_daemon_backend_data +{ + struct one_watch *watch_worktree; + struct one_watch *watch_gitdir; + + HANDLE hEventShutdown; + + HANDLE hListener[3]; /* we don't own these handles */ +#define LISTENER_SHUTDOWN 0 +#define LISTENER_HAVE_DATA_WORKTREE 1 +#define LISTENER_HAVE_DATA_GITDIR 2 + int nr_listener_handles; +}; + +/* + * Convert the WCHAR path from the notification into UTF8 and + * then normalize it. + */ +static int normalize_path_in_utf8(FILE_NOTIFY_INFORMATION *info, + struct strbuf *normalized_path) +{ + int reserve; + int len = 0; + + strbuf_reset(normalized_path); + if (!info->FileNameLength) + goto normalize; + + /* + * Pre-reserve enough space in the UTF8 buffer for + * each Unicode WCHAR character to be mapped into a + * sequence of 2 UTF8 characters. That should let us + * avoid ERROR_INSUFFICIENT_BUFFER 99.9+% of the time. + */ + reserve = info->FileNameLength + 1; + strbuf_grow(normalized_path, reserve); + + for (;;) { + len = WideCharToMultiByte(CP_UTF8, 0, info->FileName, + info->FileNameLength / sizeof(WCHAR), + normalized_path->buf, + strbuf_avail(normalized_path) - 1, + NULL, NULL); + if (len > 0) + goto normalize; + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + error(_("[GLE %ld] could not convert path to UTF-8: '%.*ls'"), + GetLastError(), + (int)(info->FileNameLength / sizeof(WCHAR)), + info->FileName); + return -1; + } + + strbuf_grow(normalized_path, + strbuf_avail(normalized_path) + reserve); + } + +normalize: + strbuf_setlen(normalized_path, len); + return strbuf_normalize_path(normalized_path); +} void fsm_listen__stop_async(struct fsmonitor_daemon_state *state) { + SetEvent(state->backend_data->hListener[LISTENER_SHUTDOWN]); +} + +static struct one_watch *create_watch(struct fsmonitor_daemon_state *state, + const char *path) +{ + struct one_watch *watch = NULL; + DWORD desired_access = FILE_LIST_DIRECTORY; + DWORD share_mode = + FILE_SHARE_WRITE | FILE_SHARE_READ | FILE_SHARE_DELETE; + HANDLE hDir; + wchar_t wpath[MAX_PATH]; + + if (xutftowcs_path(wpath, path) < 0) { + error(_("could not convert to wide characters: '%s'"), path); + return NULL; + } + + hDir = CreateFileW(wpath, + desired_access, share_mode, NULL, OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OVERLAPPED, + NULL); + if (hDir == INVALID_HANDLE_VALUE) { + error(_("[GLE %ld] could not watch '%s'"), + GetLastError(), path); + return NULL; + } + + CALLOC_ARRAY(watch, 1); + + watch->buf_len = sizeof(watch->buffer); /* assume full MAX_RDCW_BUF */ + + strbuf_init(&watch->path, 0); + strbuf_addstr(&watch->path, path); + + watch->hDir = hDir; + watch->hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); + + return watch; +} + +static void destroy_watch(struct one_watch *watch) +{ + if (!watch) + return; + + strbuf_release(&watch->path); + if (watch->hDir != INVALID_HANDLE_VALUE) + CloseHandle(watch->hDir); + if (watch->hEvent != INVALID_HANDLE_VALUE) + CloseHandle(watch->hEvent); + + free(watch); +} + +static int start_rdcw_watch(struct fsmonitor_daemon_backend_data *data, + struct one_watch *watch) +{ + DWORD dwNotifyFilter = + FILE_NOTIFY_CHANGE_FILE_NAME | + FILE_NOTIFY_CHANGE_DIR_NAME | + FILE_NOTIFY_CHANGE_ATTRIBUTES | + FILE_NOTIFY_CHANGE_SIZE | + FILE_NOTIFY_CHANGE_LAST_WRITE | + FILE_NOTIFY_CHANGE_CREATION; + + ResetEvent(watch->hEvent); + + memset(&watch->overlapped, 0, sizeof(watch->overlapped)); + watch->overlapped.hEvent = watch->hEvent; + + /* + * Queue an async call using Overlapped IO. This returns immediately. + * Our event handle will be signalled when the real result is available. + * + * The return value here just means that we successfully queued it. + * We won't know if the Read...() actually produces data until later. + */ + watch->is_active = ReadDirectoryChangesW( + watch->hDir, watch->buffer, watch->buf_len, TRUE, + dwNotifyFilter, &watch->count, &watch->overlapped, NULL); + + if (watch->is_active) + return 0; + + error(_("ReadDirectoryChangedW failed on '%s' [GLE %ld]"), + watch->path.buf, GetLastError()); + return -1; +} + +static int recv_rdcw_watch(struct one_watch *watch) +{ + DWORD gle; + + watch->is_active = FALSE; + + /* + * The overlapped result is ready. If the Read...() was successful + * we finally receive the actual result into our buffer. + */ + if (GetOverlappedResult(watch->hDir, &watch->overlapped, &watch->count, + TRUE)) + return 0; + + gle = GetLastError(); + if (gle == ERROR_INVALID_PARAMETER && + /* + * The kernel throws an invalid parameter error when our + * buffer is too big and we are pointed at a remote + * directory (and possibly for other reasons). Quietly + * set it down and try again. + * + * See note about MAX_RDCW_BUF at the top. + */ + watch->buf_len > MAX_RDCW_BUF_FALLBACK) { + watch->buf_len = MAX_RDCW_BUF_FALLBACK; + return -2; + } + + /* + * NEEDSWORK: If an external <gitdir> is deleted, the above + * returns an error. I'm not sure that there's anything that + * we can do here other than failing -- the <worktree>/.git + * link file would be broken anyway. We might try to check + * for that and return a better error message, but I'm not + * sure it is worth it. + */ + + error(_("GetOverlappedResult failed on '%s' [GLE %ld]"), + watch->path.buf, gle); + return -1; +} + +static void cancel_rdcw_watch(struct one_watch *watch) +{ + DWORD count; + + if (!watch || !watch->is_active) + return; + + /* + * The calls to ReadDirectoryChangesW() and GetOverlappedResult() + * form a "pair" (my term) where we queue an IO and promise to + * hang around and wait for the kernel to give us the result. + * + * If for some reason after we queue the IO, we have to quit + * or otherwise not stick around for the second half, we must + * tell the kernel to abort the IO. This prevents the kernel + * from writing to our buffer and/or signalling our event + * after we free them. + * + * (Ask me how much fun it was to track that one down). + */ + CancelIoEx(watch->hDir, &watch->overlapped); + GetOverlappedResult(watch->hDir, &watch->overlapped, &count, TRUE); + watch->is_active = FALSE; +} + +/* + * Process filesystem events that happen anywhere (recursively) under the + * <worktree> root directory. For a normal working directory, this includes + * both version controlled files and the contents of the .git/ directory. + * + * If <worktree>/.git is a file, then we only see events for the file + * itself. + */ +static int process_worktree_events(struct fsmonitor_daemon_state *state) +{ + struct fsmonitor_daemon_backend_data *data = state->backend_data; + struct one_watch *watch = data->watch_worktree; + struct strbuf path = STRBUF_INIT; + struct string_list cookie_list = STRING_LIST_INIT_DUP; + struct fsmonitor_batch *batch = NULL; + const char *p = watch->buffer; + + /* + * If the kernel gets more events than will fit in the kernel + * buffer associated with our RDCW handle, it drops them and + * returns a count of zero. + * + * Yes, the call returns WITHOUT error and with length zero. + * This is the documented behavior. (My testing has confirmed + * that it also sets the last error to ERROR_NOTIFY_ENUM_DIR, + * but we do not rely on that since the function did not + * return an error and it is not documented.) + * + * (The "overflow" case is not ambiguous with the "no data" case + * because we did an INFINITE wait.) + * + * This means we have a gap in coverage. Tell the daemon layer + * to resync. + */ + if (!watch->count) { + trace2_data_string("fsmonitor", NULL, "fsm-listen/kernel", + "overflow"); + fsmonitor_force_resync(state); + return LISTENER_HAVE_DATA_WORKTREE; + } + + /* + * On Windows, `info` contains an "array" of paths that are + * relative to the root of whichever directory handle received + * the event. + */ + for (;;) { + FILE_NOTIFY_INFORMATION *info = (void *)p; + const char *slash; + enum fsmonitor_path_type t; + + strbuf_reset(&path); + if (normalize_path_in_utf8(info, &path) == -1) + goto skip_this_path; + + t = fsmonitor_classify_path_workdir_relative(path.buf); + + switch (t) { + case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX: + /* special case cookie files within .git */ + + /* Use just the filename of the cookie file. */ + slash = find_last_dir_sep(path.buf); + string_list_append(&cookie_list, + slash ? slash + 1 : path.buf); + break; + + case IS_INSIDE_DOT_GIT: + /* ignore everything inside of "<worktree>/.git/" */ + break; + + case IS_DOT_GIT: + /* "<worktree>/.git" was deleted (or renamed away) */ + if ((info->Action == FILE_ACTION_REMOVED) || + (info->Action == FILE_ACTION_RENAMED_OLD_NAME)) { + trace2_data_string("fsmonitor", NULL, + "fsm-listen/dotgit", + "removed"); + goto force_shutdown; + } + break; + + case IS_WORKDIR_PATH: + /* queue normal pathname */ + if (!batch) + batch = fsmonitor_batch__new(); + fsmonitor_batch__add_path(batch, path.buf); + break; + + case IS_GITDIR: + case IS_INSIDE_GITDIR: + case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX: + default: + BUG("unexpected path classification '%d' for '%s'", + t, path.buf); + } + +skip_this_path: + if (!info->NextEntryOffset) + break; + p += info->NextEntryOffset; + } + + fsmonitor_publish(state, batch, &cookie_list); + batch = NULL; + string_list_clear(&cookie_list, 0); + strbuf_release(&path); + return LISTENER_HAVE_DATA_WORKTREE; + +force_shutdown: + fsmonitor_batch__free_list(batch); + string_list_clear(&cookie_list, 0); + strbuf_release(&path); + return LISTENER_SHUTDOWN; +} + +/* + * Process filesystem events that happened anywhere (recursively) under the + * external <gitdir> (such as non-primary worktrees or submodules). + * We only care about cookie files that our client threads created here. + * + * Note that we DO NOT get filesystem events on the external <gitdir> + * itself (it is not inside something that we are watching). In particular, + * we do not get an event if the external <gitdir> is deleted. + */ +static int process_gitdir_events(struct fsmonitor_daemon_state *state) +{ + struct fsmonitor_daemon_backend_data *data = state->backend_data; + struct one_watch *watch = data->watch_gitdir; + struct strbuf path = STRBUF_INIT; + struct string_list cookie_list = STRING_LIST_INIT_DUP; + const char *p = watch->buffer; + + if (!watch->count) { + trace2_data_string("fsmonitor", NULL, "fsm-listen/kernel", + "overflow"); + fsmonitor_force_resync(state); + return LISTENER_HAVE_DATA_GITDIR; + } + + for (;;) { + FILE_NOTIFY_INFORMATION *info = (void *)p; + const char *slash; + enum fsmonitor_path_type t; + + strbuf_reset(&path); + if (normalize_path_in_utf8(info, &path) == -1) + goto skip_this_path; + + t = fsmonitor_classify_path_gitdir_relative(path.buf); + + switch (t) { + case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX: + /* special case cookie files within gitdir */ + + /* Use just the filename of the cookie file. */ + slash = find_last_dir_sep(path.buf); + string_list_append(&cookie_list, + slash ? slash + 1 : path.buf); + break; + + case IS_INSIDE_GITDIR: + goto skip_this_path; + + default: + BUG("unexpected path classification '%d' for '%s'", + t, path.buf); + } + +skip_this_path: + if (!info->NextEntryOffset) + break; + p += info->NextEntryOffset; + } + + fsmonitor_publish(state, NULL, &cookie_list); + string_list_clear(&cookie_list, 0); + strbuf_release(&path); + return LISTENER_HAVE_DATA_GITDIR; } void fsm_listen__loop(struct fsmonitor_daemon_state *state) { + struct fsmonitor_daemon_backend_data *data = state->backend_data; + DWORD dwWait; + int result; + + state->error_code = 0; + + if (start_rdcw_watch(data, data->watch_worktree) == -1) + goto force_error_stop; + + if (data->watch_gitdir && + start_rdcw_watch(data, data->watch_gitdir) == -1) + goto force_error_stop; + + for (;;) { + dwWait = WaitForMultipleObjects(data->nr_listener_handles, + data->hListener, + FALSE, INFINITE); + + if (dwWait == WAIT_OBJECT_0 + LISTENER_HAVE_DATA_WORKTREE) { + result = recv_rdcw_watch(data->watch_worktree); + if (result == -1) { + /* hard error */ + goto force_error_stop; + } + if (result == -2) { + /* retryable error */ + if (start_rdcw_watch(data, data->watch_worktree) == -1) + goto force_error_stop; + continue; + } + + /* have data */ + if (process_worktree_events(state) == LISTENER_SHUTDOWN) + goto force_shutdown; + if (start_rdcw_watch(data, data->watch_worktree) == -1) + goto force_error_stop; + continue; + } + + if (dwWait == WAIT_OBJECT_0 + LISTENER_HAVE_DATA_GITDIR) { + result = recv_rdcw_watch(data->watch_gitdir); + if (result == -1) { + /* hard error */ + goto force_error_stop; + } + if (result == -2) { + /* retryable error */ + if (start_rdcw_watch(data, data->watch_gitdir) == -1) + goto force_error_stop; + continue; + } + + /* have data */ + if (process_gitdir_events(state) == LISTENER_SHUTDOWN) + goto force_shutdown; + if (start_rdcw_watch(data, data->watch_gitdir) == -1) + goto force_error_stop; + continue; + } + + if (dwWait == WAIT_OBJECT_0 + LISTENER_SHUTDOWN) + goto clean_shutdown; + + error(_("could not read directory changes [GLE %ld]"), + GetLastError()); + goto force_error_stop; + } + +force_error_stop: + state->error_code = -1; + +force_shutdown: + /* + * Tell the IPC thead pool to stop (which completes the await + * in the main thread (which will also signal this thread (if + * we are still alive))). + */ + ipc_server_stop_async(state->ipc_server_data); + +clean_shutdown: + cancel_rdcw_watch(data->watch_worktree); + cancel_rdcw_watch(data->watch_gitdir); } int fsm_listen__ctor(struct fsmonitor_daemon_state *state) { + struct fsmonitor_daemon_backend_data *data; + + CALLOC_ARRAY(data, 1); + + data->hEventShutdown = CreateEvent(NULL, TRUE, FALSE, NULL); + + data->watch_worktree = create_watch(state, + state->path_worktree_watch.buf); + if (!data->watch_worktree) + goto failed; + + if (state->nr_paths_watching > 1) { + data->watch_gitdir = create_watch(state, + state->path_gitdir_watch.buf); + if (!data->watch_gitdir) + goto failed; + } + + data->hListener[LISTENER_SHUTDOWN] = data->hEventShutdown; + data->nr_listener_handles++; + + data->hListener[LISTENER_HAVE_DATA_WORKTREE] = + data->watch_worktree->hEvent; + data->nr_listener_handles++; + + if (data->watch_gitdir) { + data->hListener[LISTENER_HAVE_DATA_GITDIR] = + data->watch_gitdir->hEvent; + data->nr_listener_handles++; + } + + state->backend_data = data; + return 0; + +failed: + CloseHandle(data->hEventShutdown); + destroy_watch(data->watch_worktree); + destroy_watch(data->watch_gitdir); + return -1; } void fsm_listen__dtor(struct fsmonitor_daemon_state *state) { + struct fsmonitor_daemon_backend_data *data; + + if (!state || !state->backend_data) + return; + + data = state->backend_data; + + CloseHandle(data->hEventShutdown); + destroy_watch(data->watch_worktree); + destroy_watch(data->watch_gitdir); + + FREE_AND_NULL(state->backend_data); } From 5ff01b1f1e8e42805bdf98cb0bd3277d18543b07 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:02:59 +0000 Subject: [PATCH 136/150] compat/fsmonitor/fsm-listen-darwin: add MacOS header files for FSEvent Include MacOS system declarations to allow us to use FSEvent and CoreFoundation APIs. We need different versions of the declarations for GCC vs. clang because of compiler and header file conflicts. While it is quite possible to #include Apple's CoreServices.h when compiling C source code with clang, trying to build it with GCC currently fails with this error: In file included from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/... ...Library/Frameworks/Security.framework/Headers/AuthSession.h:32, from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/... ...Library/Frameworks/Security.framework/Headers/Security.h:42, from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/... ...Library/Frameworks/CoreServices.framework/Frameworks/... ...OSServices.framework/Headers/CSIdentity.h:43, from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/... ...Library/Frameworks/CoreServices.framework/Frameworks/... ...OSServices.framework/Headers/OSServices.h:29, from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/... ...Library/Frameworks/CoreServices.framework/Frameworks/... ...LaunchServices.framework/Headers/IconsCore.h:23, from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/... ...Library/Frameworks/CoreServices.framework/Frameworks/... ...LaunchServices.framework/Headers/LaunchServices.h:23, from /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/... ...Library/Frameworks/CoreServices.framework/Headers/CoreServices.h:45, /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/System/... ...Library/Frameworks/Security.framework/Headers/Authorization.h:193:7: error: variably modified 'bytes' at file scope 193 | char bytes[kAuthorizationExternalFormLength]; | ^~~~~ The underlying reason is that GCC (rightfully) objects that an `enum` value such as `kAuthorizationExternalFormLength` is not a constant (because it is not, the preprocessor has no knowledge of it, only the actual C compiler does) and can therefore not be used to define the size of a C array. This is a known problem and tracked in GCC's bug tracker: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082 In the meantime, let's not block things and go the slightly ugly route of declaring/defining the FSEvents constants, data structures and functions that we need, so that we can avoid above-mentioned issue. Let's do this _only_ for GCC, though, so that the CI/PR builds (which build both with clang and with GCC) can guarantee that we _are_ using the correct data types. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- compat/fsmonitor/fsm-darwin-gcc.h | 92 ++++++++++++++++++++++++++++ compat/fsmonitor/fsm-listen-darwin.c | 24 ++++++++ 2 files changed, 116 insertions(+) create mode 100644 compat/fsmonitor/fsm-darwin-gcc.h diff --git a/compat/fsmonitor/fsm-darwin-gcc.h b/compat/fsmonitor/fsm-darwin-gcc.h new file mode 100644 index 0000000000..1c75c3d48e --- /dev/null +++ b/compat/fsmonitor/fsm-darwin-gcc.h @@ -0,0 +1,92 @@ +#ifndef FSM_DARWIN_GCC_H +#define FSM_DARWIN_GCC_H + +#ifndef __clang__ +/* + * It is possible to #include CoreFoundation/CoreFoundation.h when compiling + * with clang, but not with GCC as of time of writing. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082 for details. + */ +typedef unsigned int FSEventStreamCreateFlags; +#define kFSEventStreamEventFlagNone 0x00000000 +#define kFSEventStreamEventFlagMustScanSubDirs 0x00000001 +#define kFSEventStreamEventFlagUserDropped 0x00000002 +#define kFSEventStreamEventFlagKernelDropped 0x00000004 +#define kFSEventStreamEventFlagEventIdsWrapped 0x00000008 +#define kFSEventStreamEventFlagHistoryDone 0x00000010 +#define kFSEventStreamEventFlagRootChanged 0x00000020 +#define kFSEventStreamEventFlagMount 0x00000040 +#define kFSEventStreamEventFlagUnmount 0x00000080 +#define kFSEventStreamEventFlagItemCreated 0x00000100 +#define kFSEventStreamEventFlagItemRemoved 0x00000200 +#define kFSEventStreamEventFlagItemInodeMetaMod 0x00000400 +#define kFSEventStreamEventFlagItemRenamed 0x00000800 +#define kFSEventStreamEventFlagItemModified 0x00001000 +#define kFSEventStreamEventFlagItemFinderInfoMod 0x00002000 +#define kFSEventStreamEventFlagItemChangeOwner 0x00004000 +#define kFSEventStreamEventFlagItemXattrMod 0x00008000 +#define kFSEventStreamEventFlagItemIsFile 0x00010000 +#define kFSEventStreamEventFlagItemIsDir 0x00020000 +#define kFSEventStreamEventFlagItemIsSymlink 0x00040000 +#define kFSEventStreamEventFlagOwnEvent 0x00080000 +#define kFSEventStreamEventFlagItemIsHardlink 0x00100000 +#define kFSEventStreamEventFlagItemIsLastHardlink 0x00200000 +#define kFSEventStreamEventFlagItemCloned 0x00400000 + +typedef struct __FSEventStream *FSEventStreamRef; +typedef const FSEventStreamRef ConstFSEventStreamRef; + +typedef unsigned int CFStringEncoding; +#define kCFStringEncodingUTF8 0x08000100 + +typedef const struct __CFString *CFStringRef; +typedef const struct __CFArray *CFArrayRef; +typedef const struct __CFRunLoop *CFRunLoopRef; + +struct FSEventStreamContext { + long long version; + void *cb_data, *retain, *release, *copy_description; +}; + +typedef struct FSEventStreamContext FSEventStreamContext; +typedef unsigned int FSEventStreamEventFlags; +#define kFSEventStreamCreateFlagNoDefer 0x02 +#define kFSEventStreamCreateFlagWatchRoot 0x04 +#define kFSEventStreamCreateFlagFileEvents 0x10 + +typedef unsigned long long FSEventStreamEventId; +#define kFSEventStreamEventIdSinceNow 0xFFFFFFFFFFFFFFFFULL + +typedef void (*FSEventStreamCallback)(ConstFSEventStreamRef streamRef, + void *context, + __SIZE_TYPE__ num_of_events, + void *event_paths, + const FSEventStreamEventFlags event_flags[], + const FSEventStreamEventId event_ids[]); +typedef double CFTimeInterval; +FSEventStreamRef FSEventStreamCreate(void *allocator, + FSEventStreamCallback callback, + FSEventStreamContext *context, + CFArrayRef paths_to_watch, + FSEventStreamEventId since_when, + CFTimeInterval latency, + FSEventStreamCreateFlags flags); +CFStringRef CFStringCreateWithCString(void *allocator, const char *string, + CFStringEncoding encoding); +CFArrayRef CFArrayCreate(void *allocator, const void **items, long long count, + void *callbacks); +void CFRunLoopRun(void); +void CFRunLoopStop(CFRunLoopRef run_loop); +CFRunLoopRef CFRunLoopGetCurrent(void); +extern CFStringRef kCFRunLoopDefaultMode; +void FSEventStreamScheduleWithRunLoop(FSEventStreamRef stream, + CFRunLoopRef run_loop, + CFStringRef run_loop_mode); +unsigned char FSEventStreamStart(FSEventStreamRef stream); +void FSEventStreamStop(FSEventStreamRef stream); +void FSEventStreamInvalidate(FSEventStreamRef stream); +void FSEventStreamRelease(FSEventStreamRef stream); + +#endif /* !clang */ +#endif /* FSM_DARWIN_GCC_H */ diff --git a/compat/fsmonitor/fsm-listen-darwin.c b/compat/fsmonitor/fsm-listen-darwin.c index c84e3344ab..d2ce942cad 100644 --- a/compat/fsmonitor/fsm-listen-darwin.c +++ b/compat/fsmonitor/fsm-listen-darwin.c @@ -1,3 +1,27 @@ +#ifndef __clang__ +#include "fsm-darwin-gcc.h" +#else +#include <CoreFoundation/CoreFoundation.h> +#include <CoreServices/CoreServices.h> + +#ifndef AVAILABLE_MAC_OS_X_VERSION_10_13_AND_LATER +/* + * This enum value was added in 10.13 to: + * + * /Applications/Xcode.app/Contents/Developer/Platforms/ \ + * MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/ \ + * Library/Frameworks/CoreServices.framework/Frameworks/ \ + * FSEvents.framework/Versions/Current/Headers/FSEvents.h + * + * If we're compiling against an older SDK, this symbol won't be + * present. Silently define it here so that we don't have to ifdef + * the logging or masking below. This should be harmless since older + * versions of macOS won't ever emit this FS event anyway. + */ +#define kFSEventStreamEventFlagItemCloned 0x00400000 +#endif +#endif + #include "cache.h" #include "fsmonitor.h" #include "fsm-listen.h" From 65723b305ad431f0cc1161c1e98346a2ec3152f2 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:00 +0000 Subject: [PATCH 137/150] compat/fsmonitor/fsm-listen-darwin: implement FSEvent listener on MacOS Implement file system event listener on MacOS using FSEvent, CoreFoundation, and CoreServices. Co-authored-by: Kevin Willford <Kevin.Willford@microsoft.com> Co-authored-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- compat/fsmonitor/fsm-listen-darwin.c | 383 +++++++++++++++++++++++++++ 1 file changed, 383 insertions(+) diff --git a/compat/fsmonitor/fsm-listen-darwin.c b/compat/fsmonitor/fsm-listen-darwin.c index d2ce942cad..0741fe834c 100644 --- a/compat/fsmonitor/fsm-listen-darwin.c +++ b/compat/fsmonitor/fsm-listen-darwin.c @@ -25,20 +25,403 @@ #include "cache.h" #include "fsmonitor.h" #include "fsm-listen.h" +#include "fsmonitor--daemon.h" + +struct fsmonitor_daemon_backend_data +{ + CFStringRef cfsr_worktree_path; + CFStringRef cfsr_gitdir_path; + + CFArrayRef cfar_paths_to_watch; + int nr_paths_watching; + + FSEventStreamRef stream; + + CFRunLoopRef rl; + + enum shutdown_style { + SHUTDOWN_EVENT = 0, + FORCE_SHUTDOWN, + FORCE_ERROR_STOP, + } shutdown_style; + + unsigned int stream_scheduled:1; + unsigned int stream_started:1; +}; + +static void log_flags_set(const char *path, const FSEventStreamEventFlags flag) +{ + struct strbuf msg = STRBUF_INIT; + + if (flag & kFSEventStreamEventFlagMustScanSubDirs) + strbuf_addstr(&msg, "MustScanSubDirs|"); + if (flag & kFSEventStreamEventFlagUserDropped) + strbuf_addstr(&msg, "UserDropped|"); + if (flag & kFSEventStreamEventFlagKernelDropped) + strbuf_addstr(&msg, "KernelDropped|"); + if (flag & kFSEventStreamEventFlagEventIdsWrapped) + strbuf_addstr(&msg, "EventIdsWrapped|"); + if (flag & kFSEventStreamEventFlagHistoryDone) + strbuf_addstr(&msg, "HistoryDone|"); + if (flag & kFSEventStreamEventFlagRootChanged) + strbuf_addstr(&msg, "RootChanged|"); + if (flag & kFSEventStreamEventFlagMount) + strbuf_addstr(&msg, "Mount|"); + if (flag & kFSEventStreamEventFlagUnmount) + strbuf_addstr(&msg, "Unmount|"); + if (flag & kFSEventStreamEventFlagItemChangeOwner) + strbuf_addstr(&msg, "ItemChangeOwner|"); + if (flag & kFSEventStreamEventFlagItemCreated) + strbuf_addstr(&msg, "ItemCreated|"); + if (flag & kFSEventStreamEventFlagItemFinderInfoMod) + strbuf_addstr(&msg, "ItemFinderInfoMod|"); + if (flag & kFSEventStreamEventFlagItemInodeMetaMod) + strbuf_addstr(&msg, "ItemInodeMetaMod|"); + if (flag & kFSEventStreamEventFlagItemIsDir) + strbuf_addstr(&msg, "ItemIsDir|"); + if (flag & kFSEventStreamEventFlagItemIsFile) + strbuf_addstr(&msg, "ItemIsFile|"); + if (flag & kFSEventStreamEventFlagItemIsHardlink) + strbuf_addstr(&msg, "ItemIsHardlink|"); + if (flag & kFSEventStreamEventFlagItemIsLastHardlink) + strbuf_addstr(&msg, "ItemIsLastHardlink|"); + if (flag & kFSEventStreamEventFlagItemIsSymlink) + strbuf_addstr(&msg, "ItemIsSymlink|"); + if (flag & kFSEventStreamEventFlagItemModified) + strbuf_addstr(&msg, "ItemModified|"); + if (flag & kFSEventStreamEventFlagItemRemoved) + strbuf_addstr(&msg, "ItemRemoved|"); + if (flag & kFSEventStreamEventFlagItemRenamed) + strbuf_addstr(&msg, "ItemRenamed|"); + if (flag & kFSEventStreamEventFlagItemXattrMod) + strbuf_addstr(&msg, "ItemXattrMod|"); + if (flag & kFSEventStreamEventFlagOwnEvent) + strbuf_addstr(&msg, "OwnEvent|"); + if (flag & kFSEventStreamEventFlagItemCloned) + strbuf_addstr(&msg, "ItemCloned|"); + + trace_printf_key(&trace_fsmonitor, "fsevent: '%s', flags=%u %s", + path, flag, msg.buf); + + strbuf_release(&msg); +} + +static int ef_is_root_delete(const FSEventStreamEventFlags ef) +{ + return (ef & kFSEventStreamEventFlagItemIsDir && + ef & kFSEventStreamEventFlagItemRemoved); +} + +static int ef_is_root_renamed(const FSEventStreamEventFlags ef) +{ + return (ef & kFSEventStreamEventFlagItemIsDir && + ef & kFSEventStreamEventFlagItemRenamed); +} + +static int ef_is_dropped(const FSEventStreamEventFlags ef) +{ + return (ef & kFSEventStreamEventFlagMustScanSubDirs || + ef & kFSEventStreamEventFlagKernelDropped || + ef & kFSEventStreamEventFlagUserDropped); +} + +static void fsevent_callback(ConstFSEventStreamRef streamRef, + void *ctx, + size_t num_of_events, + void *event_paths, + const FSEventStreamEventFlags event_flags[], + const FSEventStreamEventId event_ids[]) +{ + struct fsmonitor_daemon_state *state = ctx; + struct fsmonitor_daemon_backend_data *data = state->backend_data; + char **paths = (char **)event_paths; + struct fsmonitor_batch *batch = NULL; + struct string_list cookie_list = STRING_LIST_INIT_DUP; + const char *path_k; + const char *slash; + int k; + struct strbuf tmp = STRBUF_INIT; + + /* + * Build a list of all filesystem changes into a private/local + * list and without holding any locks. + */ + for (k = 0; k < num_of_events; k++) { + /* + * On Mac, we receive an array of absolute paths. + */ + path_k = paths[k]; + + /* + * If you want to debug FSEvents, log them to GIT_TRACE_FSMONITOR. + * Please don't log them to Trace2. + * + * trace_printf_key(&trace_fsmonitor, "Path: '%s'", path_k); + */ + + /* + * If event[k] is marked as dropped, we assume that we have + * lost sync with the filesystem and should flush our cached + * data. We need to: + * + * [1] Abort/wake any client threads waiting for a cookie and + * flush the cached state data (the current token), and + * create a new token. + * + * [2] Discard the batch that we were locally building (since + * they are conceptually relative to the just flushed + * token). + */ + if (ef_is_dropped(event_flags[k])) { + if (trace_pass_fl(&trace_fsmonitor)) + log_flags_set(path_k, event_flags[k]); + + fsmonitor_force_resync(state); + fsmonitor_batch__free_list(batch); + string_list_clear(&cookie_list, 0); + + /* + * We assume that any events that we received + * in this callback after this dropped event + * may still be valid, so we continue rather + * than break. (And just in case there is a + * delete of ".git" hiding in there.) + */ + continue; + } + + switch (fsmonitor_classify_path_absolute(state, path_k)) { + + case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX: + case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX: + /* special case cookie files within .git or gitdir */ + + /* Use just the filename of the cookie file. */ + slash = find_last_dir_sep(path_k); + string_list_append(&cookie_list, + slash ? slash + 1 : path_k); + break; + + case IS_INSIDE_DOT_GIT: + case IS_INSIDE_GITDIR: + /* ignore all other paths inside of .git or gitdir */ + break; + + case IS_DOT_GIT: + case IS_GITDIR: + /* + * If .git directory is deleted or renamed away, + * we have to quit. + */ + if (ef_is_root_delete(event_flags[k])) { + trace_printf_key(&trace_fsmonitor, + "event: gitdir removed"); + goto force_shutdown; + } + if (ef_is_root_renamed(event_flags[k])) { + trace_printf_key(&trace_fsmonitor, + "event: gitdir renamed"); + goto force_shutdown; + } + break; + + case IS_WORKDIR_PATH: + /* try to queue normal pathnames */ + + if (trace_pass_fl(&trace_fsmonitor)) + log_flags_set(path_k, event_flags[k]); + + /* + * Because of the implicit "binning" (the + * kernel calls us at a given frequency) and + * de-duping (the kernel is free to combine + * multiple events for a given pathname), an + * individual fsevent could be marked as both + * a file and directory. Add it to the queue + * with both spellings so that the client will + * know how much to invalidate/refresh. + */ + + if (event_flags[k] & kFSEventStreamEventFlagItemIsFile) { + const char *rel = path_k + + state->path_worktree_watch.len + 1; + + if (!batch) + batch = fsmonitor_batch__new(); + fsmonitor_batch__add_path(batch, rel); + } + + if (event_flags[k] & kFSEventStreamEventFlagItemIsDir) { + const char *rel = path_k + + state->path_worktree_watch.len + 1; + + strbuf_reset(&tmp); + strbuf_addstr(&tmp, rel); + strbuf_addch(&tmp, '/'); + + if (!batch) + batch = fsmonitor_batch__new(); + fsmonitor_batch__add_path(batch, tmp.buf); + } + + break; + + case IS_OUTSIDE_CONE: + default: + trace_printf_key(&trace_fsmonitor, + "ignoring '%s'", path_k); + break; + } + } + + fsmonitor_publish(state, batch, &cookie_list); + string_list_clear(&cookie_list, 0); + strbuf_release(&tmp); + return; + +force_shutdown: + fsmonitor_batch__free_list(batch); + string_list_clear(&cookie_list, 0); + + data->shutdown_style = FORCE_SHUTDOWN; + CFRunLoopStop(data->rl); + strbuf_release(&tmp); + return; +} + +/* + * In the call to `FSEventStreamCreate()` to setup our watch, the + * `latency` argument determines the frequency of calls to our callback + * with new FS events. Too slow and events get dropped; too fast and + * we burn CPU unnecessarily. Since it is rather obscure, I don't + * think this needs to be a config setting. I've done extensive + * testing on my systems and chosen the value below. It gives good + * results and I've not seen any dropped events. + * + * With a latency of 0.1, I was seeing lots of dropped events during + * the "touch 100000" files test within t/perf/p7519, but with a + * latency of 0.001 I did not see any dropped events. So I'm going + * to assume that this is the "correct" value. + * + * https://developer.apple.com/documentation/coreservices/1443980-fseventstreamcreate + */ int fsm_listen__ctor(struct fsmonitor_daemon_state *state) { + FSEventStreamCreateFlags flags = kFSEventStreamCreateFlagNoDefer | + kFSEventStreamCreateFlagWatchRoot | + kFSEventStreamCreateFlagFileEvents; + FSEventStreamContext ctx = { + 0, + state, + NULL, + NULL, + NULL + }; + struct fsmonitor_daemon_backend_data *data; + const void *dir_array[2]; + + CALLOC_ARRAY(data, 1); + state->backend_data = data; + + data->cfsr_worktree_path = CFStringCreateWithCString( + NULL, state->path_worktree_watch.buf, kCFStringEncodingUTF8); + dir_array[data->nr_paths_watching++] = data->cfsr_worktree_path; + + if (state->nr_paths_watching > 1) { + data->cfsr_gitdir_path = CFStringCreateWithCString( + NULL, state->path_gitdir_watch.buf, + kCFStringEncodingUTF8); + dir_array[data->nr_paths_watching++] = data->cfsr_gitdir_path; + } + + data->cfar_paths_to_watch = CFArrayCreate(NULL, dir_array, + data->nr_paths_watching, + NULL); + data->stream = FSEventStreamCreate(NULL, fsevent_callback, &ctx, + data->cfar_paths_to_watch, + kFSEventStreamEventIdSinceNow, + 0.001, flags); + if (data->stream == NULL) + goto failed; + + /* + * `data->rl` needs to be set inside the listener thread. + */ + + return 0; + +failed: + error(_("Unable to create FSEventStream.")); + + FREE_AND_NULL(state->backend_data); return -1; } void fsm_listen__dtor(struct fsmonitor_daemon_state *state) { + struct fsmonitor_daemon_backend_data *data; + + if (!state || !state->backend_data) + return; + + data = state->backend_data; + + if (data->stream) { + if (data->stream_started) + FSEventStreamStop(data->stream); + if (data->stream_scheduled) + FSEventStreamInvalidate(data->stream); + FSEventStreamRelease(data->stream); + } + + FREE_AND_NULL(state->backend_data); } void fsm_listen__stop_async(struct fsmonitor_daemon_state *state) { + struct fsmonitor_daemon_backend_data *data; + + data = state->backend_data; + data->shutdown_style = SHUTDOWN_EVENT; + + CFRunLoopStop(data->rl); } void fsm_listen__loop(struct fsmonitor_daemon_state *state) { + struct fsmonitor_daemon_backend_data *data; + + data = state->backend_data; + + data->rl = CFRunLoopGetCurrent(); + + FSEventStreamScheduleWithRunLoop(data->stream, data->rl, kCFRunLoopDefaultMode); + data->stream_scheduled = 1; + + if (!FSEventStreamStart(data->stream)) { + error(_("Failed to start the FSEventStream")); + goto force_error_stop_without_loop; + } + data->stream_started = 1; + + CFRunLoopRun(); + + switch (data->shutdown_style) { + case FORCE_ERROR_STOP: + state->error_code = -1; + /* fall thru */ + case FORCE_SHUTDOWN: + ipc_server_stop_async(state->ipc_server_data); + /* fall thru */ + case SHUTDOWN_EVENT: + default: + break; + } + return; + +force_error_stop_without_loop: + state->error_code = -1; + ipc_server_stop_async(state->ipc_server_data); + return; } From 518a522f405ddc51d289192718864f1c784afc6f Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:01 +0000 Subject: [PATCH 138/150] fsmonitor--daemon: implement handle_client callback Teach fsmonitor--daemon to respond to IPC requests from client Git processes and respond with a list of modified pathnames relative to the provided token. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 311 +++++++++++++++++++++++++++++++++++- 1 file changed, 309 insertions(+), 2 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 69312119b0..eafaafb45b 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -7,6 +7,7 @@ #include "fsmonitor--daemon.h" #include "simple-ipc.h" #include "khash.h" +#include "pkt-line.h" static const char * const builtin_fsmonitor__daemon_usage[] = { N_("git fsmonitor--daemon start [<options>]"), @@ -364,6 +365,310 @@ void fsmonitor_force_resync(struct fsmonitor_daemon_state *state) pthread_mutex_unlock(&state->main_lock); } +/* + * Format an opaque token string to send to the client. + */ +static void with_lock__format_response_token( + struct strbuf *response_token, + const struct strbuf *response_token_id, + const struct fsmonitor_batch *batch) +{ + /* assert current thread holding state->main_lock */ + + strbuf_reset(response_token); + strbuf_addf(response_token, "builtin:%s:%"PRIu64, + response_token_id->buf, batch->batch_seq_nr); +} + +/* + * Parse an opaque token from the client. + * Returns -1 on error. + */ +static int fsmonitor_parse_client_token(const char *buf_token, + struct strbuf *requested_token_id, + uint64_t *seq_nr) +{ + const char *p; + char *p_end; + + strbuf_reset(requested_token_id); + *seq_nr = 0; + + if (!skip_prefix(buf_token, "builtin:", &p)) + return -1; + + while (*p && *p != ':') + strbuf_addch(requested_token_id, *p++); + if (!*p++) + return -1; + + *seq_nr = (uint64_t)strtoumax(p, &p_end, 10); + if (*p_end) + return -1; + + return 0; +} + +KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal) + +static int do_handle_client(struct fsmonitor_daemon_state *state, + const char *command, + ipc_server_reply_cb *reply, + struct ipc_server_reply_data *reply_data) +{ + struct fsmonitor_token_data *token_data = NULL; + struct strbuf response_token = STRBUF_INIT; + struct strbuf requested_token_id = STRBUF_INIT; + struct strbuf payload = STRBUF_INIT; + uint64_t requested_oldest_seq_nr = 0; + uint64_t total_response_len = 0; + const char *p; + const struct fsmonitor_batch *batch_head; + const struct fsmonitor_batch *batch; + intmax_t count = 0, duplicates = 0; + kh_str_t *shown; + int hash_ret; + int do_trivial = 0; + int do_flush = 0; + + /* + * We expect `command` to be of the form: + * + * <command> := quit NUL + * | flush NUL + * | <V1-time-since-epoch-ns> NUL + * | <V2-opaque-fsmonitor-token> NUL + */ + + if (!strcmp(command, "quit")) { + /* + * A client has requested over the socket/pipe that the + * daemon shutdown. + * + * Tell the IPC thread pool to shutdown (which completes + * the await in the main thread (which can stop the + * fsmonitor listener thread)). + * + * There is no reply to the client. + */ + return SIMPLE_IPC_QUIT; + + } else if (!strcmp(command, "flush")) { + /* + * Flush all of our cached data and generate a new token + * just like if we lost sync with the filesystem. + * + * Then send a trivial response using the new token. + */ + do_flush = 1; + do_trivial = 1; + + } else if (!skip_prefix(command, "builtin:", &p)) { + /* assume V1 timestamp or garbage */ + + char *p_end; + + strtoumax(command, &p_end, 10); + trace_printf_key(&trace_fsmonitor, + ((*p_end) ? + "fsmonitor: invalid command line '%s'" : + "fsmonitor: unsupported V1 protocol '%s'"), + command); + do_trivial = 1; + + } else { + /* We have "builtin:*" */ + if (fsmonitor_parse_client_token(command, &requested_token_id, + &requested_oldest_seq_nr)) { + trace_printf_key(&trace_fsmonitor, + "fsmonitor: invalid V2 protocol token '%s'", + command); + do_trivial = 1; + + } else { + /* + * We have a V2 valid token: + * "builtin:<token_id>:<seq_nr>" + */ + } + } + + pthread_mutex_lock(&state->main_lock); + + if (!state->current_token_data) + BUG("fsmonitor state does not have a current token"); + + if (do_flush) + with_lock__do_force_resync(state); + + /* + * We mark the current head of the batch list as "pinned" so + * that the listener thread will treat this item as read-only + * (and prevent any more paths from being added to it) from + * now on. + */ + token_data = state->current_token_data; + batch_head = token_data->batch_head; + ((struct fsmonitor_batch *)batch_head)->pinned_time = time(NULL); + + /* + * FSMonitor Protocol V2 requires that we send a response header + * with a "new current token" and then all of the paths that changed + * since the "requested token". We send the seq_nr of the just-pinned + * head batch so that future requests from a client will be relative + * to it. + */ + with_lock__format_response_token(&response_token, + &token_data->token_id, batch_head); + + reply(reply_data, response_token.buf, response_token.len + 1); + total_response_len += response_token.len + 1; + + trace2_data_string("fsmonitor", the_repository, "response/token", + response_token.buf); + trace_printf_key(&trace_fsmonitor, "response token: %s", + response_token.buf); + + if (!do_trivial) { + if (strcmp(requested_token_id.buf, token_data->token_id.buf)) { + /* + * The client last spoke to a different daemon + * instance -OR- the daemon had to resync with + * the filesystem (and lost events), so reject. + */ + trace2_data_string("fsmonitor", the_repository, + "response/token", "different"); + do_trivial = 1; + + } else if (requested_oldest_seq_nr < + token_data->batch_tail->batch_seq_nr) { + /* + * The client wants older events than we have for + * this token_id. This means that the end of our + * batch list was truncated and we cannot give the + * client a complete snapshot relative to their + * request. + */ + trace_printf_key(&trace_fsmonitor, + "client requested truncated data"); + do_trivial = 1; + } + } + + if (do_trivial) { + pthread_mutex_unlock(&state->main_lock); + + reply(reply_data, "/", 2); + + trace2_data_intmax("fsmonitor", the_repository, + "response/trivial", 1); + + goto cleanup; + } + + /* + * We're going to hold onto a pointer to the current + * token-data while we walk the list of batches of files. + * During this time, we will NOT be under the lock. + * So we ref-count it. + * + * This allows the listener thread to continue prepending + * new batches of items to the token-data (which we'll ignore). + * + * AND it allows the listener thread to do a token-reset + * (and install a new `current_token_data`). + */ + token_data->client_ref_count++; + + pthread_mutex_unlock(&state->main_lock); + + /* + * The client request is relative to the token that they sent, + * so walk the batch list backwards from the current head back + * to the batch (sequence number) they named. + * + * We use khash to de-dup the list of pathnames. + * + * NEEDSWORK: each batch contains a list of interned strings, + * so we only need to do pointer comparisons here to build the + * hash table. Currently, we're still comparing the string + * values. + */ + shown = kh_init_str(); + for (batch = batch_head; + batch && batch->batch_seq_nr > requested_oldest_seq_nr; + batch = batch->next) { + size_t k; + + for (k = 0; k < batch->nr; k++) { + const char *s = batch->interned_paths[k]; + size_t s_len; + + if (kh_get_str(shown, s) != kh_end(shown)) + duplicates++; + else { + kh_put_str(shown, s, &hash_ret); + + trace_printf_key(&trace_fsmonitor, + "send[%"PRIuMAX"]: %s", + count, s); + + /* Each path gets written with a trailing NUL */ + s_len = strlen(s) + 1; + + if (payload.len + s_len >= + LARGE_PACKET_DATA_MAX) { + reply(reply_data, payload.buf, + payload.len); + total_response_len += payload.len; + strbuf_reset(&payload); + } + + strbuf_add(&payload, s, s_len); + count++; + } + } + } + + if (payload.len) { + reply(reply_data, payload.buf, payload.len); + total_response_len += payload.len; + } + + kh_release_str(shown); + + pthread_mutex_lock(&state->main_lock); + + if (token_data->client_ref_count > 0) + token_data->client_ref_count--; + + if (token_data->client_ref_count == 0) { + if (token_data != state->current_token_data) { + /* + * The listener thread did a token-reset while we were + * walking the batch list. Therefore, this token is + * stale and can be discarded completely. If we are + * the last reader thread using this token, we own + * that work. + */ + fsmonitor_free_token_data(token_data); + } + } + + pthread_mutex_unlock(&state->main_lock); + + trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len); + trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count); + trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates); + +cleanup: + strbuf_release(&response_token); + strbuf_release(&requested_token_id); + strbuf_release(&payload); + + return 0; +} + static ipc_server_application_cb handle_client; static int handle_client(void *data, @@ -371,7 +676,7 @@ static int handle_client(void *data, ipc_server_reply_cb *reply, struct ipc_server_reply_data *reply_data) { - /* struct fsmonitor_daemon_state *state = data; */ + struct fsmonitor_daemon_state *state = data; int result; /* @@ -382,10 +687,12 @@ static int handle_client(void *data, if (command_len != strlen(command)) BUG("FSMonitor assumes text messages"); + trace_printf_key(&trace_fsmonitor, "requested token: %s", command); + trace2_region_enter("fsmonitor", "handle_client", the_repository); trace2_data_string("fsmonitor", the_repository, "request", command); - result = 0; /* TODO Do something here. */ + result = do_handle_client(state, command, reply, reply_data); trace2_region_leave("fsmonitor", "handle_client", the_repository); From dd77cf61a1a2fbf52c94d0cd986d555ad2ba8a4b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:02 +0000 Subject: [PATCH 139/150] help: include fsmonitor--daemon feature flag in version info Add the "feature: fsmonitor--daemon" message to the output of `git version --build-options`. The builtin FSMonitor is only available on certain platforms and even then only when certain Makefile flags are enabled, so print a message in the verbose version output when it is available. This can be used by test scripts for prereq testing. Granted, tests could just try `git fsmonitor--daemon status` and look for a 128 exit code or grep for a "not supported" message on stderr, but these methods are rather obscure. The main advantage is that the feature message will automatically appear in bug reports and other support requests. This concept was also used during the development of Scalar for similar reasons. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- help.c | 4 ++++ t/test-lib.sh | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/help.c b/help.c index 71444906dd..9112a51e84 100644 --- a/help.c +++ b/help.c @@ -12,6 +12,7 @@ #include "refs.h" #include "parse-options.h" #include "prompt.h" +#include "fsmonitor-ipc.h" struct category_description { uint32_t category; @@ -695,6 +696,9 @@ void get_version_info(struct strbuf *buf, int show_build_options) strbuf_addf(buf, "sizeof-size_t: %d\n", (int)sizeof(size_t)); strbuf_addf(buf, "shell-path: %s\n", SHELL_PATH); /* NEEDSWORK: also save and output GIT-BUILD_OPTIONS? */ + + if (fsmonitor_ipc__is_supported()) + strbuf_addstr(buf, "feature: fsmonitor--daemon\n"); } } diff --git a/t/test-lib.sh b/t/test-lib.sh index e4716b0b86..5d819c1bc1 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1799,3 +1799,10 @@ test_lazy_prereq SHA1 ' # Tests that verify the scheduler integration must set this locally # to avoid errors. GIT_TEST_MAINT_SCHEDULER="none:exit 1" + +# Does this platform support `git fsmonitor--daemon` +# +test_lazy_prereq FSMONITOR_DAEMON ' + git version --build-options >output && + grep "feature: fsmonitor--daemon" output +' From 148405fb27980a63bb532d11f9ab242280052355 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:03 +0000 Subject: [PATCH 140/150] t/helper/fsmonitor-client: create IPC client to talk to FSMonitor Daemon Create an IPC client to send query and flush commands to the daemon. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- Makefile | 1 + t/helper/test-fsmonitor-client.c | 116 +++++++++++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + 4 files changed, 119 insertions(+) create mode 100644 t/helper/test-fsmonitor-client.c diff --git a/Makefile b/Makefile index 26567d4f77..daa21bed6c 100644 --- a/Makefile +++ b/Makefile @@ -716,6 +716,7 @@ TEST_BUILTINS_OBJS += test-dump-split-index.o TEST_BUILTINS_OBJS += test-dump-untracked-cache.o TEST_BUILTINS_OBJS += test-example-decorate.o TEST_BUILTINS_OBJS += test-fast-rebase.o +TEST_BUILTINS_OBJS += test-fsmonitor-client.o TEST_BUILTINS_OBJS += test-genrandom.o TEST_BUILTINS_OBJS += test-genzeros.o TEST_BUILTINS_OBJS += test-getcwd.o diff --git a/t/helper/test-fsmonitor-client.c b/t/helper/test-fsmonitor-client.c new file mode 100644 index 0000000000..3062c8a3c2 --- /dev/null +++ b/t/helper/test-fsmonitor-client.c @@ -0,0 +1,116 @@ +/* + * test-fsmonitor-client.c: client code to send commands/requests to + * a `git fsmonitor--daemon` daemon. + */ + +#include "test-tool.h" +#include "cache.h" +#include "parse-options.h" +#include "fsmonitor-ipc.h" + +#ifndef HAVE_FSMONITOR_DAEMON_BACKEND +int cmd__fsmonitor_client(int argc, const char **argv) +{ + die("fsmonitor--daemon not available on this platform"); +} +#else + +/* + * Read the `.git/index` to get the last token written to the + * FSMonitor Index Extension. + */ +static const char *get_token_from_index(void) +{ + struct index_state *istate = the_repository->index; + + if (do_read_index(istate, the_repository->index_file, 0) < 0) + die("unable to read index file"); + if (!istate->fsmonitor_last_update) + die("index file does not have fsmonitor extension"); + + return istate->fsmonitor_last_update; +} + +/* + * Send an IPC query to a `git-fsmonitor--daemon` daemon and + * ask for the changes since the given token or from the last + * token in the index extension. + * + * This will implicitly start a daemon process if necessary. The + * daemon process will persist after we exit. + */ +static int do_send_query(const char *token) +{ + struct strbuf answer = STRBUF_INIT; + int ret; + + if (!token || !*token) + token = get_token_from_index(); + + ret = fsmonitor_ipc__send_query(token, &answer); + if (ret < 0) + die("could not query fsmonitor--daemon"); + + write_in_full(1, answer.buf, answer.len); + strbuf_release(&answer); + + return 0; +} + +/* + * Send a "flush" command to the `git-fsmonitor--daemon` (if running) + * and tell it to flush its cache. + * + * This feature is primarily used by the test suite to simulate a loss of + * sync with the filesystem where we miss kernel events. + */ +static int do_send_flush(void) +{ + struct strbuf answer = STRBUF_INIT; + int ret; + + ret = fsmonitor_ipc__send_command("flush", &answer); + if (ret) + return ret; + + write_in_full(1, answer.buf, answer.len); + strbuf_release(&answer); + + return 0; +} + +int cmd__fsmonitor_client(int argc, const char **argv) +{ + const char *subcmd; + const char *token = NULL; + + const char * const fsmonitor_client_usage[] = { + "test-tool fsmonitor-client query [<token>]", + "test-tool fsmonitor-client flush", + NULL, + }; + + struct option options[] = { + OPT_STRING(0, "token", &token, "token", + "command token to send to the server"), + OPT_END() + }; + + argc = parse_options(argc, argv, NULL, options, fsmonitor_client_usage, 0); + + if (argc != 1) + usage_with_options(fsmonitor_client_usage, options); + + subcmd = argv[0]; + + setup_git_directory(); + + if (!strcmp(subcmd, "query")) + return !!do_send_query(token); + + if (!strcmp(subcmd, "flush")) + return !!do_send_flush(); + + die("Unhandled subcommand: '%s'", subcmd); +} +#endif diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index e6ec69cf32..0424f7adf5 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -32,6 +32,7 @@ static struct test_cmd cmds[] = { { "dump-untracked-cache", cmd__dump_untracked_cache }, { "example-decorate", cmd__example_decorate }, { "fast-rebase", cmd__fast_rebase }, + { "fsmonitor-client", cmd__fsmonitor_client }, { "genrandom", cmd__genrandom }, { "genzeros", cmd__genzeros }, { "getcwd", cmd__getcwd }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 20756eefdd..c876e8246f 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -23,6 +23,7 @@ int cmd__dump_untracked_cache(int argc, const char **argv); int cmd__dump_reftable(int argc, const char **argv); int cmd__example_decorate(int argc, const char **argv); int cmd__fast_rebase(int argc, const char **argv); +int cmd__fsmonitor_client(int argc, const char **argv); int cmd__genrandom(int argc, const char **argv); int cmd__genzeros(int argc, const char **argv); int cmd__getcwd(int argc, const char **argv); From a00cdff81a2d578405cf0f68072da07eee462b93 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:04 +0000 Subject: [PATCH 141/150] t7527: create test for fsmonitor--daemon Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7527-builtin-fsmonitor.sh | 494 +++++++++++++++++++++++++++++++++++ 1 file changed, 494 insertions(+) create mode 100755 t/t7527-builtin-fsmonitor.sh diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh new file mode 100755 index 0000000000..062e01c0df --- /dev/null +++ b/t/t7527-builtin-fsmonitor.sh @@ -0,0 +1,494 @@ +#!/bin/sh + +test_description='built-in file system watcher' + +. ./test-lib.sh + +if ! test_have_prereq FSMONITOR_DAEMON +then + skip_all="fsmonitor--daemon is not supported on this platform" + test_done +fi + +stop_daemon_delete_repo () { + r=$1 && + test_might_fail git -C $r fsmonitor--daemon stop && + rm -rf $1 +} + +start_daemon () { + r= tf= t2= tk= && + + while test "$#" -ne 0 + do + case "$1" in + -C) + r="-C ${2?}" + shift + ;; + --tf) + tf="${2?}" + shift + ;; + --t2) + t2="${2?}" + shift + ;; + --tk) + tk="${2?}" + shift + ;; + -*) + BUG "error: unknown option: '$1'" + ;; + *) + BUG "error: unbound argument: '$1'" + ;; + esac + shift + done && + + ( + if test -n "$tf" + then + GIT_TRACE_FSMONITOR="$tf" + export GIT_TRACE_FSMONITOR + fi && + + if test -n "$t2" + then + GIT_TRACE2_PERF="$t2" + export GIT_TRACE2_PERF + fi && + + if test -n "$tk" + then + GIT_TEST_FSMONITOR_TOKEN="$tk" + export GIT_TEST_FSMONITOR_TOKEN + fi && + + git $r fsmonitor--daemon start && + git $r fsmonitor--daemon status + ) +} + +# Is a Trace2 data event present with the given catetory and key? +# We do not care what the value is. +# +have_t2_data_event () { + c=$1 && + k=$2 && + + grep -e '"event":"data".*"category":"'"$c"'".*"key":"'"$k"'"' +} + +test_expect_success 'explicit daemon start and stop' ' + test_when_finished "stop_daemon_delete_repo test_explicit" && + + git init test_explicit && + start_daemon -C test_explicit && + + git -C test_explicit fsmonitor--daemon stop && + test_must_fail git -C test_explicit fsmonitor--daemon status +' + +test_expect_success 'implicit daemon start' ' + test_when_finished "stop_daemon_delete_repo test_implicit" && + + git init test_implicit && + test_must_fail git -C test_implicit fsmonitor--daemon status && + + # query will implicitly start the daemon. + # + # for test-script simplicity, we send a V1 timestamp rather than + # a V2 token. either way, the daemon response to any query contains + # a new V2 token. (the daemon may complain that we sent a V1 request, + # but this test case is only concerned with whether the daemon was + # implicitly started.) + + GIT_TRACE2_EVENT="$PWD/.git/trace" \ + test-tool -C test_implicit fsmonitor-client query --token 0 >actual && + nul_to_q <actual >actual.filtered && + grep "builtin:" actual.filtered && + + # confirm that a daemon was started in the background. + # + # since the mechanism for starting the background daemon is platform + # dependent, just confirm that the foreground command received a + # response from the daemon. + + have_t2_data_event fsm_client query/response-length <.git/trace && + + git -C test_implicit fsmonitor--daemon status && + git -C test_implicit fsmonitor--daemon stop && + test_must_fail git -C test_implicit fsmonitor--daemon status +' + +test_expect_success 'implicit daemon stop (delete .git)' ' + test_when_finished "stop_daemon_delete_repo test_implicit_1" && + + git init test_implicit_1 && + + start_daemon -C test_implicit_1 && + + # deleting the .git directory will implicitly stop the daemon. + rm -rf test_implicit_1/.git && + + # [1] Create an empty .git directory so that the following Git + # command will stay relative to the `-C` directory. + # + # Without this, the Git command will override the requested + # -C argument and crawl out to the containing Git source tree. + # This would make the test result dependent upon whether we + # were using fsmonitor on our development worktree. + # + sleep 1 && + mkdir test_implicit_1/.git && + + test_must_fail git -C test_implicit_1 fsmonitor--daemon status +' + +test_expect_success 'implicit daemon stop (rename .git)' ' + test_when_finished "stop_daemon_delete_repo test_implicit_2" && + + git init test_implicit_2 && + + start_daemon -C test_implicit_2 && + + # renaming the .git directory will implicitly stop the daemon. + mv test_implicit_2/.git test_implicit_2/.xxx && + + # See [1] above. + # + sleep 1 && + mkdir test_implicit_2/.git && + + test_must_fail git -C test_implicit_2 fsmonitor--daemon status +' + +test_expect_success 'cannot start multiple daemons' ' + test_when_finished "stop_daemon_delete_repo test_multiple" && + + git init test_multiple && + + start_daemon -C test_multiple && + + test_must_fail git -C test_multiple fsmonitor--daemon start 2>actual && + grep "fsmonitor--daemon is already running" actual && + + git -C test_multiple fsmonitor--daemon stop && + test_must_fail git -C test_multiple fsmonitor--daemon status +' + +# These tests use the main repo in the trash directory + +test_expect_success 'setup' ' + >tracked && + >modified && + >delete && + >rename && + mkdir dir1 && + >dir1/tracked && + >dir1/modified && + >dir1/delete && + >dir1/rename && + mkdir dir2 && + >dir2/tracked && + >dir2/modified && + >dir2/delete && + >dir2/rename && + mkdir dirtorename && + >dirtorename/a && + >dirtorename/b && + + cat >.gitignore <<-\EOF && + .gitignore + expect* + actual* + EOF + + git -c core.fsmonitor=false add . && + test_tick && + git -c core.fsmonitor=false commit -m initial && + + git config core.fsmonitor true +' + +# The test already explicitly stopped (or tried to stop) the daemon. +# This is here in case something else fails first. +# +redundant_stop_daemon () { + test_might_fail git fsmonitor--daemon stop +} + +test_expect_success 'update-index implicitly starts daemon' ' + test_when_finished redundant_stop_daemon && + + test_must_fail git fsmonitor--daemon status && + + GIT_TRACE2_EVENT="$PWD/.git/trace_implicit_1" \ + git update-index --fsmonitor && + + git fsmonitor--daemon status && + test_might_fail git fsmonitor--daemon stop && + + # Confirm that the trace2 log contains a record of the + # daemon starting. + test_subcommand git fsmonitor--daemon start <.git/trace_implicit_1 +' + +test_expect_success 'status implicitly starts daemon' ' + test_when_finished redundant_stop_daemon && + + test_must_fail git fsmonitor--daemon status && + + GIT_TRACE2_EVENT="$PWD/.git/trace_implicit_2" \ + git status >actual && + + git fsmonitor--daemon status && + test_might_fail git fsmonitor--daemon stop && + + # Confirm that the trace2 log contains a record of the + # daemon starting. + test_subcommand git fsmonitor--daemon start <.git/trace_implicit_2 +' + +edit_files () { + echo 1 >modified && + echo 2 >dir1/modified && + echo 3 >dir2/modified && + >dir1/untracked +} + +delete_files () { + rm -f delete && + rm -f dir1/delete && + rm -f dir2/delete +} + +create_files () { + echo 1 >new && + echo 2 >dir1/new && + echo 3 >dir2/new +} + +rename_files () { + mv rename renamed && + mv dir1/rename dir1/renamed && + mv dir2/rename dir2/renamed +} + +file_to_directory () { + rm -f delete && + mkdir delete && + echo 1 >delete/new +} + +directory_to_file () { + rm -rf dir1 && + echo 1 >dir1 +} + +# The next few test cases confirm that our fsmonitor daemon sees each type +# of OS filesystem notification that we care about. At this layer we just +# ensure we are getting the OS notifications and do not try to confirm what +# is reported by `git status`. +# +# We run a simple query after modifying the filesystem just to introduce +# a bit of a delay so that the trace logging from the daemon has time to +# get flushed to disk. +# +# We `reset` and `clean` at the bottom of each test (and before stopping the +# daemon) because these commands might implicitly restart the daemon. + +clean_up_repo_and_stop_daemon () { + git reset --hard HEAD && + git clean -fd && + test_might_fail git fsmonitor--daemon stop && + rm -f .git/trace +} + +test_expect_success 'edit some files' ' + test_when_finished clean_up_repo_and_stop_daemon && + + start_daemon --tf "$PWD/.git/trace" && + + edit_files && + + test-tool fsmonitor-client query --token 0 && + + grep "^event: dir1/modified$" .git/trace && + grep "^event: dir2/modified$" .git/trace && + grep "^event: modified$" .git/trace && + grep "^event: dir1/untracked$" .git/trace +' + +test_expect_success 'create some files' ' + test_when_finished clean_up_repo_and_stop_daemon && + + start_daemon --tf "$PWD/.git/trace" && + + create_files && + + test-tool fsmonitor-client query --token 0 && + + grep "^event: dir1/new$" .git/trace && + grep "^event: dir2/new$" .git/trace && + grep "^event: new$" .git/trace +' + +test_expect_success 'delete some files' ' + test_when_finished clean_up_repo_and_stop_daemon && + + start_daemon --tf "$PWD/.git/trace" && + + delete_files && + + test-tool fsmonitor-client query --token 0 && + + grep "^event: dir1/delete$" .git/trace && + grep "^event: dir2/delete$" .git/trace && + grep "^event: delete$" .git/trace +' + +test_expect_success 'rename some files' ' + test_when_finished clean_up_repo_and_stop_daemon && + + start_daemon --tf "$PWD/.git/trace" && + + rename_files && + + test-tool fsmonitor-client query --token 0 && + + grep "^event: dir1/rename$" .git/trace && + grep "^event: dir2/rename$" .git/trace && + grep "^event: rename$" .git/trace && + grep "^event: dir1/renamed$" .git/trace && + grep "^event: dir2/renamed$" .git/trace && + grep "^event: renamed$" .git/trace +' + +test_expect_success 'rename directory' ' + test_when_finished clean_up_repo_and_stop_daemon && + + start_daemon --tf "$PWD/.git/trace" && + + mv dirtorename dirrenamed && + + test-tool fsmonitor-client query --token 0 && + + grep "^event: dirtorename/*$" .git/trace && + grep "^event: dirrenamed/*$" .git/trace +' + +test_expect_success 'file changes to directory' ' + test_when_finished clean_up_repo_and_stop_daemon && + + start_daemon --tf "$PWD/.git/trace" && + + file_to_directory && + + test-tool fsmonitor-client query --token 0 && + + grep "^event: delete$" .git/trace && + grep "^event: delete/new$" .git/trace +' + +test_expect_success 'directory changes to a file' ' + test_when_finished clean_up_repo_and_stop_daemon && + + start_daemon --tf "$PWD/.git/trace" && + + directory_to_file && + + test-tool fsmonitor-client query --token 0 && + + grep "^event: dir1$" .git/trace +' + +# The next few test cases exercise the token-resync code. When filesystem +# drops events (because of filesystem velocity or because the daemon isn't +# polling fast enough), we need to discard the cached data (relative to the +# current token) and start collecting events under a new token. +# +# the 'test-tool fsmonitor-client flush' command can be used to send a +# "flush" message to a running daemon and ask it to do a flush/resync. + +test_expect_success 'flush cached data' ' + test_when_finished "stop_daemon_delete_repo test_flush" && + + git init test_flush && + + start_daemon -C test_flush --tf "$PWD/.git/trace_daemon" --tk true && + + # The daemon should have an initial token with no events in _0 and + # then a few (probably platform-specific number of) events in _1. + # These should both have the same <token_id>. + + test-tool -C test_flush fsmonitor-client query --token "builtin:test_00000001:0" >actual_0 && + nul_to_q <actual_0 >actual_q0 && + + >test_flush/file_1 && + >test_flush/file_2 && + + test-tool -C test_flush fsmonitor-client query --token "builtin:test_00000001:0" >actual_1 && + nul_to_q <actual_1 >actual_q1 && + + grep "file_1" actual_q1 && + + # Force a flush. This will change the <token_id>, reset the <seq_nr>, and + # flush the file data. Then create some events and ensure that the file + # again appears in the cache. It should have the new <token_id>. + + test-tool -C test_flush fsmonitor-client flush >flush_0 && + nul_to_q <flush_0 >flush_q0 && + grep "^builtin:test_00000002:0Q/Q$" flush_q0 && + + test-tool -C test_flush fsmonitor-client query --token "builtin:test_00000002:0" >actual_2 && + nul_to_q <actual_2 >actual_q2 && + + grep "^builtin:test_00000002:0Q$" actual_q2 && + + >test_flush/file_3 && + + test-tool -C test_flush fsmonitor-client query --token "builtin:test_00000002:0" >actual_3 && + nul_to_q <actual_3 >actual_q3 && + + grep "file_3" actual_q3 +' + +# The next few test cases create repos where the .git directory is NOT +# inside the one of the working directory. That is, where .git is a file +# that points to a directory elsewhere. This happens for submodules and +# non-primary worktrees. + +test_expect_success 'setup worktree base' ' + git init wt-base && + echo 1 >wt-base/file1 && + git -C wt-base add file1 && + git -C wt-base commit -m "c1" +' + +test_expect_success 'worktree with .git file' ' + git -C wt-base worktree add ../wt-secondary && + + start_daemon -C wt-secondary \ + --tf "$PWD/trace_wt_secondary" \ + --t2 "$PWD/trace2_wt_secondary" && + + git -C wt-secondary fsmonitor--daemon stop && + test_must_fail git -C wt-secondary fsmonitor--daemon status +' + +# NEEDSWORK: Repeat one of the "edit" tests on wt-secondary and +# confirm that we get the same events and behavior -- that is, that +# fsmonitor--daemon correctly watches BOTH the working directory and +# the external GITDIR directory and behaves the same as when ".git" +# is a directory inside the working directory. + +test_expect_success 'cleanup worktrees' ' + stop_daemon_delete_repo wt-secondary && + stop_daemon_delete_repo wt-base +' + +test_done From 08894d334969011394dee7ab1dde63176fc51830 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:05 +0000 Subject: [PATCH 142/150] t/perf: avoid copying builtin fsmonitor files into test repo Do not copy any of the various fsmonitor--daemon files from the .git directory of the (GIT_PREF_REPO or GIT_PERF_LARGE_REPO) source repo into the test's trash directory. When perf tests start, they copy the contents of the source repo into the test's trash directory. If fsmonitor is running in the source repo, there may be control files, such as the IPC socket and/or fsmonitor cookie files. These should not be copied into the test repo. Unix domain sockets cannot be copied in the manner used by the test setup, so if present, the test setup fails. Cookie files are harmless, but we should avoid them. The builtin fsmonitor keeps all such control files/sockets in .git/fsmonitor--daemon*, so it is simple to exclude them. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/perf/perf-lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh index 407252bac7..932105cd12 100644 --- a/t/perf/perf-lib.sh +++ b/t/perf/perf-lib.sh @@ -78,7 +78,7 @@ test_perf_copy_repo_contents () { for stuff in "$1"/* do case "$stuff" in - */objects|*/hooks|*/config|*/commondir|*/gitdir|*/worktrees) + */objects|*/hooks|*/config|*/commondir|*/gitdir|*/worktrees|*/fsmonitor--daemon*) ;; *) cp -R "$stuff" "$repo/.git/" || exit 1 From 369f0f54ff92fe321e8c7c2d2372e0eb31f77303 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:06 +0000 Subject: [PATCH 143/150] t/helper/test-chmtime: skip directories on Windows Teach `test-tool.exe chmtime` to ignore errors when setting the mtime on a directory on Windows. NEEDSWORK: The Windows version of `utime()` (aka `mingw_utime()`) does not properly handle directories because it uses `_wopen()`. It should be converted to using `CreateFileW()` and backup semantics at a minimum. Since I'm already in the middle of a large patch series, I did not want to destabilize other callers of `utime()` right now. The problem has only been observed in the t/perf/p7519 test when the test repo contains an empty directory on disk. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/helper/test-chmtime.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/t/helper/test-chmtime.c b/t/helper/test-chmtime.c index 524b55ca49..dc28890a18 100644 --- a/t/helper/test-chmtime.c +++ b/t/helper/test-chmtime.c @@ -134,6 +134,21 @@ int cmd__chmtime(int argc, const char **argv) } if (utb.modtime != sb.st_mtime && utime(argv[i], &utb) < 0) { +#ifdef GIT_WINDOWS_NATIVE + if (S_ISDIR(sb.st_mode)) { + /* + * NEEDSWORK: The Windows version of `utime()` + * (aka `mingw_utime()`) does not correctly + * handle directory arguments, since it uses + * `_wopen()`. Ignore it for now since this + * is just a test. + */ + fprintf(stderr, + ("Failed to modify time on directory %s. " + "Skipping\n"), argv[i]); + continue; + } +#endif fprintf(stderr, "Failed to modify time on %s: %s\n", argv[i], strerror(errno)); return 1; From 8aa020970195f49c369eeff9bf22079002e3454b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:07 +0000 Subject: [PATCH 144/150] t/perf/p7519: fix coding style Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/perf/p7519-fsmonitor.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh index c8be58f3c7..5241eb6c4e 100755 --- a/t/perf/p7519-fsmonitor.sh +++ b/t/perf/p7519-fsmonitor.sh @@ -72,7 +72,7 @@ then fi fi -trace_start() { +trace_start () { if test -n "$GIT_PERF_7519_TRACE" then name="$1" @@ -91,7 +91,7 @@ trace_start() { fi } -trace_stop() { +trace_stop () { if test -n "$GIT_PERF_7519_TRACE" then unset GIT_TRACE2_PERF @@ -133,7 +133,7 @@ test_expect_success "one time repo setup" ' fi ' -setup_for_fsmonitor() { +setup_for_fsmonitor () { # set INTEGRATION_SCRIPT depending on the environment if test -n "$INTEGRATION_PATH" then @@ -173,7 +173,7 @@ test_perf_w_drop_caches () { test_perf "$@" } -test_fsmonitor_suite() { +test_fsmonitor_suite () { if test -n "$INTEGRATION_SCRIPT"; then DESC="fsmonitor=$(basename $INTEGRATION_SCRIPT)" else From 86f7433f9711012fcd9857974fc43dfd693b0aa9 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:08 +0000 Subject: [PATCH 145/150] t/perf/p7519: speed up test on Windows Change p7519 to use `test_seq` and `xargs` rather than a `for` loop to touch thousands of files. This takes minutes off of test runs on Windows because of process creation overhead. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/perf/p7519-fsmonitor.sh | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh index 5241eb6c4e..a6c2a910e7 100755 --- a/t/perf/p7519-fsmonitor.sh +++ b/t/perf/p7519-fsmonitor.sh @@ -98,6 +98,13 @@ trace_stop () { fi } +touch_files () { + n=$1 && + d="$n"_files && + + (cd $d && test_seq 1 $n | xargs touch ) +} + test_expect_success "one time repo setup" ' # set untrackedCache depending on the environment if test -n "$GIT_PERF_7519_UNTRACKED_CACHE" @@ -119,10 +126,11 @@ test_expect_success "one time repo setup" ' fi && mkdir 1_file 10_files 100_files 1000_files 10000_files && - for i in $(test_seq 1 10); do touch 10_files/$i || return 1; done && - for i in $(test_seq 1 100); do touch 100_files/$i || return 1; done && - for i in $(test_seq 1 1000); do touch 1000_files/$i || return 1; done && - for i in $(test_seq 1 10000); do touch 10000_files/$i || return 1; done && + : 1_file directory should be left empty && + touch_files 10 && + touch_files 100 && + touch_files 1000 && + touch_files 10000 && git add 1_file 10_files 100_files 1000_files 10000_files && git commit -qm "Add files" && @@ -199,15 +207,15 @@ test_fsmonitor_suite () { # Update the mtimes on upto 100k files to make status think # that they are dirty. For simplicity, omit any files with - # LFs (i.e. anything that ls-files thinks it needs to dquote). - # Then fully backslash-quote the paths to capture any - # whitespace so that they pass thru xargs properly. + # LFs (i.e. anything that ls-files thinks it needs to dquote) + # and any files with whitespace so that they pass thru xargs + # properly. # test_perf_w_drop_caches "status (dirty) ($DESC)" ' git ls-files | \ head -100000 | \ grep -v \" | \ - sed '\''s/\(.\)/\\\1/g'\'' | \ + grep -v " ." | \ xargs test-tool chmtime -300 && git status ' From ad2b54e3e89d715a283a3c3ab6262c9bed8cb3a1 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:09 +0000 Subject: [PATCH 146/150] t/perf/p7519: add fsmonitor--daemon test cases Repeat all of the fsmonitor perf tests using `git fsmonitor--daemon` and the "Simple IPC" interface. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/perf/p7519-fsmonitor.sh | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh index a6c2a910e7..0b9129ca7b 100755 --- a/t/perf/p7519-fsmonitor.sh +++ b/t/perf/p7519-fsmonitor.sh @@ -141,7 +141,7 @@ test_expect_success "one time repo setup" ' fi ' -setup_for_fsmonitor () { +setup_for_fsmonitor_hook () { # set INTEGRATION_SCRIPT depending on the environment if test -n "$INTEGRATION_PATH" then @@ -182,7 +182,11 @@ test_perf_w_drop_caches () { } test_fsmonitor_suite () { - if test -n "$INTEGRATION_SCRIPT"; then + if test -n "$USE_FSMONITOR_DAEMON" + then + DESC="builtin fsmonitor--daemon" + elif test -n "$INTEGRATION_SCRIPT" + then DESC="fsmonitor=$(basename $INTEGRATION_SCRIPT)" else DESC="fsmonitor=disabled" @@ -261,11 +265,11 @@ test_fsmonitor_suite () { trace_start fsmonitor-watchman if test -n "$GIT_PERF_7519_FSMONITOR"; then for INTEGRATION_PATH in $GIT_PERF_7519_FSMONITOR; do - test_expect_success "setup for fsmonitor $INTEGRATION_PATH" 'setup_for_fsmonitor' + test_expect_success "setup for fsmonitor $INTEGRATION_PATH" 'setup_for_fsmonitor_hook' test_fsmonitor_suite done else - test_expect_success "setup for fsmonitor" 'setup_for_fsmonitor' + test_expect_success "setup for fsmonitor hook" 'setup_for_fsmonitor_hook' test_fsmonitor_suite fi @@ -293,4 +297,30 @@ test_expect_success "setup without fsmonitor" ' test_fsmonitor_suite trace_stop +# +# Run a full set of perf tests using the built-in fsmonitor--daemon. +# It does not use the Hook API, so it has a different setup. +# Explicitly start the daemon here and before we start client commands +# so that we can later add custom tracing. +# +if test_have_prereq FSMONITOR_DAEMON +then + USE_FSMONITOR_DAEMON=t + + test_expect_success "setup for builtin fsmonitor" ' + trace_start fsmonitor--daemon--server && + git fsmonitor--daemon start && + + trace_start fsmonitor--daemon--client && + + git config core.fsmonitor true && + git update-index --fsmonitor + ' + + test_fsmonitor_suite + + git fsmonitor--daemon stop + trace_stop +fi + test_done From 50c725d6b6f52b1c7fc4940c092d7f205cb99583 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:10 +0000 Subject: [PATCH 147/150] fsmonitor--daemon: periodically truncate list of modified files Teach fsmonitor--daemon to periodically truncate the list of modified files to save some memory. Clients will ask for the set of changes relative to a token that they found in the FSMN index extension in the index. (This token is like a point in time, but different). Clients will then update the index to contain the response token (so that subsequent commands will be relative to this new token). Therefore, the daemon can gradually truncate the in-memory list of changed paths as they become obsolete (older than the previous token). Since we may have multiple clients making concurrent requests with a skew of tokens and clients may be racing to the talk to the daemon, we lazily truncate the list. We introduce a 5 minute delay and truncate batches 5 minutes after they are considered obsolete. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 88 +++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index eafaafb45b..ab9cc09f7c 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -312,6 +312,75 @@ static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest, batch_src->interned_paths[k]; } +/* + * To keep the batch list from growing unbounded in response to filesystem + * activity, we try to truncate old batches from the end of the list as + * they become irrelevant. + * + * We assume that the .git/index will be updated with the most recent token + * any time the index is updated. And future commands will only ask for + * recent changes *since* that new token. So as tokens advance into the + * future, older batch items will never be requested/needed. So we can + * truncate them without loss of functionality. + * + * However, multiple commands may be talking to the daemon concurrently + * or perform a slow command, so a little "token skew" is possible. + * Therefore, we want this to be a little bit lazy and have a generous + * delay. + * + * The current reader thread walked backwards in time from `token->batch_head` + * back to `batch_marker` somewhere in the middle of the batch list. + * + * Let's walk backwards in time from that marker an arbitrary delay + * and truncate the list there. Note that these timestamps are completely + * artificial (based on when we pinned the batch item) and not on any + * filesystem activity. + * + * Return the obsolete portion of the list after we have removed it from + * the official list so that the caller can free it after leaving the lock. + */ +#define MY_TIME_DELAY_SECONDS (5 * 60) /* seconds */ + +static struct fsmonitor_batch *with_lock__truncate_old_batches( + struct fsmonitor_daemon_state *state, + const struct fsmonitor_batch *batch_marker) +{ + /* assert current thread holding state->main_lock */ + + const struct fsmonitor_batch *batch; + struct fsmonitor_batch *remainder; + + if (!batch_marker) + return NULL; + + trace_printf_key(&trace_fsmonitor, "Truncate: mark (%"PRIu64",%"PRIu64")", + batch_marker->batch_seq_nr, + (uint64_t)batch_marker->pinned_time); + + for (batch = batch_marker; batch; batch = batch->next) { + time_t t; + + if (!batch->pinned_time) /* an overflow batch */ + continue; + + t = batch->pinned_time + MY_TIME_DELAY_SECONDS; + if (t > batch_marker->pinned_time) /* too close to marker */ + continue; + + goto truncate_past_here; + } + + return NULL; + +truncate_past_here: + state->current_token_data->batch_tail = (struct fsmonitor_batch *)batch; + + remainder = ((struct fsmonitor_batch *)batch)->next; + ((struct fsmonitor_batch *)batch)->next = NULL; + + return remainder; +} + static void fsmonitor_free_token_data(struct fsmonitor_token_data *token) { if (!token) @@ -425,6 +494,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, const char *p; const struct fsmonitor_batch *batch_head; const struct fsmonitor_batch *batch; + struct fsmonitor_batch *remainder = NULL; intmax_t count = 0, duplicates = 0; kh_str_t *shown; int hash_ret; @@ -652,11 +722,29 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, * that work. */ fsmonitor_free_token_data(token_data); + } else if (batch) { + /* + * We are holding the lock and are the only + * reader of the ref-counted portion of the + * list, so we get the honor of seeing if the + * list can be truncated to save memory. + * + * The main loop did not walk to the end of the + * list, so this batch is the first item in the + * batch-list that is older than the requested + * end-point sequence number. See if the tail + * end of the list is obsolete. + */ + remainder = with_lock__truncate_old_batches(state, + batch); } } pthread_mutex_unlock(&state->main_lock); + if (remainder) + fsmonitor_batch__free_list(remainder); + trace2_data_intmax("fsmonitor", the_repository, "response/length", total_response_len); trace2_data_intmax("fsmonitor", the_repository, "response/count/files", count); trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates); From b05880d357c6dadba8d1d7943f4782fc25e06999 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:11 +0000 Subject: [PATCH 148/150] fsmonitor--daemon: use a cookie file to sync with file system Teach fsmonitor--daemon client threads to create a cookie file inside the .git directory and then wait until FS events for the cookie are observed by the FS listener thread. This helps address the racy nature of file system events by blocking the client response until the kernel has drained any event backlog. This is especially important on MacOS where kernel events are only issued with a limited frequency. See the `latency` argument of `FSeventStreamCreate()`. The kernel only signals every `latency` seconds, but does not guarantee that the kernel queue is completely drained, so we may have to wait more than one interval. If we increase the latency, the system is more likely to drop events. We avoid these issues by having each client thread create a unique cookie file and then wait until it is seen in the event stream. Co-authored-by: Kevin Willford <Kevin.Willford@microsoft.com> Co-authored-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- builtin/fsmonitor--daemon.c | 237 +++++++++++++++++++++++++++++++++++- fsmonitor--daemon.h | 5 + 2 files changed, 241 insertions(+), 1 deletion(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index ab9cc09f7c..46be55a461 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -107,6 +107,162 @@ static int do_as_client__status(void) } } +enum fsmonitor_cookie_item_result { + FCIR_ERROR = -1, /* could not create cookie file ? */ + FCIR_INIT, + FCIR_SEEN, + FCIR_ABORT, +}; + +struct fsmonitor_cookie_item { + struct hashmap_entry entry; + char *name; + enum fsmonitor_cookie_item_result result; +}; + +static int cookies_cmp(const void *data, const struct hashmap_entry *he1, + const struct hashmap_entry *he2, const void *keydata) +{ + const struct fsmonitor_cookie_item *a = + container_of(he1, const struct fsmonitor_cookie_item, entry); + const struct fsmonitor_cookie_item *b = + container_of(he2, const struct fsmonitor_cookie_item, entry); + + return strcmp(a->name, keydata ? keydata : b->name); +} + +static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie( + struct fsmonitor_daemon_state *state) +{ + /* assert current thread holding state->main_lock */ + + int fd; + struct fsmonitor_cookie_item *cookie; + struct strbuf cookie_pathname = STRBUF_INIT; + struct strbuf cookie_filename = STRBUF_INIT; + enum fsmonitor_cookie_item_result result; + int my_cookie_seq; + + CALLOC_ARRAY(cookie, 1); + + my_cookie_seq = state->cookie_seq++; + + strbuf_addf(&cookie_filename, "%i-%i", getpid(), my_cookie_seq); + + strbuf_addbuf(&cookie_pathname, &state->path_cookie_prefix); + strbuf_addbuf(&cookie_pathname, &cookie_filename); + + cookie->name = strbuf_detach(&cookie_filename, NULL); + cookie->result = FCIR_INIT; + hashmap_entry_init(&cookie->entry, strhash(cookie->name)); + + hashmap_add(&state->cookies, &cookie->entry); + + trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'", + cookie->name, cookie_pathname.buf); + + /* + * Create the cookie file on disk and then wait for a notification + * that the listener thread has seen it. + */ + fd = open(cookie_pathname.buf, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (fd < 0) { + error_errno(_("could not create fsmonitor cookie '%s'"), + cookie->name); + + cookie->result = FCIR_ERROR; + goto done; + } + + /* + * Technically, close() and unlink() can fail, but we don't + * care here. We only created the file to trigger a watch + * event from the FS to know that when we're up to date. + */ + close(fd); + unlink(cookie_pathname.buf); + + /* + * Technically, this is an infinite wait (well, unless another + * thread sends us an abort). I'd like to change this to + * use `pthread_cond_timedwait()` and return an error/timeout + * and let the caller do the trivial response thing, but we + * don't have that routine in our thread-utils. + * + * After extensive beta testing I'm not really worried about + * this. Also note that the above open() and unlink() calls + * will cause at least two FS events on that path, so the odds + * of getting stuck are pretty slim. + */ + while (cookie->result == FCIR_INIT) + pthread_cond_wait(&state->cookies_cond, + &state->main_lock); + +done: + hashmap_remove(&state->cookies, &cookie->entry, NULL); + + result = cookie->result; + + free(cookie->name); + free(cookie); + strbuf_release(&cookie_pathname); + + return result; +} + +/* + * Mark these cookies as _SEEN and wake up the corresponding client threads. + */ +static void with_lock__mark_cookies_seen(struct fsmonitor_daemon_state *state, + const struct string_list *cookie_names) +{ + /* assert current thread holding state->main_lock */ + + int k; + int nr_seen = 0; + + for (k = 0; k < cookie_names->nr; k++) { + struct fsmonitor_cookie_item key; + struct fsmonitor_cookie_item *cookie; + + key.name = cookie_names->items[k].string; + hashmap_entry_init(&key.entry, strhash(key.name)); + + cookie = hashmap_get_entry(&state->cookies, &key, entry, NULL); + if (cookie) { + trace_printf_key(&trace_fsmonitor, "cookie-seen: '%s'", + cookie->name); + cookie->result = FCIR_SEEN; + nr_seen++; + } + } + + if (nr_seen) + pthread_cond_broadcast(&state->cookies_cond); +} + +/* + * Set _ABORT on all pending cookies and wake up all client threads. + */ +static void with_lock__abort_all_cookies(struct fsmonitor_daemon_state *state) +{ + /* assert current thread holding state->main_lock */ + + struct hashmap_iter iter; + struct fsmonitor_cookie_item *cookie; + int nr_aborted = 0; + + hashmap_for_each_entry(&state->cookies, &iter, cookie, entry) { + trace_printf_key(&trace_fsmonitor, "cookie-abort: '%s'", + cookie->name); + cookie->result = FCIR_ABORT; + nr_aborted++; + } + + if (nr_aborted) + pthread_cond_broadcast(&state->cookies_cond); +} + /* * Requests to and from a FSMonitor Protocol V2 provider use an opaque * "token" as a virtual timestamp. Clients can request a summary of all @@ -404,6 +560,9 @@ static void fsmonitor_free_token_data(struct fsmonitor_token_data *token) * We should create a new token and start fresh (as if we just * booted up). * + * [2] Some of those lost events may have been for cookie files. We + * should assume the worst and abort them rather letting them starve. + * * If there are no concurrent threads reading the current token data * series, we can free it now. Otherwise, let the last reader free * it. @@ -425,6 +584,8 @@ static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state) state->current_token_data = new_one; fsmonitor_free_token_data(free_me); + + with_lock__abort_all_cookies(state); } void fsmonitor_force_resync(struct fsmonitor_daemon_state *state) @@ -500,6 +661,8 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, int hash_ret; int do_trivial = 0; int do_flush = 0; + int do_cookie = 0; + enum fsmonitor_cookie_item_result cookie_result; /* * We expect `command` to be of the form: @@ -560,6 +723,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, * We have a V2 valid token: * "builtin:<token_id>:<seq_nr>" */ + do_cookie = 1; } } @@ -568,6 +732,30 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, if (!state->current_token_data) BUG("fsmonitor state does not have a current token"); + /* + * Write a cookie file inside the directory being watched in + * an effort to flush out existing filesystem events that we + * actually care about. Suspend this client thread until we + * see the filesystem events for this cookie file. + * + * Creating the cookie lets us guarantee that our FS listener + * thread has drained the kernel queue and we are caught up + * with the kernel. + * + * If we cannot create the cookie (or otherwise guarantee that + * we are caught up), we send a trivial response. We have to + * assume that there might be some very, very recent activity + * on the FS still in flight. + */ + if (do_cookie) { + cookie_result = with_lock__wait_for_cookie(state); + if (cookie_result != FCIR_SEEN) { + error(_("fsmonitor: cookie_result '%d' != SEEN"), + cookie_result); + do_trivial = 1; + } + } + if (do_flush) with_lock__do_force_resync(state); @@ -787,7 +975,9 @@ static int handle_client(void *data, return result; } -#define FSMONITOR_COOKIE_PREFIX ".fsmonitor-daemon-" +#define FSMONITOR_DIR "fsmonitor--daemon" +#define FSMONITOR_COOKIE_DIR "cookies" +#define FSMONITOR_COOKIE_PREFIX (FSMONITOR_DIR "/" FSMONITOR_COOKIE_DIR "/") enum fsmonitor_path_type fsmonitor_classify_path_workdir_relative( const char *rel) @@ -940,6 +1130,9 @@ void fsmonitor_publish(struct fsmonitor_daemon_state *state, } } + if (cookie_names->nr) + with_lock__mark_cookies_seen(state, cookie_names); + pthread_mutex_unlock(&state->main_lock); } @@ -1031,7 +1224,9 @@ static int fsmonitor_run_daemon(void) memset(&state, 0, sizeof(state)); + hashmap_init(&state.cookies, cookies_cmp, NULL, 0); pthread_mutex_init(&state.main_lock, NULL); + pthread_cond_init(&state.cookies_cond, NULL); state.error_code = 0; state.current_token_data = fsmonitor_new_token_data(); @@ -1056,6 +1251,44 @@ static int fsmonitor_run_daemon(void) state.nr_paths_watching = 2; } + /* + * We will write filesystem syncing cookie files into + * <gitdir>/<fsmonitor-dir>/<cookie-dir>/<pid>-<seq>. + * + * The extra layers of subdirectories here keep us from + * changing the mtime on ".git/" or ".git/foo/" when we create + * or delete cookie files. + * + * There have been problems with some IDEs that do a + * non-recursive watch of the ".git/" directory and run a + * series of commands any time something happens. + * + * For example, if we place our cookie files directly in + * ".git/" or ".git/foo/" then a `git status` (or similar + * command) from the IDE will cause a cookie file to be + * created in one of those dirs. This causes the mtime of + * those dirs to change. This triggers the IDE's watch + * notification. This triggers the IDE to run those commands + * again. And the process repeats and the machine never goes + * idle. + * + * Adding the extra layers of subdirectories prevents the + * mtime of ".git/" and ".git/foo" from changing when a + * cookie file is created. + */ + strbuf_init(&state.path_cookie_prefix, 0); + strbuf_addbuf(&state.path_cookie_prefix, &state.path_gitdir_watch); + + strbuf_addch(&state.path_cookie_prefix, '/'); + strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_DIR); + mkdir(state.path_cookie_prefix.buf, 0777); + + strbuf_addch(&state.path_cookie_prefix, '/'); + strbuf_addstr(&state.path_cookie_prefix, FSMONITOR_COOKIE_DIR); + mkdir(state.path_cookie_prefix.buf, 0777); + + strbuf_addch(&state.path_cookie_prefix, '/'); + /* * Confirm that we can create platform-specific resources for the * filesystem listener before we bother starting all the threads. @@ -1068,6 +1301,7 @@ static int fsmonitor_run_daemon(void) err = fsmonitor_run_daemon_1(&state); done: + pthread_cond_destroy(&state.cookies_cond); pthread_mutex_destroy(&state.main_lock); fsm_listen__dtor(&state); @@ -1075,6 +1309,7 @@ done: strbuf_release(&state.path_worktree_watch); strbuf_release(&state.path_gitdir_watch); + strbuf_release(&state.path_cookie_prefix); return err; } diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h index 010fbfe60e..bd09fffc17 100644 --- a/fsmonitor--daemon.h +++ b/fsmonitor--daemon.h @@ -45,6 +45,11 @@ struct fsmonitor_daemon_state { struct fsmonitor_token_data *current_token_data; + struct strbuf path_cookie_prefix; + pthread_cond_t cookies_cond; + int cookie_seq; + struct hashmap cookies; + int error_code; struct fsmonitor_daemon_backend_data *backend_data; From 26b9f34ab32b66a6387dc5ab3c773d3d4cd86685 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:12 +0000 Subject: [PATCH 149/150] fsmonitor: force update index after large responses Measure the time taken to apply the FSMonitor query result to the index and the untracked-cache. Set the `FSMONITOR_CHANGED` bit on `istate->cache_changed` when FSMonitor returns a very large repsonse to ensure that the index is written to disk. Normally, when the FSMonitor response includes a tracked file, the index is always updated. Similarly, the index might be updated when the response alters the untracked-cache (when enabled). However, in cases where neither of those cause the index to be considered changed, the FSMonitor response is wasted. Subsequent Git commands will make requests with the same token and receive the same response. If that response is very large, performance may suffer. It would be more efficient to force update the index now (and the token in the index extension) in order to reduce the size of the response received by future commands. This was observed on Windows after a large checkout. On Windows, the kernel emits events for the files that are changed as they are changed. However, it might delay events for the containing directories until the system is more idle (or someone scans the directory (so it seems)). The first status following a checkout would get the list of files. The subsequent status commands would get the list of directories as the events trickled out. But they would never catch up because the token was not advanced because the index wasn't updated. This list of directories caused `wt_status_collect_untracked()` to unnecessarily spend time actually scanning them during each command. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- fsmonitor.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/fsmonitor.c b/fsmonitor.c index a38b5710eb..292a6742b4 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -219,6 +219,43 @@ static void fsmonitor_refresh_callback(struct index_state *istate, char *name) untracked_cache_invalidate_path(istate, name, 0); } +/* + * The number of pathnames that we need to receive from FSMonitor + * before we force the index to be updated. + * + * Note that any pathname within the set of received paths MAY cause + * cache-entry or istate flag bits to be updated and thus cause the + * index to be updated on disk. + * + * However, the response may contain many paths (such as ignored + * paths) that will not update any flag bits. And thus not force the + * index to be updated. (This is fine and normal.) It also means + * that the token will not be updated in the FSMonitor index + * extension. So the next Git command will find the same token in the + * index, make the same token-relative request, and receive the same + * response (plus any newly changed paths). If this response is large + * (and continues to grow), performance could be impacted. + * + * For example, if the user runs a build and it writes 100K object + * files but doesn't modify any source files, the index would not need + * to be updated. The FSMonitor response (after the build and + * relative to a pre-build token) might be 5MB. Each subsequent Git + * command will receive that same 100K/5MB response until something + * causes the index to be updated. And `refresh_fsmonitor()` will + * have to iterate over those 100K paths each time. + * + * Performance could be improved if we optionally force update the + * index after a very large response and get an updated token into + * the FSMonitor index extension. This should allow subsequent + * commands to get smaller and more current responses. + * + * The value chosen here does not need to be precise. The index + * will be updated automatically the first time the user touches + * a tracked file and causes a command like `git status` to + * update an mtime to be updated and/or set a flag bit. + */ +static int fsmonitor_force_update_threshold = 100; + void refresh_fsmonitor(struct index_state *istate) { struct strbuf query_result = STRBUF_INIT; @@ -362,25 +399,39 @@ apply_results: * information and that we should consider everything * invalid. We call this a trivial response. */ + trace2_region_enter("fsmonitor", "apply_results", istate->repo); + if (query_success && !is_trivial) { /* * Mark all pathnames returned by the monitor as dirty. * * This updates both the cache-entries and the untracked-cache. */ + int count = 0; + buf = query_result.buf; for (i = bol; i < query_result.len; i++) { if (buf[i] != '\0') continue; fsmonitor_refresh_callback(istate, buf + bol); bol = i + 1; + count++; } - if (bol < query_result.len) + if (bol < query_result.len) { fsmonitor_refresh_callback(istate, buf + bol); + count++; + } /* Now mark the untracked cache for fsmonitor usage */ if (istate->untracked) istate->untracked->use_fsmonitor = 1; + + if (count > fsmonitor_force_update_threshold) + istate->cache_changed |= FSMONITOR_CHANGED; + + trace2_data_intmax("fsmonitor", istate->repo, "apply_count", + count); + } else { /* * We failed to get a response or received a trivial response, @@ -409,6 +460,8 @@ apply_results: if (istate->untracked) istate->untracked->use_fsmonitor = 0; } + trace2_region_leave("fsmonitor", "apply_results", istate->repo); + strbuf_release(&query_result); /* Now that we've updated istate, save the last_update_token */ From a3dfe97f418762ccf1d0601c5cce40c77046d4fc Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 18:03:13 +0000 Subject: [PATCH 150/150] t7527: test status with untracked-cache and fsmonitor--daemon Create 2x2 test matrix with the untracked-cache and fsmonitor--daemon features and a series of edits and verify that status output is identical. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> --- t/t7527-builtin-fsmonitor.sh | 115 +++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh index 062e01c0df..bd0c952a11 100755 --- a/t/t7527-builtin-fsmonitor.sh +++ b/t/t7527-builtin-fsmonitor.sh @@ -205,6 +205,8 @@ test_expect_success 'setup' ' .gitignore expect* actual* + flush* + trace* EOF git -c core.fsmonitor=false add . && @@ -491,4 +493,117 @@ test_expect_success 'cleanup worktrees' ' stop_daemon_delete_repo wt-base ' +# The next few tests perform arbitrary/contrived file operations and +# confirm that status is correct. That is, that the data (or lack of +# data) from fsmonitor doesn't cause incorrect results. And doesn't +# cause incorrect results when the untracked-cache is enabled. + +test_lazy_prereq UNTRACKED_CACHE ' + git update-index --test-untracked-cache +' + +test_expect_success 'Matrix: setup for untracked-cache,fsmonitor matrix' ' + test_unconfig core.fsmonitor && + git update-index --no-fsmonitor && + test_might_fail git fsmonitor--daemon stop +' + +matrix_clean_up_repo () { + git reset --hard HEAD && + git clean -fd +} + +matrix_try () { + uc=$1 && + fsm=$2 && + fn=$3 && + + if test $uc = true && test $fsm = false + then + # The untracked-cache is buggy when FSMonitor is + # DISABLED, so skip the tests for this matrix + # combination. + # + # We've observed random, occasional test failures on + # Windows and MacOS when the UC is turned on and FSM + # is turned off. These are rare, but they do happen + # indicating that it is probably a race condition within + # the untracked cache itself. + # + # It usually happens when a test does F/D trickery and + # then the NEXT test fails because of extra status + # output from stale UC data from the previous test. + # + # Since FSMonitor is not involved in the error, skip + # the tests for this matrix combination. + # + return 0 + fi && + + test_expect_success "Matrix[uc:$uc][fsm:$fsm] $fn" ' + matrix_clean_up_repo && + $fn && + if test $uc = false && test $fsm = false + then + git status --porcelain=v1 >.git/expect.$fn + else + git status --porcelain=v1 >.git/actual.$fn && + test_cmp .git/expect.$fn .git/actual.$fn + fi + ' +} + +uc_values="false" +test_have_prereq UNTRACKED_CACHE && uc_values="false true" +for uc_val in $uc_values +do + if test $uc_val = false + then + test_expect_success "Matrix[uc:$uc_val] disable untracked cache" ' + git config core.untrackedcache false && + git update-index --no-untracked-cache + ' + else + test_expect_success "Matrix[uc:$uc_val] enable untracked cache" ' + git config core.untrackedcache true && + git update-index --untracked-cache + ' + fi + + fsm_values="false true" + for fsm_val in $fsm_values + do + if test $fsm_val = false + then + test_expect_success "Matrix[uc:$uc_val][fsm:$fsm_val] disable fsmonitor" ' + test_unconfig core.fsmonitor && + git update-index --no-fsmonitor && + test_might_fail git fsmonitor--daemon stop + ' + else + test_expect_success "Matrix[uc:$uc_val][fsm:$fsm_val] enable fsmonitor" ' + git config core.fsmonitor true && + git fsmonitor--daemon start && + git update-index --fsmonitor + ' + fi + + matrix_try $uc_val $fsm_val edit_files + matrix_try $uc_val $fsm_val delete_files + matrix_try $uc_val $fsm_val create_files + matrix_try $uc_val $fsm_val rename_files + matrix_try $uc_val $fsm_val file_to_directory + matrix_try $uc_val $fsm_val directory_to_file + + if test $fsm_val = true + then + test_expect_success "Matrix[uc:$uc_val][fsm:$fsm_val] disable fsmonitor at end" ' + test_unconfig core.fsmonitor && + git update-index --no-fsmonitor && + test_might_fail git fsmonitor--daemon stop + ' + fi + done +done + test_done