From 5506683deaab4c1475800e9b4cd2c06b8de4de97 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:02 +0800 Subject: [PATCH 01/45] t7002: add tests for moving from in-cone to out-of-cone Add corresponding tests to test that user can move an in-cone to out-of-cone when --sparse is supplied. Such can be either clean or dirty, and moving it results in different behaviors: A clean move should move to in the index (do *not* create in the worktree), then delete from the worktree. A dirty move should move the to the , both in the working tree and the index, but should *not* remove the resulted path from the working tree and should *not* turn on its CE_SKIP_WORKTREE bit. Also make sure that if exists in the index (existing check for if is in the worktree is not enough in in-to-out moves), warn user against the overwrite. And Git should force the overwrite when supplied with -f or --force. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- t/t7002-mv-sparse-checkout.sh | 198 ++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index 71fe29690f..1ac78edde6 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -290,4 +290,202 @@ test_expect_success 'move sparse file to existing destination with --force and - test_cmp expect sub/file1 ' +test_expect_failure 'move clean path from in-cone to out-of-cone' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + + test_must_fail git mv sub/d folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/d" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + git mv --sparse sub/d folder1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing sub/d && + test_path_is_missing folder1/d && + git ls-files -t >actual && + ! grep "^H sub/d\$" actual && + grep "S folder1/d" actual +' + +test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + echo "sub/file1 overwrite" >sub/file1 && + git add sub/file1 && + + test_must_fail git mv sub/file1 folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/file1" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + test_must_fail git mv --sparse sub/file1 folder1 2>stderr && + echo "fatal: destination exists in the index, source=sub/file1, destination=folder1/file1" \ + >expect && + test_cmp expect stderr && + + git mv --sparse -f sub/file1 folder1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing sub/file1 && + test_path_is_missing folder1/file1 && + git ls-files -t >actual && + ! grep "H sub/file1" actual && + grep "S folder1/file1" actual && + + # compare file content before move and after move + echo "sub/file1 overwrite" >expect && + git ls-files -s -- folder1/file1 | awk "{print \$2}" >oid && + git cat-file blob $(cat oid) >actual && + test_cmp expect actual +' + +# This test is testing the same behavior as the +# "move clean path from in-cone to out-of-cone overwrite" above. +# The only difference is the changes from "folder1" to "folder1/file1" +test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + echo "sub/file1 overwrite" >sub/file1 && + git add sub/file1 && + + test_must_fail git mv sub/file1 folder1/file1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/file1" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + test_must_fail git mv --sparse sub/file1 folder1/file1 2>stderr && + echo "fatal: destination exists in the index, source=sub/file1, destination=folder1/file1" \ + >expect && + test_cmp expect stderr && + + git mv --sparse -f sub/file1 folder1/file1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing sub/file1 && + test_path_is_missing folder1/file1 && + git ls-files -t >actual && + ! grep "H sub/file1" actual && + grep "S folder1/file1" actual && + + # compare file content before move and after move + echo "sub/file1 overwrite" >expect && + git ls-files -s -- folder1/file1 | awk "{print \$2}" >oid && + git cat-file blob $(cat oid) >actual && + test_cmp expect actual +' + +test_expect_failure 'move directory with one of the files overwrite' ' + test_when_finished "cleanup_sparse_checkout" && + mkdir -p folder1/dir && + touch folder1/dir/file1 && + git add folder1 && + git sparse-checkout set --cone sub && + + echo test >sub/dir/file1 && + git add sub/dir/file1 && + + test_must_fail git mv sub/dir folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/dir/e" >>expect && + echo "folder1/dir/file1" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + test_must_fail git mv --sparse sub/dir folder1 2>stderr && + echo "fatal: destination exists in the index, source=sub/dir/file1, destination=folder1/dir/file1" \ + >expect && + test_cmp expect stderr && + + git mv --sparse -f sub/dir folder1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing sub/dir/file1 && + test_path_is_missing sub/dir/e && + test_path_is_missing folder1/file1 && + git ls-files -t >actual && + ! grep "H sub/dir/file1" actual && + ! grep "H sub/dir/e" actual && + grep "S folder1/dir/file1" actual && + + # compare file content before move and after move + echo test >expect && + git ls-files -s -- folder1/dir/file1 | awk "{print \$2}" >oid && + git cat-file blob $(cat oid) >actual && + test_cmp expect actual +' + +test_expect_failure 'move dirty path from in-cone to out-of-cone' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + echo "modified" >>sub/d && + + test_must_fail git mv sub/d folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/d" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + git mv --sparse sub/d folder1 2>stderr && + + test_path_is_missing sub/d && + test_path_is_file folder1/d && + git ls-files -t >actual && + ! grep "^H sub/d\$" actual && + grep "H folder1/d" actual +' + +test_expect_failure 'move dir from in-cone to out-of-cone' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + + test_must_fail git mv sub/dir folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/dir/e" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + git mv --sparse sub/dir folder1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing folder1 && + git ls-files -t >actual && + ! grep "H sub/dir/e" actual && + grep "S folder1/dir/e" actual +' + +test_expect_failure 'move partially-dirty dir from in-cone to out-of-cone' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + touch sub/dir/e2 sub/dir/e3 && + git add sub/dir/e2 sub/dir/e3 && + echo "modified" >>sub/dir/e2 && + echo "modified" >>sub/dir/e3 && + + test_must_fail git mv sub/dir folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/dir/e" >>expect && + echo "folder1/dir/e2" >>expect && + echo "folder1/dir/e3" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + git mv --sparse sub/dir folder1 2>stderr && + + test_path_is_missing folder1/dir/e && + test_path_is_file folder1/dir/e2 && + test_path_is_file folder1/dir/e3 && + git ls-files -t >actual && + ! grep "H sub/dir/e" actual && + ! grep "H sub/dir/e2" actual && + ! grep "H sub/dir/e3" actual && + grep "S folder1/dir/e" actual && + grep "H folder1/dir/e2" actual && + grep "H folder1/dir/e3" actual +' + test_done From 72e59ba19e776abbff7dccbf093c73e43527a339 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:03 +0800 Subject: [PATCH 02/45] mv: rename check_dir_in_index() to empty_dir_has_sparse_contents() Method check_dir_in_index() introduced in b91a2b6594 (mv: add check_dir_in_index() and solve general dir check issue, 2022-06-30) does not describe its intent and behavior well. Change its name to empty_dir_has_sparse_contents(), which more precisely describes its purpose. Reverse the return values, check_dir_in_index() return 0 for success and 1 for failure; reverse the values so empty_dir_has_sparse_contents() return 1 for success and 0 for failure. These values are more intuitive because 1 usually means "has" and 0 means "not found". Also modify the documentation to better align with the method's intent and behavior. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index 2a38e2af46..969f464f7d 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -125,15 +125,13 @@ static int index_range_of_same_dir(const char *src, int length, } /* - * Check if an out-of-cone directory should be in the index. Imagine this case - * that all the files under a directory are marked with 'CE_SKIP_WORKTREE' bit - * and thus the directory is sparsified. - * - * Return 0 if such directory exist (i.e. with any of its contained files not - * marked with CE_SKIP_WORKTREE, the directory would be present in working tree). - * Return 1 otherwise. + * Given the path of a directory that does not exist on-disk, check whether the + * directory contains any entries in the index with the SKIP_WORKTREE flag + * enabled. + * Return 1 if such index entries exist. + * Return 0 otherwise. */ -static int check_dir_in_index(const char *name) +static int empty_dir_has_sparse_contents(const char *name) { const char *with_slash = add_slash(name); int length = strlen(with_slash); @@ -144,14 +142,14 @@ static int check_dir_in_index(const char *name) if (pos < 0) { pos = -pos - 1; if (pos >= the_index.cache_nr) - return 1; + return 0; ce = active_cache[pos]; if (strncmp(with_slash, ce->name, length)) - return 1; - if (ce_skip_worktree(ce)) return 0; + if (ce_skip_worktree(ce)) + return 1; } - return 1; + return 0; } int cmd_mv(int argc, const char **argv, const char *prefix) @@ -231,7 +229,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) if (pos < 0) { const char *src_w_slash = add_slash(src); if (!path_in_sparse_checkout(src_w_slash, &the_index) && - !check_dir_in_index(src)) { + empty_dir_has_sparse_contents(src)) { modes[i] |= SKIP_WORKTREE_DIR; goto dir_check; } From d57690a9c82c8888be6bb8ae17be231a2b2802e6 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:04 +0800 Subject: [PATCH 03/45] mv: free the with_slash in check_dir_in_index() with_slash may be a malloc'd pointer, and when it is, free it. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index 969f464f7d..c17df2a12f 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -133,6 +133,7 @@ static int index_range_of_same_dir(const char *src, int length, */ static int empty_dir_has_sparse_contents(const char *name) { + int ret = 0; const char *with_slash = add_slash(name); int length = strlen(with_slash); @@ -142,14 +143,18 @@ static int empty_dir_has_sparse_contents(const char *name) if (pos < 0) { pos = -pos - 1; if (pos >= the_index.cache_nr) - return 0; + goto free_return; ce = active_cache[pos]; if (strncmp(with_slash, ce->name, length)) - return 0; + goto free_return; if (ce_skip_worktree(ce)) - return 1; + ret = 1; } - return 0; + +free_return: + if (with_slash != name) + free((char *)with_slash); + return ret; } int cmd_mv(int argc, const char **argv, const char *prefix) From c08830de41f15a8ee85cf7926266e1db732ec773 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:05 +0800 Subject: [PATCH 04/45] mv: check if is a SKIP_WORKTREE_DIR Originally, is assumed to be in the working tree. If it is not found as a directory, then it is determined to be either a regular file path, or error out if used under the second form (move into a directory) of 'git-mv'. Such behavior is not ideal, mainly because Git does not look into the index for , which could potentially be a SKIP_WORKTREE_DIR, which we need to determine for the later "moving from in-cone to out-of-cone" patch. Change the logic so that Git first check if is a directory with all its contents sparsified (a SKIP_WORKTREE_DIR). If is such a sparse directory, then we should modify the index the same way as we would if this were a non-sparse directory. We must be careful to ensure that the is marked with SKIP_WORKTREE_DIR. Also add a `dst_w_slash` to reuse the result from `add_slash()`, which was everywhere and can be simplified. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index c17df2a12f..11aea7b4db 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -171,6 +171,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) OPT_END(), }; const char **source, **destination, **dest_path, **submodule_gitfile; + const char *dst_w_slash; enum update_mode *modes; struct stat st; struct string_list src_for_dst = STRING_LIST_INIT_NODUP; @@ -200,6 +201,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1])) flags = 0; dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags); + dst_w_slash = add_slash(dest_path[0]); submodule_gitfile = xcalloc(argc, sizeof(char *)); if (dest_path[0][0] == '\0') @@ -207,12 +209,20 @@ int cmd_mv(int argc, const char **argv, const char *prefix) destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME); else if (!lstat(dest_path[0], &st) && S_ISDIR(st.st_mode)) { - dest_path[0] = add_slash(dest_path[0]); - destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME); + destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME); } else { - if (argc != 1) + if (!path_in_sparse_checkout(dst_w_slash, &the_index) && + empty_dir_has_sparse_contents(dst_w_slash)) { + destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME); + } else if (argc != 1) { die(_("destination '%s' is not a directory"), dest_path[0]); - destination = dest_path; + } else { + destination = dest_path; + } + } + if (dst_w_slash != dest_path[0]) { + free((char *)dst_w_slash); + dst_w_slash = NULL; } /* Checking */ From 9284c3ce266fcf9abb0afbc59645c62dd58026dd Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:06 +0800 Subject: [PATCH 05/45] mv: remove BOTH from enum update_mode Since BOTH is not used anywhere in the code and its meaning is unclear, remove it. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/builtin/mv.c b/builtin/mv.c index 11aea7b4db..7ac653be23 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -21,7 +21,6 @@ static const char * const builtin_mv_usage[] = { }; enum update_mode { - BOTH = 0, WORKING_DIRECTORY = (1 << 1), INDEX = (1 << 2), SPARSE = (1 << 3), From 5784db1b22feceafc72454dccd9637d19fdd422c Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:07 +0800 Subject: [PATCH 06/45] mv: from in-cone to out-of-cone Originally, moving an in-cone to an out-of-cone was not possible, mainly because such is a directory that is not present in the working tree. Change the behavior so that we can move an in-cone to out-of-cone when --sparse is supplied. Notice that can also be an out-of-cone file path, rather than a directory. Such can be either clean or dirty, and moving it results in different behaviors: A clean move should move to in the index (do *not* create in the worktree), then delete from the worktree. A dirty move should move the to the , both in the working tree and the index, but should *not* remove the resulted path from the working tree and should *not* turn on its CE_SKIP_WORKTREE bit. Optional reading ================ We are strict about cone mode when is a file path. The reason is that some of the previous tests that use no-cone mode in t7002 are keep breaking, mainly because the `dst_mode = SPARSE;` line added in this patch. Most features developed in both "from-out-to-in" and "from-in-to-out" only care about cone mode situation, as no-cone mode is becoming irrelevant. And because assigning `SPARSE` to `dst_mode` when the repo is in no-cone mode causes miscellaneous bugs, we should just leave this new functionality to be exclusive cone mode and save some time. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 69 +++++++++++++++++++++++++++++++---- t/t7002-mv-sparse-checkout.sh | 8 ++-- 2 files changed, 65 insertions(+), 12 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index 7ac653be23..b64c28bd5b 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -171,12 +171,13 @@ int cmd_mv(int argc, const char **argv, const char *prefix) }; const char **source, **destination, **dest_path, **submodule_gitfile; const char *dst_w_slash; - enum update_mode *modes; + enum update_mode *modes, dst_mode = 0; struct stat st; struct string_list src_for_dst = STRING_LIST_INIT_NODUP; struct lock_file lock_file = LOCK_INIT; struct cache_entry *ce; struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP; + struct string_list dirty_paths = STRING_LIST_INIT_NODUP; git_config(git_default_config, NULL); @@ -213,10 +214,21 @@ int cmd_mv(int argc, const char **argv, const char *prefix) if (!path_in_sparse_checkout(dst_w_slash, &the_index) && empty_dir_has_sparse_contents(dst_w_slash)) { destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME); + dst_mode = SKIP_WORKTREE_DIR; } else if (argc != 1) { die(_("destination '%s' is not a directory"), dest_path[0]); } else { destination = dest_path; + /* + * is a file outside of sparse-checkout + * cone. Insist on cone mode here for backward + * compatibility. We don't want dst_mode to be assigned + * for a file when the repo is using no-cone mode (which + * is deprecated at this point) sparse-checkout. As + * SPARSE here is only considering cone-mode situation. + */ + if (!path_in_cone_mode_sparse_checkout(destination[0], &the_index)) + dst_mode = SPARSE; } } if (dst_w_slash != dest_path[0]) { @@ -410,6 +422,7 @@ remove_entry: const char *src = source[i], *dst = destination[i]; enum update_mode mode = modes[i]; int pos; + int sparse_and_dirty = 0; struct checkout state = CHECKOUT_INIT; state.istate = &the_index; @@ -420,6 +433,7 @@ remove_entry: if (show_only) continue; if (!(mode & (INDEX | SPARSE | SKIP_WORKTREE_DIR)) && + !(dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) && rename(src, dst) < 0) { if (ignore_errors) continue; @@ -439,17 +453,55 @@ remove_entry: pos = cache_name_pos(src, strlen(src)); assert(pos >= 0); + if (!(mode & SPARSE) && !lstat(src, &st)) + sparse_and_dirty = ce_modified(active_cache[pos], &st, 0); rename_cache_entry_at(pos, dst); - if ((mode & SPARSE) && - (path_in_sparse_checkout(dst, &the_index))) { - int dst_pos; + if (ignore_sparse && + core_apply_sparse_checkout && + core_sparse_checkout_cone) { + /* + * NEEDSWORK: we are *not* paying attention to + * "out-to-out" move ( is out-of-cone and + * is out-of-cone) at this point. It + * should be added in a future patch. + */ + if ((mode & SPARSE) && + path_in_sparse_checkout(dst, &the_index)) { + /* from out-of-cone to in-cone */ + int dst_pos = cache_name_pos(dst, strlen(dst)); + struct cache_entry *dst_ce = active_cache[dst_pos]; - dst_pos = cache_name_pos(dst, strlen(dst)); - active_cache[dst_pos]->ce_flags &= ~CE_SKIP_WORKTREE; + dst_ce->ce_flags &= ~CE_SKIP_WORKTREE; - if (checkout_entry(active_cache[dst_pos], &state, NULL, NULL)) - die(_("cannot checkout %s"), active_cache[dst_pos]->name); + if (checkout_entry(dst_ce, &state, NULL, NULL)) + die(_("cannot checkout %s"), dst_ce->name); + } else if ((dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) && + !(mode & SPARSE) && + !path_in_sparse_checkout(dst, &the_index)) { + /* from in-cone to out-of-cone */ + int dst_pos = cache_name_pos(dst, strlen(dst)); + struct cache_entry *dst_ce = active_cache[dst_pos]; + + /* + * if src is clean, it will suffice to remove it + */ + if (!sparse_and_dirty) { + dst_ce->ce_flags |= CE_SKIP_WORKTREE; + unlink_or_warn(src); + } else { + /* + * if src is dirty, move it to the + * destination and create leading + * dirs if necessary + */ + char *dst_dup = xstrdup(dst); + string_list_append(&dirty_paths, dst); + safe_create_leading_directories(dst_dup); + FREE_AND_NULL(dst_dup); + rename(src, dst); + } + } } } @@ -461,6 +513,7 @@ remove_entry: die(_("Unable to write new index file")); string_list_clear(&src_for_dst, 0); + string_list_clear(&dirty_paths, 0); UNLEAK(source); UNLEAK(dest_path); free(submodule_gitfile); diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index 1ac78edde6..d875f492dd 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -290,7 +290,7 @@ test_expect_success 'move sparse file to existing destination with --force and - test_cmp expect sub/file1 ' -test_expect_failure 'move clean path from in-cone to out-of-cone' ' +test_expect_success 'move clean path from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && @@ -419,7 +419,7 @@ test_expect_failure 'move directory with one of the files overwrite' ' test_cmp expect actual ' -test_expect_failure 'move dirty path from in-cone to out-of-cone' ' +test_expect_success 'move dirty path from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && echo "modified" >>sub/d && @@ -439,7 +439,7 @@ test_expect_failure 'move dirty path from in-cone to out-of-cone' ' grep "H folder1/d" actual ' -test_expect_failure 'move dir from in-cone to out-of-cone' ' +test_expect_success 'move dir from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && @@ -458,7 +458,7 @@ test_expect_failure 'move dir from in-cone to out-of-cone' ' grep "S folder1/dir/e" actual ' -test_expect_failure 'move partially-dirty dir from in-cone to out-of-cone' ' +test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && touch sub/dir/e2 sub/dir/e3 && From b6f51e3db978ae2a72c290a10bd205f9e1d6818e Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:08 +0800 Subject: [PATCH 07/45] mv: cleanup empty WORKING_DIRECTORY Originally, moving from-in-to-out may leave an empty directory on-disk (this kind of directory is marked as WORKING_DIRECTORY). Cleanup such directories if they are empty (don't have any entries under them). Modify two tests that take as WORKING_DIRECTORY to test this behavior. Suggested-by: Derrick Stolee Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 27 +++++++++++++++++++++++++++ t/t7002-mv-sparse-checkout.sh | 4 ++++ 2 files changed, 31 insertions(+) diff --git a/builtin/mv.c b/builtin/mv.c index b64c28bd5b..f4961c0ffd 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -171,6 +171,9 @@ int cmd_mv(int argc, const char **argv, const char *prefix) }; const char **source, **destination, **dest_path, **submodule_gitfile; const char *dst_w_slash; + const char **src_dir = NULL; + int src_dir_nr = 0, src_dir_alloc = 0; + struct strbuf a_src_dir = STRBUF_INIT; enum update_mode *modes, dst_mode = 0; struct stat st; struct string_list src_for_dst = STRING_LIST_INIT_NODUP; @@ -313,6 +316,10 @@ dir_check: /* last - first >= 1 */ modes[i] |= WORKING_DIRECTORY; + + ALLOC_GROW(src_dir, src_dir_nr + 1, src_dir_alloc); + src_dir[src_dir_nr++] = src; + n = argc + last - first; REALLOC_ARRAY(source, n); REALLOC_ARRAY(destination, n); @@ -505,6 +512,26 @@ remove_entry: } } + /* + * cleanup the empty src_dirs + */ + for (i = 0; i < src_dir_nr; i++) { + int dummy; + strbuf_addstr(&a_src_dir, src_dir[i]); + /* + * if entries under a_src_dir are all moved away, + * recursively remove a_src_dir to cleanup + */ + if (index_range_of_same_dir(a_src_dir.buf, a_src_dir.len, + &dummy, &dummy) < 1) { + remove_dir_recursively(&a_src_dir, 0); + } + strbuf_reset(&a_src_dir); + } + + strbuf_release(&a_src_dir); + free(src_dir); + if (gitmodules_modified) stage_updated_gitmodules(&the_index); diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index d875f492dd..df8c0fa572 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -442,6 +442,7 @@ test_expect_success 'move dirty path from in-cone to out-of-cone' ' test_expect_success 'move dir from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && + mkdir sub/dir/deep && test_must_fail git mv sub/dir folder1 2>stderr && cat sparse_error_header >expect && @@ -452,6 +453,7 @@ test_expect_success 'move dir from in-cone to out-of-cone' ' git mv --sparse sub/dir folder1 2>stderr && test_must_be_empty stderr && + test_path_is_missing sub/dir && test_path_is_missing folder1 && git ls-files -t >actual && ! grep "H sub/dir/e" actual && @@ -461,6 +463,7 @@ test_expect_success 'move dir from in-cone to out-of-cone' ' test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && + mkdir sub/dir/deep && touch sub/dir/e2 sub/dir/e3 && git add sub/dir/e2 sub/dir/e3 && echo "modified" >>sub/dir/e2 && @@ -476,6 +479,7 @@ test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' ' git mv --sparse sub/dir folder1 2>stderr && + test_path_is_missing sub/dir && test_path_is_missing folder1/dir/e && test_path_is_file folder1/dir/e2 && test_path_is_file folder1/dir/e3 && From 5efd533ed8896592740afe22ac07271497d6db36 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:09 +0800 Subject: [PATCH 08/45] advice.h: add advise_on_moving_dirty_path() Add an advice. When the user use `git mv --sparse `, Git will warn the user to use `git add --sparse ` then use `git sparse-checkout reapply` to apply the sparsity rules. Add a few lines to previous "move dirty path" tests so we can test this new advice is working. Suggested-by: Derrick Stolee Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- advice.c | 19 +++++++++++++++++++ advice.h | 1 + builtin/mv.c | 3 +++ t/t7002-mv-sparse-checkout.sh | 24 +++++++++++++++++++++++- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/advice.c b/advice.c index 6fda9edbc2..fd18968943 100644 --- a/advice.c +++ b/advice.c @@ -261,3 +261,22 @@ void detach_advice(const char *new_name) fprintf(stderr, fmt, new_name); } + +void advise_on_moving_dirty_path(struct string_list *pathspec_list) +{ + struct string_list_item *item; + + if (!pathspec_list->nr) + return; + + fprintf(stderr, _("The following paths have been moved outside the\n" + "sparse-checkout definition but are not sparse due to local\n" + "modifications.\n")); + for_each_string_list_item(item, pathspec_list) + fprintf(stderr, "%s\n", item->string); + + advise_if_enabled(ADVICE_UPDATE_SPARSE_PATH, + _("To correct the sparsity of these paths, do the following:\n" + "* Use \"git add --sparse \" to update the index\n" + "* Use \"git sparse-checkout reapply\" to apply the sparsity rules")); +} diff --git a/advice.h b/advice.h index 7ddc6cbc1a..07e0f76833 100644 --- a/advice.h +++ b/advice.h @@ -74,5 +74,6 @@ void NORETURN die_conclude_merge(void); void NORETURN die_ff_impossible(void); void advise_on_updating_sparse_paths(struct string_list *pathspec_list); void detach_advice(const char *new_name); +void advise_on_moving_dirty_path(struct string_list *pathspec_list); #endif /* ADVICE_H */ diff --git a/builtin/mv.c b/builtin/mv.c index f4961c0ffd..d80adf8de5 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -532,6 +532,9 @@ remove_entry: strbuf_release(&a_src_dir); free(src_dir); + if (dirty_paths.nr) + advise_on_moving_dirty_path(&dirty_paths); + if (gitmodules_modified) stage_updated_gitmodules(&the_index); diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index df8c0fa572..5e5eb70e7a 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -28,12 +28,25 @@ test_expect_success 'setup' " updated in the index: EOF - cat >sparse_hint <<-EOF + cat >sparse_hint <<-EOF && hint: If you intend to update such entries, try one of the following: hint: * Use the --sparse option. hint: * Disable or modify the sparsity rules. hint: Disable this message with \"git config advice.updateSparsePath false\" EOF + + cat >dirty_error_header <<-EOF && + The following paths have been moved outside the + sparse-checkout definition but are not sparse due to local + modifications. + EOF + + cat >dirty_hint <<-EOF + hint: To correct the sparsity of these paths, do the following: + hint: * Use \"git add --sparse \" to update the index + hint: * Use \"git sparse-checkout reapply\" to apply the sparsity rules + hint: Disable this message with \"git config advice.updateSparsePath false\" + EOF " test_expect_success 'mv refuses to move sparse-to-sparse' ' @@ -431,6 +444,10 @@ test_expect_success 'move dirty path from in-cone to out-of-cone' ' test_cmp expect stderr && git mv --sparse sub/d folder1 2>stderr && + cat dirty_error_header >expect && + echo "folder1/d" >>expect && + cat dirty_hint >>expect && + test_cmp expect stderr && test_path_is_missing sub/d && test_path_is_file folder1/d && @@ -478,6 +495,11 @@ test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' ' test_cmp expect stderr && git mv --sparse sub/dir folder1 2>stderr && + cat dirty_error_header >expect && + echo "folder1/dir/e2" >>expect && + echo "folder1/dir/e3" >>expect && + cat dirty_hint >>expect && + test_cmp expect stderr && test_path_is_missing sub/dir && test_path_is_missing folder1/dir/e && From da6fe05b3d624ad5b40472eebfe0499c15ecc93d Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:10 +0800 Subject: [PATCH 09/45] mv: check overwrite for in-to-out move Add checking logic for overwriting when moving from in-cone to out-of-cone. It is the index version of the original overwrite logic. Helped-by: Derrick Stolee Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 12 ++++++++++++ t/t7002-mv-sparse-checkout.sh | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index d80adf8de5..4b67bd096a 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -376,6 +376,18 @@ dir_check: goto act_on_entry; } + if (ignore_sparse && + (dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) && + index_entry_exists(&the_index, dst, strlen(dst))) { + bad = _("destination exists in the index"); + if (force) { + if (verbose) + warning(_("overwriting '%s'"), dst); + bad = NULL; + } else { + goto act_on_entry; + } + } /* * We check if the paths are in the sparse-checkout * definition as a very final check, since that diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index 5e5eb70e7a..26582ae4e5 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -323,7 +323,7 @@ test_expect_success 'move clean path from in-cone to out-of-cone' ' grep "S folder1/d" actual ' -test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' ' +test_expect_success 'move clean path from in-cone to out-of-cone overwrite' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && echo "sub/file1 overwrite" >sub/file1 && @@ -359,7 +359,7 @@ test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' ' # This test is testing the same behavior as the # "move clean path from in-cone to out-of-cone overwrite" above. # The only difference is the changes from "folder1" to "folder1/file1" -test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite' ' +test_expect_success 'move clean path from in-cone to out-of-cone file overwrite' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && echo "sub/file1 overwrite" >sub/file1 && @@ -392,7 +392,7 @@ test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite' test_cmp expect actual ' -test_expect_failure 'move directory with one of the files overwrite' ' +test_expect_success 'move directory with one of the files overwrite' ' test_when_finished "cleanup_sparse_checkout" && mkdir -p folder1/dir && touch folder1/dir/file1 && From b4f25b07c74fc294cab6c12d09faa2021c67f25a Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:39 +0000 Subject: [PATCH 10/45] t: add skeleton chainlint.pl Although chainlint.sed usefully identifies broken &&-chains in tests, it has several shortcomings which include: * only detects &&-chain breakage in subshells (one-level deep) * does not check for broken top-level &&-chains; that task is left to the "magic exit code 117" checker built into test-lib.sh, however, that detection does not extend to `{...}` blocks, `$(...)` expressions, or compound statements such as `if...fi`, `while...done`, `case...esac` * uses heuristics, which makes it (potentially) fallible and difficult to tweak to handle additional real-world cases * written in `sed` and employs advanced `sed` operators which are probably not well-known to many programmers, thus the pool of people who can maintain it is likely small * manually simulates recursion into subshells which makes it much more difficult to reason about than, say, a traditional top-down parser * checks each test as the test is run, which can get expensive for tests which are run repeatedly by functions or loops since their bodies will be checked over and over (tens or hundreds of times) unnecessarily To address these shortcomings, begin implementing a more functional and precise test linter which understands shell syntax and semantics rather than employing heuristics, thus is able to recognize structural problems with tests beyond broken &&-chains. The new linter is written in Perl, thus should be more accessible to a wider audience, and is structured as a traditional top-down parser which makes it much easier to reason about, and allows it to inspect compound statements within test bodies to any depth. Furthermore, it can check all test definitions in the entire project in a single invocation rather than having to be invoked once per test, and each test definition is checked only once no matter how many times the test is actually run. At this stage, the new linter is just a skeleton containing boilerplate which handles command-line options, collects and reports statistics, and feeds its arguments -- paths of test scripts -- to a (presently) do-nothing script parser for validation. Subsequent changes will flesh out the functionality. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 115 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100755 t/chainlint.pl diff --git a/t/chainlint.pl b/t/chainlint.pl new file mode 100755 index 0000000000..e8ab95c785 --- /dev/null +++ b/t/chainlint.pl @@ -0,0 +1,115 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2021-2022 Eric Sunshine +# +# This tool scans shell scripts for test definitions and checks those tests for +# problems, such as broken &&-chains, which might hide bugs in the tests +# themselves or in behaviors being exercised by the tests. +# +# Input arguments are pathnames of shell scripts containing test definitions, +# or globs referencing a collection of scripts. For each problem discovered, +# the pathname of the script containing the test is printed along with the test +# name and the test body with a `?!FOO?!` annotation at the location of each +# detected problem, where "FOO" is a tag such as "AMP" which indicates a broken +# &&-chain. Returns zero if no problems are discovered, otherwise non-zero. + +use warnings; +use strict; +use File::Glob; +use Getopt::Long; + +my $show_stats; +my $emit_all; + +package ScriptParser; + +sub new { + my $class = shift @_; + my $self = bless {} => $class; + $self->{output} = []; + $self->{ntests} = 0; + return $self; +} + +sub parse_cmd { + return undef; +} + +# main contains high-level functionality for processing command-line switches, +# feeding input test scripts to ScriptParser, and reporting results. +package main; + +my $getnow = sub { return time(); }; +my $interval = sub { return time() - shift; }; +if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { + $getnow = sub { return [Time::HiRes::gettimeofday()]; }; + $interval = sub { return Time::HiRes::tv_interval(shift); }; +} + +sub show_stats { + my ($start_time, $stats) = @_; + my $walltime = $interval->($start_time); + my ($usertime) = times(); + my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); + for (@$stats) { + my ($worker, $nscripts, $ntests, $nerrs) = @$_; + print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); + $total_workers++; + $total_scripts += $nscripts; + $total_tests += $ntests; + $total_errs += $nerrs; + } + printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); +} + +sub check_script { + my ($id, $next_script, $emit) = @_; + my ($nscripts, $ntests, $nerrs) = (0, 0, 0); + while (my $path = $next_script->()) { + $nscripts++; + my $fh; + unless (open($fh, "<", $path)) { + $emit->("?!ERR?! $path: $!\n"); + next; + } + my $s = do { local $/; <$fh> }; + close($fh); + my $parser = ScriptParser->new(\$s); + 1 while $parser->parse_cmd(); + if (@{$parser->{output}}) { + my $s = join('', @{$parser->{output}}); + $emit->("# chainlint: $path\n" . $s); + $nerrs += () = $s =~ /\?![^?]+\?!/g; + } + $ntests += $parser->{ntests}; + } + return [$id, $nscripts, $ntests, $nerrs]; +} + +sub exit_code { + my $stats = shift @_; + for (@$stats) { + my ($worker, $nscripts, $ntests, $nerrs) = @$_; + return 1 if $nerrs; + } + return 0; +} + +Getopt::Long::Configure(qw{bundling}); +GetOptions( + "emit-all!" => \$emit_all, + "stats|show-stats!" => \$show_stats) or die("option error\n"); + +my $start_time = $getnow->(); +my @stats; + +my @scripts; +push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV); +unless (@scripts) { + show_stats($start_time, \@stats) if $show_stats; + exit; +} + +push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); +show_stats($start_time, \@stats) if $show_stats; +exit(exit_code(\@stats)); From 7d4804731ed642b92b516908fb93397b08e986bf Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:40 +0000 Subject: [PATCH 11/45] chainlint.pl: add POSIX shell lexical analyzer Begin fleshing out chainlint.pl by adding a lexical analyzer for the POSIX shell command language. The sole entry point Lexer::scan_token() returns the next token from the input. It will be called by the upcoming shell language parser. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 177 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) diff --git a/t/chainlint.pl b/t/chainlint.pl index e8ab95c785..81ffbf28bf 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -21,6 +21,183 @@ use Getopt::Long; my $show_stats; my $emit_all; +# Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3 +# "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although +# similar to lexical analyzers for other languages, this one differs in a few +# substantial ways due to quirks of the shell command language. +# +# For instance, in many languages, newline is just whitespace like space or +# TAB, but in shell a newline is a command separator, thus a distinct lexical +# token. A newline is significant and returned as a distinct token even at the +# end of a shell comment. +# +# In other languages, `1+2` would typically be scanned as three tokens +# (`1`, `+`, and `2`), but in shell it is a single token. However, the similar +# `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well. +# In shell, several characters with special meaning lose that meaning when not +# surrounded by whitespace. For instance, the negation operator `!` is special +# when standing alone surrounded by whitespace; whereas in `foo!uucp` it is +# just a plain character in the longer token "foo!uucp". In many other +# languages, `"string"/foo:'string'` might be scanned as five tokens ("string", +# `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token. +# +# The lexical analyzer for the shell command language is also somewhat unusual +# in that it recursively invokes the parser to handle the body of `$(...)` +# expressions which can contain arbitrary shell code. Such expressions may be +# encountered both inside and outside of double-quoted strings. +# +# The lexical analyzer is responsible for consuming shell here-doc bodies which +# extend from the line following a `< $parser, + buff => $s, + heretags => [] + } => $class; +} + +sub scan_heredoc_tag { + my $self = shift @_; + ${$self->{buff}} =~ /\G(-?)/gc; + my $indented = $1; + my $tag = $self->scan_token(); + $tag =~ s/['"\\]//g; + push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag"); + return "<<$indented$tag"; +} + +sub scan_op { + my ($self, $c) = @_; + my $b = $self->{buff}; + return $c unless $$b =~ /\G(.)/sgc; + my $cc = $c . $1; + return scan_heredoc_tag($self) if $cc eq '<<'; + return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/; + pos($$b)--; + return $c; +} + +sub scan_sqstring { + my $self = shift @_; + ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc; + return "'" . $1; +} + +sub scan_dqstring { + my $self = shift @_; + my $b = $self->{buff}; + my $s = '"'; + while (1) { + # slurp up non-special characters + $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc; + # handle special characters + last unless $$b =~ /\G(.)/sgc; + my $c = $1; + $s .= '"', last if $c eq '"'; + $s .= '$' . $self->scan_dollar(), next if $c eq '$'; + if ($c eq '\\') { + $s .= '\\', last unless $$b =~ /\G(.)/sgc; + $c = $1; + next if $c eq "\n"; # line splice + # backslash escapes only $, `, ", \ in dq-string + $s .= '\\' unless $c =~ /^[\$`"\\]$/; + $s .= $c; + next; + } + die("internal error scanning dq-string '$c'\n"); + } + return $s; +} + +sub scan_balanced { + my ($self, $c1, $c2) = @_; + my $b = $self->{buff}; + my $depth = 1; + my $s = $c1; + while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) { + $s .= $1; + $depth++, next if $s =~ /\Q$c1\E$/; + $depth--; + last if $depth == 0; + } + return $s; +} + +sub scan_subst { + my $self = shift @_; + my @tokens = $self->{parser}->parse(qr/^\)$/); + $self->{parser}->next_token(); # closing ")" + return @tokens; +} + +sub scan_dollar { + my $self = shift @_; + my $b = $self->{buff}; + return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) + return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) + return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} + return $1 if $$b =~ /\G(\w+)/gc; # $var + return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. + return ''; +} + +sub swallow_heredocs { + my $self = shift @_; + my $b = $self->{buff}; + my $tags = $self->{heretags}; + while (my $tag = shift @$tags) { + my $indent = $tag =~ s/^\t// ? '\\s*' : ''; + $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc; + } +} + +sub scan_token { + my $self = shift @_; + my $b = $self->{buff}; + my $token = ''; +RESTART: + $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) + return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment + while (1) { + # slurp up non-special characters + $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; + # handle special characters + last unless $$b =~ /\G(.)/sgc; + my $c = $1; + last if $c =~ /^[ \t]$/; # whitespace ends token + pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/; + $token .= $self->scan_sqstring(), next if $c eq "'"; + $token .= $self->scan_dqstring(), next if $c eq '"'; + $token .= $c . $self->scan_dollar(), next if $c eq '$'; + $self->swallow_heredocs(), $token = $c, last if $c eq "\n"; + $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/; + $token = $c, last if $c =~ /^[(){}]$/; + if ($c eq '\\') { + $token .= '\\', last unless $$b =~ /\G(.)/sgc; + $c = $1; + next if $c eq "\n" && length($token); # line splice + goto RESTART if $c eq "\n"; # line splice + $token .= '\\' . $c; + next; + } + die("internal error scanning character '$c'\n"); + } + return length($token) ? $token : undef; +} + package ScriptParser; sub new { From 6594554119811a01888b44112a7daec6fa0312b2 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:41 +0000 Subject: [PATCH 12/45] chainlint.pl: add POSIX shell parser Continue fleshing out chainlint.pl by adding a general purpose recursive descent parser for the POSIX shell command language. Although never invoked directly, upcoming parser subclasses will extend its functionality for specific purposes, such as plucking test definitions from input scripts and applying domain-specific knowledge to perform test validation. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 243 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) diff --git a/t/chainlint.pl b/t/chainlint.pl index 81ffbf28bf..cdf136896b 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -198,6 +198,249 @@ RESTART: return length($token) ? $token : undef; } +# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It +# is a recursive descent parser very roughly modeled after section 2.10 "Shell +# Grammar" of POSIX chapter 2 "Shell Command Language". +package ShellParser; + +sub new { + my ($class, $s) = @_; + my $self = bless { + buff => [], + stop => [], + output => [] + } => $class; + $self->{lexer} = Lexer->new($self, $s); + return $self; +} + +sub next_token { + my $self = shift @_; + return pop(@{$self->{buff}}) if @{$self->{buff}}; + return $self->{lexer}->scan_token(); +} + +sub untoken { + my $self = shift @_; + push(@{$self->{buff}}, @_); +} + +sub peek { + my $self = shift @_; + my $token = $self->next_token(); + return undef unless defined($token); + $self->untoken($token); + return $token; +} + +sub stop_at { + my ($self, $token) = @_; + return 1 unless defined($token); + my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; + return defined($stop) && $token =~ $stop; +} + +sub expect { + my ($self, $expect) = @_; + my $token = $self->next_token(); + return $token if defined($token) && $token eq $expect; + push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "") . "'\n"); + $self->untoken($token) if defined($token); + return (); +} + +sub optional_newlines { + my $self = shift @_; + my @tokens; + while (my $token = $self->peek()) { + last unless $token eq "\n"; + push(@tokens, $self->next_token()); + } + return @tokens; +} + +sub parse_group { + my $self = shift @_; + return ($self->parse(qr/^}$/), + $self->expect('}')); +} + +sub parse_subshell { + my $self = shift @_; + return ($self->parse(qr/^\)$/), + $self->expect(')')); +} + +sub parse_case_pattern { + my $self = shift @_; + my @tokens; + while (defined(my $token = $self->next_token())) { + push(@tokens, $token); + last if $token eq ')'; + } + return @tokens; +} + +sub parse_case { + my $self = shift @_; + my @tokens; + push(@tokens, + $self->next_token(), # subject + $self->optional_newlines(), + $self->expect('in'), + $self->optional_newlines()); + while (1) { + my $token = $self->peek(); + last unless defined($token) && $token ne 'esac'; + push(@tokens, + $self->parse_case_pattern(), + $self->optional_newlines(), + $self->parse(qr/^(?:;;|esac)$/)); # item body + $token = $self->peek(); + last unless defined($token) && $token ne 'esac'; + push(@tokens, + $self->expect(';;'), + $self->optional_newlines()); + } + push(@tokens, $self->expect('esac')); + return @tokens; +} + +sub parse_for { + my $self = shift @_; + my @tokens; + push(@tokens, + $self->next_token(), # variable + $self->optional_newlines()); + my $token = $self->peek(); + if (defined($token) && $token eq 'in') { + push(@tokens, + $self->expect('in'), + $self->optional_newlines()); + } + push(@tokens, + $self->parse(qr/^do$/), # items + $self->expect('do'), + $self->optional_newlines(), + $self->parse_loop_body(), + $self->expect('done')); + return @tokens; +} + +sub parse_if { + my $self = shift @_; + my @tokens; + while (1) { + push(@tokens, + $self->parse(qr/^then$/), # if/elif condition + $self->expect('then'), + $self->optional_newlines(), + $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body + my $token = $self->peek(); + last unless defined($token) && $token eq 'elif'; + push(@tokens, $self->expect('elif')); + } + my $token = $self->peek(); + if (defined($token) && $token eq 'else') { + push(@tokens, + $self->expect('else'), + $self->optional_newlines(), + $self->parse(qr/^fi$/)); # else body + } + push(@tokens, $self->expect('fi')); + return @tokens; +} + +sub parse_loop_body { + my $self = shift @_; + return $self->parse(qr/^done$/); +} + +sub parse_loop { + my $self = shift @_; + return ($self->parse(qr/^do$/), # condition + $self->expect('do'), + $self->optional_newlines(), + $self->parse_loop_body(), + $self->expect('done')); +} + +sub parse_func { + my $self = shift @_; + return ($self->expect('('), + $self->expect(')'), + $self->optional_newlines(), + $self->parse_cmd()); # body +} + +sub parse_bash_array_assignment { + my $self = shift @_; + my @tokens = $self->expect('('); + while (defined(my $token = $self->next_token())) { + push(@tokens, $token); + last if $token eq ')'; + } + return @tokens; +} + +my %compound = ( + '{' => \&parse_group, + '(' => \&parse_subshell, + 'case' => \&parse_case, + 'for' => \&parse_for, + 'if' => \&parse_if, + 'until' => \&parse_loop, + 'while' => \&parse_loop); + +sub parse_cmd { + my $self = shift @_; + my $cmd = $self->next_token(); + return () unless defined($cmd); + return $cmd if $cmd eq "\n"; + + my $token; + my @tokens = $cmd; + if ($cmd eq '!') { + push(@tokens, $self->parse_cmd()); + return @tokens; + } elsif (my $f = $compound{$cmd}) { + push(@tokens, $self->$f()); + } elsif (defined($token = $self->peek()) && $token eq '(') { + if ($cmd !~ /\w=$/) { + push(@tokens, $self->parse_func()); + return @tokens; + } + $tokens[-1] .= join(' ', $self->parse_bash_array_assignment()); + } + + while (defined(my $token = $self->next_token())) { + $self->untoken($token), last if $self->stop_at($token); + push(@tokens, $token); + last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/; + } + push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n"; + return @tokens; +} + +sub accumulate { + my ($self, $tokens, $cmd) = @_; + push(@$tokens, @$cmd); +} + +sub parse { + my ($self, $stop) = @_; + push(@{$self->{stop}}, $stop); + goto DONE if $self->stop_at($self->peek()); + my @tokens; + while (my @cmd = $self->parse_cmd()) { + $self->accumulate(\@tokens, \@cmd); + last if $self->stop_at($self->peek()); + } +DONE: + pop(@{$self->{stop}}); + return @tokens; +} + package ScriptParser; sub new { From 6d932e92fcb49b59b780bc018fe550d867bb3d84 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:42 +0000 Subject: [PATCH 13/45] chainlint.pl: add parser to validate tests Continue fleshing out chainlint.pl by adding TestParser, a parser with special knowledge about how Git tests should be written; for instance, it knows that commands within a test body should be chained together with `&&`. An upcoming parser which plucks test definitions from test scripts will invoke TestParser for each test body it encounters. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/t/chainlint.pl b/t/chainlint.pl index cdf136896b..ad257106e5 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -441,6 +441,52 @@ DONE: return @tokens; } +# TestParser is a subclass of ShellParser which, beyond parsing shell script +# code, is also imbued with semantic knowledge of test construction, and checks +# tests for common problems (such as broken &&-chains) which might hide bugs in +# the tests themselves or in behaviors being exercised by the tests. As such, +# TestParser is only called upon to parse test bodies, not the top-level +# scripts in which the tests are defined. +package TestParser; + +use base 'ShellParser'; + +sub find_non_nl { + my $tokens = shift @_; + my $n = shift @_; + $n = $#$tokens if !defined($n); + $n-- while $n >= 0 && $$tokens[$n] eq "\n"; + return $n; +} + +sub ends_with { + my ($tokens, $needles) = @_; + my $n = find_non_nl($tokens); + for my $needle (reverse(@$needles)) { + return undef if $n < 0; + $n = find_non_nl($tokens, $n), next if $needle eq "\n"; + return undef if $$tokens[$n] !~ $needle; + $n--; + } + return 1; +} + +sub accumulate { + my ($self, $tokens, $cmd) = @_; + goto DONE unless @$tokens; + goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; + + # did previous command end with "&&", "||", "|"? + goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]); + + # flag missing "&&" at end of previous command + my $n = find_non_nl($tokens); + splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; + +DONE: + $self->SUPER::accumulate($tokens, $cmd); +} + package ScriptParser; sub new { From d99ebd6d2e57baa3ec45b939d40cf939b85301a3 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:43 +0000 Subject: [PATCH 14/45] chainlint.pl: add parser to identify test definitions Finish fleshing out chainlint.pl by adding ScriptParser, a parser which scans shell scripts for tests defined by test_expect_success() and test_expect_failure(), plucks the test body from each definition, and passes it to TestParser for validation. It recognizes test definitions not only at the top-level of test scripts but also tests synthesized within compound commands such as loops and function. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 63 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/t/chainlint.pl b/t/chainlint.pl index ad257106e5..d526723ac0 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -487,18 +487,75 @@ DONE: $self->SUPER::accumulate($tokens, $cmd); } +# ScriptParser is a subclass of ShellParser which identifies individual test +# definitions within test scripts, and passes each test body through TestParser +# to identify possible problems. ShellParser detects test definitions not only +# at the top-level of test scripts but also within compound commands such as +# loops and function definitions. package ScriptParser; +use base 'ShellParser'; + sub new { my $class = shift @_; - my $self = bless {} => $class; - $self->{output} = []; + my $self = $class->SUPER::new(@_); $self->{ntests} = 0; return $self; } +# extract the raw content of a token, which may be a single string or a +# composition of multiple strings and non-string character runs; for instance, +# `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` +sub unwrap { + my $token = @_ ? shift @_ : $_; + # simple case: 'sqstring' or "dqstring" + return $token if $token =~ s/^'([^']*)'$/$1/; + return $token if $token =~ s/^"([^"]*)"$/$1/; + + # composite case + my ($s, $q, $escaped); + while (1) { + # slurp up non-special characters + $s .= $1 if $token =~ /\G([^\\'"]*)/gc; + # handle special characters + last unless $token =~ /\G(.)/sgc; + my $c = $1; + $q = undef, next if defined($q) && $c eq $q; + $q = $c, next if !defined($q) && $c =~ /^['"]$/; + if ($c eq '\\') { + last unless $token =~ /\G(.)/sgc; + $c = $1; + $s .= '\\' if $c eq "\n"; # preserve line splice + } + $s .= $c; + } + return $s +} + +sub check_test { + my $self = shift @_; + my ($title, $body) = map(unwrap, @_); + $self->{ntests}++; + my $parser = TestParser->new(\$body); + my @tokens = $parser->parse(); + return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); + my $checked = join(' ', @tokens); + $checked =~ s/^\n//; + $checked =~ s/^ //mg; + $checked =~ s/ $//mg; + $checked .= "\n" unless $checked =~ /\n$/; + push(@{$self->{output}}, "# chainlint: $title\n$checked"); +} + sub parse_cmd { - return undef; + my $self = shift @_; + my @tokens = $self->SUPER::parse_cmd(); + return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/; + my $n = $#tokens; + $n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/; + $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body + $self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body + return @tokens; } # main contains high-level functionality for processing command-line switches, From 29fb2ec384a867ca577335a12f4b45c184e7b642 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:44 +0000 Subject: [PATCH 15/45] chainlint.pl: validate test scripts in parallel Although chainlint.pl has undergone a good deal of optimization during its development -- increasing in speed significantly -- parsing and validating 1050+ scripts and 16500+ tests via Perl is not exactly instantaneous. However, perceived performance can be improved by taking advantage of the fact that there is no interdependence between test scripts or test definitions, thus parsing and validating can be done in parallel. The number of available cores is determined automatically but can be overridden via the --jobs option. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/t/chainlint.pl b/t/chainlint.pl index d526723ac0..898573a910 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -15,9 +15,11 @@ use warnings; use strict; +use Config; use File::Glob; use Getopt::Long; +my $jobs = -1; my $show_stats; my $emit_all; @@ -569,6 +571,16 @@ if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { $interval = sub { return Time::HiRes::tv_interval(shift); }; } +sub ncores { + # Windows + return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); + # Linux / MSYS2 / Cygwin / WSL + do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor\s*:/, <>)); } if -r '/proc/cpuinfo'; + # macOS & BSD + return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/; + return 1; +} + sub show_stats { my ($start_time, $stats) = @_; my $walltime = $interval->($start_time); @@ -621,7 +633,9 @@ sub exit_code { Getopt::Long::Configure(qw{bundling}); GetOptions( "emit-all!" => \$emit_all, + "jobs|j=i" => \$jobs, "stats|show-stats!" => \$show_stats) or die("option error\n"); +$jobs = ncores() if $jobs < 1; my $start_time = $getnow->(); my @stats; @@ -633,6 +647,40 @@ unless (@scripts) { exit; } -push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); +unless ($Config{useithreads} && eval { + require threads; threads->import(); + require Thread::Queue; Thread::Queue->import(); + 1; + }) { + push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); + show_stats($start_time, \@stats) if $show_stats; + exit(exit_code(\@stats)); +} + +my $script_queue = Thread::Queue->new(); +my $output_queue = Thread::Queue->new(); + +sub next_script { return $script_queue->dequeue(); } +sub emit { $output_queue->enqueue(@_); } + +sub monitor { + while (my $s = $output_queue->dequeue()) { + print($s); + } +} + +my $mon = threads->create({'context' => 'void'}, \&monitor); +threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs; + +$script_queue->enqueue(@scripts); +$script_queue->end(); + +for (threads->list()) { + push(@stats, $_->join()) unless $_ == $mon; +} + +$output_queue->end(); +$mon->join(); + show_stats($start_time, \@stats) if $show_stats; exit(exit_code(\@stats)); From 35ebb1e37b25b9d799d1064d36a2ce668ad20264 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:45 +0000 Subject: [PATCH 16/45] chainlint.pl: don't require `return|exit|continue` to end with `&&` In order to check for &&-chain breakage, each time TestParser encounters a new command, it checks whether the previous command ends with `&&`, and -- with a couple exceptions -- signals breakage if it does not. The first exception is that a command may validly end with `||`, which is commonly employed as `command || return 1` at the very end of a loop body to terminate the loop early. The second is that piping one command's output with `|` to another command does not constitute a &&-chain break (the exit status of the pipe is the exit status of the final command in the pipe). However, it turns out that there are a few additional cases found in the wild in which it is likely safe for `&&` to be missing even when other commands follow. For instance: while {condition-1} do test {condition-2} || return 1 # or `exit 1` within a subshell more-commands done while {condition-1} do test {condition-2} || continue more-commands done Such cases indicate deliberate thought about failure modes by the test author, thus flagging them as breaking the &&-chain is not helpful. Therefore, take these special cases into consideration when checking for &&-chain breakage. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 20 ++++++++++++++++++-- t/chainlint/chain-break-continue.expect | 12 ++++++++++++ t/chainlint/chain-break-continue.test | 13 +++++++++++++ t/chainlint/chain-break-return-exit.expect | 4 ++++ t/chainlint/chain-break-return-exit.test | 5 +++++ t/chainlint/return-loop.expect | 5 +++++ t/chainlint/return-loop.test | 6 ++++++ 7 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 t/chainlint/chain-break-continue.expect create mode 100644 t/chainlint/chain-break-continue.test create mode 100644 t/chainlint/chain-break-return-exit.expect create mode 100644 t/chainlint/chain-break-return-exit.test create mode 100644 t/chainlint/return-loop.expect create mode 100644 t/chainlint/return-loop.test diff --git a/t/chainlint.pl b/t/chainlint.pl index 898573a910..31c444067c 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -473,13 +473,29 @@ sub ends_with { return 1; } +sub match_ending { + my ($tokens, $endings) = @_; + for my $needles (@$endings) { + next if @$tokens < scalar(grep {$_ ne "\n"} @$needles); + return 1 if ends_with($tokens, $needles); + } + return undef; +} + +my @safe_endings = ( + [qr/^(?:&&|\|\||\|)$/], + [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], + [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], + [qr/^(?:exit|return|continue)$/], + [qr/^(?:exit|return|continue)$/, qr/^;$/]); + sub accumulate { my ($self, $tokens, $cmd) = @_; goto DONE unless @$tokens; goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; - # did previous command end with "&&", "||", "|"? - goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]); + # did previous command end with "&&", "|", "|| return" or similar? + goto DONE if match_ending($tokens, \@safe_endings); # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); diff --git a/t/chainlint/chain-break-continue.expect b/t/chainlint/chain-break-continue.expect new file mode 100644 index 0000000000..47a3457710 --- /dev/null +++ b/t/chainlint/chain-break-continue.expect @@ -0,0 +1,12 @@ +git ls-tree --name-only -r refs/notes/many_notes | +while read path +do + test "$path" = "foobar/non-note.txt" && continue + test "$path" = "deadbeef" && continue + test "$path" = "de/adbeef" && continue + + if test $(expr length "$path") -ne $hexsz + then + return 1 + fi +done diff --git a/t/chainlint/chain-break-continue.test b/t/chainlint/chain-break-continue.test new file mode 100644 index 0000000000..f0af71d8bd --- /dev/null +++ b/t/chainlint/chain-break-continue.test @@ -0,0 +1,13 @@ +git ls-tree --name-only -r refs/notes/many_notes | +while read path +do +# LINT: broken &&-chain okay if explicit "continue" + test "$path" = "foobar/non-note.txt" && continue + test "$path" = "deadbeef" && continue + test "$path" = "de/adbeef" && continue + + if test $(expr length "$path") -ne $hexsz + then + return 1 + fi +done diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect new file mode 100644 index 0000000000..dba292ee89 --- /dev/null +++ b/t/chainlint/chain-break-return-exit.expect @@ -0,0 +1,4 @@ +for i in 1 2 3 4 ; do + git checkout main -b $i || return $? + test_commit $i $i $i tag$i || return $? +done diff --git a/t/chainlint/chain-break-return-exit.test b/t/chainlint/chain-break-return-exit.test new file mode 100644 index 0000000000..e2b059933a --- /dev/null +++ b/t/chainlint/chain-break-return-exit.test @@ -0,0 +1,5 @@ +for i in 1 2 3 4 ; do +# LINT: broken &&-chain okay if explicit "return $?" signals failure + git checkout main -b $i || return $? + test_commit $i $i $i tag$i || return $? +done diff --git a/t/chainlint/return-loop.expect b/t/chainlint/return-loop.expect new file mode 100644 index 0000000000..cfc0549bef --- /dev/null +++ b/t/chainlint/return-loop.expect @@ -0,0 +1,5 @@ +while test $i -lt $((num - 5)) +do + git notes add -m "notes for commit$i" HEAD~$i || return 1 + i=$((i + 1)) +done diff --git a/t/chainlint/return-loop.test b/t/chainlint/return-loop.test new file mode 100644 index 0000000000..f90b171300 --- /dev/null +++ b/t/chainlint/return-loop.test @@ -0,0 +1,6 @@ +while test $i -lt $((num - 5)) +do +# LINT: "|| return {n}" valid loop escape outside subshell; no "&&" needed + git notes add -m "notes for commit$i" HEAD~$i || return 1 + i=$((i + 1)) +done From d00113ec3474a1652a73c11695c7e7b5182d80a7 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:46 +0000 Subject: [PATCH 17/45] t/Makefile: apply chainlint.pl to existing self-tests Now that chainlint.pl is functional, take advantage of the existing chainlint self-tests to validate its operation. (While at it, stop validating chainlint.sed against the self-tests since it will soon be retired.) Due to chainlint.sed implementation limitations leaking into the self-test "expect" files, a few of them require minor adjustment to make them compatible with chainlint.pl which does not share those limitations. First, because `sed` does not provide any sort of real recursion, chainlint.sed only emulates recursion into subshells, and each level of recursion leads to a multiplicative increase in complexity of the `sed` rules. To avoid substantial complexity, chainlint.sed, therefore, only emulates subshell recursion one level deep. Any subshell deeper than that is passed through as-is, which means that &&-chains are not checked in deeper subshells. chainlint.pl, on the other hand, employs a proper recursive descent parser, thus checks subshells to any depth and correctly flags broken &&-chains in deep subshells. Second, due to sed's line-oriented nature, chainlint.sed, by necessity, folds multi-line quoted strings into a single line. chainlint.pl, on the other hand, employs a proper lexical analyzer which preserves quoted strings as-is, including embedded newlines. Furthermore, the output of chainlint.sed and chainlint.pl do not match precisely in terms of whitespace. However, since the purpose of the self-checks is to verify that the ?!AMP?! annotations are being correctly added, minor whitespace differences are immaterial. For this reason, rather than adjusting whitespace in all existing self-test "expect" files to match the new linter's output, the `check-chainlint` target ignores whitespace differences. Since `diff -w` is not POSIX, `check-chainlint` attempts to employ `git diff -w`, and only falls back to non-POSIX `diff -w` (and `-u`) if `git diff` is not available. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/Makefile | 29 +++++++++++++++---- t/chainlint/block.expect | 2 +- t/chainlint/here-doc-multi-line-string.expect | 3 +- t/chainlint/multi-line-string.expect | 11 +++++-- t/chainlint/nested-subshell.expect | 2 +- t/chainlint/t7900-subtree.expect | 13 +++++++-- 6 files changed, 46 insertions(+), 14 deletions(-) diff --git a/t/Makefile b/t/Makefile index 1c80c0c79a..11f276774e 100644 --- a/t/Makefile +++ b/t/Makefile @@ -38,7 +38,7 @@ T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)) THELPERS = $(sort $(filter-out $(T),$(wildcard *.sh))) TPERF = $(sort $(wildcard perf/p[0-9][0-9][0-9][0-9]-*.sh)) CHAINLINTTESTS = $(sort $(patsubst chainlint/%.test,%,$(wildcard chainlint/*.test))) -CHAINLINT = sed -f chainlint.sed +CHAINLINT = '$(PERL_PATH_SQ)' chainlint.pl all: $(DEFAULT_TEST_TARGET) @@ -73,10 +73,29 @@ clean-chainlint: check-chainlint: @mkdir -p '$(CHAINLINTTMP_SQ)' && \ - sed -e '/^# LINT: /d' $(patsubst %,chainlint/%.test,$(CHAINLINTTESTS)) >'$(CHAINLINTTMP_SQ)'/tests && \ - sed -e '/^[ ]*$$/d' $(patsubst %,chainlint/%.expect,$(CHAINLINTTESTS)) >'$(CHAINLINTTMP_SQ)'/expect && \ - $(CHAINLINT) '$(CHAINLINTTMP_SQ)'/tests | grep -v '^[ ]*$$' >'$(CHAINLINTTMP_SQ)'/actual && \ - diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual + for i in $(CHAINLINTTESTS); do \ + echo "test_expect_success '$$i' '" && \ + sed -e '/^# LINT: /d' chainlint/$$i.test && \ + echo "'"; \ + done >'$(CHAINLINTTMP_SQ)'/tests && \ + { \ + echo "# chainlint: $(CHAINLINTTMP_SQ)/tests" && \ + for i in $(CHAINLINTTESTS); do \ + echo "# chainlint: $$i" && \ + sed -e '/^[ ]*$$/d' chainlint/$$i.expect; \ + done \ + } >'$(CHAINLINTTMP_SQ)'/expect && \ + $(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \ + grep -v '^[ ]*$$' >'$(CHAINLINTTMP_SQ)'/actual && \ + if test -f ../GIT-BUILD-OPTIONS; then \ + . ../GIT-BUILD-OPTIONS; \ + fi && \ + if test -x ../git$$X; then \ + DIFFW="../git$$X --no-pager diff -w --no-index"; \ + else \ + DIFFW="diff -w -u"; \ + fi && \ + $$DIFFW '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \ test-lint-filenames diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect index da60257ebc..37dbf7d95f 100644 --- a/t/chainlint/block.expect +++ b/t/chainlint/block.expect @@ -1,7 +1,7 @@ ( foo && { - echo a + echo a ?!AMP?! echo b } && bar && diff --git a/t/chainlint/here-doc-multi-line-string.expect b/t/chainlint/here-doc-multi-line-string.expect index 2578191ca8..be64b26869 100644 --- a/t/chainlint/here-doc-multi-line-string.expect +++ b/t/chainlint/here-doc-multi-line-string.expect @@ -1,4 +1,5 @@ ( - cat <<-TXT && echo "multi-line string" ?!AMP?! + cat <<-TXT && echo "multi-line + string" ?!AMP?! bap ) diff --git a/t/chainlint/multi-line-string.expect b/t/chainlint/multi-line-string.expect index ab0dadf748..27ff95218e 100644 --- a/t/chainlint/multi-line-string.expect +++ b/t/chainlint/multi-line-string.expect @@ -1,9 +1,14 @@ ( - x="line 1 line 2 line 3" && - y="line 1 line2" ?!AMP?! + x="line 1 + line 2 + line 3" && + y="line 1 + line2" ?!AMP?! foobar ) && ( - echo "xyz" "abc def ghi" && + echo "xyz" "abc + def + ghi" && barfoo ) diff --git a/t/chainlint/nested-subshell.expect b/t/chainlint/nested-subshell.expect index 41a48adaa2..02e0a9f1bb 100644 --- a/t/chainlint/nested-subshell.expect +++ b/t/chainlint/nested-subshell.expect @@ -6,7 +6,7 @@ ) >file && cd foo && ( - echo a + echo a ?!AMP?! echo b ) >file ) diff --git a/t/chainlint/t7900-subtree.expect b/t/chainlint/t7900-subtree.expect index 1cccc7bf7e..69167da2f2 100644 --- a/t/chainlint/t7900-subtree.expect +++ b/t/chainlint/t7900-subtree.expect @@ -1,10 +1,17 @@ ( - chks="sub1sub2sub3sub4" && + chks="sub1 +sub2 +sub3 +sub4" && chks_sub=$(cat < Date: Thu, 1 Sep 2022 00:29:47 +0000 Subject: [PATCH 18/45] chainlint.pl: don't require `&` background command to end with `&&` The exit status of the `&` asynchronous operator which starts a command in the background is unconditionally zero, and the few places in the test scripts which launch commands asynchronously are not interested in the exit status of the `&` operator (though they often capture the background command's PID). As such, there is little value in complaining about broken &&-chain for a command launched in the background, and doing so would only make busy-work for test authors. Therefore, take this special case into account when checking for &&-chain breakage. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 2 +- t/chainlint/chain-break-background.expect | 9 +++++++++ t/chainlint/chain-break-background.test | 10 ++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 t/chainlint/chain-break-background.expect create mode 100644 t/chainlint/chain-break-background.test diff --git a/t/chainlint.pl b/t/chainlint.pl index 31c444067c..ba3fcb0c8e 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -483,7 +483,7 @@ sub match_ending { } my @safe_endings = ( - [qr/^(?:&&|\|\||\|)$/], + [qr/^(?:&&|\|\||\||&)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], [qr/^(?:exit|return|continue)$/], diff --git a/t/chainlint/chain-break-background.expect b/t/chainlint/chain-break-background.expect new file mode 100644 index 0000000000..28f9114f42 --- /dev/null +++ b/t/chainlint/chain-break-background.expect @@ -0,0 +1,9 @@ +JGIT_DAEMON_PID= && +git init --bare empty.git && +> empty.git/git-daemon-export-ok && +mkfifo jgit_daemon_output && +{ + jgit daemon --port="$JGIT_DAEMON_PORT" . > jgit_daemon_output & + JGIT_DAEMON_PID=$! +} && +test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git diff --git a/t/chainlint/chain-break-background.test b/t/chainlint/chain-break-background.test new file mode 100644 index 0000000000..e10f656b05 --- /dev/null +++ b/t/chainlint/chain-break-background.test @@ -0,0 +1,10 @@ +JGIT_DAEMON_PID= && +git init --bare empty.git && +>empty.git/git-daemon-export-ok && +mkfifo jgit_daemon_output && +{ +# LINT: exit status of "&" is always 0 so &&-chaining immaterial + jgit daemon --port="$JGIT_DAEMON_PORT" . >jgit_daemon_output & + JGIT_DAEMON_PID=$! +} && +test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git From a8f30ee0502b89ecb660af36784f653a8c3fb20d Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:48 +0000 Subject: [PATCH 19/45] chainlint.pl: don't flag broken &&-chain if `$?` handled explicitly There are cases in which tests capture and check a command's exit code explicitly without employing test_expect_code(). They do so by intentionally breaking the &&-chain since it would be impossible to capture "$?" in the failing case if the `status=$?` assignment was part of the &&-chain. Since such constructs are manually checking the exit code, their &&-chain breakage is legitimate and safe, thus should not be flagged. Therefore, stop flagging &&-chain breakage in such cases. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 6 ++++++ t/chainlint/chain-break-status.expect | 9 +++++++++ t/chainlint/chain-break-status.test | 11 +++++++++++ 3 files changed, 26 insertions(+) create mode 100644 t/chainlint/chain-break-status.expect create mode 100644 t/chainlint/chain-break-status.test diff --git a/t/chainlint.pl b/t/chainlint.pl index ba3fcb0c8e..14e1db3519 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -497,6 +497,12 @@ sub accumulate { # did previous command end with "&&", "|", "|| return" or similar? goto DONE if match_ending($tokens, \@safe_endings); + # if this command handles "$?" specially, then okay for previous + # command to be missing "&&" + for my $token (@$cmd) { + goto DONE if $token =~ /\$\?/; + } + # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; diff --git a/t/chainlint/chain-break-status.expect b/t/chainlint/chain-break-status.expect new file mode 100644 index 0000000000..f4bada9463 --- /dev/null +++ b/t/chainlint/chain-break-status.expect @@ -0,0 +1,9 @@ +OUT=$(( ( large_git ; echo $? 1 >& 3 ) | : ) 3 >& 1) && +test_match_signal 13 "$OUT" && + +{ test-tool sigchain > actual ; ret=$? ; } && +{ + test_match_signal 15 "$ret" || + test "$ret" = 3 +} && +test_cmp expect actual diff --git a/t/chainlint/chain-break-status.test b/t/chainlint/chain-break-status.test new file mode 100644 index 0000000000..a6602a7b99 --- /dev/null +++ b/t/chainlint/chain-break-status.test @@ -0,0 +1,11 @@ +# LINT: broken &&-chain okay if next command handles "$?" explicitly +OUT=$( ((large_git; echo $? 1>&3) | :) 3>&1 ) && +test_match_signal 13 "$OUT" && + +# LINT: broken &&-chain okay if next command handles "$?" explicitly +{ test-tool sigchain >actual; ret=$?; } && +{ + test_match_signal 15 "$ret" || + test "$ret" = 3 +} && +test_cmp expect actual From 832c68b3c210267c93e1dcb2f2763372339ca36c Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:49 +0000 Subject: [PATCH 20/45] chainlint.pl: don't flag broken &&-chain if failure indicated explicitly There are quite a few tests which print an error messages and then explicitly signal failure with `false`, `return 1`, or `exit 1` as the final command in an `if` branch. In these cases, the tests don't bother maintaining the &&-chain between `echo` and the explicit "test failed" indicator. Since such constructs are manually signaling failure, their &&-chain breakage is legitimate and safe -- both for the command immediately preceding `false`, `return`, or `exit`, as well as for all preceding commands in the `if` branch. Therefore, stop flagging &&-chain breakage in these sorts of cases. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 8 ++++++++ t/chainlint/chain-break-false.expect | 9 +++++++++ t/chainlint/chain-break-false.test | 10 ++++++++++ t/chainlint/chain-break-return-exit.expect | 15 +++++++++++++++ t/chainlint/chain-break-return-exit.test | 18 ++++++++++++++++++ t/chainlint/if-in-loop.expect | 2 +- t/chainlint/if-in-loop.test | 2 +- 7 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 t/chainlint/chain-break-false.expect create mode 100644 t/chainlint/chain-break-false.test diff --git a/t/chainlint.pl b/t/chainlint.pl index 14e1db3519..a76a09ecf5 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -503,6 +503,14 @@ sub accumulate { goto DONE if $token =~ /\$\?/; } + # if this command is "false", "return 1", or "exit 1" (which signal + # failure explicitly), then okay for all preceding commands to be + # missing "&&" + if ($$cmd[0] =~ /^(?:false|return|exit)$/) { + @$tokens = grep(!/^\?!AMP\?!$/, @$tokens); + goto DONE; + } + # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; diff --git a/t/chainlint/chain-break-false.expect b/t/chainlint/chain-break-false.expect new file mode 100644 index 0000000000..989766fb85 --- /dev/null +++ b/t/chainlint/chain-break-false.expect @@ -0,0 +1,9 @@ +if condition not satisified +then + echo it did not work... + echo failed! + false +else + echo it went okay ?!AMP?! + congratulate user +fi diff --git a/t/chainlint/chain-break-false.test b/t/chainlint/chain-break-false.test new file mode 100644 index 0000000000..a5aaff8c8a --- /dev/null +++ b/t/chainlint/chain-break-false.test @@ -0,0 +1,10 @@ +# LINT: broken &&-chain okay if explicit "false" signals failure +if condition not satisified +then + echo it did not work... + echo failed! + false +else + echo it went okay + congratulate user +fi diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect index dba292ee89..1732d221c3 100644 --- a/t/chainlint/chain-break-return-exit.expect +++ b/t/chainlint/chain-break-return-exit.expect @@ -1,3 +1,18 @@ +case "$(git ls-files)" in +one ) echo pass one ;; +* ) echo bad one ; return 1 ;; +esac && +( + case "$(git ls-files)" in + two ) echo pass two ;; + * ) echo bad two ; exit 1 ;; +esac +) && +case "$(git ls-files)" in +dir/two"$LF"one ) echo pass both ;; +* ) echo bad ; return 1 ;; +esac && + for i in 1 2 3 4 ; do git checkout main -b $i || return $? test_commit $i $i $i tag$i || return $? diff --git a/t/chainlint/chain-break-return-exit.test b/t/chainlint/chain-break-return-exit.test index e2b059933a..46542edf88 100644 --- a/t/chainlint/chain-break-return-exit.test +++ b/t/chainlint/chain-break-return-exit.test @@ -1,3 +1,21 @@ +case "$(git ls-files)" in +one) echo pass one ;; +# LINT: broken &&-chain okay if explicit "return 1" signals failuire +*) echo bad one; return 1 ;; +esac && +( + case "$(git ls-files)" in + two) echo pass two ;; +# LINT: broken &&-chain okay if explicit "exit 1" signals failuire + *) echo bad two; exit 1 ;; + esac +) && +case "$(git ls-files)" in +dir/two"$LF"one) echo pass both ;; +# LINT: broken &&-chain okay if explicit "return 1" signals failuire +*) echo bad; return 1 ;; +esac && + for i in 1 2 3 4 ; do # LINT: broken &&-chain okay if explicit "return $?" signals failure git checkout main -b $i || return $? diff --git a/t/chainlint/if-in-loop.expect b/t/chainlint/if-in-loop.expect index 03b82a3e58..d6514ae749 100644 --- a/t/chainlint/if-in-loop.expect +++ b/t/chainlint/if-in-loop.expect @@ -3,7 +3,7 @@ do if false then - echo "err" ?!AMP?! + echo "err" exit 1 fi ?!AMP?! foo diff --git a/t/chainlint/if-in-loop.test b/t/chainlint/if-in-loop.test index f0cf19cfad..90c23976fe 100644 --- a/t/chainlint/if-in-loop.test +++ b/t/chainlint/if-in-loop.test @@ -3,7 +3,7 @@ do if false then -# LINT: missing "&&" on "echo" +# LINT: missing "&&" on "echo" okay since "exit 1" signals error explicitly echo "err" exit 1 # LINT: missing "&&" on "fi" From fd4094c3cad7c62adb0b7080e0dca37f66bf0c6e Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:50 +0000 Subject: [PATCH 21/45] chainlint.pl: complain about loops lacking explicit failure handling Shell `for` and `while` loops do not terminate automatically just because a command fails within the loop body. Instead, the loop continues to iterate and eventually returns the exit status of the final command of the final iteration, which may not be the command which failed, thus it is possible for failures to go undetected. Consequently, it is important for test authors to explicitly handle failure within the loop body by terminating the loop manually upon failure. This can be done by returning a non-zero exit code from within the loop body (i.e. `|| return 1`) or exiting (i.e. `|| exit 1`) if the loop is within a subshell, or by manually checking `$?` and taking some appropriate action. Therefore, add logic to detect and complain about loops which lack explicit `return` or `exit`, or `$?` check. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 11 ++++++ t/chainlint/complex-if-in-cuddled-loop.expect | 2 +- t/chainlint/for-loop.expect | 4 +-- t/chainlint/loop-detect-failure.expect | 15 ++++++++ t/chainlint/loop-detect-failure.test | 17 +++++++++ t/chainlint/loop-detect-status.expect | 18 ++++++++++ t/chainlint/loop-detect-status.test | 19 ++++++++++ t/chainlint/loop-in-if.expect | 2 +- t/chainlint/nested-loop-detect-failure.expect | 31 ++++++++++++++++ t/chainlint/nested-loop-detect-failure.test | 35 +++++++++++++++++++ t/chainlint/semicolon.expect | 2 +- t/chainlint/while-loop.expect | 4 +-- 12 files changed, 153 insertions(+), 7 deletions(-) create mode 100644 t/chainlint/loop-detect-failure.expect create mode 100644 t/chainlint/loop-detect-failure.test create mode 100644 t/chainlint/loop-detect-status.expect create mode 100644 t/chainlint/loop-detect-status.test create mode 100644 t/chainlint/nested-loop-detect-failure.expect create mode 100644 t/chainlint/nested-loop-detect-failure.test diff --git a/t/chainlint.pl b/t/chainlint.pl index a76a09ecf5..674b3ddf69 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -482,6 +482,17 @@ sub match_ending { return undef; } +sub parse_loop_body { + my $self = shift @_; + my @tokens = $self->SUPER::parse_loop_body(@_); + # did loop signal failure via "|| return" or "|| exit"? + return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens); + # flag missing "return/exit" handling explicit failure in loop body + my $n = find_non_nl(\@tokens); + splice(@tokens, $n + 1, 0, '?!LOOP?!'); + return @tokens; +} + my @safe_endings = ( [qr/^(?:&&|\|\||\||&)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], diff --git a/t/chainlint/complex-if-in-cuddled-loop.expect b/t/chainlint/complex-if-in-cuddled-loop.expect index 2fca183409..dac2d0fd1d 100644 --- a/t/chainlint/complex-if-in-cuddled-loop.expect +++ b/t/chainlint/complex-if-in-cuddled-loop.expect @@ -4,6 +4,6 @@ : else echo >file - fi + fi ?!LOOP?! done) && test ! -f file diff --git a/t/chainlint/for-loop.expect b/t/chainlint/for-loop.expect index 6671b8cd84..a5810c9bdd 100644 --- a/t/chainlint/for-loop.expect +++ b/t/chainlint/for-loop.expect @@ -2,10 +2,10 @@ for i in a b c do echo $i ?!AMP?! - cat <<-EOF + cat <<-EOF ?!LOOP?! done ?!AMP?! for i in a b c; do echo $i && - cat $i + cat $i ?!LOOP?! done ) diff --git a/t/chainlint/loop-detect-failure.expect b/t/chainlint/loop-detect-failure.expect new file mode 100644 index 0000000000..a66025c39d --- /dev/null +++ b/t/chainlint/loop-detect-failure.expect @@ -0,0 +1,15 @@ +git init r1 && +for n in 1 2 3 4 5 +do + echo "This is file: $n" > r1/file.$n && + git -C r1 add file.$n && + git -C r1 commit -m "$n" || return 1 +done && + +git init r2 && +for n in 1000 10000 +do + printf "%"$n"s" X > r2/large.$n && + git -C r2 add large.$n && + git -C r2 commit -m "$n" ?!LOOP?! +done diff --git a/t/chainlint/loop-detect-failure.test b/t/chainlint/loop-detect-failure.test new file mode 100644 index 0000000000..b9791cc802 --- /dev/null +++ b/t/chainlint/loop-detect-failure.test @@ -0,0 +1,17 @@ +git init r1 && +# LINT: loop handles failure explicitly with "|| return 1" +for n in 1 2 3 4 5 +do + echo "This is file: $n" > r1/file.$n && + git -C r1 add file.$n && + git -C r1 commit -m "$n" || return 1 +done && + +git init r2 && +# LINT: loop fails to handle failure explicitly with "|| return 1" +for n in 1000 10000 +do + printf "%"$n"s" X > r2/large.$n && + git -C r2 add large.$n && + git -C r2 commit -m "$n" +done diff --git a/t/chainlint/loop-detect-status.expect b/t/chainlint/loop-detect-status.expect new file mode 100644 index 0000000000..0ad23bb35e --- /dev/null +++ b/t/chainlint/loop-detect-status.expect @@ -0,0 +1,18 @@ +( while test $i -le $blobcount +do + printf "Generating blob $i/$blobcount\r" >& 2 && + printf "blob\nmark :$i\ndata $blobsize\n" && + + printf "%-${blobsize}s" $i && + echo "M 100644 :$i $i" >> commit && + i=$(($i+1)) || + echo $? > exit-status +done && +echo "commit refs/heads/main" && +echo "author A U Thor 123456789 +0000" && +echo "committer C O Mitter 123456789 +0000" && +echo "data 5" && +echo ">2gb" && +cat commit ) | +git fast-import --big-file-threshold=2 && +test ! -f exit-status diff --git a/t/chainlint/loop-detect-status.test b/t/chainlint/loop-detect-status.test new file mode 100644 index 0000000000..1c6c23cfc9 --- /dev/null +++ b/t/chainlint/loop-detect-status.test @@ -0,0 +1,19 @@ +# LINT: "$?" handled explicitly within loop body +(while test $i -le $blobcount + do + printf "Generating blob $i/$blobcount\r" >&2 && + printf "blob\nmark :$i\ndata $blobsize\n" && + #test-tool genrandom $i $blobsize && + printf "%-${blobsize}s" $i && + echo "M 100644 :$i $i" >> commit && + i=$(($i+1)) || + echo $? > exit-status + done && + echo "commit refs/heads/main" && + echo "author A U Thor 123456789 +0000" && + echo "committer C O Mitter 123456789 +0000" && + echo "data 5" && + echo ">2gb" && + cat commit) | +git fast-import --big-file-threshold=2 && +test ! -f exit-status diff --git a/t/chainlint/loop-in-if.expect b/t/chainlint/loop-in-if.expect index e1be42376c..6c5d6e5b24 100644 --- a/t/chainlint/loop-in-if.expect +++ b/t/chainlint/loop-in-if.expect @@ -4,7 +4,7 @@ while true do echo "pop" ?!AMP?! - echo "glup" + echo "glup" ?!LOOP?! done ?!AMP?! foo fi ?!AMP?! diff --git a/t/chainlint/nested-loop-detect-failure.expect b/t/chainlint/nested-loop-detect-failure.expect new file mode 100644 index 0000000000..4793a0e8e1 --- /dev/null +++ b/t/chainlint/nested-loop-detect-failure.expect @@ -0,0 +1,31 @@ +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" ?!LOOP?! + done ?!LOOP?! +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" || return 1 + done +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" ?!LOOP?! + done || return 1 +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" || return 1 + done || return 1 +done diff --git a/t/chainlint/nested-loop-detect-failure.test b/t/chainlint/nested-loop-detect-failure.test new file mode 100644 index 0000000000..e6f0c1acfb --- /dev/null +++ b/t/chainlint/nested-loop-detect-failure.test @@ -0,0 +1,35 @@ +# LINT: neither loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" + done +done && + +# LINT: inner loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" || return 1 + done +done && + +# LINT: outer loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" + done || return 1 +done && + +# LINT: inner & outer loops handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" || return 1 + done || return 1 +done diff --git a/t/chainlint/semicolon.expect b/t/chainlint/semicolon.expect index ed0b3707ae..3aa2259f36 100644 --- a/t/chainlint/semicolon.expect +++ b/t/chainlint/semicolon.expect @@ -15,5 +15,5 @@ ) && (cd foo && for i in a b c; do - echo; + echo; ?!LOOP?! done) diff --git a/t/chainlint/while-loop.expect b/t/chainlint/while-loop.expect index 0d3a9b3d12..f272aa21fe 100644 --- a/t/chainlint/while-loop.expect +++ b/t/chainlint/while-loop.expect @@ -2,10 +2,10 @@ while true do echo foo ?!AMP?! - cat <<-EOF + cat <<-EOF ?!LOOP?! done ?!AMP?! while true; do echo foo && - cat bar + cat bar ?!LOOP?! done ) From ae0c55abf8217bb06422f9eafcd7a30b2c8f9e8b Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:51 +0000 Subject: [PATCH 22/45] chainlint.pl: allow `|| echo` to signal failure upstream of a pipe The use of `|| return` (or `|| exit`) to signal failure within a loop isn't effective when the loop is upstream of a pipe since the pipe swallows all upstream exit codes and returns only the exit code of the final command in the pipeline. To work around this limitation, tests may adopt an alternative strategy of signaling failure by emitting text which would never be emitted in the non-failing case. For instance: while condition do command1 && command2 || echo "impossible text" done | sort >actual && Such usage indicates deliberate thought about failure cases by the test author, thus flagging them as missing `|| return` (or `|| exit`) is not helpful. Therefore, take this case into consideration when checking for explicit loop termination. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 3 +++ t/chainlint/loop-upstream-pipe.expect | 10 ++++++++++ t/chainlint/loop-upstream-pipe.test | 11 +++++++++++ 3 files changed, 24 insertions(+) create mode 100644 t/chainlint/loop-upstream-pipe.expect create mode 100644 t/chainlint/loop-upstream-pipe.test diff --git a/t/chainlint.pl b/t/chainlint.pl index 674b3ddf69..386999ce65 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -487,6 +487,9 @@ sub parse_loop_body { my @tokens = $self->SUPER::parse_loop_body(@_); # did loop signal failure via "|| return" or "|| exit"? return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens); + # did loop upstream of a pipe signal failure via "|| echo 'impossible + # text'" as the final command in the loop body? + return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]); # flag missing "return/exit" handling explicit failure in loop body my $n = find_non_nl(\@tokens); splice(@tokens, $n + 1, 0, '?!LOOP?!'); diff --git a/t/chainlint/loop-upstream-pipe.expect b/t/chainlint/loop-upstream-pipe.expect new file mode 100644 index 0000000000..0b82ecc4b9 --- /dev/null +++ b/t/chainlint/loop-upstream-pipe.expect @@ -0,0 +1,10 @@ +( + git rev-list --objects --no-object-names base..loose | + while read oid + do + path="$objdir/$(test_oid_to_path "$oid")" && + printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" || + echo "object list generation failed for $oid" + done | + sort -k1 +) >expect && diff --git a/t/chainlint/loop-upstream-pipe.test b/t/chainlint/loop-upstream-pipe.test new file mode 100644 index 0000000000..efb77da897 --- /dev/null +++ b/t/chainlint/loop-upstream-pipe.test @@ -0,0 +1,11 @@ +( + git rev-list --objects --no-object-names base..loose | + while read oid + do +# LINT: "|| echo" signals failure in loop upstream of a pipe + path="$objdir/$(test_oid_to_path "$oid")" && + printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" || + echo "object list generation failed for $oid" + done | + sort -k1 +) >expect && From 56066523ed3ebd16b455e99ce954ec19b6ac5ada Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:52 +0000 Subject: [PATCH 23/45] t/chainlint: add more chainlint.pl self-tests During the development of chainlint.pl, numerous new self-tests were created to verify correct functioning beyond the checks already represented by the existing self-tests. The new checks fall into several categories: * behavior of the lexical analyzer for complex cases, such as line splicing, token pasting, entering and exiting string contexts inside and outside of test script bodies; for instance: test_expect_success 'title' ' x=$(echo "something" | sed -e '\''s/\\/\\\\/g'\'' -e '\''s/[[/.*^$]/\\&/g'\'' ' * behavior of the parser for all compound grammatical constructs, such as `if...fi`, `case...esac`, `while...done`, `{...}`, etc., and for other legal shell grammatical constructs not covered by existing chainlint.sed self-tests, as well as complex cases, such as: OUT=$( ((large_git 1>&3) | :) 3>&1 ) && * detection of problems, such as &&-chain breakage, from top-level to any depth since the existing self-tests do not cover any top-level context and only cover subshells one level deep due to limitations of chainlint.sed * address blind spots in chainlint.sed (such as not detecting a broken &&-chain on a one-line for-loop in a subshell[1]) which chainlint.pl correctly detects * real-world cases which tripped up chainlint.pl during its development [1]: https://lore.kernel.org/git/dce35a47012fecc6edc11c68e91dbb485c5bc36f.1661663880.git.gitgitgadget@gmail.com/ Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint/blank-line-before-esac.expect | 18 +++++++++++ t/chainlint/blank-line-before-esac.test | 19 +++++++++++ t/chainlint/block.expect | 13 +++++++- t/chainlint/block.test | 15 ++++++++- t/chainlint/chained-block.expect | 9 ++++++ t/chainlint/chained-block.test | 11 +++++++ t/chainlint/chained-subshell.expect | 10 ++++++ t/chainlint/chained-subshell.test | 13 ++++++++ .../command-substitution-subsubshell.expect | 2 ++ .../command-substitution-subsubshell.test | 3 ++ t/chainlint/double-here-doc.expect | 2 ++ t/chainlint/double-here-doc.test | 12 +++++++ t/chainlint/dqstring-line-splice.expect | 3 ++ t/chainlint/dqstring-line-splice.test | 7 ++++ t/chainlint/dqstring-no-interpolate.expect | 11 +++++++ t/chainlint/dqstring-no-interpolate.test | 15 +++++++++ t/chainlint/empty-here-doc.expect | 3 ++ t/chainlint/empty-here-doc.test | 5 +++ t/chainlint/exclamation.expect | 4 +++ t/chainlint/exclamation.test | 8 +++++ t/chainlint/for-loop-abbreviated.expect | 5 +++ t/chainlint/for-loop-abbreviated.test | 6 ++++ t/chainlint/function.expect | 11 +++++++ t/chainlint/function.test | 13 ++++++++ t/chainlint/here-doc-indent-operator.expect | 5 +++ t/chainlint/here-doc-indent-operator.test | 13 ++++++++ t/chainlint/if-condition-split.expect | 7 ++++ t/chainlint/if-condition-split.test | 8 +++++ t/chainlint/one-liner-for-loop.expect | 9 ++++++ t/chainlint/one-liner-for-loop.test | 10 ++++++ t/chainlint/sqstring-in-sqstring.expect | 4 +++ t/chainlint/sqstring-in-sqstring.test | 5 +++ t/chainlint/token-pasting.expect | 27 ++++++++++++++++ t/chainlint/token-pasting.test | 32 +++++++++++++++++++ 34 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 t/chainlint/blank-line-before-esac.expect create mode 100644 t/chainlint/blank-line-before-esac.test create mode 100644 t/chainlint/chained-block.expect create mode 100644 t/chainlint/chained-block.test create mode 100644 t/chainlint/chained-subshell.expect create mode 100644 t/chainlint/chained-subshell.test create mode 100644 t/chainlint/command-substitution-subsubshell.expect create mode 100644 t/chainlint/command-substitution-subsubshell.test create mode 100644 t/chainlint/double-here-doc.expect create mode 100644 t/chainlint/double-here-doc.test create mode 100644 t/chainlint/dqstring-line-splice.expect create mode 100644 t/chainlint/dqstring-line-splice.test create mode 100644 t/chainlint/dqstring-no-interpolate.expect create mode 100644 t/chainlint/dqstring-no-interpolate.test create mode 100644 t/chainlint/empty-here-doc.expect create mode 100644 t/chainlint/empty-here-doc.test create mode 100644 t/chainlint/exclamation.expect create mode 100644 t/chainlint/exclamation.test create mode 100644 t/chainlint/for-loop-abbreviated.expect create mode 100644 t/chainlint/for-loop-abbreviated.test create mode 100644 t/chainlint/function.expect create mode 100644 t/chainlint/function.test create mode 100644 t/chainlint/here-doc-indent-operator.expect create mode 100644 t/chainlint/here-doc-indent-operator.test create mode 100644 t/chainlint/if-condition-split.expect create mode 100644 t/chainlint/if-condition-split.test create mode 100644 t/chainlint/one-liner-for-loop.expect create mode 100644 t/chainlint/one-liner-for-loop.test create mode 100644 t/chainlint/sqstring-in-sqstring.expect create mode 100644 t/chainlint/sqstring-in-sqstring.test create mode 100644 t/chainlint/token-pasting.expect create mode 100644 t/chainlint/token-pasting.test diff --git a/t/chainlint/blank-line-before-esac.expect b/t/chainlint/blank-line-before-esac.expect new file mode 100644 index 0000000000..48ed4eb124 --- /dev/null +++ b/t/chainlint/blank-line-before-esac.expect @@ -0,0 +1,18 @@ +test_done ( ) { + case "$test_failure" in + 0 ) + test_at_end_hook_ + + exit 0 ;; + + * ) + if test $test_external_has_tap -eq 0 + then + say_color error "# failed $test_failure among $msg" + say "1..$test_count" + fi + + exit 1 ;; + + esac +} diff --git a/t/chainlint/blank-line-before-esac.test b/t/chainlint/blank-line-before-esac.test new file mode 100644 index 0000000000..cecccad19f --- /dev/null +++ b/t/chainlint/blank-line-before-esac.test @@ -0,0 +1,19 @@ +# LINT: blank line before "esac" +test_done () { + case "$test_failure" in + 0) + test_at_end_hook_ + + exit 0 ;; + + *) + if test $test_external_has_tap -eq 0 + then + say_color error "# failed $test_failure among $msg" + say "1..$test_count" + fi + + exit 1 ;; + + esac +} diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect index 37dbf7d95f..a3bcea492a 100644 --- a/t/chainlint/block.expect +++ b/t/chainlint/block.expect @@ -9,4 +9,15 @@ echo c } ?!AMP?! baz -) +) && + +{ + echo a ; ?!AMP?! echo b +} && +{ echo a ; ?!AMP?! echo b ; } && + +{ + echo "${var}9" && + echo "done" +} && +finis diff --git a/t/chainlint/block.test b/t/chainlint/block.test index 0a82fd579f..4ab69a4afc 100644 --- a/t/chainlint/block.test +++ b/t/chainlint/block.test @@ -11,4 +11,17 @@ echo c } baz -) +) && + +# LINT: ";" not allowed in place of "&&" +{ + echo a; echo b +} && +{ echo a; echo b; } && + +# LINT: "}" inside string not mistaken as end of block +{ + echo "${var}9" && + echo "done" +} && +finis diff --git a/t/chainlint/chained-block.expect b/t/chainlint/chained-block.expect new file mode 100644 index 0000000000..574cdceb07 --- /dev/null +++ b/t/chainlint/chained-block.expect @@ -0,0 +1,9 @@ +echo nobody home && { + test the doohicky ?!AMP?! + right now +} && + +GIT_EXTERNAL_DIFF=echo git diff | { + read path oldfile oldhex oldmode newfile newhex newmode && + test "z$oh" = "z$oldhex" +} diff --git a/t/chainlint/chained-block.test b/t/chainlint/chained-block.test new file mode 100644 index 0000000000..86f81ece63 --- /dev/null +++ b/t/chainlint/chained-block.test @@ -0,0 +1,11 @@ +# LINT: start of block chained to preceding command +echo nobody home && { + test the doohicky + right now +} && + +# LINT: preceding command pipes to block on same line +GIT_EXTERNAL_DIFF=echo git diff | { + read path oldfile oldhex oldmode newfile newhex newmode && + test "z$oh" = "z$oldhex" +} diff --git a/t/chainlint/chained-subshell.expect b/t/chainlint/chained-subshell.expect new file mode 100644 index 0000000000..af0369d328 --- /dev/null +++ b/t/chainlint/chained-subshell.expect @@ -0,0 +1,10 @@ +mkdir sub && ( + cd sub && + foo the bar ?!AMP?! + nuff said +) && + +cut "-d " -f actual | ( read s1 s2 s3 && +test -f $s1 ?!AMP?! +test $(cat $s2) = tree2path1 && +test $(cat $s3) = tree3path1 ) diff --git a/t/chainlint/chained-subshell.test b/t/chainlint/chained-subshell.test new file mode 100644 index 0000000000..4ff6ddd8cb --- /dev/null +++ b/t/chainlint/chained-subshell.test @@ -0,0 +1,13 @@ +# LINT: start of subshell chained to preceding command +mkdir sub && ( + cd sub && + foo the bar + nuff said +) && + +# LINT: preceding command pipes to subshell on same line +cut "-d " -f actual | (read s1 s2 s3 && +test -f $s1 +test $(cat $s2) = tree2path1 && +# LINT: closing subshell ")" correctly detected on same line as "$(...)" +test $(cat $s3) = tree3path1) diff --git a/t/chainlint/command-substitution-subsubshell.expect b/t/chainlint/command-substitution-subsubshell.expect new file mode 100644 index 0000000000..ab2f79e845 --- /dev/null +++ b/t/chainlint/command-substitution-subsubshell.expect @@ -0,0 +1,2 @@ +OUT=$(( ( large_git 1 >& 3 ) | : ) 3 >& 1) && +test_match_signal 13 "$OUT" diff --git a/t/chainlint/command-substitution-subsubshell.test b/t/chainlint/command-substitution-subsubshell.test new file mode 100644 index 0000000000..321de2951c --- /dev/null +++ b/t/chainlint/command-substitution-subsubshell.test @@ -0,0 +1,3 @@ +# LINT: subshell nested in subshell nested in command substitution +OUT=$( ((large_git 1>&3) | :) 3>&1 ) && +test_match_signal 13 "$OUT" diff --git a/t/chainlint/double-here-doc.expect b/t/chainlint/double-here-doc.expect new file mode 100644 index 0000000000..75477bb1ad --- /dev/null +++ b/t/chainlint/double-here-doc.expect @@ -0,0 +1,2 @@ +run_sub_test_lib_test_err run-inv-range-start "--run invalid range start" --run="a-5" <<-EOF && +check_sub_test_lib_test_err run-inv-range-start <<-EOF_OUT 3 <<-EOF_ERR diff --git a/t/chainlint/double-here-doc.test b/t/chainlint/double-here-doc.test new file mode 100644 index 0000000000..cd584a4357 --- /dev/null +++ b/t/chainlint/double-here-doc.test @@ -0,0 +1,12 @@ +run_sub_test_lib_test_err run-inv-range-start \ + "--run invalid range start" \ + --run="a-5" <<-\EOF && +test_expect_success "passing test #1" "true" +test_done +EOF +check_sub_test_lib_test_err run-inv-range-start \ + <<-\EOF_OUT 3<<-EOF_ERR +> FATAL: Unexpected exit with code 1 +EOF_OUT +> error: --run: invalid non-numeric in range start: ${SQ}a-5${SQ} +EOF_ERR diff --git a/t/chainlint/dqstring-line-splice.expect b/t/chainlint/dqstring-line-splice.expect new file mode 100644 index 0000000000..bf9ced60d4 --- /dev/null +++ b/t/chainlint/dqstring-line-splice.expect @@ -0,0 +1,3 @@ +echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' > expect && +test_must_fail git commit --fixup=reword:HEAD~ $1 2 > actual && +test_cmp expect actual diff --git a/t/chainlint/dqstring-line-splice.test b/t/chainlint/dqstring-line-splice.test new file mode 100644 index 0000000000..b40714439f --- /dev/null +++ b/t/chainlint/dqstring-line-splice.test @@ -0,0 +1,7 @@ +# LINT: line-splice within DQ-string +'" +echo 'fatal: reword option of --fixup is mutually exclusive with'\ + '--patch/--interactive/--all/--include/--only' >expect && +test_must_fail git commit --fixup=reword:HEAD~ $1 2>actual && +test_cmp expect actual +"' diff --git a/t/chainlint/dqstring-no-interpolate.expect b/t/chainlint/dqstring-no-interpolate.expect new file mode 100644 index 0000000000..10724987a5 --- /dev/null +++ b/t/chainlint/dqstring-no-interpolate.expect @@ -0,0 +1,11 @@ +grep "^ ! [rejected][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out && + +grep "^\.git$" output.txt && + + +( + cd client$version && + GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. $(cat ../input) +) > output && + cut -d ' ' -f 2 < output | sort > actual && + test_cmp expect actual diff --git a/t/chainlint/dqstring-no-interpolate.test b/t/chainlint/dqstring-no-interpolate.test new file mode 100644 index 0000000000..d2f4219cbb --- /dev/null +++ b/t/chainlint/dqstring-no-interpolate.test @@ -0,0 +1,15 @@ +# LINT: regex dollar-sign eol anchor in double-quoted string not special +grep "^ ! \[rejected\][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out && + +# LINT: escaped "$" not mistaken for variable expansion +grep "^\\.git\$" output.txt && + +'" +( + cd client$version && +# LINT: escaped dollar-sign in double-quoted test body + GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. \$(cat ../input) +) >output && + cut -d ' ' -f 2 actual && + test_cmp expect actual +"' diff --git a/t/chainlint/empty-here-doc.expect b/t/chainlint/empty-here-doc.expect new file mode 100644 index 0000000000..f42f2d41ba --- /dev/null +++ b/t/chainlint/empty-here-doc.expect @@ -0,0 +1,3 @@ +git ls-tree $tree path > current && +cat > expected <current && +# LINT: empty here-doc +cat >expected <<\EOF && +EOF +test_output diff --git a/t/chainlint/exclamation.expect b/t/chainlint/exclamation.expect new file mode 100644 index 0000000000..2d961a58c6 --- /dev/null +++ b/t/chainlint/exclamation.expect @@ -0,0 +1,4 @@ +if ! condition ; then echo nope ; else yep ; fi && +test_prerequisite !MINGW && +mail uucp!address && +echo !whatever! diff --git a/t/chainlint/exclamation.test b/t/chainlint/exclamation.test new file mode 100644 index 0000000000..323595b5bd --- /dev/null +++ b/t/chainlint/exclamation.test @@ -0,0 +1,8 @@ +# LINT: "! word" is two tokens +if ! condition; then echo nope; else yep; fi && +# LINT: "!word" is single token, not two tokens "!" and "word" +test_prerequisite !MINGW && +# LINT: "word!word" is single token, not three tokens "word", "!", and "word" +mail uucp!address && +# LINT: "!word!" is single token, not three tokens "!", "word", and "!" +echo !whatever! diff --git a/t/chainlint/for-loop-abbreviated.expect b/t/chainlint/for-loop-abbreviated.expect new file mode 100644 index 0000000000..a21007a63f --- /dev/null +++ b/t/chainlint/for-loop-abbreviated.expect @@ -0,0 +1,5 @@ +for it +do + path=$(expr "$it" : ( [^:]*) ) && + git update-index --add "$path" || exit +done diff --git a/t/chainlint/for-loop-abbreviated.test b/t/chainlint/for-loop-abbreviated.test new file mode 100644 index 0000000000..1084eccb89 --- /dev/null +++ b/t/chainlint/for-loop-abbreviated.test @@ -0,0 +1,6 @@ +# LINT: for-loop lacking optional "in [word...]" before "do" +for it +do + path=$(expr "$it" : '\([^:]*\)') && + git update-index --add "$path" || exit +done diff --git a/t/chainlint/function.expect b/t/chainlint/function.expect new file mode 100644 index 0000000000..a14388e6b9 --- /dev/null +++ b/t/chainlint/function.expect @@ -0,0 +1,11 @@ +sha1_file ( ) { + echo "$*" | sed "s#..#.git/objects/&/#" +} && + +remove_object ( ) { + file=$(sha1_file "$*") && + test -e "$file" ?!AMP?! + rm -f "$file" +} ?!AMP?! + +sha1_file arg && remove_object arg diff --git a/t/chainlint/function.test b/t/chainlint/function.test new file mode 100644 index 0000000000..5ee59562c9 --- /dev/null +++ b/t/chainlint/function.test @@ -0,0 +1,13 @@ +# LINT: "()" in function definition not mistaken for subshell +sha1_file() { + echo "$*" | sed "s#..#.git/objects/&/#" +} && + +# LINT: broken &&-chain in function and after function +remove_object() { + file=$(sha1_file "$*") && + test -e "$file" + rm -f "$file" +} + +sha1_file arg && remove_object arg diff --git a/t/chainlint/here-doc-indent-operator.expect b/t/chainlint/here-doc-indent-operator.expect new file mode 100644 index 0000000000..fb6cf7285d --- /dev/null +++ b/t/chainlint/here-doc-indent-operator.expect @@ -0,0 +1,5 @@ +cat > expect <<-EOF && + +cat > expect <<-EOF ?!AMP?! + +cleanup diff --git a/t/chainlint/here-doc-indent-operator.test b/t/chainlint/here-doc-indent-operator.test new file mode 100644 index 0000000000..c8a6f18eb4 --- /dev/null +++ b/t/chainlint/here-doc-indent-operator.test @@ -0,0 +1,13 @@ +# LINT: whitespace between operator "<<-" and tag legal +cat >expect <<- EOF && +header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0 +num_commits: $1 +chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data +EOF + +# LINT: not an indented here-doc; just a plain here-doc with tag named "-EOF" +cat >expect << -EOF +this is not indented +-EOF + +cleanup diff --git a/t/chainlint/if-condition-split.expect b/t/chainlint/if-condition-split.expect new file mode 100644 index 0000000000..ee745ef8d7 --- /dev/null +++ b/t/chainlint/if-condition-split.expect @@ -0,0 +1,7 @@ +if bob && + marcia || + kevin +then + echo "nomads" ?!AMP?! + echo "for sure" +fi diff --git a/t/chainlint/if-condition-split.test b/t/chainlint/if-condition-split.test new file mode 100644 index 0000000000..240daa9fd5 --- /dev/null +++ b/t/chainlint/if-condition-split.test @@ -0,0 +1,8 @@ +# LINT: "if" condition split across multiple lines at "&&" or "||" +if bob && + marcia || + kevin +then + echo "nomads" + echo "for sure" +fi diff --git a/t/chainlint/one-liner-for-loop.expect b/t/chainlint/one-liner-for-loop.expect new file mode 100644 index 0000000000..51a3dc7c54 --- /dev/null +++ b/t/chainlint/one-liner-for-loop.expect @@ -0,0 +1,9 @@ +git init dir-rename-and-content && +( + cd dir-rename-and-content && + test_write_lines 1 2 3 4 5 >foo && + mkdir olddir && + for i in a b c; do echo $i >olddir/$i; ?!LOOP?! done ?!AMP?! + git add foo olddir && + git commit -m "original" && +) diff --git a/t/chainlint/one-liner-for-loop.test b/t/chainlint/one-liner-for-loop.test new file mode 100644 index 0000000000..4bd8c066c7 --- /dev/null +++ b/t/chainlint/one-liner-for-loop.test @@ -0,0 +1,10 @@ +git init dir-rename-and-content && +( + cd dir-rename-and-content && + test_write_lines 1 2 3 4 5 >foo && + mkdir olddir && +# LINT: one-liner for-loop missing "|| exit"; also broken &&-chain + for i in a b c; do echo $i >olddir/$i; done + git add foo olddir && + git commit -m "original" && +) diff --git a/t/chainlint/sqstring-in-sqstring.expect b/t/chainlint/sqstring-in-sqstring.expect new file mode 100644 index 0000000000..cf0b591cf7 --- /dev/null +++ b/t/chainlint/sqstring-in-sqstring.expect @@ -0,0 +1,4 @@ +perl -e ' + defined($_ = -s $_) or die for @ARGV; + exit 1 if $ARGV[0] <= $ARGV[1]; +' test-2-$packname_2.pack test-3-$packname_3.pack diff --git a/t/chainlint/sqstring-in-sqstring.test b/t/chainlint/sqstring-in-sqstring.test new file mode 100644 index 0000000000..77a425e0c7 --- /dev/null +++ b/t/chainlint/sqstring-in-sqstring.test @@ -0,0 +1,5 @@ +# LINT: SQ-string Perl code fragment within SQ-string +perl -e '\'' + defined($_ = -s $_) or die for @ARGV; + exit 1 if $ARGV[0] <= $ARGV[1]; +'\'' test-2-$packname_2.pack test-3-$packname_3.pack diff --git a/t/chainlint/token-pasting.expect b/t/chainlint/token-pasting.expect new file mode 100644 index 0000000000..342360bcd0 --- /dev/null +++ b/t/chainlint/token-pasting.expect @@ -0,0 +1,27 @@ +git config filter.rot13.smudge ./rot13.sh && +git config filter.rot13.clean ./rot13.sh && + +{ + echo "*.t filter=rot13" ?!AMP?! + echo "*.i ident" +} > .gitattributes && + +{ + echo a b c d e f g h i j k l m ?!AMP?! + echo n o p q r s t u v w x y z ?!AMP?! + echo '$Id$' +} > test && +cat test > test.t && +cat test > test.o && +cat test > test.i && +git add test test.t test.i && +rm -f test test.t test.i && +git checkout -- test test.t test.i && + +echo "content-test2" > test2.o && +echo "content-test3 - filename with special characters" > "test3 'sq',$x=.o" ?!AMP?! + +downstream_url_for_sed=$( + printf "%sn" "$downstream_url" | + sed -e 's/\/\\/g' -e 's/[[/.*^$]/\&/g' +) diff --git a/t/chainlint/token-pasting.test b/t/chainlint/token-pasting.test new file mode 100644 index 0000000000..b4610ce815 --- /dev/null +++ b/t/chainlint/token-pasting.test @@ -0,0 +1,32 @@ +# LINT: single token; composite of multiple strings +git config filter.rot13.smudge ./rot13.sh && +git config filter.rot13.clean ./rot13.sh && + +{ + echo "*.t filter=rot13" + echo "*.i ident" +} >.gitattributes && + +{ + echo a b c d e f g h i j k l m + echo n o p q r s t u v w x y z +# LINT: exit/enter string context and escaped-quote outside of string + echo '\''$Id$'\'' +} >test && +cat test >test.t && +cat test >test.o && +cat test >test.i && +git add test test.t test.i && +rm -f test test.t test.i && +git checkout -- test test.t test.i && + +echo "content-test2" >test2.o && +# LINT: exit/enter string context and escaped-quote outside of string +echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x=.o" + +# LINT: single token; composite of multiple strings +downstream_url_for_sed=$( + printf "%s\n" "$downstream_url" | +# LINT: exit/enter string context; "&" inside string not command terminator + sed -e '\''s/\\/\\\\/g'\'' -e '\''s/[[/.*^$]/\\&/g'\'' +) From 9fd911237f94680e0d1985e1f2fba751b16f5a94 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:53 +0000 Subject: [PATCH 24/45] test-lib: retire "lint harder" optimization hack `test_run_` in test-lib.sh "lints" the body of a test by sending it down a `sed chainlint.sed | grep` pipeline; this happens once for each test run by a test script. Although this pipeline may seem relatively cheap in isolation, it can become expensive when invoked 26800+ times by `make test`, once for each test run, despite the existence of only 16500+ test definitions across all tests scripts. This difference in the number of tests defined in the scripts (16500+) and the number of tests actually run by `make test` (26800+) is explained by the fact that some test scripts run a very large number of small tests, all driven by a series of functions/loops which fill in the test bodies. This means that certain test definitions are being linted repeatedly (tens or hundreds of times) unnecessarily. To avoid such unnecessary work, 2d86a96220 (t: avoid sed-based chain-linting in some expensive cases, 2021-05-13) added an optimization hack which allows individual scripts to manually suppress the unnecessary repeated linting of the same test definition. However, unlike chainlint.sed which checks a test body as the test is run, chainlint.pl checks each test definition just once, no matter how many times the test is run, thus the sort of optimization hack introduced by 2d86a96220 is no longer needed and can be retired. Therefore, revert 2d86a96220. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/README | 5 ----- t/t0027-auto-crlf.sh | 7 +------ t/t3070-wildmatch.sh | 5 ----- t/test-lib.sh | 7 ++----- 4 files changed, 3 insertions(+), 21 deletions(-) diff --git a/t/README b/t/README index 2f439f9658..979b2d4833 100644 --- a/t/README +++ b/t/README @@ -196,11 +196,6 @@ appropriately before running "make". Short options can be bundled, i.e. this feature by setting the GIT_TEST_CHAIN_LINT environment variable to "1" or "0", respectively. - A few test scripts disable some of the more advanced - chain-linting detection in the name of efficiency. You can - override this by setting the GIT_TEST_CHAIN_LINT_HARDER - environment variable to "1". - --stress:: Run the test script repeatedly in multiple parallel jobs until one of them fails. Useful for reproducing rare failures in diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index a22e0e1382..a94ac1eae3 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -387,9 +387,7 @@ test_expect_success 'setup main' ' test_tick ' -# Disable extra chain-linting for the next set of tests. There are many -# auto-generated ones that are not worth checking over and over. -GIT_TEST_CHAIN_LINT_HARDER_DEFAULT=0 + warn_LF_CRLF="LF will be replaced by CRLF" warn_CRLF_LF="CRLF will be replaced by LF" @@ -606,9 +604,6 @@ do checkout_files "" "$id" "crlf" true "" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul done -# The rest of the tests are unique; do the usual linting. -unset GIT_TEST_CHAIN_LINT_HARDER_DEFAULT - # Should be the last test case: remove some files from the worktree test_expect_success 'ls-files --eol -d -z' ' rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes && diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh index f9539968e4..5d871fde96 100755 --- a/t/t3070-wildmatch.sh +++ b/t/t3070-wildmatch.sh @@ -5,11 +5,6 @@ test_description='wildmatch tests' TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh -# Disable expensive chain-lint tests; all of the tests in this script -# are variants of a few trivial test-tool invocations, and there are a lot of -# them. -GIT_TEST_CHAIN_LINT_HARDER_DEFAULT=0 - should_create_test_file() { file=$1 diff --git a/t/test-lib.sh b/t/test-lib.sh index 377cc1c120..dc0d059109 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1091,11 +1091,8 @@ test_run_ () { trace= # 117 is magic because it is unlikely to match the exit # code of other programs - if test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" || - { - test "${GIT_TEST_CHAIN_LINT_HARDER:-${GIT_TEST_CHAIN_LINT_HARDER_DEFAULT:-1}}" != 0 && - $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') - } + if $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') || + test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" then BUG "broken &&-chain or run-away HERE-DOC: $1" fi From 23a14f301662df6d003b5bf4dc598f02311c6b30 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:54 +0000 Subject: [PATCH 25/45] test-lib: replace chainlint.sed with chainlint.pl By automatically invoking chainlint.sed upon each test it runs, `test_run_` in test-lib.sh ensures that broken &&-chains will be detected early as tests are modified or new are tests created since it is typical to run a test script manually (i.e. `./t1234-test-script.sh`) during test development. Now that the implementation of chainlint.pl is complete, modify test-lib.sh to invoke it automatically instead of chainlint.sed each time a test script is run. This change reduces the number of "linter" invocations from 26800+ (once per test run) down to 1050+ (once per test script), however, a subsequent change will drop the number of invocations to 1 per `make test`, thus fully realizing the benefit of the new linter. Note that the "magic exit code 117" &&-chain checker added by bb79af9d09 (t/test-lib: introduce --chain-lint option, 2015-03-20) which is built into t/test-lib.sh is retained since it has near zero-cost and (theoretically) may catch a broken &&-chain not caught by chainlint.pl. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/buildsystems/CMakeLists.txt | 2 +- t/test-lib.sh | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 2237109b57..ca358a21a5 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -1076,7 +1076,7 @@ if(NOT ${CMAKE_BINARY_DIR}/CMakeCache.txt STREQUAL ${CACHE_PATH}) "string(REPLACE \"\${GIT_BUILD_DIR_REPL}\" \"GIT_BUILD_DIR=\\\"$TEST_DIRECTORY/../${BUILD_DIR_RELATIVE}\\\"\" content \"\${content}\")\n" "file(WRITE ${CMAKE_SOURCE_DIR}/t/test-lib.sh \${content})") #misc copies - file(COPY ${CMAKE_SOURCE_DIR}/t/chainlint.sed DESTINATION ${CMAKE_BINARY_DIR}/t/) + file(COPY ${CMAKE_SOURCE_DIR}/t/chainlint.pl DESTINATION ${CMAKE_BINARY_DIR}/t/) file(COPY ${CMAKE_SOURCE_DIR}/po/is.po DESTINATION ${CMAKE_BINARY_DIR}/po/) file(COPY ${CMAKE_SOURCE_DIR}/mergetools/tkdiff DESTINATION ${CMAKE_BINARY_DIR}/mergetools/) file(COPY ${CMAKE_SOURCE_DIR}/contrib/completion/git-prompt.sh DESTINATION ${CMAKE_BINARY_DIR}/contrib/completion/) diff --git a/t/test-lib.sh b/t/test-lib.sh index dc0d059109..a65df2fd22 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1091,8 +1091,7 @@ test_run_ () { trace= # 117 is magic because it is unlikely to match the exit # code of other programs - if $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') || - test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" + if test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" then BUG "broken &&-chain or run-away HERE-DOC: $1" fi @@ -1588,6 +1587,12 @@ then BAIL_OUT_ENV_NEEDS_SANITIZE_LEAK "GIT_TEST_SANITIZE_LEAK_LOG=true" fi +if test "${GIT_TEST_CHAIN_LINT:-1}" != 0 +then + "$PERL_PATH" "$TEST_DIRECTORY/chainlint.pl" "$0" || + BUG "lint error (see '?!...!? annotations above)" +fi + # Last-minute variable setup USER_HOME="$HOME" HOME="$TRASH_DIRECTORY" From 69b9924b875079babb1d3f665bdc719c4871ba73 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:55 +0000 Subject: [PATCH 26/45] t/Makefile: teach `make test` and `make prove` to run chainlint.pl Unlike chainlint.sed which "lints" a single test body at a time, thus is invoked once per test, chainlint.pl can check all test bodies in all test scripts with a single invocation. As such, it is akin to other bulk "linters" run by the Makefile, such as `test-lint-shell-syntax`, `test-lint-duplicates`, etc. Therefore, teach `make test` and `make prove` to invoke chainlint.pl along with the other bulk linters. Also, since the single chainlint.pl invocation by `make test` or `make prove` has already checked all tests in all scripts, instruct the individual test scripts not to run chainlint.pl on themselves unnecessarily. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/Makefile | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/t/Makefile b/t/Makefile index 11f276774e..3db48c0cb6 100644 --- a/t/Makefile +++ b/t/Makefile @@ -36,14 +36,21 @@ CHAINLINTTMP_SQ = $(subst ','\'',$(CHAINLINTTMP)) T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)) THELPERS = $(sort $(filter-out $(T),$(wildcard *.sh))) +TLIBS = $(sort $(wildcard lib-*.sh)) annotate-tests.sh TPERF = $(sort $(wildcard perf/p[0-9][0-9][0-9][0-9]-*.sh)) +TINTEROP = $(sort $(wildcard interop/i[0-9][0-9][0-9][0-9]-*.sh)) CHAINLINTTESTS = $(sort $(patsubst chainlint/%.test,%,$(wildcard chainlint/*.test))) CHAINLINT = '$(PERL_PATH_SQ)' chainlint.pl +# `test-chainlint` (which is a dependency of `test-lint`, `test` and `prove`) +# checks all tests in all scripts via a single invocation, so tell individual +# scripts not to "chainlint" themselves +CHAINLINTSUPPRESS = GIT_TEST_CHAIN_LINT=0 && export GIT_TEST_CHAIN_LINT && + all: $(DEFAULT_TEST_TARGET) test: pre-clean check-chainlint $(TEST_LINT) - $(MAKE) aggregate-results-and-cleanup + $(CHAINLINTSUPPRESS) $(MAKE) aggregate-results-and-cleanup failed: @failed=$$(cd '$(TEST_RESULTS_DIRECTORY_SQ)' && \ @@ -52,7 +59,7 @@ failed: test -z "$$failed" || $(MAKE) $$failed prove: pre-clean check-chainlint $(TEST_LINT) - @echo "*** prove ***"; $(PROVE) --exec '$(TEST_SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS) + @echo "*** prove ***"; $(CHAINLINTSUPPRESS) $(PROVE) --exec '$(TEST_SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS) $(MAKE) clean-except-prove-cache $(T): @@ -99,6 +106,9 @@ check-chainlint: test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \ test-lint-filenames +ifneq ($(GIT_TEST_CHAIN_LINT),0) +test-lint: test-chainlint +endif test-lint-duplicates: @dups=`echo $(T) $(TPERF) | tr ' ' '\n' | sed 's/-.*//' | sort | uniq -d` && \ @@ -121,6 +131,9 @@ test-lint-filenames: test -z "$$bad" || { \ echo >&2 "non-portable file name(s): $$bad"; exit 1; } +test-chainlint: + @$(CHAINLINT) $(T) $(TLIBS) $(TPERF) $(TINTEROP) + aggregate-results-and-cleanup: $(T) $(MAKE) aggregate-results $(MAKE) clean @@ -136,4 +149,5 @@ valgrind: perf: $(MAKE) -C perf/ all -.PHONY: pre-clean $(T) aggregate-results clean valgrind perf check-chainlint clean-chainlint +.PHONY: pre-clean $(T) aggregate-results clean valgrind perf \ + check-chainlint clean-chainlint test-chainlint From fb41727b7ed7f62d121cd846f826fb1c62d1bc6a Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:56 +0000 Subject: [PATCH 27/45] t: retire unused chainlint.sed Retire chainlint.sed since it has been replaced by a more accurate and functional &&-chain "linter", thus is no longer used. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.sed | 399 ------------------------------------------------ 1 file changed, 399 deletions(-) delete mode 100644 t/chainlint.sed diff --git a/t/chainlint.sed b/t/chainlint.sed deleted file mode 100644 index dc4ce37cb5..0000000000 --- a/t/chainlint.sed +++ /dev/null @@ -1,399 +0,0 @@ -#------------------------------------------------------------------------------ -# Detect broken &&-chains in tests. -# -# At present, only &&-chains in subshells are examined by this linter; -# top-level &&-chains are instead checked directly by the test framework. Like -# the top-level &&-chain linter, the subshell linter (intentionally) does not -# check &&-chains within {...} blocks. -# -# Checking for &&-chain breakage is done line-by-line by pure textual -# inspection. -# -# Incomplete lines (those ending with "\") are stitched together with following -# lines to simplify processing, particularly of "one-liner" statements. -# Top-level here-docs are swallowed to avoid false positives within the -# here-doc body, although the statement to which the here-doc is attached is -# retained. -# -# Heuristics are used to detect end-of-subshell when the closing ")" is cuddled -# with the final subshell statement on the same line: -# -# (cd foo && -# bar) -# -# in order to avoid misinterpreting the ")" in constructs such as "x=$(...)" -# and "case $x in *)" as ending the subshell. -# -# Lines missing a final "&&" are flagged with "?!AMP?!", as are lines which -# chain commands with ";" internally rather than "&&". A line may be flagged -# for both violations. -# -# Detection of a missing &&-link in a multi-line subshell is complicated by the -# fact that the last statement before the closing ")" must not end with "&&". -# Since processing is line-by-line, it is not known whether a missing "&&" is -# legitimate or not until the _next_ line is seen. To accommodate this, within -# multi-line subshells, each line is stored in sed's "hold" area until after -# the next line is seen and processed. If the next line is a stand-alone ")", -# then a missing "&&" on the previous line is legitimate; otherwise a missing -# "&&" is a break in the &&-chain. -# -# ( -# cd foo && -# bar -# ) -# -# In practical terms, when "bar" is encountered, it is flagged with "?!AMP?!", -# but when the stand-alone ")" line is seen which closes the subshell, the -# "?!AMP?!" violation is removed from the "bar" line (retrieved from the "hold" -# area) since the final statement of a subshell must not end with "&&". The -# final line of a subshell may still break the &&-chain by using ";" internally -# to chain commands together rather than "&&", but an internal "?!AMP?!" is -# never removed from a line even though a line-ending "?!AMP?!" might be. -# -# Care is taken to recognize the last _statement_ of a multi-line subshell, not -# necessarily the last textual _line_ within the subshell, since &&-chaining -# applies to statements, not to lines. Consequently, blank lines, comment -# lines, and here-docs are swallowed (but not the command to which the here-doc -# is attached), leaving the last statement in the "hold" area, not the last -# line, thus simplifying &&-link checking. -# -# The final statement before "done" in for- and while-loops, and before "elif", -# "else", and "fi" in if-then-else likewise must not end with "&&", thus -# receives similar treatment. -# -# Swallowing here-docs with arbitrary tags requires a bit of finesse. When a -# line such as "cat <cat <\n\1$/ is attempted to see if -# the content inside "<...>" matches the entirety of the newly-read line. For -# instance, if the next line read is "some data", when concatenated with the -# target line, it becomes "cat <cat <" does match the text following the -# newline, thus the closing here-doc tag has been found. The closing tag line -# and the "<...>" prefix on the target line are then discarded, leaving just -# the target line "cat <\1\2/ - :hered - N - /^<\([^>]*\)>.*\n[ ]*\1[ ]*$/!{ - s/\n.*$// - bhered - } - s/^<[^>]*>// - s/\n.*$// -} -:notdoc - -# one-liner "(...) &&" -/^[ ]*!*[ ]*(..*)[ ]*&&[ ]*$/boneline - -# same as above but without trailing "&&" -/^[ ]*!*[ ]*(..*)[ ]*$/boneline - -# one-liner "(...) >x" (or "2>x" or "|&]/boneline - -# multi-line "(...\n...)" -/^[ ]*(/bsubsh - -# innocuous line -- print it and advance to next line -b - -# found one-liner "(...)" -- mark suspect if it uses ";" internally rather than -# "&&" (but not ";" in a string) -:oneline -/;/{ - /"[^"]*;[^"]*"/!s/;/; ?!AMP?!/ -} -b - -:subsh -# bare "(" line? -- stash for later printing -/^[ ]*([ ]*$/ { - h - bnextln -} -# "(..." line -- "(" opening subshell cuddled with command; temporarily replace -# "(" with sentinel "^" and process the line as if "(" had been seen solo on -# the preceding line; this temporary replacement prevents several rules from -# accidentally thinking "(" introduces a nested subshell; "^" is changed back -# to "(" at output time -x -s/.*// -x -s/(/^/ -bslurp - -:nextln -N -s/.*\n// - -:slurp -# incomplete line "...\" -/\\$/bicmplte -# multi-line quoted string "...\n..."? -/"/bdqstr -# multi-line quoted string '...\n...'? (but not contraction in string "it's") -/'/{ - /"[^'"]*'[^'"]*"/!bsqstr -} -:folded -# here-doc -- swallow it (but not "<<" in a string) -/<<-*[ ]*[\\'"]*[A-Za-z0-9_]/{ - /"[^"]*<<[^"]*"/!bheredoc -} -# comment or empty line -- discard since final non-comment, non-empty line -# before closing ")", "done", "elsif", "else", or "fi" will need to be -# re-visited to drop "suspect" marking since final line of those constructs -# legitimately lacks "&&", so "suspect" mark must be removed -/^[ ]*#/bnextln -/^[ ]*$/bnextln -# in-line comment -- strip it (but not "#" in a string, Bash ${#...} array -# length, or Perforce "//depot/path#42" revision in filespec) -/[ ]#/{ - /"[^"]*#[^"]*"/!s/[ ]#.*$// -} -# one-liner "case ... esac" -/^[ ^]*case[ ]*..*esac/bchkchn -# multi-line "case ... esac" -/^[ ^]*case[ ]..*[ ]in/bcase -# multi-line "for ... done" or "while ... done" -/^[ ^]*for[ ]..*[ ]in/bcont -/^[ ^]*while[ ]/bcont -/^[ ]*do[ ]/bcont -/^[ ]*do[ ]*$/bcont -/;[ ]*do/bcont -/^[ ]*done[ ]*&&[ ]*$/bdone -/^[ ]*done[ ]*$/bdone -/^[ ]*done[ ]*[<>|]/bdone -/^[ ]*done[ ]*)/bdone -/||[ ]*exit[ ]/bcont -/||[ ]*exit[ ]*$/bcont -# multi-line "if...elsif...else...fi" -/^[ ^]*if[ ]/bcont -/^[ ]*then[ ]/bcont -/^[ ]*then[ ]*$/bcont -/;[ ]*then/bcont -/^[ ]*elif[ ]/belse -/^[ ]*elif[ ]*$/belse -/^[ ]*else[ ]/belse -/^[ ]*else[ ]*$/belse -/^[ ]*fi[ ]*&&[ ]*$/bdone -/^[ ]*fi[ ]*$/bdone -/^[ ]*fi[ ]*[<>|]/bdone -/^[ ]*fi[ ]*)/bdone -# nested one-liner "(...) &&" -/^[ ^]*(.*)[ ]*&&[ ]*$/bchkchn -# nested one-liner "(...)" -/^[ ^]*(.*)[ ]*$/bchkchn -# nested one-liner "(...) >x" (or "2>x" or "|]/bchkchn -# nested multi-line "(...\n...)" -/^[ ^]*(/bnest -# multi-line "{...\n...}" -/^[ ^]*{/bblock -# closing ")" on own line -- exit subshell -/^[ ]*)/bclssolo -# "$((...))" -- arithmetic expansion; not closing ")" -/\$(([^)][^)]*))[^)]*$/bchkchn -# "$(...)" -- command substitution; not closing ")" -/\$([^)][^)]*)[^)]*$/bchkchn -# multi-line "$(...\n...)" -- command substitution; treat as nested subshell -/\$([^)]*$/bnest -# "=(...)" -- Bash array assignment; not closing ")" -/=(/bchkchn -# closing "...) &&" -/)[ ]*&&[ ]*$/bclose -# closing "...)" -/)[ ]*$/bclose -# closing "...) >x" (or "2>x" or "|]/bclose -:chkchn -# mark suspect if line uses ";" internally rather than "&&" (but not ";" in a -# string and not ";;" in one-liner "case...esac") -/;/{ - /;;/!{ - /"[^"]*;[^"]*"/!s/;/; ?!AMP?!/ - } -} -# line ends with pipe "...|" -- valid; not missing "&&" -/|[ ]*$/bcont -# missing end-of-line "&&" -- mark suspect -/&&[ ]*$/!s/$/ ?!AMP?!/ -:cont -# retrieve and print previous line -x -s/^\([ ]*\)^/\1(/ -s/?!HERE?!/<\1?!HERE?!\2\3/ -:hdocsub -N -/^<\([^>]*\)>.*\n[ ]*\1[ ]*$/!{ - s/\n.*$// - bhdocsub -} -s/^<[^>]*>// -s/\n.*$// -bfolded - -# found "case ... in" -- pass through untouched -:case -x -s/^\([ ]*\)^/\1(/ -s/?!HERE?!/< Date: Fri, 2 Sep 2022 15:56:42 +0000 Subject: [PATCH 28/45] scalar: fix command documentation section header Rename the last section header in 'contrib/scalar/scalar.txt' from "Scalar" to "GIT". The linting rules of the 'documentation' CI build enforce the existence of a "GIT" section in command documentation. Although 'scalar.txt' is not yet checked, it will be in a future patch. Here, changing the header name is more appropriate than making a Scalar-specific exception to the linting rule. The existing "Scalar" section contains only a link back to the main Git documentation, essentially the same as the "GIT" section in builtin documentation. Changing the section name further clarifies the Scalar-Git association and maintains consistency with the rest of Git. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- contrib/scalar/scalar.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt index 1a12dc4507..f33436c7f6 100644 --- a/contrib/scalar/scalar.txt +++ b/contrib/scalar/scalar.txt @@ -161,6 +161,6 @@ SEE ALSO -------- linkgit:git-clone[1], linkgit:git-maintenance[1]. -Scalar +GIT --- -Associated with the linkgit:git[1] suite +Part of the linkgit:git[1] suite From 7b5c93c6c6847b4b6037e38418bc8bbb8c2eada8 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:43 +0000 Subject: [PATCH 29/45] scalar: include in standard Git build & installation Move 'scalar' out of 'contrib/' and into the root of the Git tree. The goal of this change is to build 'scalar' as part of the standard Git build & install processes. This patch includes both the physical move of Scalar's files out of 'contrib/' ('scalar.c', 'scalar.txt', and 't9xxx-scalar.sh'), and the changes to the build definitions in 'Makefile' and 'CMakelists.txt' to accommodate the new program. At a high level, Scalar is built so that: - there is a 'scalar-objs' target (similar to those created in 029bac01a8 (Makefile: add {program,xdiff,test,git,fuzz}-objs & objects targets, 2021-02-23)) for debugging purposes. - it appears in the root of the install directory (rather than the gitexecdir). - it is included in the 'bin-wrappers/' directory for use in tests. - it receives a platform-specific executable suffix (e.g., '.exe'), if applicable. - 'scalar.txt' is installed as 'man1' documentation. - the 'clean' target removes the 'scalar' executable. Additionally, update the root level '.gitignore' file to ignore the Scalar executable. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- .gitignore | 1 + Documentation/Makefile | 1 + {contrib/scalar => Documentation}/scalar.txt | 0 Makefile | 31 ++++--- contrib/buildsystems/CMakeLists.txt | 9 ++- contrib/scalar/.gitignore | 2 - contrib/scalar/Makefile | 35 -------- contrib/scalar/t/Makefile | 81 ------------------- contrib/scalar/scalar.c => scalar.c | 0 .../t/t9099-scalar.sh => t/t9210-scalar.sh | 10 +-- 10 files changed, 28 insertions(+), 142 deletions(-) rename {contrib/scalar => Documentation}/scalar.txt (100%) delete mode 100644 contrib/scalar/.gitignore delete mode 100644 contrib/scalar/Makefile delete mode 100644 contrib/scalar/t/Makefile rename contrib/scalar/scalar.c => scalar.c (100%) rename contrib/scalar/t/t9099-scalar.sh => t/t9210-scalar.sh (96%) diff --git a/.gitignore b/.gitignore index 80b530bbed..3d1b880101 100644 --- a/.gitignore +++ b/.gitignore @@ -185,6 +185,7 @@ /git-whatchanged /git-worktree /git-write-tree +/scalar /git-core-*/?* /git.res /gitweb/GITWEB-BUILD-OPTIONS diff --git a/Documentation/Makefile b/Documentation/Makefile index bd6b6fcb93..16c9e06239 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -21,6 +21,7 @@ MAN1_TXT += $(filter-out \ MAN1_TXT += git.txt MAN1_TXT += gitk.txt MAN1_TXT += gitweb.txt +MAN1_TXT += scalar.txt # man5 / man7 guides (note: new guides should also be added to command-list.txt) MAN5_TXT += gitattributes.txt diff --git a/contrib/scalar/scalar.txt b/Documentation/scalar.txt similarity index 100% rename from contrib/scalar/scalar.txt rename to Documentation/scalar.txt diff --git a/Makefile b/Makefile index eac30126e2..e03f32ec1e 100644 --- a/Makefile +++ b/Makefile @@ -608,7 +608,9 @@ FUZZ_OBJS = FUZZ_PROGRAMS = GIT_OBJS = LIB_OBJS = +SCALAR_OBJS = OBJECTS = +OTHER_PROGRAMS = PROGRAM_OBJS = PROGRAMS = EXCLUDED_PROGRAMS = @@ -821,10 +823,12 @@ BUILT_INS += git-switch$X BUILT_INS += git-whatchanged$X # what 'all' will build but not install in gitexecdir -OTHER_PROGRAMS = git$X +OTHER_PROGRAMS += git$X +OTHER_PROGRAMS += scalar$X # what test wrappers are needed and 'install' will install, in bindir BINDIR_PROGRAMS_NEED_X += git +BINDIR_PROGRAMS_NEED_X += scalar BINDIR_PROGRAMS_NEED_X += git-receive-pack BINDIR_PROGRAMS_NEED_X += git-shell BINDIR_PROGRAMS_NEED_X += git-upload-archive @@ -2222,7 +2226,7 @@ profile-fast: profile-clean all:: $(ALL_COMMANDS_TO_INSTALL) $(SCRIPT_LIB) $(OTHER_PROGRAMS) GIT-BUILD-OPTIONS ifneq (,$X) - $(QUIET_BUILT_IN)$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) git$X)), test -d '$p' -o '$p' -ef '$p$X' || $(RM) '$p';) + $(QUIET_BUILT_IN)$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) $(OTHER_PROGRAMS))), test -d '$p' -o '$p' -ef '$p$X' || $(RM) '$p';) endif all:: @@ -2545,7 +2549,12 @@ GIT_OBJS += git.o .PHONY: git-objs git-objs: $(GIT_OBJS) +SCALAR_OBJS += scalar.o +.PHONY: scalar-objs +scalar-objs: $(SCALAR_OBJS) + OBJECTS += $(GIT_OBJS) +OBJECTS += $(SCALAR_OBJS) OBJECTS += $(PROGRAM_OBJS) OBJECTS += $(TEST_OBJS) OBJECTS += $(XDIFF_OBJS) @@ -2556,10 +2565,6 @@ ifndef NO_CURL OBJECTS += http.o http-walker.o remote-curl.o endif -SCALAR_SOURCES := contrib/scalar/scalar.c -SCALAR_OBJECTS := $(SCALAR_SOURCES:c=o) -OBJECTS += $(SCALAR_OBJECTS) - .PHONY: objects objects: $(OBJECTS) @@ -2691,7 +2696,7 @@ $(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o GIT-LDFLAGS $(GITLIBS $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ $(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS) -contrib/scalar/scalar$X: $(SCALAR_OBJECTS) GIT-LDFLAGS $(GITLIBS) +scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \ $(filter %.o,$^) $(LIBS) @@ -2747,8 +2752,7 @@ XGETTEXT_FLAGS_SH = $(XGETTEXT_FLAGS) --language=Shell \ XGETTEXT_FLAGS_PERL = $(XGETTEXT_FLAGS) --language=Perl \ --keyword=__ --keyword=N__ --keyword="__n:1,2" MSGMERGE_FLAGS = --add-location --backup=off --update -LOCALIZED_C = $(sort $(FOUND_C_SOURCES) $(FOUND_H_SOURCES) $(SCALAR_SOURCES) \ - $(GENERATED_H)) +LOCALIZED_C = $(sort $(FOUND_C_SOURCES) $(FOUND_H_SOURCES) $(GENERATED_H)) LOCALIZED_SH = $(sort $(SCRIPT_SH) git-sh-setup.sh) LOCALIZED_PERL = $(sort $(SCRIPT_PERL)) @@ -3062,7 +3066,7 @@ bin-wrappers/%: wrap-for-bin.sh $(call mkdir_p_parent_template) $(QUIET_GEN)sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ -e 's|@@BUILD_DIR@@|$(shell pwd)|' \ - -e 's|@@PROG@@|$(patsubst test-%,t/helper/test-%$(X),$(@F))$(patsubst git%,$(X),$(filter $(@F),$(BINDIR_PROGRAMS_NEED_X)))|' < $< > $@ && \ + -e 's|@@PROG@@|$(patsubst test-%,t/helper/test-%,$(@F))$(if $(filter-out $(BINDIR_PROGRAMS_NO_X),$(@F)),$(X),)|' < $< > $@ && \ chmod +x $@ # GNU make supports exporting all variables by "export" without parameters. @@ -3276,14 +3280,14 @@ ifndef NO_TCLTK $(MAKE) -C git-gui gitexecdir='$(gitexec_instdir_SQ)' install endif ifneq (,$X) - $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) git$X)), test '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p' -ef '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p$X' || $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';) + $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) $(OTHER_PROGRAMS))), test '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p' -ef '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p$X' || $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';) endif bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \ execdir=$$(cd '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' && pwd) && \ destdir_from_execdir_SQ=$$(echo '$(gitexecdir_relative_SQ)' | sed -e 's|[^/][^/]*|..|g') && \ { test "$$bindir/" = "$$execdir/" || \ - for p in git$X $(filter $(install_bindir_programs),$(ALL_PROGRAMS)); do \ + for p in $(OTHER_PROGRAMS) $(filter $(install_bindir_programs),$(ALL_PROGRAMS)); do \ $(RM) "$$execdir/$$p" && \ test -n "$(INSTALL_SYMLINKS)" && \ ln -s "$$destdir_from_execdir_SQ/$(bindir_relative_SQ)/$$p" "$$execdir/$$p" || \ @@ -3458,7 +3462,7 @@ clean: profile-clean coverage-clean cocciclean $(RM) git.res $(RM) $(OBJECTS) $(RM) $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(REFTABLE_TEST_LIB) - $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X + $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) $(OTHER_PROGRAMS) $(RM) $(TEST_PROGRAMS) $(RM) $(FUZZ_PROGRAMS) $(RM) $(SP_OBJ) @@ -3509,6 +3513,7 @@ ALL_COMMANDS += git-citool ALL_COMMANDS += git-gui ALL_COMMANDS += gitk ALL_COMMANDS += gitweb +ALL_COMMANDS += scalar .PHONY: check-docs check-docs:: diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 2237109b57..bae203c1fb 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -610,7 +610,7 @@ unset(CMAKE_REQUIRED_INCLUDES) #programs set(PROGRAMS_BUILT git git-daemon git-http-backend git-sh-i18n--envsubst - git-shell) + git-shell scalar) if(NOT CURL_FOUND) list(APPEND excluded_progs git-http-fetch git-http-push) @@ -757,6 +757,9 @@ target_link_libraries(git-sh-i18n--envsubst common-main) add_executable(git-shell ${CMAKE_SOURCE_DIR}/shell.c) target_link_libraries(git-shell common-main) +add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c) +target_link_libraries(scalar common-main) + if(CURL_FOUND) add_library(http_obj OBJECT ${CMAKE_SOURCE_DIR}/http.c) @@ -903,7 +906,7 @@ list(TRANSFORM git_perl_scripts PREPEND "${CMAKE_BINARY_DIR}/") #install foreach(program ${PROGRAMS_BUILT}) -if(program STREQUAL "git" OR program STREQUAL "git-shell") +if(program MATCHES "^(git|git-shell|scalar)$") install(TARGETS ${program} RUNTIME DESTINATION bin) else() @@ -977,7 +980,7 @@ endif() #wrapper scripts set(wrapper_scripts - git git-upload-pack git-receive-pack git-upload-archive git-shell git-remote-ext) + git git-upload-pack git-receive-pack git-upload-archive git-shell git-remote-ext scalar) set(wrapper_test_scripts test-fake-ssh test-tool) diff --git a/contrib/scalar/.gitignore b/contrib/scalar/.gitignore deleted file mode 100644 index ff3d47e84d..0000000000 --- a/contrib/scalar/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/*.exe -/scalar diff --git a/contrib/scalar/Makefile b/contrib/scalar/Makefile deleted file mode 100644 index 37f283f35d..0000000000 --- a/contrib/scalar/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -# The default target of this Makefile is... -all:: - -# Import tree-wide shared Makefile behavior and libraries -include ../../shared.mak - -include ../../config.mak.uname --include ../../config.mak.autogen --include ../../config.mak - -TARGETS = scalar$(X) scalar.o -GITLIBS = ../../common-main.o ../../libgit.a ../../xdiff/lib.a - -all:: scalar$(X) ../../bin-wrappers/scalar - -$(GITLIBS): - $(QUIET_SUBDIR0)../.. $(QUIET_SUBDIR1) $(subst ../../,,$@) - -$(TARGETS): $(GITLIBS) scalar.c - $(QUIET_SUBDIR0)../.. $(QUIET_SUBDIR1) $(patsubst %,contrib/scalar/%,$@) - -clean: - $(RM) $(TARGETS) ../../bin-wrappers/scalar - -../../bin-wrappers/scalar: ../../wrap-for-bin.sh Makefile - @mkdir -p ../../bin-wrappers - $(QUIET_GEN)sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ - -e 's|@@BUILD_DIR@@|$(shell cd ../.. && pwd)|' \ - -e 's|@@PROG@@|contrib/scalar/scalar$(X)|' < $< > $@ && \ - chmod +x $@ - -test: all - $(MAKE) -C t - -.PHONY: $(GITLIBS) all clean test FORCE diff --git a/contrib/scalar/t/Makefile b/contrib/scalar/t/Makefile deleted file mode 100644 index 1ed174a8cf..0000000000 --- a/contrib/scalar/t/Makefile +++ /dev/null @@ -1,81 +0,0 @@ -# Import tree-wide shared Makefile behavior and libraries -include ../../../shared.mak - -# Run scalar tests -# -# Copyright (c) 2005,2021 Junio C Hamano, Johannes Schindelin -# - --include ../../../config.mak.autogen --include ../../../config.mak - -SHELL_PATH ?= $(SHELL) -PERL_PATH ?= /usr/bin/perl -RM ?= rm -f -PROVE ?= prove -DEFAULT_TEST_TARGET ?= test -TEST_LINT ?= test-lint - -ifdef TEST_OUTPUT_DIRECTORY -TEST_RESULTS_DIRECTORY = $(TEST_OUTPUT_DIRECTORY)/test-results -else -TEST_RESULTS_DIRECTORY = ../../../t/test-results -endif - -# Shell quote; -SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) -PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) -TEST_RESULTS_DIRECTORY_SQ = $(subst ','\'',$(TEST_RESULTS_DIRECTORY)) - -T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)) - -all: $(DEFAULT_TEST_TARGET) - -test: $(TEST_LINT) - $(MAKE) aggregate-results-and-cleanup - -prove: $(TEST_LINT) - @echo "*** prove ***"; GIT_CONFIG=.git/config $(PROVE) --exec '$(SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS) - $(MAKE) clean-except-prove-cache - -$(T): - @echo "*** $@ ***"; GIT_CONFIG=.git/config '$(SHELL_PATH_SQ)' $@ $(GIT_TEST_OPTS) - -clean-except-prove-cache: - $(RM) -r 'trash directory'.* - $(RM) -r valgrind/bin - -clean: clean-except-prove-cache - $(RM) .prove - -test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax - -test-lint-duplicates: - @dups=`echo $(T) | tr ' ' '\n' | sed 's/-.*//' | sort | uniq -d` && \ - test -z "$$dups" || { \ - echo >&2 "duplicate test numbers:" $$dups; exit 1; } - -test-lint-executable: - @bad=`for i in $(T); do test -x "$$i" || echo $$i; done` && \ - test -z "$$bad" || { \ - echo >&2 "non-executable tests:" $$bad; exit 1; } - -test-lint-shell-syntax: - @'$(PERL_PATH_SQ)' ../../../t/check-non-portable-shell.pl $(T) - -aggregate-results-and-cleanup: $(T) - $(MAKE) aggregate-results - $(MAKE) clean - -aggregate-results: - for f in '$(TEST_RESULTS_DIRECTORY_SQ)'/t*-*.counts; do \ - echo "$$f"; \ - done | '$(SHELL_PATH_SQ)' ../../../t/aggregate-results.sh - -valgrind: - $(MAKE) GIT_TEST_OPTS="$(GIT_TEST_OPTS) --valgrind" - -test-results: - mkdir -p test-results - -.PHONY: $(T) aggregate-results clean valgrind diff --git a/contrib/scalar/scalar.c b/scalar.c similarity index 100% rename from contrib/scalar/scalar.c rename to scalar.c diff --git a/contrib/scalar/t/t9099-scalar.sh b/t/t9210-scalar.sh similarity index 96% rename from contrib/scalar/t/t9099-scalar.sh rename to t/t9210-scalar.sh index dfb949f52e..14ca575a21 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/t/t9210-scalar.sh @@ -2,15 +2,9 @@ test_description='test the `scalar` command' -TEST_DIRECTORY=$PWD/../../../t -export TEST_DIRECTORY +. ./test-lib.sh -# Make it work with --no-bin-wrappers -PATH=$PWD/..:$PATH - -. ../../../t/test-lib.sh - -GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab ../cron.txt,launchctl:true,schtasks:true" +GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true" export GIT_TEST_MAINT_SCHEDULER test_expect_success 'scalar shows a usage' ' From dd9603e22822fab19c0557dad0cf1825de325403 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 2 Sep 2022 15:56:44 +0000 Subject: [PATCH 30/45] git help: special-case `scalar` With this commit, `git help scalar` will open the appropriate manual or HTML page (instead of looking for `gitscalar`). Signed-off-by: Johannes Schindelin Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- builtin/help.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/help.c b/builtin/help.c index 09ac4289f1..6f2796f211 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -440,6 +440,8 @@ static const char *cmd_to_page(const char *git_cmd) return git_cmd; else if (is_git_command(git_cmd)) return xstrfmt("git-%s", git_cmd); + else if (!strcmp("scalar", git_cmd)) + return xstrdup(git_cmd); else return xstrfmt("git%s", git_cmd); } From 951759d3a5cb20ffeff4836aa0c4b793fa142b51 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 2 Sep 2022 15:56:45 +0000 Subject: [PATCH 31/45] scalar: implement the `help` subcommand It is merely handing off to `git help scalar`. Signed-off-by: Johannes Schindelin Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- scalar.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/scalar.c b/scalar.c index 642d16124e..c5c1ce6891 100644 --- a/scalar.c +++ b/scalar.c @@ -819,6 +819,25 @@ static int cmd_delete(int argc, const char **argv) return res; } +static int cmd_help(int argc, const char **argv) +{ + struct option options[] = { + OPT_END(), + }; + const char * const usage[] = { + "scalar help", + NULL + }; + + argc = parse_options(argc, argv, NULL, options, + usage, 0); + + if (argc != 0) + usage_with_options(usage, options); + + return run_git("help", "scalar", NULL); +} + static int cmd_version(int argc, const char **argv) { int verbose = 0, build_options = 0; @@ -858,6 +877,7 @@ static struct { { "run", cmd_run }, { "reconfigure", cmd_reconfigure }, { "delete", cmd_delete }, + { "help", cmd_help }, { "version", cmd_version }, { "diagnose", cmd_diagnose }, { NULL, NULL}, From cc75e556a9de5d62c1ca52db900b729fc830f378 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:46 +0000 Subject: [PATCH 32/45] scalar: add to 'git help -a' command list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 'scalar' as a 'mainporcelain' command in the Git command list. Update the regex in 'cmd-list.perl' used to match the first line of command documentation to find 'scalar(1)'. Helped-by: Ævar Arnfjörð Bjarmason Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- Documentation/cmd-list.perl | 2 +- command-list.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/cmd-list.perl b/Documentation/cmd-list.perl index af5da45d28..9515a499a3 100755 --- a/Documentation/cmd-list.perl +++ b/Documentation/cmd-list.perl @@ -10,7 +10,7 @@ sub format_one { $state = 0; open I, '<', "$name.txt" or die "No such file $name.txt"; while () { - if (/^git[a-z0-9-]*\(([0-9])\)$/) { + if (/^(git|scalar)[a-z0-9-]*\(([0-9])\)$/) { $mansection = $1; next; } diff --git a/command-list.txt b/command-list.txt index f96bdabd7d..93f94e42ab 100644 --- a/command-list.txt +++ b/command-list.txt @@ -235,3 +235,4 @@ gittutorial guide gittutorial-2 guide gitweb ancillaryinterrogators gitworkflows guide +scalar mainporcelain From 14b4e7e5a455b771f9dee74a5b4eb9a10f2a1cd4 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:47 +0000 Subject: [PATCH 33/45] scalar-clone: add test coverage Create a new test file ('t9211-scalar-clone.sh') to exercise the options and behavior of the 'scalar clone' command. Each test clones to a unique target location and cleans up the cloned repo only when the test passes. This ensures that failed tests' artifacts are captured in CI artifacts for further debugging. Helped-by: Johannes Schindelin Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/t9211-scalar-clone.sh | 151 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100755 t/t9211-scalar-clone.sh diff --git a/t/t9211-scalar-clone.sh b/t/t9211-scalar-clone.sh new file mode 100755 index 0000000000..dd33d87e9b --- /dev/null +++ b/t/t9211-scalar-clone.sh @@ -0,0 +1,151 @@ +#!/bin/sh + +test_description='test the `scalar clone` subcommand' + +. ./test-lib.sh + +GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true" +export GIT_TEST_MAINT_SCHEDULER + +test_expect_success 'set up repository to clone' ' + rm -rf .git && + git init to-clone && + ( + cd to-clone && + git branch -m base && + + test_commit first && + test_commit second && + test_commit third && + + git switch -c parallel first && + mkdir -p 1/2 && + test_commit 1/2/3 && + + git switch base && + + # By default, permit + git config uploadpack.allowfilter true && + git config uploadpack.allowanysha1inwant true + ) +' + +cleanup_clone () { + rm -rf "$1" +} + +test_expect_success 'creates content in enlistment root' ' + enlistment=cloned && + + scalar clone "file://$(pwd)/to-clone" $enlistment && + ls -A $enlistment >enlistment-root && + test_line_count = 1 enlistment-root && + test_path_is_dir $enlistment/src && + test_path_is_dir $enlistment/src/.git && + + cleanup_clone $enlistment +' + +test_expect_success 'with spaces' ' + enlistment="cloned with space" && + + scalar clone "file://$(pwd)/to-clone" "$enlistment" && + test_path_is_dir "$enlistment" && + test_path_is_dir "$enlistment/src" && + test_path_is_dir "$enlistment/src/.git" && + + cleanup_clone "$enlistment" +' + +test_expect_success 'partial clone if supported by server' ' + enlistment=partial-clone && + + scalar clone "file://$(pwd)/to-clone" $enlistment && + + ( + cd $enlistment/src && + + # Two promisor packs: one for refs, the other for blobs + ls .git/objects/pack/pack-*.promisor >promisorlist && + test_line_count = 2 promisorlist + ) && + + cleanup_clone $enlistment +' + +test_expect_success 'fall back on full clone if partial unsupported' ' + enlistment=no-partial-support && + + test_config -C to-clone uploadpack.allowfilter false && + test_config -C to-clone uploadpack.allowanysha1inwant false && + + scalar clone "file://$(pwd)/to-clone" $enlistment 2>err && + grep "filtering not recognized by server, ignoring" err && + + ( + cd $enlistment/src && + + # Still get a refs promisor file, but none for blobs + ls .git/objects/pack/pack-*.promisor >promisorlist && + test_line_count = 1 promisorlist + ) && + + cleanup_clone $enlistment +' + +test_expect_success 'initializes sparse-checkout by default' ' + enlistment=sparse && + + scalar clone "file://$(pwd)/to-clone" $enlistment && + ( + cd $enlistment/src && + test_cmp_config true core.sparseCheckout && + test_cmp_config true core.sparseCheckoutCone + ) && + + cleanup_clone $enlistment +' + +test_expect_success '--full-clone does not create sparse-checkout' ' + enlistment=full-clone && + + scalar clone --full-clone "file://$(pwd)/to-clone" $enlistment && + ( + cd $enlistment/src && + test_cmp_config "" --default "" core.sparseCheckout && + test_cmp_config "" --default "" core.sparseCheckoutCone + ) && + + cleanup_clone $enlistment +' + +test_expect_success '--single-branch clones HEAD only' ' + enlistment=single-branch && + + scalar clone --single-branch "file://$(pwd)/to-clone" $enlistment && + ( + cd $enlistment/src && + git for-each-ref refs/remotes/origin >out && + test_line_count = 1 out && + grep "refs/remotes/origin/base" out + ) && + + cleanup_clone $enlistment +' + +test_expect_success '--no-single-branch clones all branches' ' + enlistment=no-single-branch && + + scalar clone --no-single-branch "file://$(pwd)/to-clone" $enlistment && + ( + cd $enlistment/src && + git for-each-ref refs/remotes/origin >out && + test_line_count = 2 out && + grep "refs/remotes/origin/base" out && + grep "refs/remotes/origin/parallel" out + ) && + + cleanup_clone $enlistment +' + +test_done From e2809233d19e0faacedf59f229ec292cb9e7c7ef Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:48 +0000 Subject: [PATCH 34/45] t/perf: add Scalar performance tests Create 'p9210-scalar.sh' for testing Scalar performance and comparing performance of Git operations in Scalar registrations and standard repositories. Example results: Test this tree ------------------------------------------------------------------------ 9210.2: scalar clone 14.82(18.00+3.63) 9210.3: git clone 26.15(36.67+6.90) 9210.4: git status (scalar) 0.04(0.01+0.01) 9210.5: git status (non-scalar) 0.10(0.02+0.11) 9210.6: test_commit --append --no-tag A (scalar) 0.08(0.02+0.03) 9210.7: test_commit --append --no-tag A (non-scalar) 0.13(0.03+0.11) Helped-by: Johannes Schindelin Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/perf/p9210-scalar.sh | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 t/perf/p9210-scalar.sh diff --git a/t/perf/p9210-scalar.sh b/t/perf/p9210-scalar.sh new file mode 100755 index 0000000000..265f7cd1fe --- /dev/null +++ b/t/perf/p9210-scalar.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='test scalar performance' +. ./perf-lib.sh + +test_perf_large_repo "$TRASH_DIRECTORY/to-clone" + +test_expect_success 'enable server-side partial clone' ' + git -C to-clone config uploadpack.allowFilter true && + git -C to-clone config uploadpack.allowAnySHA1InWant true +' + +test_perf 'scalar clone' ' + rm -rf scalar-clone && + scalar clone "file://$(pwd)/to-clone" scalar-clone +' + +test_perf 'git clone' ' + rm -rf git-clone && + git clone "file://$(pwd)/to-clone" git-clone +' + +test_compare_perf () { + command=$1 + shift + args=$* + test_perf "$command $args (scalar)" " + $command -C scalar-clone/src $args + " + + test_perf "$command $args (non-scalar)" " + $command -C git-clone $args + " +} + +test_compare_perf git status +test_compare_perf test_commit --append --no-tag A + +test_done From ba1b117eec2f73c84f73f827e6f3ac8b82b35585 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:49 +0000 Subject: [PATCH 35/45] t/perf: add 'GIT_PERF_USE_SCALAR' run option Add a 'GIT_PERF_USE_SCALAR' environment variable (and corresponding perf config 'useScalar') to register a repository created with any of: * test_perf_fresh_repo * test_perf_default_repo * test_perf_large_repo as a Scalar enlistment. This is intended to allow a developer to test the impact of Scalar on already-defined performance scenarios. Suggested-by: Derrick Stolee Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/perf/README | 4 ++++ t/perf/perf-lib.sh | 13 ++++++++++++- t/perf/run | 3 +++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/t/perf/README b/t/perf/README index fb9127a66f..8f217d7be7 100644 --- a/t/perf/README +++ b/t/perf/README @@ -95,6 +95,10 @@ You can set the following variables (also in your config.mak): Git (e.g., performance of index-pack as the number of threads changes). These can be enabled with GIT_PERF_EXTRA. + GIT_PERF_USE_SCALAR + Boolean indicating whether to register test repo(s) with Scalar + before executing tests. + You can also pass the options taken by ordinary git tests; the most useful one is: diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh index 27c2801792..e7786775a9 100644 --- a/t/perf/perf-lib.sh +++ b/t/perf/perf-lib.sh @@ -49,6 +49,9 @@ export TEST_DIRECTORY TRASH_DIRECTORY GIT_BUILD_DIR GIT_TEST_CMP MODERN_GIT=$GIT_BUILD_DIR/bin-wrappers/git export MODERN_GIT +MODERN_SCALAR=$GIT_BUILD_DIR/bin-wrappers/scalar +export MODERN_SCALAR + perf_results_dir=$TEST_RESULTS_DIR test -n "$GIT_PERF_SUBSECTION" && perf_results_dir="$perf_results_dir/$GIT_PERF_SUBSECTION" mkdir -p "$perf_results_dir" @@ -120,6 +123,10 @@ test_perf_create_repo_from () { # status" due to a locked index. Since we have # a copy it's fine to remove the lock. rm .git/index.lock + fi && + if test_bool_env GIT_PERF_USE_SCALAR false + then + "$MODERN_SCALAR" register fi ) || error "failed to copy repository '$source' to '$repo'" } @@ -130,7 +137,11 @@ test_perf_fresh_repo () { "$MODERN_GIT" init -q "$repo" && ( cd "$repo" && - test_perf_do_repo_symlink_config_ + test_perf_do_repo_symlink_config_ && + if test_bool_env GIT_PERF_USE_SCALAR false + then + "$MODERN_SCALAR" register + fi ) } diff --git a/t/perf/run b/t/perf/run index 55219aa405..33da4d2aba 100755 --- a/t/perf/run +++ b/t/perf/run @@ -171,6 +171,9 @@ run_subsection () { get_var_from_env_or_config "GIT_PERF_MAKE_COMMAND" "perf" "makeCommand" get_var_from_env_or_config "GIT_PERF_MAKE_OPTS" "perf" "makeOpts" + get_var_from_env_or_config "GIT_PERF_USE_SCALAR" "perf" "useScalar" "--bool" + export GIT_PERF_USE_SCALAR + get_var_from_env_or_config "GIT_PERF_REPO_NAME" "perf" "repoName" export GIT_PERF_REPO_NAME From 9eb7a73158bdc91892a6b9a0b43b8f954b1e39e2 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:50 +0000 Subject: [PATCH 36/45] Documentation/technical: include Scalar technical doc Include 'Documentation/technical/scalar.txt' alongside the other HTML technical docs when installing them. Now that the document is intended as a widely-accessible reference, remove the internal work-in-progress roadmap from the document. Those details should no longer be needed to guide Scalar's development and, if they were left, they could fall out-of-date and be misleading to readers. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- Documentation/Makefile | 1 + Documentation/technical/scalar.txt | 61 ------------------------------ 2 files changed, 1 insertion(+), 61 deletions(-) diff --git a/Documentation/Makefile b/Documentation/Makefile index 16c9e06239..9ec53afdf1 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -116,6 +116,7 @@ TECH_DOCS += technical/parallel-checkout TECH_DOCS += technical/partial-clone TECH_DOCS += technical/racy-git TECH_DOCS += technical/reftable +TECH_DOCS += technical/scalar TECH_DOCS += technical/send-pack-pipeline TECH_DOCS += technical/shallow TECH_DOCS += technical/trivial-merge diff --git a/Documentation/technical/scalar.txt b/Documentation/technical/scalar.txt index 0600150b3a..921cb104c3 100644 --- a/Documentation/technical/scalar.txt +++ b/Documentation/technical/scalar.txt @@ -64,64 +64,3 @@ some "global" `git` options (e.g., `-c` and `-C`). Because `scalar` is not invoked as a Git subcommand (like `git scalar`), it is built and installed as its own executable in the `bin/` directory, alongside `git`, `git-gui`, etc. - -Roadmap -------- - -NOTE: this section will be removed once the remaining tasks outlined in this -roadmap are complete. - -Scalar is a large enough project that it is being upstreamed incrementally, -living in `contrib/` until it is feature-complete. So far, the following patch -series have been accepted: - -- `scalar-the-beginning`: The initial patch series which sets up - `contrib/scalar/` and populates it with a minimal `scalar` command that - demonstrates the fundamental ideas. - -- `scalar-c-and-C`: The `scalar` command learns about two options that can be - specified before the command, `-c =` and `-C `. - -- `scalar-diagnose`: The `scalar` command is taught the `diagnose` subcommand. - -- `scalar-generalize-diagnose`: Move the functionality of `scalar diagnose` - into `git diagnose` and `git bugreport --diagnose`. - -- 'scalar-add-fsmonitor: Enable the built-in FSMonitor in Scalar - enlistments. At the end of this series, Scalar should be feature-complete - from the perspective of a user. - -Roughly speaking (and subject to change), the following series are needed to -"finish" this initial version of Scalar: - -- Move Scalar to toplevel: Move Scalar out of `contrib/` and into the root of - `git`. This includes a variety of related updates, including: - - building & installing Scalar in the Git root-level 'make [install]'. - - builing & testing Scalar as part of CI. - - moving and expanding test coverage of Scalar (including perf tests). - - implementing 'scalar help'/'git help scalar' to display scalar - documentation. - -Finally, there are two additional patch series that exist in Microsoft's fork of -Git, but there is no current plan to upstream them. There are some interesting -ideas there, but the implementation is too specific to Azure Repos and/or VFS -for Git to be of much help in general. - -These still exist mainly because the GVFS protocol is what Azure Repos has -instead of partial clone, while Git is focused on improving partial clone: - -- `scalar-with-gvfs`: The primary purpose of this patch series is to support - existing Scalar users whose repositories are hosted in Azure Repos (which does - not support Git's partial clones, but supports its predecessor, the GVFS - protocol, which is used by Scalar to emulate the partial clone). - - Since the GVFS protocol will never be supported by core Git, this patch series - will remain in Microsoft's fork of Git. - -- `run-scalar-functional-tests`: The Scalar project developed a quite - comprehensive set of integration tests (or, "Functional Tests"). They are the - sole remaining part of the original C#-based Scalar project, and this patch - adds a GitHub workflow that runs them all. - - Since the tests partially depend on features that are only provided in the - `scalar-with-gvfs` patch series, this patch cannot be upstreamed. From 7ead46810b507828c1481eaea6d64b9ed635b8b7 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Fri, 9 Sep 2022 15:27:36 -0700 Subject: [PATCH 37/45] builtin/mv.c: fix possible segfault in add_slash() A possible segfault was introduced in c08830de41 (mv: check if is a SKIP_WORKTREE_DIR, 2022-08-09). When running t7001 with SANITIZE=address, problem appears when running: git mv path1/path2/ . or git mv directory ../ or any that makes dest_path[0] an empty string. The add_slash() call could segfault when path argument to it is an empty string, because it makes an out-of-bounds read to decide if an extra slash '/' needs to be appended to it. As add_slash() is used to make sure that a valid pathname to a file in the given directory can be made by appending a filename after the value returned from it, if path is an empty string, we want to return it as-is. The path to a file "F" in the top-level of the working tree (i.e. path=="") is formed by appending "F" after "" (i.e. path) without any slash in between. So, just like the case where a non-empty path already ends with a slash, return an empty path as-is. Reported-by: Jeff King Helped-by: Jeff King Helped-by: Junio C Hamano Helped-by: Derrick Stolee Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/mv.c b/builtin/mv.c index 4b67bd096a..35bf1e9c71 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -71,7 +71,7 @@ static const char **internal_prefix_pathspec(const char *prefix, static const char *add_slash(const char *path) { size_t len = strlen(path); - if (path[len - 1] != '/') { + if (len && path[len - 1] != '/') { char *with_slash = xmalloc(st_add(len, 2)); memcpy(with_slash, path, len); with_slash[len++] = '/'; From e40d906449950c140ba1e081b647c708d6d2979e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 11 Sep 2022 00:58:09 -0400 Subject: [PATCH 38/45] list-objects-filter: don't memset after releasing filter struct If we see an error while parsing a "combine" filter, we call list_objects_filter_release() to free any allocated memory, and then use memset() to return the struct to a known state. But the release function already does that reinitializing. Doing it again is pointless. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 6cc4eb8e1c..ea989db260 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -187,10 +187,8 @@ static int parse_combine_filter( cleanup: strbuf_list_free(subspecs); - if (result) { + if (result) list_objects_filter_release(filter_options); - memset(filter_options, 0, sizeof(*filter_options)); - } return result; } From aff4bfcf0a51a26d069ccc3a29e643a112867b27 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 11 Sep 2022 01:00:45 -0400 Subject: [PATCH 39/45] list-objects-filter: handle null default filter spec When we have a remote.*.promisor config variable, we know that we're in a partial clone. Usually there's a matching remote.*.partialclonefilter option, which tells us which filter to use with the remote. If that option is missing, we skip setting up the filter at all. But something funny happens: we stick a NULL entry into the string_list storing the text filter spec. This is a weird state, and could possibly segfault if anybody called called list_objects_filter_spec(), etc. In practice, nobody does, because filter->choice will still be LOFC_DISABLED, so code generally realizes there's no filter to use. And the string_list itself is OK, because it starts in non-dup mode until we actually parse a filter spec. So it blindly stores the NULL without even looking at it. But it's probably worth avoiding this confused state. It's an accident waiting to happen, and it will be a problem if we replace the lazy initialization from 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec strings, 2022-09-08) with a real initialization function. The history is a little interesting here, as the bug was introduced during the merge resolution in 627b826834 (Merge branch 'md/list-objects-filter-combo', 2019-09-18). The original logic comes from cac1137dc4 (list-objects: check if filter is NULL before using, 2018-06-11), where we had a single string via core.partialCloneFilter, and a simple NULL check was sufficient. And it even added a test in t0410 that covers this situation. Later, that was expanded to allow per-remote filters in fa3d1b63e8 (promisor-remote: parse remote.*.partialclonefilter, 2019-06-25). After that commit, we get a promisor struct with a partial_clone_filter string, which could be NULL. The commit checks only that the struct pointer is non-NULL, which is enough. It may pass NULL to gently_parse_list_objects_filter(), but that function is smart enough to consider it a noop. But in parallel, cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list, 2019-06-27) added a new line of code: before we call gently_parse_list_objets_filter(), we append the filter spec to the string_list. By itself that was OK, since we'd have returned early if the string was NULL. When the two were merged in 627b826834, the result is that we return early only if the struct is NULL, but not the string. And we append to the string_list, meaning we may append NULL. The solution is to return early if either is NULL, as it would mean we don't have a configured filter. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index ea989db260..18c51001dc 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -399,7 +399,7 @@ void partial_clone_get_default_filter_spec( /* * Parse default value, but silently ignore it if it is invalid. */ - if (!promisor) + if (!promisor || !promisor->partial_clone_filter) return; string_list_append(&filter_options->filter_spec, From 2a01bdedf87d7cbfc4411ff5059cfe406e1637db Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 11 Sep 2022 01:03:07 -0400 Subject: [PATCH 40/45] list-objects-filter: add and use initializers In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec strings, 2022-09-08), we noted that the filter_spec string_list was inconsistent in how it handled memory ownership of strings stored in the list. The fix there was a bit of a band-aid to set the "strdup_strings" variable right before adding anything. That works OK, and it lets the users of the API continue to zero-initialize the struct. But it makes the code a bit hard to follow and accident-prone, as any other spots appending the filter_spec need to think about whether to set the strdup_strings value, too (there's one such spot in partial_clone_get_default_filter_spec(), which is probably a possible memory leak). So let's do that full cleanup now. We'll introduce a LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as appropriate (though it is for the "_options" struct, this matches the corresponding list_objects_filter_release() function). This is harder than it seems! Many other structs, like git_transport_data, embed the filter struct. So they need to initialize it themselves even if the rest of the enclosing struct is OK with zero-initialization. I found all of the relevant spots by grepping manually for declarations of list_objects_filter_options. And then doing so recursively for structs which embed it, and ones which embed those, and so on. I'm pretty sure I got everything, but there's no change that would alert the compiler if any topics in flight added new declarations. To catch this case, we now double-check in the parsing function that things were initialized as expected and BUG() if appropriate. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/fetch-pack.c | 1 + builtin/fetch.c | 2 +- builtin/submodule--helper.c | 8 ++++---- bundle.h | 1 + list-objects-filter-options.c | 20 +++++++++++--------- list-objects-filter-options.h | 3 +++ revision.c | 1 + transport-helper.c | 2 ++ transport.c | 1 + upload-pack.c | 1 + 11 files changed, 27 insertions(+), 15 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 9e0b2b45ca..78fb80eab6 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -72,7 +72,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; -static struct list_objects_filter_options filter_options; +static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; static int option_filter_submodules = -1; /* unspecified */ static int config_filter_submodules = -1; /* unspecified */ static struct string_list server_options = STRING_LIST_INIT_NODUP; diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index f045bbbe94..afe679368d 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -62,6 +62,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) packet_trace_identity("fetch-pack"); memset(&args, 0, sizeof(args)); + list_objects_filter_init(&args.filter_options); args.uploadpack = "git-upload-pack"; for (i = 1; i < argc && *argv[i] == '-'; i++) { diff --git a/builtin/fetch.c b/builtin/fetch.c index ac29c2b1ae..0e6238d283 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -80,7 +80,7 @@ static int recurse_submodules_cli = RECURSE_SUBMODULES_DEFAULT; static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND; static int shown_url = 0; static struct refspec refmap = REFSPEC_INIT_FETCH; -static struct list_objects_filter_options filter_options; +static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; static struct string_list server_options = STRING_LIST_INIT_DUP; static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP; static int fetch_write_commit_graph = -1; diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index c597df7528..f5dc63fab4 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1754,7 +1754,8 @@ static int module_clone(int argc, const char **argv, const char *prefix) { int dissociate = 0, quiet = 0, progress = 0, require_init = 0; struct module_clone_data clone_data = MODULE_CLONE_DATA_INIT; - struct list_objects_filter_options filter_options; + struct list_objects_filter_options filter_options = + LIST_OBJECTS_FILTER_INIT; struct option module_clone_options[] = { OPT_STRING(0, "prefix", &clone_data.prefix, @@ -1796,7 +1797,6 @@ static int module_clone(int argc, const char **argv, const char *prefix) NULL }; - memset(&filter_options, 0, sizeof(filter_options)); argc = parse_options(argc, argv, prefix, module_clone_options, git_submodule_helper_usage, 0); @@ -2581,7 +2581,8 @@ static int module_update(int argc, const char **argv, const char *prefix) { struct pathspec pathspec; struct update_data opt = UPDATE_DATA_INIT; - struct list_objects_filter_options filter_options; + struct list_objects_filter_options filter_options = + LIST_OBJECTS_FILTER_INIT; int ret; struct option module_update_options[] = { @@ -2639,7 +2640,6 @@ static int module_update(int argc, const char **argv, const char *prefix) update_clone_config_from_gitmodules(&opt.max_jobs); git_config(git_update_clone_config, &opt.max_jobs); - memset(&filter_options, 0, sizeof(filter_options)); argc = parse_options(argc, argv, prefix, module_update_options, git_submodule_helper_usage, 0); diff --git a/bundle.h b/bundle.h index 0c052f5496..68ff39a0a7 100644 --- a/bundle.h +++ b/bundle.h @@ -18,6 +18,7 @@ struct bundle_header { { \ .prerequisites = STRING_LIST_INIT_DUP, \ .references = STRING_LIST_INIT_DUP, \ + .filter = LIST_OBJECTS_FILTER_INIT, \ } void bundle_header_init(struct bundle_header *header); void bundle_header_release(struct bundle_header *header); diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 18c51001dc..56a1933a50 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -108,7 +108,7 @@ int gently_parse_list_objects_filter( strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg); - memset(filter_options, 0, sizeof(*filter_options)); + list_objects_filter_init(filter_options); return 1; } @@ -223,8 +223,7 @@ static void transform_to_combine_type( struct list_objects_filter_options *sub_array = xcalloc(initial_sub_alloc, sizeof(*sub_array)); sub_array[0] = *filter_options; - memset(filter_options, 0, sizeof(*filter_options)); - string_list_init_dup(&filter_options->filter_spec); + list_objects_filter_init(filter_options); filter_options->sub = sub_array; filter_options->sub_alloc = initial_sub_alloc; } @@ -255,11 +254,8 @@ void parse_list_objects_filter( struct strbuf errbuf = STRBUF_INIT; int parse_error; - if (!filter_options->filter_spec.strdup_strings) { - if (filter_options->filter_spec.nr) - BUG("unexpected non-allocated string in filter_spec"); - filter_options->filter_spec.strdup_strings = 1; - } + if (!filter_options->filter_spec.strdup_strings) + BUG("filter_options not properly initialized"); if (!filter_options->choice) { string_list_append(&filter_options->filter_spec, arg); @@ -346,7 +342,7 @@ void list_objects_filter_release( for (sub = 0; sub < filter_options->sub_nr; sub++) list_objects_filter_release(&filter_options->sub[sub]); free(filter_options->sub); - memset(filter_options, 0, sizeof(*filter_options)); + list_objects_filter_init(filter_options); } void partial_clone_register( @@ -429,3 +425,9 @@ void list_objects_filter_copy( for (i = 0; i < src->sub_nr; i++) list_objects_filter_copy(&dest->sub[i], &src->sub[i]); } + +void list_objects_filter_init(struct list_objects_filter_options *filter_options) +{ + struct list_objects_filter_options blank = LIST_OBJECTS_FILTER_INIT; + memcpy(filter_options, &blank, sizeof(*filter_options)); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index ffc02d77e7..2720f7dba8 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -69,6 +69,9 @@ struct list_objects_filter_options { */ }; +#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRING_LIST_INIT_DUP } +void list_objects_filter_init(struct list_objects_filter_options *filter_options); + /* * Parse value of the argument to the "filter" keyword. * On the command line this looks like: diff --git a/revision.c b/revision.c index 0c6e26cd9c..fbd89ef8e7 100644 --- a/revision.c +++ b/revision.c @@ -1900,6 +1900,7 @@ void repo_init_revisions(struct repository *r, } init_display_notes(&revs->notes_opt); + list_objects_filter_init(&revs->filter); } static void add_pending_commit_list(struct rev_info *revs, diff --git a/transport-helper.c b/transport-helper.c index 322c722478..e95267a4ab 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -1286,6 +1286,8 @@ int transport_helper_init(struct transport *transport, const char *name) if (getenv("GIT_TRANSPORT_HELPER_DEBUG")) debug = 1; + list_objects_filter_init(&data->transport_options.filter_options); + transport->data = data; transport->vtable = &vtable; transport->smart_options = &(data->transport_options); diff --git a/transport.c b/transport.c index 6ec6130852..a14179684b 100644 --- a/transport.c +++ b/transport.c @@ -1113,6 +1113,7 @@ struct transport *transport_get(struct remote *remote, const char *url) * will be checked individually in git_connect. */ struct git_transport_data *data = xcalloc(1, sizeof(*data)); + list_objects_filter_init(&data->options.filter_options); ret->data = data; ret->vtable = &builtin_smart_vtable; ret->smart_options = &(data->options); diff --git a/upload-pack.c b/upload-pack.c index 3a851b3606..cbd373f2e5 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -141,6 +141,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) data->allow_filter_fallback = 1; data->tree_filter_max_depth = ULONG_MAX; packet_writer_init(&data->writer, 1); + list_objects_filter_init(&data->filter_options); data->keepalive = 5; data->advertise_sid = 0; From c54980ab83661e8e290003fbd5ab44b12e4e77b1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 11 Sep 2022 01:03:31 -0400 Subject: [PATCH 41/45] list-objects-filter: convert filter_spec to a strbuf Originally, the filter_spec field was just a string pointer. In cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list, 2019-06-27) it became a string_list, but that commit notes: A strbuf would seem to be a more natural choice for this object, but it unfortunately requires initialization besides just zero'ing out the memory. This results in all container structs, and all containers of those structs, etc., to also require initialization. Initializing them all would be more cumbersome that simply using a string_list, which behaves properly when its contents are zero'd. Now that we've changed the struct to require non-zero initialization anyway (ironically, because string_list also needed non-zero initialization to avoid leaks), we can now convert to that more natural type. This makes the list_objects_filter_spec() function much less awkward, as it had to collapse the string_list to a single-entry list on the fly. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 51 +++++++++++++---------------------- list-objects-filter-options.h | 4 +-- 2 files changed, 20 insertions(+), 35 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 56a1933a50..d46ce4acc4 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -202,10 +202,10 @@ static int allow_unencoded(char ch) static void filter_spec_append_urlencode( struct list_objects_filter_options *filter, const char *raw) { - struct strbuf buf = STRBUF_INIT; - strbuf_addstr_urlencode(&buf, raw, allow_unencoded); - trace_printf("Add to combine filter-spec: %s\n", buf.buf); - string_list_append_nodup(&filter->filter_spec, strbuf_detach(&buf, NULL)); + size_t orig_len = filter->filter_spec.len; + strbuf_addstr_urlencode(&filter->filter_spec, raw, allow_unencoded); + trace_printf("Add to combine filter-spec: %s\n", + filter->filter_spec.buf + orig_len); } /* @@ -229,7 +229,7 @@ static void transform_to_combine_type( } filter_options->sub_nr = 1; filter_options->choice = LOFC_COMBINE; - string_list_append(&filter_options->filter_spec, "combine:"); + strbuf_addstr(&filter_options->filter_spec, "combine:"); filter_spec_append_urlencode( filter_options, list_objects_filter_spec(&filter_options->sub[0])); @@ -237,7 +237,7 @@ static void transform_to_combine_type( * We don't need the filter_spec strings for subfilter specs, only the * top level. */ - string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0); + strbuf_release(&filter_options->sub[0].filter_spec); } void list_objects_filter_die_if_populated( @@ -254,11 +254,11 @@ void parse_list_objects_filter( struct strbuf errbuf = STRBUF_INIT; int parse_error; - if (!filter_options->filter_spec.strdup_strings) + if (!filter_options->filter_spec.buf) BUG("filter_options not properly initialized"); if (!filter_options->choice) { - string_list_append(&filter_options->filter_spec, arg); + strbuf_addstr(&filter_options->filter_spec, arg); parse_error = gently_parse_list_objects_filter( filter_options, arg, &errbuf); @@ -269,7 +269,7 @@ void parse_list_objects_filter( */ transform_to_combine_type(filter_options); - string_list_append(&filter_options->filter_spec, "+"); + strbuf_addch(&filter_options->filter_spec, '+'); filter_spec_append_urlencode(filter_options, arg); ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1, filter_options->sub_alloc); @@ -300,31 +300,18 @@ int opt_parse_list_objects_filter(const struct option *opt, const char *list_objects_filter_spec(struct list_objects_filter_options *filter) { - if (!filter->filter_spec.nr) + if (!filter->filter_spec.len) BUG("no filter_spec available for this filter"); - if (filter->filter_spec.nr != 1) { - struct strbuf concatted = STRBUF_INIT; - strbuf_add_separated_string_list( - &concatted, "", &filter->filter_spec); - string_list_clear(&filter->filter_spec, /*free_util=*/0); - string_list_append_nodup( - &filter->filter_spec, strbuf_detach(&concatted, NULL)); - } - - return filter->filter_spec.items[0].string; + return filter->filter_spec.buf; } const char *expand_list_objects_filter_spec( struct list_objects_filter_options *filter) { if (filter->choice == LOFC_BLOB_LIMIT) { - struct strbuf expanded_spec = STRBUF_INIT; - strbuf_addf(&expanded_spec, "blob:limit=%lu", + strbuf_release(&filter->filter_spec); + strbuf_addf(&filter->filter_spec, "blob:limit=%lu", filter->blob_limit_value); - string_list_clear(&filter->filter_spec, /*free_util=*/0); - string_list_append_nodup( - &filter->filter_spec, - strbuf_detach(&expanded_spec, NULL)); } return list_objects_filter_spec(filter); @@ -337,7 +324,7 @@ void list_objects_filter_release( if (!filter_options) return; - string_list_clear(&filter_options->filter_spec, /*free_util=*/0); + strbuf_release(&filter_options->filter_spec); free(filter_options->sparse_oid_name); for (sub = 0; sub < filter_options->sub_nr; sub++) list_objects_filter_release(&filter_options->sub[sub]); @@ -398,8 +385,8 @@ void partial_clone_get_default_filter_spec( if (!promisor || !promisor->partial_clone_filter) return; - string_list_append(&filter_options->filter_spec, - promisor->partial_clone_filter); + strbuf_addstr(&filter_options->filter_spec, + promisor->partial_clone_filter); gently_parse_list_objects_filter(filter_options, promisor->partial_clone_filter, &errbuf); @@ -411,14 +398,12 @@ void list_objects_filter_copy( const struct list_objects_filter_options *src) { int i; - struct string_list_item *item; /* Copy everything. We will overwrite the pointers shortly. */ memcpy(dest, src, sizeof(struct list_objects_filter_options)); - string_list_init_dup(&dest->filter_spec); - for_each_string_list_item(item, &src->filter_spec) - string_list_append(&dest->filter_spec, item->string); + strbuf_init(&dest->filter_spec, 0); + strbuf_addbuf(&dest->filter_spec, &src->filter_spec); dest->sparse_oid_name = xstrdup_or_null(src->sparse_oid_name); ALLOC_ARRAY(dest->sub, dest->sub_alloc); diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 2720f7dba8..7eeadab2dd 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -35,7 +35,7 @@ struct list_objects_filter_options { * To get the raw filter spec given by the user, use the result of * list_objects_filter_spec(). */ - struct string_list filter_spec; + struct strbuf filter_spec; /* * 'choice' is determined by parsing the filter-spec. This indicates @@ -69,7 +69,7 @@ struct list_objects_filter_options { */ }; -#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRING_LIST_INIT_DUP } +#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRBUF_INIT } void list_objects_filter_init(struct list_objects_filter_options *filter_options); /* From 746aae3dd1a8e8dba31797ac237916d9533e4254 Mon Sep 17 00:00:00 2001 From: ZheNing Hu Date: Sun, 11 Sep 2022 14:03:17 +0000 Subject: [PATCH 42/45] ls-files: fix black space in error message ce74de9(ls-files: introduce "--format" option) miss a space between two words incorrectly, it leads to wrong i10n messages. So fix it by adding a space at the end of the error message. Signed-off-by: ZheNing Hu Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 779dc18e59..4cf8a23648 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -257,7 +257,7 @@ static size_t expand_show_index(struct strbuf *sb, const char *start, end = strchr(start + 1, ')'); if (!end) - die(_("bad ls-files format: element '%s'" + die(_("bad ls-files format: element '%s' " "does not end in ')'"), start); len = end - start + 1; From 7c04aa73906b9186c9d46010227d4437fd534d93 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Tue, 13 Sep 2022 04:01:47 +0000 Subject: [PATCH 43/45] chainlint: colorize problem annotations and test delimiters When `chainlint.pl` detects problems in a test definition, it emits the test definition with "?!FOO?!" annotations highlighting the problems it discovered. For instance, given this problematic test: test_expect_success 'discombobulate frobnitz' ' git frob babble && (echo balderdash; echo gnabgib) >expect && for i in three two one do git nitfol $i done >actual test_cmp expect actual ' chainlint.pl will output: # chainlint: t1234-confusing.sh # chainlint: discombobulate frobnitz git frob babble && (echo balderdash ; ?!AMP?! echo gnabgib) >expect && for i in three two one do git nitfol $i ?!LOOP?! done >actual ?!AMP?! test_cmp expect actual in which it may be difficult to spot the "?!FOO?!" annotations. The problem is compounded when multiple tests, possibly in multiple scripts, fail "linting", in which case it may be difficult to spot the "# chainlint:" lines which delimit one problematic test from another. To ameliorate this potential problem, colorize the "?!FOO?!" annotations in order to quickly draw the test author's attention to the problem spots, and colorize the "# chainlint:" lines to help the author identify the name of each script and each problematic test. Colorization is disabled automatically if output is not directed to a terminal or if NO_COLOR environment variable is set. The implementation is specific to Unix (it employs `tput` if available) but works equally well in the Git for Windows development environment which emulates Unix sufficiently. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/t/chainlint.pl b/t/chainlint.pl index 386999ce65..976db4b8a0 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -585,12 +585,14 @@ sub check_test { my $parser = TestParser->new(\$body); my @tokens = $parser->parse(); return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); + my $c = main::fd_colors(1); my $checked = join(' ', @tokens); $checked =~ s/^\n//; $checked =~ s/^ //mg; $checked =~ s/ $//mg; + $checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg; $checked .= "\n" unless $checked =~ /\n$/; - push(@{$self->{output}}, "# chainlint: $title\n$checked"); + push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked"); } sub parse_cmd { @@ -615,6 +617,41 @@ if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { $interval = sub { return Time::HiRes::tv_interval(shift); }; } +# Restore TERM if test framework set it to "dumb" so 'tput' will work; do this +# outside of get_colors() since under 'ithreads' all threads use %ENV of main +# thread and ignore %ENV changes in subthreads. +$ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM}; + +my @NOCOLORS = (bold => '', rev => '', reset => '', blue => '', green => '', red => ''); +my %COLORS = (); +sub get_colors { + return \%COLORS if %COLORS; + if (exists($ENV{NO_COLOR}) || + system("tput sgr0 >/dev/null 2>&1") != 0 || + system("tput bold >/dev/null 2>&1") != 0 || + system("tput rev >/dev/null 2>&1") != 0 || + system("tput setaf 1 >/dev/null 2>&1") != 0) { + %COLORS = @NOCOLORS; + return \%COLORS; + } + %COLORS = (bold => `tput bold`, + rev => `tput rev`, + reset => `tput sgr0`, + blue => `tput setaf 4`, + green => `tput setaf 2`, + red => `tput setaf 1`); + chomp(%COLORS); + return \%COLORS; +} + +my %FD_COLORS = (); +sub fd_colors { + my $fd = shift; + return $FD_COLORS{$fd} if exists($FD_COLORS{$fd}); + $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS}; + return $FD_COLORS{$fd}; +} + sub ncores { # Windows return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); @@ -630,6 +667,8 @@ sub show_stats { my $walltime = $interval->($start_time); my ($usertime) = times(); my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); + my $c = fd_colors(2); + print(STDERR $c->{green}); for (@$stats) { my ($worker, $nscripts, $ntests, $nerrs) = @$_; print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); @@ -638,7 +677,7 @@ sub show_stats { $total_tests += $ntests; $total_errs += $nerrs; } - printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); + printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); } sub check_script { @@ -656,8 +695,9 @@ sub check_script { my $parser = ScriptParser->new(\$s); 1 while $parser->parse_cmd(); if (@{$parser->{output}}) { + my $c = fd_colors(1); my $s = join('', @{$parser->{output}}); - $emit->("# chainlint: $path\n" . $s); + $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s); $nerrs += () = $s =~ /\?![^?]+\?!/g; } $ntests += $parser->{ntests}; From 255a6f91ae4600ee2d257670477caf97b7986470 Mon Sep 17 00:00:00 2001 From: Adam Dinwoodie Date: Thu, 15 Sep 2022 08:57:17 +0100 Subject: [PATCH 44/45] t1800: correct test to handle Cygwin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Cygwin, when failing to spawn a process using start_command, Git outputs the same error as on Linux systems, rather than using the GIT_WINDOWS_NATIVE-specific error output. The WINDOWS test prerequisite is set in both Cygwin and native Windows environments, which means it's not appropriate to use to anticipate the error output from start_command. Instead, use the MINGW test prerequisite, which is only set for Git in native Windows environments, and not for Cygwin. Signed-off-by: Adam Dinwoodie Helped-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- t/t1800-hook.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 64096adac7..43fcb7c0bf 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -157,9 +157,9 @@ test_expect_success 'git hook run a hook with a bad shebang' ' write_script bad-hooks/test-hook "/bad/path/no/spaces" expect <<-\EOF fatal: cannot run bad-hooks/test-hook: ... From dda7228a83e2e9ff584bf6adbf55910565b41e14 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 19 Sep 2022 12:55:59 -0700 Subject: [PATCH 45/45] A bit more of remaining topics before -rc1 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.38.0.txt | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Documentation/RelNotes/2.38.0.txt b/Documentation/RelNotes/2.38.0.txt index 01617baa98..5d9bd8c295 100644 --- a/Documentation/RelNotes/2.38.0.txt +++ b/Documentation/RelNotes/2.38.0.txt @@ -6,7 +6,7 @@ UI, Workflows & Features * "git remote show [-n] frotz" now pays attention to negative pathspec. - * "git push" sometimes perform poorly when reachability bitmaps are + * "git push" sometimes performs poorly when reachability bitmaps are used, even in a repository where other operations are helped by bitmaps. The push.useBitmaps configuration variable is introduced to allow disabling use of reachability bitmaps only for "git push". @@ -27,7 +27,7 @@ UI, Workflows & Features what locale they are in by sending Accept-Language HTTP header, but this was done only for some requests but not others. - * Introduce a discovery.barerepository configuration variable that + * Introduce a safe.barerepository configuration variable that allows users to forbid discovery of bare repositories. * Various messages that come from the pack-bitmap codepaths have been @@ -79,12 +79,15 @@ UI, Workflows & Features * "git format-patch --from=" can be told to add an in-body "From:" line even for commits that are authored by the given - with "--force-in-body-from"option. + with "--force-in-body-from" option. * The built-in fsmonitor refuses to work on a network mounted repositories; a configuration knob for users to override this has been introduced. + * The "scalar" addition from Microsoft is now part of the core Git + installation. + Performance, Internal Implementation, Development Support etc. @@ -127,7 +130,7 @@ Performance, Internal Implementation, Development Support etc. * The way "git multi-pack" uses parse-options API has been improved. - * A coccinelle rule (in contrib/) to encourage use of COPY_ARRAY + * A Coccinelle rule (in contrib/) to encourage use of COPY_ARRAY macro has been improved. * API tweak to make it easier to run fuzz testing on commit-graph parser. @@ -172,6 +175,12 @@ Performance, Internal Implementation, Development Support etc. * Share the text used to explain configuration variables used by "git " in "git help " with the text from "git help config". + * "git mv A B" in a sparsely populated working tree can be asked to + move a path from a directory that is "in cone" to another directory + that is "out of cone". Handling of such a case has been improved. + + * The chainlint script for our tests has been revamped. + Fixes since v2.37 ----------------- @@ -297,7 +306,7 @@ Fixes since v2.37 * "git fsck" reads mode from tree objects but canonicalizes the mode before passing it to the logic to check object sanity, which has hid broken tree objects from the checking logic. This has been - corrected, but to help exiting projects with broken tree objects + corrected, but to help existing projects with broken tree objects that they cannot fix retroactively, the severity of anomalies this code detects has been demoted to "info" for now. @@ -306,12 +315,10 @@ Fixes since v2.37 * An earlier optimization discarded a tree-object buffer that is still in use, which has been corrected. - (merge 1490d7d82d jk/is-promisor-object-keep-tree-in-use later to maint). * Fix deadlocks between main Git process and subprocess spawned via the pipe_command() API, that can kill "git add -p" that was reimplemented in C recently. - (merge 716c1f649e jk/pipe-command-nonblock later to maint). * The sequencer machinery translated messages left in the reflog by mistake, which has been corrected. @@ -319,20 +326,16 @@ Fixes since v2.37 * xcalloc(), imitating calloc(), takes "number of elements of the array", and "size of a single element", in this order. A call that does not follow this ordering has been corrected. - (merge c4bbd9bb8f sg/xcalloc-cocci-fix later to maint). * The preload-index codepath made copies of pathspec to give to multiple threads, which were left leaked. - (merge 23578904da ad/preload-plug-memleak later to maint). * Update the version of Ubuntu used for GitHub Actions CI from 18.04 to 22.04. - (merge ef46584831 ds/github-actions-use-newer-ubuntu later to maint). * The auto-stashed local changes created by "git merge --autostash" was mixed into a conflicted state left in the working tree, which has been corrected. - (merge d3a9295ada en/merge-unstash-only-on-clean-merge later to maint). * Multi-pack index got corrupted when preferred pack changed from one pack to another in a certain way, which has been corrected.