From 5506683deaab4c1475800e9b4cd2c06b8de4de97 Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:02 +0800
Subject: [PATCH 01/45] t7002: add tests for moving from in-cone to out-of-cone

Add corresponding tests to test that user can move an in-cone <source>
to out-of-cone <destination> when --sparse is supplied.

Such <source> can be either clean or dirty, and moving it results in
different behaviors:

A clean move should move <source> to <destination> in the index (do
*not* create <destination> in the worktree), then delete <source> from
the worktree.

A dirty move should move the <source> to the <destination>, both in the
working tree and the index, but should *not* remove the resulted path
from the working tree and should *not* turn on its CE_SKIP_WORKTREE bit.

Also make sure that if <destination> exists in the index (existing
check for if <destination> is in the worktree is not enough in
in-to-out moves), warn user against the overwrite. And Git should force
the overwrite when supplied with -f or --force.

Helped-by: Derrick Stolee <derrickstolee@github.com>
Helped-by: Victoria Dye <vdye@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t7002-mv-sparse-checkout.sh | 198 ++++++++++++++++++++++++++++++++++
 1 file changed, 198 insertions(+)
diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh
index 71fe29690f..1ac78edde6 100755
--- a/t/t7002-mv-sparse-checkout.sh
+++ b/t/t7002-mv-sparse-checkout.sh
@@ -290,4 +290,202 @@ test_expect_success 'move sparse file to existing destination with --force and -
 	test_cmp expect sub/file1
 '
 
+test_expect_failure 'move clean path from in-cone to out-of-cone' '
+	test_when_finished "cleanup_sparse_checkout" &&
+	setup_sparse_checkout &&
+
+	test_must_fail git mv sub/d folder1 2>stderr &&
+	cat sparse_error_header >expect &&
+	echo "folder1/d" >>expect &&
+	cat sparse_hint >>expect &&
+	test_cmp expect stderr &&
+
+	git mv --sparse sub/d folder1 2>stderr &&
+	test_must_be_empty stderr &&
+
+	test_path_is_missing sub/d &&
+	test_path_is_missing folder1/d &&
+	git ls-files -t >actual &&
+	! grep "^H sub/d\$" actual &&
+	grep "S folder1/d" actual
+'
+
+test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' '
+	test_when_finished "cleanup_sparse_checkout" &&
+	setup_sparse_checkout &&
+	echo "sub/file1 overwrite" >sub/file1 &&
+	git add sub/file1 &&
+
+	test_must_fail git mv sub/file1 folder1 2>stderr &&
+	cat sparse_error_header >expect &&
+	echo "folder1/file1" >>expect &&
+	cat sparse_hint >>expect &&
+	test_cmp expect stderr &&
+
+	test_must_fail git mv --sparse sub/file1 folder1 2>stderr &&
+	echo "fatal: destination exists in the index, source=sub/file1, destination=folder1/file1" \
+	>expect &&
+	test_cmp expect stderr &&
+
+	git mv --sparse -f sub/file1 folder1 2>stderr &&
+	test_must_be_empty stderr &&
+
+	test_path_is_missing sub/file1 &&
+	test_path_is_missing folder1/file1 &&
+	git ls-files -t >actual &&
+	! grep "H sub/file1" actual &&
+	grep "S folder1/file1" actual &&
+
+	# compare file content before move and after move
+	echo "sub/file1 overwrite" >expect &&
+	git ls-files -s -- folder1/file1 | awk "{print \$2}" >oid &&
+	git cat-file blob $(cat oid) >actual &&
+	test_cmp expect actual
+'
+
+# This test is testing the same behavior as the
+# "move clean path from in-cone to out-of-cone overwrite" above.
+# The only difference is the <destination> changes from "folder1" to "folder1/file1"
+test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite' '
+	test_when_finished "cleanup_sparse_checkout" &&
+	setup_sparse_checkout &&
+	echo "sub/file1 overwrite" >sub/file1 &&
+	git add sub/file1 &&
+
+	test_must_fail git mv sub/file1 folder1/file1 2>stderr &&
+	cat sparse_error_header >expect &&
+	echo "folder1/file1" >>expect &&
+	cat sparse_hint >>expect &&
+	test_cmp expect stderr &&
+
+	test_must_fail git mv --sparse sub/file1 folder1/file1 2>stderr &&
+	echo "fatal: destination exists in the index, source=sub/file1, destination=folder1/file1" \
+	>expect &&
+	test_cmp expect stderr &&
+
+	git mv --sparse -f sub/file1 folder1/file1 2>stderr &&
+	test_must_be_empty stderr &&
+
+	test_path_is_missing sub/file1 &&
+	test_path_is_missing folder1/file1 &&
+	git ls-files -t >actual &&
+	! grep "H sub/file1" actual &&
+	grep "S folder1/file1" actual &&
+
+	# compare file content before move and after move
+	echo "sub/file1 overwrite" >expect &&
+	git ls-files -s -- folder1/file1 | awk "{print \$2}" >oid &&
+	git cat-file blob $(cat oid) >actual &&
+	test_cmp expect actual
+'
+
+test_expect_failure 'move directory with one of the files overwrite' '
+	test_when_finished "cleanup_sparse_checkout" &&
+	mkdir -p folder1/dir &&
+	touch folder1/dir/file1 &&
+	git add folder1 &&
+	git sparse-checkout set --cone sub &&
+
+	echo test >sub/dir/file1 &&
+	git add sub/dir/file1 &&
+
+	test_must_fail git mv sub/dir folder1 2>stderr &&
+	cat sparse_error_header >expect &&
+	echo "folder1/dir/e" >>expect &&
+	echo "folder1/dir/file1" >>expect &&
+	cat sparse_hint >>expect &&
+	test_cmp expect stderr &&
+
+	test_must_fail git mv --sparse sub/dir folder1 2>stderr &&
+	echo "fatal: destination exists in the index, source=sub/dir/file1, destination=folder1/dir/file1" \
+	>expect &&
+	test_cmp expect stderr &&
+
+	git mv --sparse -f sub/dir folder1 2>stderr &&
+	test_must_be_empty stderr &&
+
+	test_path_is_missing sub/dir/file1 &&
+	test_path_is_missing sub/dir/e &&
+	test_path_is_missing folder1/file1 &&
+	git ls-files -t >actual &&
+	! grep "H sub/dir/file1" actual &&
+	! grep "H sub/dir/e" actual &&
+	grep "S folder1/dir/file1" actual &&
+
+	# compare file content before move and after move
+	echo test >expect &&
+	git ls-files -s -- folder1/dir/file1 | awk "{print \$2}" >oid &&
+	git cat-file blob $(cat oid) >actual &&
+	test_cmp expect actual
+'
+
+test_expect_failure 'move dirty path from in-cone to out-of-cone' '
+	test_when_finished "cleanup_sparse_checkout" &&
+	setup_sparse_checkout &&
+	echo "modified" >>sub/d &&
+
+	test_must_fail git mv sub/d folder1 2>stderr &&
+	cat sparse_error_header >expect &&
+	echo "folder1/d" >>expect &&
+	cat sparse_hint >>expect &&
+	test_cmp expect stderr &&
+
+	git mv --sparse sub/d folder1 2>stderr &&
+
+	test_path_is_missing sub/d &&
+	test_path_is_file folder1/d &&
+	git ls-files -t >actual &&
+	! grep "^H sub/d\$" actual &&
+	grep "H folder1/d" actual
+'
+
+test_expect_failure 'move dir from in-cone to out-of-cone' '
+	test_when_finished "cleanup_sparse_checkout" &&
+	setup_sparse_checkout &&
+
+	test_must_fail git mv sub/dir folder1 2>stderr &&
+	cat sparse_error_header >expect &&
+	echo "folder1/dir/e" >>expect &&
+	cat sparse_hint >>expect &&
+	test_cmp expect stderr &&
+
+	git mv --sparse sub/dir folder1 2>stderr &&
+	test_must_be_empty stderr &&
+
+	test_path_is_missing folder1 &&
+	git ls-files -t >actual &&
+	! grep "H sub/dir/e" actual &&
+	grep "S folder1/dir/e" actual
+'
+
+test_expect_failure 'move partially-dirty dir from in-cone to out-of-cone' '
+	test_when_finished "cleanup_sparse_checkout" &&
+	setup_sparse_checkout &&
+	touch sub/dir/e2 sub/dir/e3 &&
+	git add sub/dir/e2 sub/dir/e3 &&
+	echo "modified" >>sub/dir/e2 &&
+	echo "modified" >>sub/dir/e3 &&
+
+	test_must_fail git mv sub/dir folder1 2>stderr &&
+	cat sparse_error_header >expect &&
+	echo "folder1/dir/e" >>expect &&
+	echo "folder1/dir/e2" >>expect &&
+	echo "folder1/dir/e3" >>expect &&
+	cat sparse_hint >>expect &&
+	test_cmp expect stderr &&
+
+	git mv --sparse sub/dir folder1 2>stderr &&
+
+	test_path_is_missing folder1/dir/e &&
+	test_path_is_file folder1/dir/e2 &&
+	test_path_is_file folder1/dir/e3 &&
+	git ls-files -t >actual &&
+	! grep "H sub/dir/e" actual &&
+	! grep "H sub/dir/e2" actual &&
+	! grep "H sub/dir/e3" actual &&
+	grep "S folder1/dir/e" actual &&
+	grep "H folder1/dir/e2" actual &&
+	grep "H folder1/dir/e3" actual
+'
+
 test_done

From 72e59ba19e776abbff7dccbf093c73e43527a339 Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:03 +0800
Subject: [PATCH 02/45] mv: rename check_dir_in_index() to
 empty_dir_has_sparse_contents()

Method check_dir_in_index() introduced in b91a2b6594 (mv: add
check_dir_in_index() and solve general dir check issue, 2022-06-30)
does not describe its intent and behavior well.

Change its name to empty_dir_has_sparse_contents(), which more
precisely describes its purpose.

Reverse the return values, check_dir_in_index() return 0 for success
and 1 for failure; reverse the values so empty_dir_has_sparse_contents()
return 1 for success and 0 for failure. These values are more intuitive
because 1 usually means "has" and 0 means "not found".

Also modify the documentation to better align with the method's
intent and behavior.

Helped-by: Derrick Stolee <derrickstolee@github.com>
Helped-by: Victoria Dye <vdye@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/mv.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index 2a38e2af46..969f464f7d 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -125,15 +125,13 @@ static int index_range_of_same_dir(const char *src, int length,
 }
 
 /*
- * Check if an out-of-cone directory should be in the index. Imagine this case
- * that all the files under a directory are marked with 'CE_SKIP_WORKTREE' bit
- * and thus the directory is sparsified.
- *
- * Return 0 if such directory exist (i.e. with any of its contained files not
- * marked with CE_SKIP_WORKTREE, the directory would be present in working tree).
- * Return 1 otherwise.
+ * Given the path of a directory that does not exist on-disk, check whether the
+ * directory contains any entries in the index with the SKIP_WORKTREE flag
+ * enabled.
+ * Return 1 if such index entries exist.
+ * Return 0 otherwise.
  */
-static int check_dir_in_index(const char *name)
+static int empty_dir_has_sparse_contents(const char *name)
 {
 	const char *with_slash = add_slash(name);
 	int length = strlen(with_slash);
@@ -144,14 +142,14 @@ static int check_dir_in_index(const char *name)
 	if (pos < 0) {
 		pos = -pos - 1;
 		if (pos >= the_index.cache_nr)
-			return 1;
+			return 0;
 		ce = active_cache[pos];
 		if (strncmp(with_slash, ce->name, length))
-			return 1;
-		if (ce_skip_worktree(ce))
 			return 0;
+		if (ce_skip_worktree(ce))
+			return 1;
 	}
-	return 1;
+	return 0;
 }
 
 int cmd_mv(int argc, const char **argv, const char *prefix)
@@ -231,7 +229,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 			if (pos < 0) {
 				const char *src_w_slash = add_slash(src);
 				if (!path_in_sparse_checkout(src_w_slash, &the_index) &&
-				    !check_dir_in_index(src)) {
+				    empty_dir_has_sparse_contents(src)) {
 					modes[i] |= SKIP_WORKTREE_DIR;
 					goto dir_check;
 				}

From d57690a9c82c8888be6bb8ae17be231a2b2802e6 Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:04 +0800
Subject: [PATCH 03/45] mv: free the with_slash in check_dir_in_index()

with_slash may be a malloc'd pointer, and when it is, free it.

Helped-by: Derrick Stolee <derrickstolee@github.com>
Helped-by: Victoria Dye <vdye@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/mv.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index 969f464f7d..c17df2a12f 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -133,6 +133,7 @@ static int index_range_of_same_dir(const char *src, int length,
  */
 static int empty_dir_has_sparse_contents(const char *name)
 {
+	int ret = 0;
 	const char *with_slash = add_slash(name);
 	int length = strlen(with_slash);
 
@@ -142,14 +143,18 @@ static int empty_dir_has_sparse_contents(const char *name)
 	if (pos < 0) {
 		pos = -pos - 1;
 		if (pos >= the_index.cache_nr)
-			return 0;
+			goto free_return;
 		ce = active_cache[pos];
 		if (strncmp(with_slash, ce->name, length))
-			return 0;
+			goto free_return;
 		if (ce_skip_worktree(ce))
-			return 1;
+			ret = 1;
 	}
-	return 0;
+
+free_return:
+	if (with_slash != name)
+		free((char *)with_slash);
+	return ret;
 }
 
 int cmd_mv(int argc, const char **argv, const char *prefix)

From c08830de41f15a8ee85cf7926266e1db732ec773 Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:05 +0800
Subject: [PATCH 04/45] mv: check if <destination> is a SKIP_WORKTREE_DIR

Originally, <destination> is assumed to be in the working tree. If it is
not found as a directory, then it is determined to be either a regular file
path, or error out if used under the second form (move into a directory)
of 'git-mv'. Such behavior is not ideal, mainly because Git does not
look into the index for <destination>, which could potentially be a
SKIP_WORKTREE_DIR, which we need to determine for the later "moving from
in-cone to out-of-cone" patch.

Change the logic so that Git first check if <destination> is a directory
with all its contents sparsified (a SKIP_WORKTREE_DIR).

If <destination> is such a sparse directory, then we should modify the
index the same way as we would if this were a non-sparse directory. We
must be careful to ensure that the <destination> is marked with
SKIP_WORKTREE_DIR.

Also add a `dst_w_slash` to reuse the result from `add_slash()`, which
was everywhere and can be simplified.

Helped-by: Derrick Stolee <derrickstolee@github.com>
Helped-by: Victoria Dye <vdye@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/mv.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index c17df2a12f..11aea7b4db 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -171,6 +171,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 		OPT_END(),
 	};
 	const char **source, **destination, **dest_path, **submodule_gitfile;
+	const char *dst_w_slash;
 	enum update_mode *modes;
 	struct stat st;
 	struct string_list src_for_dst = STRING_LIST_INIT_NODUP;
@@ -200,6 +201,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 	if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1]))
 		flags = 0;
 	dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags);
+	dst_w_slash = add_slash(dest_path[0]);
 	submodule_gitfile = xcalloc(argc, sizeof(char *));
 
 	if (dest_path[0][0] == '\0')
@@ -207,12 +209,20 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
 	else if (!lstat(dest_path[0], &st) &&
 			S_ISDIR(st.st_mode)) {
-		dest_path[0] = add_slash(dest_path[0]);
-		destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME);
+		destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
 	} else {
-		if (argc != 1)
+		if (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
+		    empty_dir_has_sparse_contents(dst_w_slash)) {
+			destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
+		} else if (argc != 1) {
 			die(_("destination '%s' is not a directory"), dest_path[0]);
-		destination = dest_path;
+		} else {
+			destination = dest_path;
+		}
+	}
+	if (dst_w_slash != dest_path[0]) {
+		free((char *)dst_w_slash);
+		dst_w_slash = NULL;
 	}
 
 	/* Checking */

From 9284c3ce266fcf9abb0afbc59645c62dd58026dd Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:06 +0800
Subject: [PATCH 05/45] mv: remove BOTH from enum update_mode

Since BOTH is not used anywhere in the code and its meaning is unclear,
remove it.

Helped-by: Derrick Stolee <derrickstolee@github.com>
Helped-by: Victoria Dye <vdye@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/mv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index 11aea7b4db..7ac653be23 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -21,7 +21,6 @@ static const char * const builtin_mv_usage[] = {
 };
 
 enum update_mode {
-	BOTH = 0,
 	WORKING_DIRECTORY = (1 << 1),
 	INDEX = (1 << 2),
 	SPARSE = (1 << 3),

From 5784db1b22feceafc72454dccd9637d19fdd422c Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:07 +0800
Subject: [PATCH 06/45] mv: from in-cone to out-of-cone

Originally, moving an in-cone <source> to an out-of-cone <destination>
was not possible, mainly because such <destination> is a directory that
is not present in the working tree.

Change the behavior so that we can move an in-cone <source> to
out-of-cone <destination> when --sparse is supplied.

Notice that <destination> can also be an out-of-cone file path, rather
than a directory.

Such <source> can be either clean or dirty, and moving it results in
different behaviors:

A clean move should move <source> to <destination> in the index (do
*not* create <destination> in the worktree), then delete <source> from
the worktree.

A dirty move should move the <source> to the <destination>, both in the
working tree and the index, but should *not* remove the resulted path
from the working tree and should *not* turn on its CE_SKIP_WORKTREE bit.

Optional reading
================

We are strict about cone mode when <destination> is a file path.
The reason is that some of the previous tests that use no-cone mode in
t7002 are keep breaking, mainly because the `dst_mode = SPARSE;` line
added in this patch.

Most features developed in both "from-out-to-in" and "from-in-to-out"
only care about cone mode situation, as no-cone mode is becoming
irrelevant. And because assigning `SPARSE` to `dst_mode` when the
repo is in no-cone mode causes miscellaneous bugs, we should just leave
this new functionality to be exclusive cone mode and save some time.

Helped-by: Derrick Stolee <derrickstolee@github.com>
Helped-by: Victoria Dye <vdye@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/mv.c                  | 69 +++++++++++++++++++++++++++++++----
 t/t7002-mv-sparse-checkout.sh |  8 ++--
 2 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index 7ac653be23..b64c28bd5b 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -171,12 +171,13 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 	};
 	const char **source, **destination, **dest_path, **submodule_gitfile;
 	const char *dst_w_slash;
-	enum update_mode *modes;
+	enum update_mode *modes, dst_mode = 0;
 	struct stat st;
 	struct string_list src_for_dst = STRING_LIST_INIT_NODUP;
 	struct lock_file lock_file = LOCK_INIT;
 	struct cache_entry *ce;
 	struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP;
+	struct string_list dirty_paths = STRING_LIST_INIT_NODUP;
 
 	git_config(git_default_config, NULL);
 
@@ -213,10 +214,21 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 		if (!path_in_sparse_checkout(dst_w_slash, &the_index) &&
 		    empty_dir_has_sparse_contents(dst_w_slash)) {
 			destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME);
+			dst_mode = SKIP_WORKTREE_DIR;
 		} else if (argc != 1) {
 			die(_("destination '%s' is not a directory"), dest_path[0]);
 		} else {
 			destination = dest_path;
+			/*
+			 * <destination> is a file outside of sparse-checkout
+			 * cone. Insist on cone mode here for backward
+			 * compatibility. We don't want dst_mode to be assigned
+			 * for a file when the repo is using no-cone mode (which
+			 * is deprecated at this point) sparse-checkout. As
+			 * SPARSE here is only considering cone-mode situation.
+			 */
+			if (!path_in_cone_mode_sparse_checkout(destination[0], &the_index))
+				dst_mode = SPARSE;
 		}
 	}
 	if (dst_w_slash != dest_path[0]) {
@@ -410,6 +422,7 @@ remove_entry:
 		const char *src = source[i], *dst = destination[i];
 		enum update_mode mode = modes[i];
 		int pos;
+		int sparse_and_dirty = 0;
 		struct checkout state = CHECKOUT_INIT;
 		state.istate = &the_index;
 
@@ -420,6 +433,7 @@ remove_entry:
 		if (show_only)
 			continue;
 		if (!(mode & (INDEX | SPARSE | SKIP_WORKTREE_DIR)) &&
+		    !(dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) &&
 		    rename(src, dst) < 0) {
 			if (ignore_errors)
 				continue;
@@ -439,17 +453,55 @@ remove_entry:
 
 		pos = cache_name_pos(src, strlen(src));
 		assert(pos >= 0);
+		if (!(mode & SPARSE) && !lstat(src, &st))
+			sparse_and_dirty = ce_modified(active_cache[pos], &st, 0);
 		rename_cache_entry_at(pos, dst);
 
-		if ((mode & SPARSE) &&
-		    (path_in_sparse_checkout(dst, &the_index))) {
-			int dst_pos;
+		if (ignore_sparse &&
+		    core_apply_sparse_checkout &&
+		    core_sparse_checkout_cone) {
+			/*
+			 * NEEDSWORK: we are *not* paying attention to
+			 * "out-to-out" move (<source> is out-of-cone and
+			 * <destination> is out-of-cone) at this point. It
+			 * should be added in a future patch.
+			 */
+			if ((mode & SPARSE) &&
+			    path_in_sparse_checkout(dst, &the_index)) {
+				/* from out-of-cone to in-cone */
+				int dst_pos = cache_name_pos(dst, strlen(dst));
+				struct cache_entry *dst_ce = active_cache[dst_pos];
 
-			dst_pos = cache_name_pos(dst, strlen(dst));
-			active_cache[dst_pos]->ce_flags &= ~CE_SKIP_WORKTREE;
+				dst_ce->ce_flags &= ~CE_SKIP_WORKTREE;
 
-			if (checkout_entry(active_cache[dst_pos], &state, NULL, NULL))
-				die(_("cannot checkout %s"), active_cache[dst_pos]->name);
+				if (checkout_entry(dst_ce, &state, NULL, NULL))
+					die(_("cannot checkout %s"), dst_ce->name);
+			} else if ((dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) &&
+				   !(mode & SPARSE) &&
+				   !path_in_sparse_checkout(dst, &the_index)) {
+				/* from in-cone to out-of-cone */
+				int dst_pos = cache_name_pos(dst, strlen(dst));
+				struct cache_entry *dst_ce = active_cache[dst_pos];
+
+				/*
+				 * if src is clean, it will suffice to remove it
+				 */
+				if (!sparse_and_dirty) {
+					dst_ce->ce_flags |= CE_SKIP_WORKTREE;
+					unlink_or_warn(src);
+				} else {
+					/*
+					 * if src is dirty, move it to the
+					 * destination and create leading
+					 * dirs if necessary
+					 */
+					char *dst_dup = xstrdup(dst);
+					string_list_append(&dirty_paths, dst);
+					safe_create_leading_directories(dst_dup);
+					FREE_AND_NULL(dst_dup);
+					rename(src, dst);
+				}
+			}
 		}
 	}
 
@@ -461,6 +513,7 @@ remove_entry:
 		die(_("Unable to write new index file"));
 
 	string_list_clear(&src_for_dst, 0);
+	string_list_clear(&dirty_paths, 0);
 	UNLEAK(source);
 	UNLEAK(dest_path);
 	free(submodule_gitfile);
diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh
index 1ac78edde6..d875f492dd 100755
--- a/t/t7002-mv-sparse-checkout.sh
+++ b/t/t7002-mv-sparse-checkout.sh
@@ -290,7 +290,7 @@ test_expect_success 'move sparse file to existing destination with --force and -
 	test_cmp expect sub/file1
 '
 
-test_expect_failure 'move clean path from in-cone to out-of-cone' '
+test_expect_success 'move clean path from in-cone to out-of-cone' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
 
@@ -419,7 +419,7 @@ test_expect_failure 'move directory with one of the files overwrite' '
 	test_cmp expect actual
 '
 
-test_expect_failure 'move dirty path from in-cone to out-of-cone' '
+test_expect_success 'move dirty path from in-cone to out-of-cone' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
 	echo "modified" >>sub/d &&
@@ -439,7 +439,7 @@ test_expect_failure 'move dirty path from in-cone to out-of-cone' '
 	grep "H folder1/d" actual
 '
 
-test_expect_failure 'move dir from in-cone to out-of-cone' '
+test_expect_success 'move dir from in-cone to out-of-cone' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
 
@@ -458,7 +458,7 @@ test_expect_failure 'move dir from in-cone to out-of-cone' '
 	grep "S folder1/dir/e" actual
 '
 
-test_expect_failure 'move partially-dirty dir from in-cone to out-of-cone' '
+test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
 	touch sub/dir/e2 sub/dir/e3 &&

From b6f51e3db978ae2a72c290a10bd205f9e1d6818e Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:08 +0800
Subject: [PATCH 07/45] mv: cleanup empty WORKING_DIRECTORY

Originally, moving from-in-to-out may leave an empty <source>
directory on-disk (this kind of directory is marked as
WORKING_DIRECTORY).

Cleanup such directories if they are empty (don't have any entries
under them).

Modify two tests that take <source> as WORKING_DIRECTORY to test
this behavior.

Suggested-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/mv.c                  | 27 +++++++++++++++++++++++++++
 t/t7002-mv-sparse-checkout.sh |  4 ++++
 2 files changed, 31 insertions(+)

diff --git a/builtin/mv.c b/builtin/mv.c
index b64c28bd5b..f4961c0ffd 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -171,6 +171,9 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 	};
 	const char **source, **destination, **dest_path, **submodule_gitfile;
 	const char *dst_w_slash;
+	const char **src_dir = NULL;
+	int src_dir_nr = 0, src_dir_alloc = 0;
+	struct strbuf a_src_dir = STRBUF_INIT;
 	enum update_mode *modes, dst_mode = 0;
 	struct stat st;
 	struct string_list src_for_dst = STRING_LIST_INIT_NODUP;
@@ -313,6 +316,10 @@ dir_check:
 
 			/* last - first >= 1 */
 			modes[i] |= WORKING_DIRECTORY;
+
+			ALLOC_GROW(src_dir, src_dir_nr + 1, src_dir_alloc);
+			src_dir[src_dir_nr++] = src;
+
 			n = argc + last - first;
 			REALLOC_ARRAY(source, n);
 			REALLOC_ARRAY(destination, n);
@@ -505,6 +512,26 @@ remove_entry:
 		}
 	}
 
+	/*
+	 * cleanup the empty src_dirs
+	 */
+	for (i = 0; i < src_dir_nr; i++) {
+		int dummy;
+		strbuf_addstr(&a_src_dir, src_dir[i]);
+		/*
+		 * if entries under a_src_dir are all moved away,
+		 * recursively remove a_src_dir to cleanup
+		 */
+		if (index_range_of_same_dir(a_src_dir.buf, a_src_dir.len,
+					    &dummy, &dummy) < 1) {
+			remove_dir_recursively(&a_src_dir, 0);
+		}
+		strbuf_reset(&a_src_dir);
+	}
+
+	strbuf_release(&a_src_dir);
+	free(src_dir);
+
 	if (gitmodules_modified)
 		stage_updated_gitmodules(&the_index);
 
diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh
index d875f492dd..df8c0fa572 100755
--- a/t/t7002-mv-sparse-checkout.sh
+++ b/t/t7002-mv-sparse-checkout.sh
@@ -442,6 +442,7 @@ test_expect_success 'move dirty path from in-cone to out-of-cone' '
 test_expect_success 'move dir from in-cone to out-of-cone' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
+	mkdir sub/dir/deep &&
 
 	test_must_fail git mv sub/dir folder1 2>stderr &&
 	cat sparse_error_header >expect &&
@@ -452,6 +453,7 @@ test_expect_success 'move dir from in-cone to out-of-cone' '
 	git mv --sparse sub/dir folder1 2>stderr &&
 	test_must_be_empty stderr &&
 
+	test_path_is_missing sub/dir &&
 	test_path_is_missing folder1 &&
 	git ls-files -t >actual &&
 	! grep "H sub/dir/e" actual &&
@@ -461,6 +463,7 @@ test_expect_success 'move dir from in-cone to out-of-cone' '
 test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
+	mkdir sub/dir/deep &&
 	touch sub/dir/e2 sub/dir/e3 &&
 	git add sub/dir/e2 sub/dir/e3 &&
 	echo "modified" >>sub/dir/e2 &&
@@ -476,6 +479,7 @@ test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' '
 
 	git mv --sparse sub/dir folder1 2>stderr &&
 
+	test_path_is_missing sub/dir &&
 	test_path_is_missing folder1/dir/e &&
 	test_path_is_file folder1/dir/e2 &&
 	test_path_is_file folder1/dir/e3 &&

From 5efd533ed8896592740afe22ac07271497d6db36 Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:09 +0800
Subject: [PATCH 08/45] advice.h: add advise_on_moving_dirty_path()

Add an advice.

When the user use `git mv --sparse <dirty-path> <destination>`, Git
will warn the user to use `git add --sparse <paths>` then use
`git sparse-checkout reapply` to apply the sparsity rules.

Add a few lines to previous "move dirty path" tests so we can test
this new advice is working.

Suggested-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 advice.c                      | 19 +++++++++++++++++++
 advice.h                      |  1 +
 builtin/mv.c                  |  3 +++
 t/t7002-mv-sparse-checkout.sh | 24 +++++++++++++++++++++++-
 4 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/advice.c b/advice.c
index 6fda9edbc2..fd18968943 100644
--- a/advice.c
+++ b/advice.c
@@ -261,3 +261,22 @@ void detach_advice(const char *new_name)
 
 	fprintf(stderr, fmt, new_name);
 }
+
+void advise_on_moving_dirty_path(struct string_list *pathspec_list)
+{
+	struct string_list_item *item;
+
+	if (!pathspec_list->nr)
+		return;
+
+	fprintf(stderr, _("The following paths have been moved outside the\n"
+			  "sparse-checkout definition but are not sparse due to local\n"
+			  "modifications.\n"));
+	for_each_string_list_item(item, pathspec_list)
+		fprintf(stderr, "%s\n", item->string);
+
+	advise_if_enabled(ADVICE_UPDATE_SPARSE_PATH,
+			  _("To correct the sparsity of these paths, do the following:\n"
+			    "* Use \"git add --sparse <paths>\" to update the index\n"
+			    "* Use \"git sparse-checkout reapply\" to apply the sparsity rules"));
+}
diff --git a/advice.h b/advice.h
index 7ddc6cbc1a..07e0f76833 100644
--- a/advice.h
+++ b/advice.h
@@ -74,5 +74,6 @@ void NORETURN die_conclude_merge(void);
 void NORETURN die_ff_impossible(void);
 void advise_on_updating_sparse_paths(struct string_list *pathspec_list);
 void detach_advice(const char *new_name);
+void advise_on_moving_dirty_path(struct string_list *pathspec_list);
 
 #endif /* ADVICE_H */
diff --git a/builtin/mv.c b/builtin/mv.c
index f4961c0ffd..d80adf8de5 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -532,6 +532,9 @@ remove_entry:
 	strbuf_release(&a_src_dir);
 	free(src_dir);
 
+	if (dirty_paths.nr)
+		advise_on_moving_dirty_path(&dirty_paths);
+
 	if (gitmodules_modified)
 		stage_updated_gitmodules(&the_index);
 
diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh
index df8c0fa572..5e5eb70e7a 100755
--- a/t/t7002-mv-sparse-checkout.sh
+++ b/t/t7002-mv-sparse-checkout.sh
@@ -28,12 +28,25 @@ test_expect_success 'setup' "
 	updated in the index:
 	EOF
 
-	cat >sparse_hint <<-EOF
+	cat >sparse_hint <<-EOF &&
 	hint: If you intend to update such entries, try one of the following:
 	hint: * Use the --sparse option.
 	hint: * Disable or modify the sparsity rules.
 	hint: Disable this message with \"git config advice.updateSparsePath false\"
 	EOF
+
+	cat >dirty_error_header <<-EOF &&
+	The following paths have been moved outside the
+	sparse-checkout definition but are not sparse due to local
+	modifications.
+	EOF
+
+	cat >dirty_hint <<-EOF
+	hint: To correct the sparsity of these paths, do the following:
+	hint: * Use \"git add --sparse <paths>\" to update the index
+	hint: * Use \"git sparse-checkout reapply\" to apply the sparsity rules
+	hint: Disable this message with \"git config advice.updateSparsePath false\"
+	EOF
 "
 
 test_expect_success 'mv refuses to move sparse-to-sparse' '
@@ -431,6 +444,10 @@ test_expect_success 'move dirty path from in-cone to out-of-cone' '
 	test_cmp expect stderr &&
 
 	git mv --sparse sub/d folder1 2>stderr &&
+	cat dirty_error_header >expect &&
+	echo "folder1/d" >>expect &&
+	cat dirty_hint >>expect &&
+	test_cmp expect stderr &&
 
 	test_path_is_missing sub/d &&
 	test_path_is_file folder1/d &&
@@ -478,6 +495,11 @@ test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' '
 	test_cmp expect stderr &&
 
 	git mv --sparse sub/dir folder1 2>stderr &&
+	cat dirty_error_header >expect &&
+	echo "folder1/dir/e2" >>expect &&
+	echo "folder1/dir/e3" >>expect &&
+	cat dirty_hint >>expect &&
+	test_cmp expect stderr &&
 
 	test_path_is_missing sub/dir &&
 	test_path_is_missing folder1/dir/e &&

From da6fe05b3d624ad5b40472eebfe0499c15ecc93d Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Tue, 9 Aug 2022 20:09:10 +0800
Subject: [PATCH 09/45] mv: check overwrite for in-to-out move

Add checking logic for overwriting when moving from in-cone to
out-of-cone. It is the index version of the original overwrite logic.

Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/mv.c                  | 12 ++++++++++++
 t/t7002-mv-sparse-checkout.sh |  6 +++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index d80adf8de5..4b67bd096a 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -376,6 +376,18 @@ dir_check:
 			goto act_on_entry;
 		}
 
+		if (ignore_sparse &&
+		    (dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) &&
+		    index_entry_exists(&the_index, dst, strlen(dst))) {
+			bad = _("destination exists in the index");
+			if (force) {
+				if (verbose)
+					warning(_("overwriting '%s'"), dst);
+				bad = NULL;
+			} else {
+				goto act_on_entry;
+			}
+		}
 		/*
 		 * We check if the paths are in the sparse-checkout
 		 * definition as a very final check, since that
diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh
index 5e5eb70e7a..26582ae4e5 100755
--- a/t/t7002-mv-sparse-checkout.sh
+++ b/t/t7002-mv-sparse-checkout.sh
@@ -323,7 +323,7 @@ test_expect_success 'move clean path from in-cone to out-of-cone' '
 	grep "S folder1/d" actual
 '
 
-test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' '
+test_expect_success 'move clean path from in-cone to out-of-cone overwrite' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
 	echo "sub/file1 overwrite" >sub/file1 &&
@@ -359,7 +359,7 @@ test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' '
 # This test is testing the same behavior as the
 # "move clean path from in-cone to out-of-cone overwrite" above.
 # The only difference is the <destination> changes from "folder1" to "folder1/file1"
-test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite' '
+test_expect_success 'move clean path from in-cone to out-of-cone file overwrite' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	setup_sparse_checkout &&
 	echo "sub/file1 overwrite" >sub/file1 &&
@@ -392,7 +392,7 @@ test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite'
 	test_cmp expect actual
 '
 
-test_expect_failure 'move directory with one of the files overwrite' '
+test_expect_success 'move directory with one of the files overwrite' '
 	test_when_finished "cleanup_sparse_checkout" &&
 	mkdir -p folder1/dir &&
 	touch folder1/dir/file1 &&

From b4f25b07c74fc294cab6c12d09faa2021c67f25a Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:39 +0000
Subject: [PATCH 10/45] t: add skeleton chainlint.pl

Although chainlint.sed usefully identifies broken &&-chains in tests, it
has several shortcomings which include:

  * only detects &&-chain breakage in subshells (one-level deep)

  * does not check for broken top-level &&-chains; that task is left to
    the "magic exit code 117" checker built into test-lib.sh, however,
    that detection does not extend to `{...}` blocks, `$(...)`
    expressions, or compound statements such as `if...fi`,
    `while...done`, `case...esac`

  * uses heuristics, which makes it (potentially) fallible and difficult
    to tweak to handle additional real-world cases

  * written in `sed` and employs advanced `sed` operators which are
    probably not well-known to many programmers, thus the pool of people
    who can maintain it is likely small

  * manually simulates recursion into subshells which makes it much more
    difficult to reason about than, say, a traditional top-down parser

  * checks each test as the test is run, which can get expensive for
    tests which are run repeatedly by functions or loops since their
    bodies will be checked over and over (tens or hundreds of times)
    unnecessarily

To address these shortcomings, begin implementing a more functional and
precise test linter which understands shell syntax and semantics rather
than employing heuristics, thus is able to recognize structural problems
with tests beyond broken &&-chains.

The new linter is written in Perl, thus should be more accessible to a
wider audience, and is structured as a traditional top-down parser which
makes it much easier to reason about, and allows it to inspect compound
statements within test bodies to any depth.

Furthermore, it can check all test definitions in the entire project in
a single invocation rather than having to be invoked once per test, and
each test definition is checked only once no matter how many times the
test is actually run.

At this stage, the new linter is just a skeleton containing boilerplate
which handles command-line options, collects and reports statistics, and
feeds its arguments -- paths of test scripts -- to a (presently)
do-nothing script parser for validation. Subsequent changes will flesh
out the functionality.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl | 115 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100755 t/chainlint.pl

diff --git a/t/chainlint.pl b/t/chainlint.pl
new file mode 100755
index 0000000000..e8ab95c785
--- /dev/null
+++ b/t/chainlint.pl
@@ -0,0 +1,115 @@
+#!/usr/bin/env perl
+#
+# Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com>
+#
+# This tool scans shell scripts for test definitions and checks those tests for
+# problems, such as broken &&-chains, which might hide bugs in the tests
+# themselves or in behaviors being exercised by the tests.
+#
+# Input arguments are pathnames of shell scripts containing test definitions,
+# or globs referencing a collection of scripts. For each problem discovered,
+# the pathname of the script containing the test is printed along with the test
+# name and the test body with a `?!FOO?!` annotation at the location of each
+# detected problem, where "FOO" is a tag such as "AMP" which indicates a broken
+# &&-chain. Returns zero if no problems are discovered, otherwise non-zero.
+
+use warnings;
+use strict;
+use File::Glob;
+use Getopt::Long;
+
+my $show_stats;
+my $emit_all;
+
+package ScriptParser;
+
+sub new {
+	my $class = shift @_;
+	my $self = bless {} => $class;
+	$self->{output} = [];
+	$self->{ntests} = 0;
+	return $self;
+}
+
+sub parse_cmd {
+	return undef;
+}
+
+# main contains high-level functionality for processing command-line switches,
+# feeding input test scripts to ScriptParser, and reporting results.
+package main;
+
+my $getnow = sub { return time(); };
+my $interval = sub { return time() - shift; };
+if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
+	$getnow = sub { return [Time::HiRes::gettimeofday()]; };
+	$interval = sub { return Time::HiRes::tv_interval(shift); };
+}
+
+sub show_stats {
+	my ($start_time, $stats) = @_;
+	my $walltime = $interval->($start_time);
+	my ($usertime) = times();
+	my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0);
+	for (@$stats) {
+		my ($worker, $nscripts, $ntests, $nerrs) = @$_;
+		print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n");
+		$total_workers++;
+		$total_scripts += $nscripts;
+		$total_tests += $ntests;
+		$total_errs += $nerrs;
+	}
+	printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
+}
+
+sub check_script {
+	my ($id, $next_script, $emit) = @_;
+	my ($nscripts, $ntests, $nerrs) = (0, 0, 0);
+	while (my $path = $next_script->()) {
+		$nscripts++;
+		my $fh;
+		unless (open($fh, "<", $path)) {
+			$emit->("?!ERR?! $path: $!\n");
+			next;
+		}
+		my $s = do { local $/; <$fh> };
+		close($fh);
+		my $parser = ScriptParser->new(\$s);
+		1 while $parser->parse_cmd();
+		if (@{$parser->{output}}) {
+			my $s = join('', @{$parser->{output}});
+			$emit->("# chainlint: $path\n" . $s);
+			$nerrs += () = $s =~ /\?![^?]+\?!/g;
+		}
+		$ntests += $parser->{ntests};
+	}
+	return [$id, $nscripts, $ntests, $nerrs];
+}
+
+sub exit_code {
+	my $stats = shift @_;
+	for (@$stats) {
+		my ($worker, $nscripts, $ntests, $nerrs) = @$_;
+		return 1 if $nerrs;
+	}
+	return 0;
+}
+
+Getopt::Long::Configure(qw{bundling});
+GetOptions(
+	"emit-all!" => \$emit_all,
+	"stats|show-stats!" => \$show_stats) or die("option error\n");
+
+my $start_time = $getnow->();
+my @stats;
+
+my @scripts;
+push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV);
+unless (@scripts) {
+	show_stats($start_time, \@stats) if $show_stats;
+	exit;
+}
+
+push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
+show_stats($start_time, \@stats) if $show_stats;
+exit(exit_code(\@stats));

From 7d4804731ed642b92b516908fb93397b08e986bf Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:40 +0000
Subject: [PATCH 11/45] chainlint.pl: add POSIX shell lexical analyzer

Begin fleshing out chainlint.pl by adding a lexical analyzer for the
POSIX shell command language. The sole entry point Lexer::scan_token()
returns the next token from the input. It will be called by the upcoming
shell language parser.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl | 177 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)

diff --git a/t/chainlint.pl b/t/chainlint.pl
index e8ab95c785..81ffbf28bf 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -21,6 +21,183 @@ use Getopt::Long;
 my $show_stats;
 my $emit_all;
 
+# Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3
+# "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although
+# similar to lexical analyzers for other languages, this one differs in a few
+# substantial ways due to quirks of the shell command language.
+#
+# For instance, in many languages, newline is just whitespace like space or
+# TAB, but in shell a newline is a command separator, thus a distinct lexical
+# token. A newline is significant and returned as a distinct token even at the
+# end of a shell comment.
+#
+# In other languages, `1+2` would typically be scanned as three tokens
+# (`1`, `+`, and `2`), but in shell it is a single token. However, the similar
+# `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well.
+# In shell, several characters with special meaning lose that meaning when not
+# surrounded by whitespace. For instance, the negation operator `!` is special
+# when standing alone surrounded by whitespace; whereas in `foo!uucp` it is
+# just a plain character in the longer token "foo!uucp". In many other
+# languages, `"string"/foo:'string'` might be scanned as five tokens ("string",
+# `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token.
+#
+# The lexical analyzer for the shell command language is also somewhat unusual
+# in that it recursively invokes the parser to handle the body of `$(...)`
+# expressions which can contain arbitrary shell code. Such expressions may be
+# encountered both inside and outside of double-quoted strings.
+#
+# The lexical analyzer is responsible for consuming shell here-doc bodies which
+# extend from the line following a `<<TAG` operator until a line consisting
+# solely of `TAG`. Here-doc consumption begins when a newline is encountered.
+# It is legal for multiple here-doc `<<TAG` operators to be present on a single
+# line, in which case their bodies must be present one following the next, and
+# are consumed in the (left-to-right) order the `<<TAG` operators appear on the
+# line. A special complication is that the bodies of all here-docs must be
+# consumed when the newline is encountered even if the parse context depth has
+# changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs
+# "A" and "B" must be consumed even though "A" was introduced outside the
+# recursive parse context in which "B" was introduced and in which the newline
+# is encountered.
+package Lexer;
+
+sub new {
+	my ($class, $parser, $s) = @_;
+	bless {
+		parser => $parser,
+		buff => $s,
+		heretags => []
+	} => $class;
+}
+
+sub scan_heredoc_tag {
+	my $self = shift @_;
+	${$self->{buff}} =~ /\G(-?)/gc;
+	my $indented = $1;
+	my $tag = $self->scan_token();
+	$tag =~ s/['"\\]//g;
+	push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag");
+	return "<<$indented$tag";
+}
+
+sub scan_op {
+	my ($self, $c) = @_;
+	my $b = $self->{buff};
+	return $c unless $$b =~ /\G(.)/sgc;
+	my $cc = $c . $1;
+	return scan_heredoc_tag($self) if $cc eq '<<';
+	return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/;
+	pos($$b)--;
+	return $c;
+}
+
+sub scan_sqstring {
+	my $self = shift @_;
+	${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc;
+	return "'" . $1;
+}
+
+sub scan_dqstring {
+	my $self = shift @_;
+	my $b = $self->{buff};
+	my $s = '"';
+	while (1) {
+		# slurp up non-special characters
+		$s .= $1 if $$b =~ /\G([^"\$\\]+)/gc;
+		# handle special characters
+		last unless $$b =~ /\G(.)/sgc;
+		my $c = $1;
+		$s .= '"', last if $c eq '"';
+		$s .= '$' . $self->scan_dollar(), next if $c eq '$';
+		if ($c eq '\\') {
+			$s .= '\\', last unless $$b =~ /\G(.)/sgc;
+			$c = $1;
+			next if $c eq "\n"; # line splice
+			# backslash escapes only $, `, ", \ in dq-string
+			$s .= '\\' unless $c =~ /^[\$`"\\]$/;
+			$s .= $c;
+			next;
+		}
+		die("internal error scanning dq-string '$c'\n");
+	}
+	return $s;
+}
+
+sub scan_balanced {
+	my ($self, $c1, $c2) = @_;
+	my $b = $self->{buff};
+	my $depth = 1;
+	my $s = $c1;
+	while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) {
+		$s .= $1;
+		$depth++, next if $s =~ /\Q$c1\E$/;
+		$depth--;
+		last if $depth == 0;
+	}
+	return $s;
+}
+
+sub scan_subst {
+	my $self = shift @_;
+	my @tokens = $self->{parser}->parse(qr/^\)$/);
+	$self->{parser}->next_token(); # closing ")"
+	return @tokens;
+}
+
+sub scan_dollar {
+	my $self = shift @_;
+	my $b = $self->{buff};
+	return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...))
+	return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
+	return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...}
+	return $1 if $$b =~ /\G(\w+)/gc; # $var
+	return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc.
+	return '';
+}
+
+sub swallow_heredocs {
+	my $self = shift @_;
+	my $b = $self->{buff};
+	my $tags = $self->{heretags};
+	while (my $tag = shift @$tags) {
+		my $indent = $tag =~ s/^\t// ? '\\s*' : '';
+		$$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc;
+	}
+}
+
+sub scan_token {
+	my $self = shift @_;
+	my $b = $self->{buff};
+	my $token = '';
+RESTART:
+	$$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline)
+	return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
+	while (1) {
+		# slurp up non-special characters
+		$token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc;
+		# handle special characters
+		last unless $$b =~ /\G(.)/sgc;
+		my $c = $1;
+		last if $c =~ /^[ \t]$/; # whitespace ends token
+		pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/;
+		$token .= $self->scan_sqstring(), next if $c eq "'";
+		$token .= $self->scan_dqstring(), next if $c eq '"';
+		$token .= $c . $self->scan_dollar(), next if $c eq '$';
+		$self->swallow_heredocs(), $token = $c, last if $c eq "\n";
+		$token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/;
+		$token = $c, last if $c =~ /^[(){}]$/;
+		if ($c eq '\\') {
+			$token .= '\\', last unless $$b =~ /\G(.)/sgc;
+			$c = $1;
+			next if $c eq "\n" && length($token); # line splice
+			goto RESTART if $c eq "\n"; # line splice
+			$token .= '\\' . $c;
+			next;
+		}
+		die("internal error scanning character '$c'\n");
+	}
+	return length($token) ? $token : undef;
+}
+
 package ScriptParser;
 
 sub new {

From 6594554119811a01888b44112a7daec6fa0312b2 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:41 +0000
Subject: [PATCH 12/45] chainlint.pl: add POSIX shell parser

Continue fleshing out chainlint.pl by adding a general purpose recursive
descent parser for the POSIX shell command language. Although never
invoked directly, upcoming parser subclasses will extend its
functionality for specific purposes, such as plucking test definitions
from input scripts and applying domain-specific knowledge to perform
test validation.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl | 243 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 243 insertions(+)

diff --git a/t/chainlint.pl b/t/chainlint.pl
index 81ffbf28bf..cdf136896b 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -198,6 +198,249 @@ RESTART:
 	return length($token) ? $token : undef;
 }
 
+# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
+# is a recursive descent parser very roughly modeled after section 2.10 "Shell
+# Grammar" of POSIX chapter 2 "Shell Command Language".
+package ShellParser;
+
+sub new {
+	my ($class, $s) = @_;
+	my $self = bless {
+		buff => [],
+		stop => [],
+		output => []
+	} => $class;
+	$self->{lexer} = Lexer->new($self, $s);
+	return $self;
+}
+
+sub next_token {
+	my $self = shift @_;
+	return pop(@{$self->{buff}}) if @{$self->{buff}};
+	return $self->{lexer}->scan_token();
+}
+
+sub untoken {
+	my $self = shift @_;
+	push(@{$self->{buff}}, @_);
+}
+
+sub peek {
+	my $self = shift @_;
+	my $token = $self->next_token();
+	return undef unless defined($token);
+	$self->untoken($token);
+	return $token;
+}
+
+sub stop_at {
+	my ($self, $token) = @_;
+	return 1 unless defined($token);
+	my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
+	return defined($stop) && $token =~ $stop;
+}
+
+sub expect {
+	my ($self, $expect) = @_;
+	my $token = $self->next_token();
+	return $token if defined($token) && $token eq $expect;
+	push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n");
+	$self->untoken($token) if defined($token);
+	return ();
+}
+
+sub optional_newlines {
+	my $self = shift @_;
+	my @tokens;
+	while (my $token = $self->peek()) {
+		last unless $token eq "\n";
+		push(@tokens, $self->next_token());
+	}
+	return @tokens;
+}
+
+sub parse_group {
+	my $self = shift @_;
+	return ($self->parse(qr/^}$/),
+		$self->expect('}'));
+}
+
+sub parse_subshell {
+	my $self = shift @_;
+	return ($self->parse(qr/^\)$/),
+		$self->expect(')'));
+}
+
+sub parse_case_pattern {
+	my $self = shift @_;
+	my @tokens;
+	while (defined(my $token = $self->next_token())) {
+		push(@tokens, $token);
+		last if $token eq ')';
+	}
+	return @tokens;
+}
+
+sub parse_case {
+	my $self = shift @_;
+	my @tokens;
+	push(@tokens,
+	     $self->next_token(), # subject
+	     $self->optional_newlines(),
+	     $self->expect('in'),
+	     $self->optional_newlines());
+	while (1) {
+		my $token = $self->peek();
+		last unless defined($token) && $token ne 'esac';
+		push(@tokens,
+		     $self->parse_case_pattern(),
+		     $self->optional_newlines(),
+		     $self->parse(qr/^(?:;;|esac)$/)); # item body
+		$token = $self->peek();
+		last unless defined($token) && $token ne 'esac';
+		push(@tokens,
+		     $self->expect(';;'),
+		     $self->optional_newlines());
+	}
+	push(@tokens, $self->expect('esac'));
+	return @tokens;
+}
+
+sub parse_for {
+	my $self = shift @_;
+	my @tokens;
+	push(@tokens,
+	     $self->next_token(), # variable
+	     $self->optional_newlines());
+	my $token = $self->peek();
+	if (defined($token) && $token eq 'in') {
+		push(@tokens,
+		     $self->expect('in'),
+		     $self->optional_newlines());
+	}
+	push(@tokens,
+	     $self->parse(qr/^do$/), # items
+	     $self->expect('do'),
+	     $self->optional_newlines(),
+	     $self->parse_loop_body(),
+	     $self->expect('done'));
+	return @tokens;
+}
+
+sub parse_if {
+	my $self = shift @_;
+	my @tokens;
+	while (1) {
+		push(@tokens,
+		     $self->parse(qr/^then$/), # if/elif condition
+		     $self->expect('then'),
+		     $self->optional_newlines(),
+		     $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
+		my $token = $self->peek();
+		last unless defined($token) && $token eq 'elif';
+		push(@tokens, $self->expect('elif'));
+	}
+	my $token = $self->peek();
+	if (defined($token) && $token eq 'else') {
+		push(@tokens,
+		     $self->expect('else'),
+		     $self->optional_newlines(),
+		     $self->parse(qr/^fi$/)); # else body
+	}
+	push(@tokens, $self->expect('fi'));
+	return @tokens;
+}
+
+sub parse_loop_body {
+	my $self = shift @_;
+	return $self->parse(qr/^done$/);
+}
+
+sub parse_loop {
+	my $self = shift @_;
+	return ($self->parse(qr/^do$/), # condition
+		$self->expect('do'),
+		$self->optional_newlines(),
+		$self->parse_loop_body(),
+		$self->expect('done'));
+}
+
+sub parse_func {
+	my $self = shift @_;
+	return ($self->expect('('),
+		$self->expect(')'),
+		$self->optional_newlines(),
+		$self->parse_cmd()); # body
+}
+
+sub parse_bash_array_assignment {
+	my $self = shift @_;
+	my @tokens = $self->expect('(');
+	while (defined(my $token = $self->next_token())) {
+		push(@tokens, $token);
+		last if $token eq ')';
+	}
+	return @tokens;
+}
+
+my %compound = (
+	'{' => \&parse_group,
+	'(' => \&parse_subshell,
+	'case' => \&parse_case,
+	'for' => \&parse_for,
+	'if' => \&parse_if,
+	'until' => \&parse_loop,
+	'while' => \&parse_loop);
+
+sub parse_cmd {
+	my $self = shift @_;
+	my $cmd = $self->next_token();
+	return () unless defined($cmd);
+	return $cmd if $cmd eq "\n";
+
+	my $token;
+	my @tokens = $cmd;
+	if ($cmd eq '!') {
+		push(@tokens, $self->parse_cmd());
+		return @tokens;
+	} elsif (my $f = $compound{$cmd}) {
+		push(@tokens, $self->$f());
+	} elsif (defined($token = $self->peek()) && $token eq '(') {
+		if ($cmd !~ /\w=$/) {
+			push(@tokens, $self->parse_func());
+			return @tokens;
+		}
+		$tokens[-1] .= join(' ', $self->parse_bash_array_assignment());
+	}
+
+	while (defined(my $token = $self->next_token())) {
+		$self->untoken($token), last if $self->stop_at($token);
+		push(@tokens, $token);
+		last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/;
+	}
+	push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n";
+	return @tokens;
+}
+
+sub accumulate {
+	my ($self, $tokens, $cmd) = @_;
+	push(@$tokens, @$cmd);
+}
+
+sub parse {
+	my ($self, $stop) = @_;
+	push(@{$self->{stop}}, $stop);
+	goto DONE if $self->stop_at($self->peek());
+	my @tokens;
+	while (my @cmd = $self->parse_cmd()) {
+		$self->accumulate(\@tokens, \@cmd);
+		last if $self->stop_at($self->peek());
+	}
+DONE:
+	pop(@{$self->{stop}});
+	return @tokens;
+}
+
 package ScriptParser;
 
 sub new {

From 6d932e92fcb49b59b780bc018fe550d867bb3d84 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:42 +0000
Subject: [PATCH 13/45] chainlint.pl: add parser to validate tests

Continue fleshing out chainlint.pl by adding TestParser, a parser with
special knowledge about how Git tests should be written; for instance,
it knows that commands within a test body should be chained together
with `&&`. An upcoming parser which plucks test definitions from test
scripts will invoke TestParser for each test body it encounters.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/t/chainlint.pl b/t/chainlint.pl
index cdf136896b..ad257106e5 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -441,6 +441,52 @@ DONE:
 	return @tokens;
 }
 
+# TestParser is a subclass of ShellParser which, beyond parsing shell script
+# code, is also imbued with semantic knowledge of test construction, and checks
+# tests for common problems (such as broken &&-chains) which might hide bugs in
+# the tests themselves or in behaviors being exercised by the tests. As such,
+# TestParser is only called upon to parse test bodies, not the top-level
+# scripts in which the tests are defined.
+package TestParser;
+
+use base 'ShellParser';
+
+sub find_non_nl {
+	my $tokens = shift @_;
+	my $n = shift @_;
+	$n = $#$tokens if !defined($n);
+	$n-- while $n >= 0 && $$tokens[$n] eq "\n";
+	return $n;
+}
+
+sub ends_with {
+	my ($tokens, $needles) = @_;
+	my $n = find_non_nl($tokens);
+	for my $needle (reverse(@$needles)) {
+		return undef if $n < 0;
+		$n = find_non_nl($tokens, $n), next if $needle eq "\n";
+		return undef if $$tokens[$n] !~ $needle;
+		$n--;
+	}
+	return 1;
+}
+
+sub accumulate {
+	my ($self, $tokens, $cmd) = @_;
+	goto DONE unless @$tokens;
+	goto DONE if @$cmd == 1 && $$cmd[0] eq "\n";
+
+	# did previous command end with "&&", "||", "|"?
+	goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]);
+
+	# flag missing "&&" at end of previous command
+	my $n = find_non_nl($tokens);
+	splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0;
+
+DONE:
+	$self->SUPER::accumulate($tokens, $cmd);
+}
+
 package ScriptParser;
 
 sub new {

From d99ebd6d2e57baa3ec45b939d40cf939b85301a3 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:43 +0000
Subject: [PATCH 14/45] chainlint.pl: add parser to identify test definitions

Finish fleshing out chainlint.pl by adding ScriptParser, a parser which
scans shell scripts for tests defined by test_expect_success() and
test_expect_failure(), plucks the test body from each definition, and
passes it to TestParser for validation. It recognizes test definitions
not only at the top-level of test scripts but also tests synthesized
within compound commands such as loops and function.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl | 63 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 3 deletions(-)

diff --git a/t/chainlint.pl b/t/chainlint.pl
index ad257106e5..d526723ac0 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -487,18 +487,75 @@ DONE:
 	$self->SUPER::accumulate($tokens, $cmd);
 }
 
+# ScriptParser is a subclass of ShellParser which identifies individual test
+# definitions within test scripts, and passes each test body through TestParser
+# to identify possible problems. ShellParser detects test definitions not only
+# at the top-level of test scripts but also within compound commands such as
+# loops and function definitions.
 package ScriptParser;
 
+use base 'ShellParser';
+
 sub new {
 	my $class = shift @_;
-	my $self = bless {} => $class;
-	$self->{output} = [];
+	my $self = $class->SUPER::new(@_);
 	$self->{ntests} = 0;
 	return $self;
 }
 
+# extract the raw content of a token, which may be a single string or a
+# composition of multiple strings and non-string character runs; for instance,
+# `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d`
+sub unwrap {
+	my $token = @_ ? shift @_ : $_;
+	# simple case: 'sqstring' or "dqstring"
+	return $token if $token =~ s/^'([^']*)'$/$1/;
+	return $token if $token =~ s/^"([^"]*)"$/$1/;
+
+	# composite case
+	my ($s, $q, $escaped);
+	while (1) {
+		# slurp up non-special characters
+		$s .= $1 if $token =~ /\G([^\\'"]*)/gc;
+		# handle special characters
+		last unless $token =~ /\G(.)/sgc;
+		my $c = $1;
+		$q = undef, next if defined($q) && $c eq $q;
+		$q = $c, next if !defined($q) && $c =~ /^['"]$/;
+		if ($c eq '\\') {
+			last unless $token =~ /\G(.)/sgc;
+			$c = $1;
+			$s .= '\\' if $c eq "\n"; # preserve line splice
+		}
+		$s .= $c;
+	}
+	return $s
+}
+
+sub check_test {
+	my $self = shift @_;
+	my ($title, $body) = map(unwrap, @_);
+	$self->{ntests}++;
+	my $parser = TestParser->new(\$body);
+	my @tokens = $parser->parse();
+	return unless $emit_all || grep(/\?![^?]+\?!/, @tokens);
+	my $checked = join(' ', @tokens);
+	$checked =~ s/^\n//;
+	$checked =~ s/^ //mg;
+	$checked =~ s/ $//mg;
+	$checked .= "\n" unless $checked =~ /\n$/;
+	push(@{$self->{output}}, "# chainlint: $title\n$checked");
+}
+
 sub parse_cmd {
-	return undef;
+	my $self = shift @_;
+	my @tokens = $self->SUPER::parse_cmd();
+	return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/;
+	my $n = $#tokens;
+	$n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/;
+	$self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body
+	$self->check_test($tokens[2], $tokens[3]) if $n > 2;  # prereq title body
+	return @tokens;
 }
 
 # main contains high-level functionality for processing command-line switches,

From 29fb2ec384a867ca577335a12f4b45c184e7b642 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:44 +0000
Subject: [PATCH 15/45] chainlint.pl: validate test scripts in parallel

Although chainlint.pl has undergone a good deal of optimization during
its development -- increasing in speed significantly -- parsing and
validating 1050+ scripts and 16500+ tests via Perl is not exactly
instantaneous. However, perceived performance can be improved by taking
advantage of the fact that there is no interdependence between test
scripts or test definitions, thus parsing and validating can be done in
parallel. The number of available cores is determined automatically but
can be overridden via the --jobs option.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/t/chainlint.pl b/t/chainlint.pl
index d526723ac0..898573a910 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -15,9 +15,11 @@
 
 use warnings;
 use strict;
+use Config;
 use File::Glob;
 use Getopt::Long;
 
+my $jobs = -1;
 my $show_stats;
 my $emit_all;
 
@@ -569,6 +571,16 @@ if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
 	$interval = sub { return Time::HiRes::tv_interval(shift); };
 }
 
+sub ncores {
+	# Windows
+	return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS});
+	# Linux / MSYS2 / Cygwin / WSL
+	do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor\s*:/, <>)); } if -r '/proc/cpuinfo';
+	# macOS & BSD
+	return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/;
+	return 1;
+}
+
 sub show_stats {
 	my ($start_time, $stats) = @_;
 	my $walltime = $interval->($start_time);
@@ -621,7 +633,9 @@ sub exit_code {
 Getopt::Long::Configure(qw{bundling});
 GetOptions(
 	"emit-all!" => \$emit_all,
+	"jobs|j=i" => \$jobs,
 	"stats|show-stats!" => \$show_stats) or die("option error\n");
+$jobs = ncores() if $jobs < 1;
 
 my $start_time = $getnow->();
 my @stats;
@@ -633,6 +647,40 @@ unless (@scripts) {
 	exit;
 }
 
-push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
+unless ($Config{useithreads} && eval {
+	require threads; threads->import();
+	require Thread::Queue; Thread::Queue->import();
+	1;
+	}) {
+	push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
+	show_stats($start_time, \@stats) if $show_stats;
+	exit(exit_code(\@stats));
+}
+
+my $script_queue = Thread::Queue->new();
+my $output_queue = Thread::Queue->new();
+
+sub next_script { return $script_queue->dequeue(); }
+sub emit { $output_queue->enqueue(@_); }
+
+sub monitor {
+	while (my $s = $output_queue->dequeue()) {
+		print($s);
+	}
+}
+
+my $mon = threads->create({'context' => 'void'}, \&monitor);
+threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs;
+
+$script_queue->enqueue(@scripts);
+$script_queue->end();
+
+for (threads->list()) {
+	push(@stats, $_->join()) unless $_ == $mon;
+}
+
+$output_queue->end();
+$mon->join();
+
 show_stats($start_time, \@stats) if $show_stats;
 exit(exit_code(\@stats));

From 35ebb1e37b25b9d799d1064d36a2ce668ad20264 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:45 +0000
Subject: [PATCH 16/45] chainlint.pl: don't require `return|exit|continue` to
 end with `&&`

In order to check for &&-chain breakage, each time TestParser encounters
a new command, it checks whether the previous command ends with `&&`,
and -- with a couple exceptions -- signals breakage if it does not. The
first exception is that a command may validly end with `||`, which is
commonly employed as `command || return 1` at the very end of a loop
body to terminate the loop early. The second is that piping one
command's output with `|` to another command does not constitute a
&&-chain break (the exit status of the pipe is the exit status of the
final command in the pipe).

However, it turns out that there are a few additional cases found in the
wild in which it is likely safe for `&&` to be missing even when other
commands follow. For instance:

    while {condition-1}
    do
        test {condition-2} || return 1 # or `exit 1` within a subshell
        more-commands
    done

    while {condition-1}
    do
        test {condition-2} || continue
        more-commands
    done

Such cases indicate deliberate thought about failure modes by the test
author, thus flagging them as breaking the &&-chain is not helpful.
Therefore, take these special cases into consideration when checking for
&&-chain breakage.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl                             | 20 ++++++++++++++++++--
 t/chainlint/chain-break-continue.expect    | 12 ++++++++++++
 t/chainlint/chain-break-continue.test      | 13 +++++++++++++
 t/chainlint/chain-break-return-exit.expect |  4 ++++
 t/chainlint/chain-break-return-exit.test   |  5 +++++
 t/chainlint/return-loop.expect             |  5 +++++
 t/chainlint/return-loop.test               |  6 ++++++
 7 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 t/chainlint/chain-break-continue.expect
 create mode 100644 t/chainlint/chain-break-continue.test
 create mode 100644 t/chainlint/chain-break-return-exit.expect
 create mode 100644 t/chainlint/chain-break-return-exit.test
 create mode 100644 t/chainlint/return-loop.expect
 create mode 100644 t/chainlint/return-loop.test

diff --git a/t/chainlint.pl b/t/chainlint.pl
index 898573a910..31c444067c 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -473,13 +473,29 @@ sub ends_with {
 	return 1;
 }
 
+sub match_ending {
+	my ($tokens, $endings) = @_;
+	for my $needles (@$endings) {
+		next if @$tokens < scalar(grep {$_ ne "\n"} @$needles);
+		return 1 if ends_with($tokens, $needles);
+	}
+	return undef;
+}
+
+my @safe_endings = (
+	[qr/^(?:&&|\|\||\|)$/],
+	[qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
+	[qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/],
+	[qr/^(?:exit|return|continue)$/],
+	[qr/^(?:exit|return|continue)$/, qr/^;$/]);
+
 sub accumulate {
 	my ($self, $tokens, $cmd) = @_;
 	goto DONE unless @$tokens;
 	goto DONE if @$cmd == 1 && $$cmd[0] eq "\n";
 
-	# did previous command end with "&&", "||", "|"?
-	goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]);
+	# did previous command end with "&&", "|", "|| return" or similar?
+	goto DONE if match_ending($tokens, \@safe_endings);
 
 	# flag missing "&&" at end of previous command
 	my $n = find_non_nl($tokens);
diff --git a/t/chainlint/chain-break-continue.expect b/t/chainlint/chain-break-continue.expect
new file mode 100644
index 0000000000..47a3457710
--- /dev/null
+++ b/t/chainlint/chain-break-continue.expect
@@ -0,0 +1,12 @@
+git ls-tree --name-only -r refs/notes/many_notes |
+while read path
+do
+	test "$path" = "foobar/non-note.txt" && continue
+	test "$path" = "deadbeef" && continue
+	test "$path" = "de/adbeef" && continue
+
+	if test $(expr length "$path") -ne $hexsz
+	then
+		return 1
+	fi
+done
diff --git a/t/chainlint/chain-break-continue.test b/t/chainlint/chain-break-continue.test
new file mode 100644
index 0000000000..f0af71d8bd
--- /dev/null
+++ b/t/chainlint/chain-break-continue.test
@@ -0,0 +1,13 @@
+git ls-tree --name-only -r refs/notes/many_notes |
+while read path
+do
+# LINT: broken &&-chain okay if explicit "continue"
+	test "$path" = "foobar/non-note.txt" && continue
+	test "$path" = "deadbeef" && continue
+	test "$path" = "de/adbeef" && continue
+
+	if test $(expr length "$path") -ne $hexsz
+	then
+		return 1
+	fi
+done
diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect
new file mode 100644
index 0000000000..dba292ee89
--- /dev/null
+++ b/t/chainlint/chain-break-return-exit.expect
@@ -0,0 +1,4 @@
+for i in 1 2 3 4 ; do
+	git checkout main -b $i || return $?
+	test_commit $i $i $i tag$i || return $?
+done
diff --git a/t/chainlint/chain-break-return-exit.test b/t/chainlint/chain-break-return-exit.test
new file mode 100644
index 0000000000..e2b059933a
--- /dev/null
+++ b/t/chainlint/chain-break-return-exit.test
@@ -0,0 +1,5 @@
+for i in 1 2 3 4 ; do
+# LINT: broken &&-chain okay if explicit "return $?" signals failure
+	git checkout main -b $i || return $?
+	test_commit $i $i $i tag$i || return $?
+done
diff --git a/t/chainlint/return-loop.expect b/t/chainlint/return-loop.expect
new file mode 100644
index 0000000000..cfc0549bef
--- /dev/null
+++ b/t/chainlint/return-loop.expect
@@ -0,0 +1,5 @@
+while test $i -lt $((num - 5))
+do
+	git notes add -m "notes for commit$i" HEAD~$i || return 1
+	i=$((i + 1))
+done
diff --git a/t/chainlint/return-loop.test b/t/chainlint/return-loop.test
new file mode 100644
index 0000000000..f90b171300
--- /dev/null
+++ b/t/chainlint/return-loop.test
@@ -0,0 +1,6 @@
+while test $i -lt $((num - 5))
+do
+# LINT: "|| return {n}" valid loop escape outside subshell; no "&&" needed
+	git notes add -m "notes for commit$i" HEAD~$i || return 1
+	i=$((i + 1))
+done

From d00113ec3474a1652a73c11695c7e7b5182d80a7 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:46 +0000
Subject: [PATCH 17/45] t/Makefile: apply chainlint.pl to existing self-tests

Now that chainlint.pl is functional, take advantage of the existing
chainlint self-tests to validate its operation. (While at it, stop
validating chainlint.sed against the self-tests since it will soon be
retired.)

Due to chainlint.sed implementation limitations leaking into the
self-test "expect" files, a few of them require minor adjustment to make
them compatible with chainlint.pl which does not share those
limitations.

First, because `sed` does not provide any sort of real recursion,
chainlint.sed only emulates recursion into subshells, and each level of
recursion leads to a multiplicative increase in complexity of the `sed`
rules. To avoid substantial complexity, chainlint.sed, therefore, only
emulates subshell recursion one level deep. Any subshell deeper than
that is passed through as-is, which means that &&-chains are not checked
in deeper subshells. chainlint.pl, on the other hand, employs a proper
recursive descent parser, thus checks subshells to any depth and
correctly flags broken &&-chains in deep subshells.

Second, due to sed's line-oriented nature, chainlint.sed, by necessity,
folds multi-line quoted strings into a single line. chainlint.pl, on the
other hand, employs a proper lexical analyzer which preserves quoted
strings as-is, including embedded newlines.

Furthermore, the output of chainlint.sed and chainlint.pl do not match
precisely in terms of whitespace. However, since the purpose of the
self-checks is to verify that the ?!AMP?! annotations are being
correctly added, minor whitespace differences are immaterial. For this
reason, rather than adjusting whitespace in all existing self-test
"expect" files to match the new linter's output, the `check-chainlint`
target ignores whitespace differences. Since `diff -w` is not POSIX,
`check-chainlint` attempts to employ `git diff -w`, and only falls back
to non-POSIX `diff -w` (and `-u`) if `git diff` is not available.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/Makefile                                    | 29 +++++++++++++++----
 t/chainlint/block.expect                      |  2 +-
 t/chainlint/here-doc-multi-line-string.expect |  3 +-
 t/chainlint/multi-line-string.expect          | 11 +++++--
 t/chainlint/nested-subshell.expect            |  2 +-
 t/chainlint/t7900-subtree.expect              | 13 +++++++--
 6 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/t/Makefile b/t/Makefile
index 1c80c0c79a..11f276774e 100644
--- a/t/Makefile
+++ b/t/Makefile
@@ -38,7 +38,7 @@ T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh))
 THELPERS = $(sort $(filter-out $(T),$(wildcard *.sh)))
 TPERF = $(sort $(wildcard perf/p[0-9][0-9][0-9][0-9]-*.sh))
 CHAINLINTTESTS = $(sort $(patsubst chainlint/%.test,%,$(wildcard chainlint/*.test)))
-CHAINLINT = sed -f chainlint.sed
+CHAINLINT = '$(PERL_PATH_SQ)' chainlint.pl
 
 all: $(DEFAULT_TEST_TARGET)
 
@@ -73,10 +73,29 @@ clean-chainlint:
 
 check-chainlint:
 	@mkdir -p '$(CHAINLINTTMP_SQ)' && \
-	sed -e '/^# LINT: /d' $(patsubst %,chainlint/%.test,$(CHAINLINTTESTS)) >'$(CHAINLINTTMP_SQ)'/tests && \
-	sed -e '/^[ 	]*$$/d' $(patsubst %,chainlint/%.expect,$(CHAINLINTTESTS)) >'$(CHAINLINTTMP_SQ)'/expect && \
-	$(CHAINLINT) '$(CHAINLINTTMP_SQ)'/tests | grep -v '^[	]*$$' >'$(CHAINLINTTMP_SQ)'/actual && \
-	diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
+	for i in $(CHAINLINTTESTS); do \
+		echo "test_expect_success '$$i' '" && \
+		sed -e '/^# LINT: /d' chainlint/$$i.test && \
+		echo "'"; \
+	done >'$(CHAINLINTTMP_SQ)'/tests && \
+	{ \
+		echo "# chainlint: $(CHAINLINTTMP_SQ)/tests" && \
+		for i in $(CHAINLINTTESTS); do \
+			echo "# chainlint: $$i" && \
+			sed -e '/^[ 	]*$$/d' chainlint/$$i.expect; \
+		done \
+	} >'$(CHAINLINTTMP_SQ)'/expect && \
+	$(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \
+		grep -v '^[ 	]*$$' >'$(CHAINLINTTMP_SQ)'/actual && \
+	if test -f ../GIT-BUILD-OPTIONS; then \
+		. ../GIT-BUILD-OPTIONS; \
+	fi && \
+	if test -x ../git$$X; then \
+		DIFFW="../git$$X --no-pager diff -w --no-index"; \
+	else \
+		DIFFW="diff -w -u"; \
+	fi && \
+	$$DIFFW '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
 
 test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \
 	test-lint-filenames
diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect
index da60257ebc..37dbf7d95f 100644
--- a/t/chainlint/block.expect
+++ b/t/chainlint/block.expect
@@ -1,7 +1,7 @@
 (
 	foo &&
 	{
-		echo a
+		echo a ?!AMP?!
 		echo b
 	} &&
 	bar &&
diff --git a/t/chainlint/here-doc-multi-line-string.expect b/t/chainlint/here-doc-multi-line-string.expect
index 2578191ca8..be64b26869 100644
--- a/t/chainlint/here-doc-multi-line-string.expect
+++ b/t/chainlint/here-doc-multi-line-string.expect
@@ -1,4 +1,5 @@
 (
-	cat <<-TXT && echo "multi-line	string" ?!AMP?!
+	cat <<-TXT && echo "multi-line
+	string" ?!AMP?!
 	bap
 )
diff --git a/t/chainlint/multi-line-string.expect b/t/chainlint/multi-line-string.expect
index ab0dadf748..27ff95218e 100644
--- a/t/chainlint/multi-line-string.expect
+++ b/t/chainlint/multi-line-string.expect
@@ -1,9 +1,14 @@
 (
-	x="line 1		line 2		line 3" &&
-	y="line 1		line2" ?!AMP?!
+	x="line 1
+		line 2
+		line 3" &&
+	y="line 1
+		line2" ?!AMP?!
 	foobar
 ) &&
 (
-	echo "xyz" "abc		def		ghi" &&
+	echo "xyz" "abc
+		def
+		ghi" &&
 	barfoo
 )
diff --git a/t/chainlint/nested-subshell.expect b/t/chainlint/nested-subshell.expect
index 41a48adaa2..02e0a9f1bb 100644
--- a/t/chainlint/nested-subshell.expect
+++ b/t/chainlint/nested-subshell.expect
@@ -6,7 +6,7 @@
 	) >file &&
 	cd foo &&
 	(
-		echo a
+		echo a ?!AMP?!
 		echo b
 	) >file
 )
diff --git a/t/chainlint/t7900-subtree.expect b/t/chainlint/t7900-subtree.expect
index 1cccc7bf7e..69167da2f2 100644
--- a/t/chainlint/t7900-subtree.expect
+++ b/t/chainlint/t7900-subtree.expect
@@ -1,10 +1,17 @@
 (
-	chks="sub1sub2sub3sub4" &&
+	chks="sub1
+sub2
+sub3
+sub4" &&
 	chks_sub=$(cat <<TXT | sed "s,^,sub dir/,"
 ) &&
-	chkms="main-sub1main-sub2main-sub3main-sub4" &&
+	chkms="main-sub1
+main-sub2
+main-sub3
+main-sub4" &&
 	chkms_sub=$(cat <<TXT | sed "s,^,sub dir/,"
 ) &&
 	subfiles=$(git ls-files) &&
-	check_equal "$subfiles" "$chkms$chks"
+	check_equal "$subfiles" "$chkms
+$chks"
 )

From aabc3258a1da5f06e0b2492f450d4b8e7e8137e5 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:47 +0000
Subject: [PATCH 18/45] chainlint.pl: don't require `&` background command to
 end with `&&`

The exit status of the `&` asynchronous operator which starts a command
in the background is unconditionally zero, and the few places in the
test scripts which launch commands asynchronously are not interested in
the exit status of the `&` operator (though they often capture the
background command's PID). As such, there is little value in complaining
about broken &&-chain for a command launched in the background, and
doing so would only make busy-work for test authors. Therefore, take
this special case into account when checking for &&-chain breakage.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl                            |  2 +-
 t/chainlint/chain-break-background.expect |  9 +++++++++
 t/chainlint/chain-break-background.test   | 10 ++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 t/chainlint/chain-break-background.expect
 create mode 100644 t/chainlint/chain-break-background.test

diff --git a/t/chainlint.pl b/t/chainlint.pl
index 31c444067c..ba3fcb0c8e 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -483,7 +483,7 @@ sub match_ending {
 }
 
 my @safe_endings = (
-	[qr/^(?:&&|\|\||\|)$/],
+	[qr/^(?:&&|\|\||\||&)$/],
 	[qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
 	[qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/],
 	[qr/^(?:exit|return|continue)$/],
diff --git a/t/chainlint/chain-break-background.expect b/t/chainlint/chain-break-background.expect
new file mode 100644
index 0000000000..28f9114f42
--- /dev/null
+++ b/t/chainlint/chain-break-background.expect
@@ -0,0 +1,9 @@
+JGIT_DAEMON_PID= &&
+git init --bare empty.git &&
+> empty.git/git-daemon-export-ok &&
+mkfifo jgit_daemon_output &&
+{
+	jgit daemon --port="$JGIT_DAEMON_PORT" . > jgit_daemon_output &
+	JGIT_DAEMON_PID=$!
+} &&
+test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git
diff --git a/t/chainlint/chain-break-background.test b/t/chainlint/chain-break-background.test
new file mode 100644
index 0000000000..e10f656b05
--- /dev/null
+++ b/t/chainlint/chain-break-background.test
@@ -0,0 +1,10 @@
+JGIT_DAEMON_PID= &&
+git init --bare empty.git &&
+>empty.git/git-daemon-export-ok &&
+mkfifo jgit_daemon_output &&
+{
+# LINT: exit status of "&" is always 0 so &&-chaining immaterial
+	jgit daemon --port="$JGIT_DAEMON_PORT" . >jgit_daemon_output &
+	JGIT_DAEMON_PID=$!
+} &&
+test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git

From a8f30ee0502b89ecb660af36784f653a8c3fb20d Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:48 +0000
Subject: [PATCH 19/45] chainlint.pl: don't flag broken &&-chain if `$?`
 handled explicitly

There are cases in which tests capture and check a command's exit code
explicitly without employing test_expect_code(). They do so by
intentionally breaking the &&-chain since it would be impossible to
capture "$?" in the failing case if the `status=$?` assignment was part
of the &&-chain. Since such constructs are manually checking the exit
code, their &&-chain breakage is legitimate and safe, thus should not be
flagged. Therefore, stop flagging &&-chain breakage in such cases.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl                        |  6 ++++++
 t/chainlint/chain-break-status.expect |  9 +++++++++
 t/chainlint/chain-break-status.test   | 11 +++++++++++
 3 files changed, 26 insertions(+)
 create mode 100644 t/chainlint/chain-break-status.expect
 create mode 100644 t/chainlint/chain-break-status.test

diff --git a/t/chainlint.pl b/t/chainlint.pl
index ba3fcb0c8e..14e1db3519 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -497,6 +497,12 @@ sub accumulate {
 	# did previous command end with "&&", "|", "|| return" or similar?
 	goto DONE if match_ending($tokens, \@safe_endings);
 
+	# if this command handles "$?" specially, then okay for previous
+	# command to be missing "&&"
+	for my $token (@$cmd) {
+		goto DONE if $token =~ /\$\?/;
+	}
+
 	# flag missing "&&" at end of previous command
 	my $n = find_non_nl($tokens);
 	splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0;
diff --git a/t/chainlint/chain-break-status.expect b/t/chainlint/chain-break-status.expect
new file mode 100644
index 0000000000..f4bada9463
--- /dev/null
+++ b/t/chainlint/chain-break-status.expect
@@ -0,0 +1,9 @@
+OUT=$(( ( large_git ; echo $? 1 >& 3 ) | : ) 3 >& 1) &&
+test_match_signal 13 "$OUT" &&
+
+{ test-tool sigchain > actual ; ret=$? ; } &&
+{
+	test_match_signal 15 "$ret" ||
+	test "$ret" = 3
+} &&
+test_cmp expect actual
diff --git a/t/chainlint/chain-break-status.test b/t/chainlint/chain-break-status.test
new file mode 100644
index 0000000000..a6602a7b99
--- /dev/null
+++ b/t/chainlint/chain-break-status.test
@@ -0,0 +1,11 @@
+# LINT: broken &&-chain okay if next command handles "$?" explicitly
+OUT=$( ((large_git; echo $? 1>&3) | :) 3>&1 ) &&
+test_match_signal 13 "$OUT" &&
+
+# LINT: broken &&-chain okay if next command handles "$?" explicitly
+{ test-tool sigchain >actual; ret=$?; } &&
+{
+	test_match_signal 15 "$ret" ||
+	test "$ret" = 3
+} &&
+test_cmp expect actual

From 832c68b3c210267c93e1dcb2f2763372339ca36c Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:49 +0000
Subject: [PATCH 20/45] chainlint.pl: don't flag broken &&-chain if failure
 indicated explicitly

There are quite a few tests which print an error messages and then
explicitly signal failure with `false`, `return 1`, or `exit 1` as the
final command in an `if` branch. In these cases, the tests don't bother
maintaining the &&-chain between `echo` and the explicit "test failed"
indicator. Since such constructs are manually signaling failure, their
&&-chain breakage is legitimate and safe -- both for the command
immediately preceding `false`, `return`, or `exit`, as well as for all
preceding commands in the `if` branch. Therefore, stop flagging &&-chain
breakage in these sorts of cases.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl                             |  8 ++++++++
 t/chainlint/chain-break-false.expect       |  9 +++++++++
 t/chainlint/chain-break-false.test         | 10 ++++++++++
 t/chainlint/chain-break-return-exit.expect | 15 +++++++++++++++
 t/chainlint/chain-break-return-exit.test   | 18 ++++++++++++++++++
 t/chainlint/if-in-loop.expect              |  2 +-
 t/chainlint/if-in-loop.test                |  2 +-
 7 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 t/chainlint/chain-break-false.expect
 create mode 100644 t/chainlint/chain-break-false.test

diff --git a/t/chainlint.pl b/t/chainlint.pl
index 14e1db3519..a76a09ecf5 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -503,6 +503,14 @@ sub accumulate {
 		goto DONE if $token =~ /\$\?/;
 	}
 
+	# if this command is "false", "return 1", or "exit 1" (which signal
+	# failure explicitly), then okay for all preceding commands to be
+	# missing "&&"
+	if ($$cmd[0] =~ /^(?:false|return|exit)$/) {
+		@$tokens = grep(!/^\?!AMP\?!$/, @$tokens);
+		goto DONE;
+	}
+
 	# flag missing "&&" at end of previous command
 	my $n = find_non_nl($tokens);
 	splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0;
diff --git a/t/chainlint/chain-break-false.expect b/t/chainlint/chain-break-false.expect
new file mode 100644
index 0000000000..989766fb85
--- /dev/null
+++ b/t/chainlint/chain-break-false.expect
@@ -0,0 +1,9 @@
+if condition not satisified
+then
+	echo it did not work...
+	echo failed!
+	false
+else
+	echo it went okay ?!AMP?!
+	congratulate user
+fi
diff --git a/t/chainlint/chain-break-false.test b/t/chainlint/chain-break-false.test
new file mode 100644
index 0000000000..a5aaff8c8a
--- /dev/null
+++ b/t/chainlint/chain-break-false.test
@@ -0,0 +1,10 @@
+# LINT: broken &&-chain okay if explicit "false" signals failure
+if condition not satisified
+then
+	echo it did not work...
+	echo failed!
+	false
+else
+	echo it went okay
+	congratulate user
+fi
diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect
index dba292ee89..1732d221c3 100644
--- a/t/chainlint/chain-break-return-exit.expect
+++ b/t/chainlint/chain-break-return-exit.expect
@@ -1,3 +1,18 @@
+case "$(git ls-files)" in
+one ) echo pass one ;;
+* ) echo bad one ; return 1 ;;
+esac &&
+(
+	case "$(git ls-files)" in
+	two ) echo pass two ;;
+	* ) echo bad two ; exit 1 ;;
+esac
+) &&
+case "$(git ls-files)" in
+dir/two"$LF"one ) echo pass both ;;
+* ) echo bad ; return 1 ;;
+esac &&
+
 for i in 1 2 3 4 ; do
 	git checkout main -b $i || return $?
 	test_commit $i $i $i tag$i || return $?
diff --git a/t/chainlint/chain-break-return-exit.test b/t/chainlint/chain-break-return-exit.test
index e2b059933a..46542edf88 100644
--- a/t/chainlint/chain-break-return-exit.test
+++ b/t/chainlint/chain-break-return-exit.test
@@ -1,3 +1,21 @@
+case "$(git ls-files)" in
+one) echo pass one ;;
+# LINT: broken &&-chain okay if explicit "return 1" signals failuire
+*) echo bad one; return 1 ;;
+esac &&
+(
+	case "$(git ls-files)" in
+	two) echo pass two ;;
+# LINT: broken &&-chain okay if explicit "exit 1" signals failuire
+	*) echo bad two; exit 1 ;;
+	esac
+) &&
+case "$(git ls-files)" in
+dir/two"$LF"one) echo pass both ;;
+# LINT: broken &&-chain okay if explicit "return 1" signals failuire
+*) echo bad; return 1 ;;
+esac &&
+
 for i in 1 2 3 4 ; do
 # LINT: broken &&-chain okay if explicit "return $?" signals failure
 	git checkout main -b $i || return $?
diff --git a/t/chainlint/if-in-loop.expect b/t/chainlint/if-in-loop.expect
index 03b82a3e58..d6514ae749 100644
--- a/t/chainlint/if-in-loop.expect
+++ b/t/chainlint/if-in-loop.expect
@@ -3,7 +3,7 @@
 	do
 		if false
 		then
-			echo "err" ?!AMP?!
+			echo "err"
 			exit 1
 		fi ?!AMP?!
 		foo
diff --git a/t/chainlint/if-in-loop.test b/t/chainlint/if-in-loop.test
index f0cf19cfad..90c23976fe 100644
--- a/t/chainlint/if-in-loop.test
+++ b/t/chainlint/if-in-loop.test
@@ -3,7 +3,7 @@
 	do
 		if false
 		then
-# LINT: missing "&&" on "echo"
+# LINT: missing "&&" on "echo" okay since "exit 1" signals error explicitly
 			echo "err"
 			exit 1
 # LINT: missing "&&" on "fi"

From fd4094c3cad7c62adb0b7080e0dca37f66bf0c6e Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:50 +0000
Subject: [PATCH 21/45] chainlint.pl: complain about loops lacking explicit
 failure handling

Shell `for` and `while` loops do not terminate automatically just
because a command fails within the loop body. Instead, the loop
continues to iterate and eventually returns the exit status of the final
command of the final iteration, which may not be the command which
failed, thus it is possible for failures to go undetected. Consequently,
it is important for test authors to explicitly handle failure within the
loop body by terminating the loop manually upon failure. This can be
done by returning a non-zero exit code from within the loop body
(i.e. `|| return 1`) or exiting (i.e. `|| exit 1`) if the loop is within
a subshell, or by manually checking `$?` and taking some appropriate
action. Therefore, add logic to detect and complain about loops which
lack explicit `return` or `exit`, or `$?` check.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl                                | 11 ++++++
 t/chainlint/complex-if-in-cuddled-loop.expect |  2 +-
 t/chainlint/for-loop.expect                   |  4 +--
 t/chainlint/loop-detect-failure.expect        | 15 ++++++++
 t/chainlint/loop-detect-failure.test          | 17 +++++++++
 t/chainlint/loop-detect-status.expect         | 18 ++++++++++
 t/chainlint/loop-detect-status.test           | 19 ++++++++++
 t/chainlint/loop-in-if.expect                 |  2 +-
 t/chainlint/nested-loop-detect-failure.expect | 31 ++++++++++++++++
 t/chainlint/nested-loop-detect-failure.test   | 35 +++++++++++++++++++
 t/chainlint/semicolon.expect                  |  2 +-
 t/chainlint/while-loop.expect                 |  4 +--
 12 files changed, 153 insertions(+), 7 deletions(-)
 create mode 100644 t/chainlint/loop-detect-failure.expect
 create mode 100644 t/chainlint/loop-detect-failure.test
 create mode 100644 t/chainlint/loop-detect-status.expect
 create mode 100644 t/chainlint/loop-detect-status.test
 create mode 100644 t/chainlint/nested-loop-detect-failure.expect
 create mode 100644 t/chainlint/nested-loop-detect-failure.test

diff --git a/t/chainlint.pl b/t/chainlint.pl
index a76a09ecf5..674b3ddf69 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -482,6 +482,17 @@ sub match_ending {
 	return undef;
 }
 
+sub parse_loop_body {
+	my $self = shift @_;
+	my @tokens = $self->SUPER::parse_loop_body(@_);
+	# did loop signal failure via "|| return" or "|| exit"?
+	return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens);
+	# flag missing "return/exit" handling explicit failure in loop body
+	my $n = find_non_nl(\@tokens);
+	splice(@tokens, $n + 1, 0, '?!LOOP?!');
+	return @tokens;
+}
+
 my @safe_endings = (
 	[qr/^(?:&&|\|\||\||&)$/],
 	[qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
diff --git a/t/chainlint/complex-if-in-cuddled-loop.expect b/t/chainlint/complex-if-in-cuddled-loop.expect
index 2fca183409..dac2d0fd1d 100644
--- a/t/chainlint/complex-if-in-cuddled-loop.expect
+++ b/t/chainlint/complex-if-in-cuddled-loop.expect
@@ -4,6 +4,6 @@
      :
    else
      echo >file
-   fi
+   fi ?!LOOP?!
  done) &&
 test ! -f file
diff --git a/t/chainlint/for-loop.expect b/t/chainlint/for-loop.expect
index 6671b8cd84..a5810c9bdd 100644
--- a/t/chainlint/for-loop.expect
+++ b/t/chainlint/for-loop.expect
@@ -2,10 +2,10 @@
 	for i in a b c
 	do
 		echo $i ?!AMP?!
-		cat <<-EOF
+		cat <<-EOF ?!LOOP?!
 	done ?!AMP?!
 	for i in a b c; do
 		echo $i &&
-		cat $i
+		cat $i ?!LOOP?!
 	done
 )
diff --git a/t/chainlint/loop-detect-failure.expect b/t/chainlint/loop-detect-failure.expect
new file mode 100644
index 0000000000..a66025c39d
--- /dev/null
+++ b/t/chainlint/loop-detect-failure.expect
@@ -0,0 +1,15 @@
+git init r1 &&
+for n in 1 2 3 4 5
+do
+	echo "This is file: $n" > r1/file.$n &&
+	git -C r1 add file.$n &&
+	git -C r1 commit -m "$n" || return 1
+done &&
+
+git init r2 &&
+for n in 1000 10000
+do
+	printf "%"$n"s" X > r2/large.$n &&
+	git -C r2 add large.$n &&
+	git -C r2 commit -m "$n" ?!LOOP?!
+done
diff --git a/t/chainlint/loop-detect-failure.test b/t/chainlint/loop-detect-failure.test
new file mode 100644
index 0000000000..b9791cc802
--- /dev/null
+++ b/t/chainlint/loop-detect-failure.test
@@ -0,0 +1,17 @@
+git init r1 &&
+# LINT: loop handles failure explicitly with "|| return 1"
+for n in 1 2 3 4 5
+do
+	echo "This is file: $n" > r1/file.$n &&
+	git -C r1 add file.$n &&
+	git -C r1 commit -m "$n" || return 1
+done &&
+
+git init r2 &&
+# LINT: loop fails to handle failure explicitly with "|| return 1"
+for n in 1000 10000
+do
+	printf "%"$n"s" X > r2/large.$n &&
+	git -C r2 add large.$n &&
+	git -C r2 commit -m "$n"
+done
diff --git a/t/chainlint/loop-detect-status.expect b/t/chainlint/loop-detect-status.expect
new file mode 100644
index 0000000000..0ad23bb35e
--- /dev/null
+++ b/t/chainlint/loop-detect-status.expect
@@ -0,0 +1,18 @@
+( while test $i -le $blobcount
+do
+	printf "Generating blob $i/$blobcount\r" >& 2 &&
+	printf "blob\nmark :$i\ndata $blobsize\n" &&
+
+	printf "%-${blobsize}s" $i &&
+	echo "M 100644 :$i $i" >> commit &&
+	i=$(($i+1)) ||
+	echo $? > exit-status
+done &&
+echo "commit refs/heads/main" &&
+echo "author A U Thor <author@email.com> 123456789 +0000" &&
+echo "committer C O Mitter <committer@email.com> 123456789 +0000" &&
+echo "data 5" &&
+echo ">2gb" &&
+cat commit ) |
+git fast-import --big-file-threshold=2 &&
+test ! -f exit-status
diff --git a/t/chainlint/loop-detect-status.test b/t/chainlint/loop-detect-status.test
new file mode 100644
index 0000000000..1c6c23cfc9
--- /dev/null
+++ b/t/chainlint/loop-detect-status.test
@@ -0,0 +1,19 @@
+# LINT: "$?" handled explicitly within loop body
+(while test $i -le $blobcount
+ do
+	printf "Generating blob $i/$blobcount\r" >&2 &&
+	printf "blob\nmark :$i\ndata $blobsize\n" &&
+	#test-tool genrandom $i $blobsize &&
+	printf "%-${blobsize}s" $i &&
+	echo "M 100644 :$i $i" >> commit &&
+	i=$(($i+1)) ||
+	echo $? > exit-status
+ done &&
+ echo "commit refs/heads/main" &&
+ echo "author A U Thor <author@email.com> 123456789 +0000" &&
+ echo "committer C O Mitter <committer@email.com> 123456789 +0000" &&
+ echo "data 5" &&
+ echo ">2gb" &&
+ cat commit) |
+git fast-import --big-file-threshold=2 &&
+test ! -f exit-status
diff --git a/t/chainlint/loop-in-if.expect b/t/chainlint/loop-in-if.expect
index e1be42376c..6c5d6e5b24 100644
--- a/t/chainlint/loop-in-if.expect
+++ b/t/chainlint/loop-in-if.expect
@@ -4,7 +4,7 @@
 		while true
 		do
 			echo "pop" ?!AMP?!
-			echo "glup"
+			echo "glup" ?!LOOP?!
 		done ?!AMP?!
 		foo
 	fi ?!AMP?!
diff --git a/t/chainlint/nested-loop-detect-failure.expect b/t/chainlint/nested-loop-detect-failure.expect
new file mode 100644
index 0000000000..4793a0e8e1
--- /dev/null
+++ b/t/chainlint/nested-loop-detect-failure.expect
@@ -0,0 +1,31 @@
+for i in 0 1 2 3 4 5 6 7 8 9 ;
+do
+	for j in 0 1 2 3 4 5 6 7 8 9 ;
+	do
+		echo "$i$j" > "path$i$j" ?!LOOP?!
+	done ?!LOOP?!
+done &&
+
+for i in 0 1 2 3 4 5 6 7 8 9 ;
+do
+	for j in 0 1 2 3 4 5 6 7 8 9 ;
+	do
+		echo "$i$j" > "path$i$j" || return 1
+	done
+done &&
+
+for i in 0 1 2 3 4 5 6 7 8 9 ;
+do
+	for j in 0 1 2 3 4 5 6 7 8 9 ;
+	do
+		echo "$i$j" > "path$i$j" ?!LOOP?!
+	done || return 1
+done &&
+
+for i in 0 1 2 3 4 5 6 7 8 9 ;
+do
+	for j in 0 1 2 3 4 5 6 7 8 9 ;
+	do
+		echo "$i$j" > "path$i$j" || return 1
+	done || return 1
+done
diff --git a/t/chainlint/nested-loop-detect-failure.test b/t/chainlint/nested-loop-detect-failure.test
new file mode 100644
index 0000000000..e6f0c1acfb
--- /dev/null
+++ b/t/chainlint/nested-loop-detect-failure.test
@@ -0,0 +1,35 @@
+# LINT: neither loop handles failure explicitly with "|| return 1"
+for i in 0 1 2 3 4 5 6 7 8 9;
+do
+	for j in 0 1 2 3 4 5 6 7 8 9;
+	do
+		echo "$i$j" >"path$i$j"
+	done
+done &&
+
+# LINT: inner loop handles failure explicitly with "|| return 1"
+for i in 0 1 2 3 4 5 6 7 8 9;
+do
+	for j in 0 1 2 3 4 5 6 7 8 9;
+	do
+		echo "$i$j" >"path$i$j" || return 1
+	done
+done &&
+
+# LINT: outer loop handles failure explicitly with "|| return 1"
+for i in 0 1 2 3 4 5 6 7 8 9;
+do
+	for j in 0 1 2 3 4 5 6 7 8 9;
+	do
+		echo "$i$j" >"path$i$j"
+	done || return 1
+done &&
+
+# LINT: inner & outer loops handles failure explicitly with "|| return 1"
+for i in 0 1 2 3 4 5 6 7 8 9;
+do
+	for j in 0 1 2 3 4 5 6 7 8 9;
+	do
+		echo "$i$j" >"path$i$j" || return 1
+	done || return 1
+done
diff --git a/t/chainlint/semicolon.expect b/t/chainlint/semicolon.expect
index ed0b3707ae..3aa2259f36 100644
--- a/t/chainlint/semicolon.expect
+++ b/t/chainlint/semicolon.expect
@@ -15,5 +15,5 @@
 ) &&
 (cd foo &&
 	for i in a b c; do
-		echo;
+		echo; ?!LOOP?!
 	done)
diff --git a/t/chainlint/while-loop.expect b/t/chainlint/while-loop.expect
index 0d3a9b3d12..f272aa21fe 100644
--- a/t/chainlint/while-loop.expect
+++ b/t/chainlint/while-loop.expect
@@ -2,10 +2,10 @@
 	while true
 	do
 		echo foo ?!AMP?!
-		cat <<-EOF
+		cat <<-EOF ?!LOOP?!
 	done ?!AMP?!
 	while true; do
 		echo foo &&
-		cat bar
+		cat bar ?!LOOP?!
 	done
 )

From ae0c55abf8217bb06422f9eafcd7a30b2c8f9e8b Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:51 +0000
Subject: [PATCH 22/45] chainlint.pl: allow `|| echo` to signal failure
 upstream of a pipe

The use of `|| return` (or `|| exit`) to signal failure within a loop
isn't effective when the loop is upstream of a pipe since the pipe
swallows all upstream exit codes and returns only the exit code of the
final command in the pipeline.

To work around this limitation, tests may adopt an alternative strategy
of signaling failure by emitting text which would never be emitted in
the non-failing case. For instance:

    while condition
    do
        command1 &&
        command2 ||
        echo "impossible text"
    done |
    sort >actual &&

Such usage indicates deliberate thought about failure cases by the test
author, thus flagging them as missing `|| return` (or `|| exit`) is not
helpful. Therefore, take this case into consideration when checking for
explicit loop termination.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl                        |  3 +++
 t/chainlint/loop-upstream-pipe.expect | 10 ++++++++++
 t/chainlint/loop-upstream-pipe.test   | 11 +++++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 t/chainlint/loop-upstream-pipe.expect
 create mode 100644 t/chainlint/loop-upstream-pipe.test

diff --git a/t/chainlint.pl b/t/chainlint.pl
index 674b3ddf69..386999ce65 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -487,6 +487,9 @@ sub parse_loop_body {
 	my @tokens = $self->SUPER::parse_loop_body(@_);
 	# did loop signal failure via "|| return" or "|| exit"?
 	return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens);
+	# did loop upstream of a pipe signal failure via "|| echo 'impossible
+	# text'" as the final command in the loop body?
+	return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
 	# flag missing "return/exit" handling explicit failure in loop body
 	my $n = find_non_nl(\@tokens);
 	splice(@tokens, $n + 1, 0, '?!LOOP?!');
diff --git a/t/chainlint/loop-upstream-pipe.expect b/t/chainlint/loop-upstream-pipe.expect
new file mode 100644
index 0000000000..0b82ecc4b9
--- /dev/null
+++ b/t/chainlint/loop-upstream-pipe.expect
@@ -0,0 +1,10 @@
+(
+	git rev-list --objects --no-object-names base..loose |
+	while read oid
+	do
+		path="$objdir/$(test_oid_to_path "$oid")" &&
+		printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" ||
+		echo "object list generation failed for $oid"
+	done |
+	sort -k1
+) >expect &&
diff --git a/t/chainlint/loop-upstream-pipe.test b/t/chainlint/loop-upstream-pipe.test
new file mode 100644
index 0000000000..efb77da897
--- /dev/null
+++ b/t/chainlint/loop-upstream-pipe.test
@@ -0,0 +1,11 @@
+(
+	git rev-list --objects --no-object-names base..loose |
+	while read oid
+	do
+# LINT: "|| echo" signals failure in loop upstream of a pipe
+		path="$objdir/$(test_oid_to_path "$oid")" &&
+		printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" ||
+		echo "object list generation failed for $oid"
+	done |
+	sort -k1
+) >expect &&

From 56066523ed3ebd16b455e99ce954ec19b6ac5ada Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:52 +0000
Subject: [PATCH 23/45] t/chainlint: add more chainlint.pl self-tests

During the development of chainlint.pl, numerous new self-tests were
created to verify correct functioning beyond the checks already
represented by the existing self-tests. The new checks fall into several
categories:

* behavior of the lexical analyzer for complex cases, such as line
  splicing, token pasting, entering and exiting string contexts inside
  and outside of test script bodies; for instance:

    test_expect_success 'title' '
      x=$(echo "something" |
        sed -e '\''s/\\/\\\\/g'\'' -e '\''s/[[/.*^$]/\\&/g'\''
    '

* behavior of the parser for all compound grammatical constructs, such
  as `if...fi`, `case...esac`, `while...done`, `{...}`, etc., and for
  other legal shell grammatical constructs not covered by existing
  chainlint.sed self-tests, as well as complex cases, such as:

    OUT=$( ((large_git 1>&3) | :) 3>&1 ) &&

* detection of problems, such as &&-chain breakage, from top-level to
  any depth since the existing self-tests do not cover any top-level
  context and only cover subshells one level deep due to limitations of
  chainlint.sed

* address blind spots in chainlint.sed (such as not detecting a broken
  &&-chain on a one-line for-loop in a subshell[1]) which chainlint.pl
  correctly detects

* real-world cases which tripped up chainlint.pl during its development

[1]: https://lore.kernel.org/git/dce35a47012fecc6edc11c68e91dbb485c5bc36f.1661663880.git.gitgitgadget@gmail.com/

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint/blank-line-before-esac.expect     | 18 +++++++++++
 t/chainlint/blank-line-before-esac.test       | 19 +++++++++++
 t/chainlint/block.expect                      | 13 +++++++-
 t/chainlint/block.test                        | 15 ++++++++-
 t/chainlint/chained-block.expect              |  9 ++++++
 t/chainlint/chained-block.test                | 11 +++++++
 t/chainlint/chained-subshell.expect           | 10 ++++++
 t/chainlint/chained-subshell.test             | 13 ++++++++
 .../command-substitution-subsubshell.expect   |  2 ++
 .../command-substitution-subsubshell.test     |  3 ++
 t/chainlint/double-here-doc.expect            |  2 ++
 t/chainlint/double-here-doc.test              | 12 +++++++
 t/chainlint/dqstring-line-splice.expect       |  3 ++
 t/chainlint/dqstring-line-splice.test         |  7 ++++
 t/chainlint/dqstring-no-interpolate.expect    | 11 +++++++
 t/chainlint/dqstring-no-interpolate.test      | 15 +++++++++
 t/chainlint/empty-here-doc.expect             |  3 ++
 t/chainlint/empty-here-doc.test               |  5 +++
 t/chainlint/exclamation.expect                |  4 +++
 t/chainlint/exclamation.test                  |  8 +++++
 t/chainlint/for-loop-abbreviated.expect       |  5 +++
 t/chainlint/for-loop-abbreviated.test         |  6 ++++
 t/chainlint/function.expect                   | 11 +++++++
 t/chainlint/function.test                     | 13 ++++++++
 t/chainlint/here-doc-indent-operator.expect   |  5 +++
 t/chainlint/here-doc-indent-operator.test     | 13 ++++++++
 t/chainlint/if-condition-split.expect         |  7 ++++
 t/chainlint/if-condition-split.test           |  8 +++++
 t/chainlint/one-liner-for-loop.expect         |  9 ++++++
 t/chainlint/one-liner-for-loop.test           | 10 ++++++
 t/chainlint/sqstring-in-sqstring.expect       |  4 +++
 t/chainlint/sqstring-in-sqstring.test         |  5 +++
 t/chainlint/token-pasting.expect              | 27 ++++++++++++++++
 t/chainlint/token-pasting.test                | 32 +++++++++++++++++++
 34 files changed, 336 insertions(+), 2 deletions(-)
 create mode 100644 t/chainlint/blank-line-before-esac.expect
 create mode 100644 t/chainlint/blank-line-before-esac.test
 create mode 100644 t/chainlint/chained-block.expect
 create mode 100644 t/chainlint/chained-block.test
 create mode 100644 t/chainlint/chained-subshell.expect
 create mode 100644 t/chainlint/chained-subshell.test
 create mode 100644 t/chainlint/command-substitution-subsubshell.expect
 create mode 100644 t/chainlint/command-substitution-subsubshell.test
 create mode 100644 t/chainlint/double-here-doc.expect
 create mode 100644 t/chainlint/double-here-doc.test
 create mode 100644 t/chainlint/dqstring-line-splice.expect
 create mode 100644 t/chainlint/dqstring-line-splice.test
 create mode 100644 t/chainlint/dqstring-no-interpolate.expect
 create mode 100644 t/chainlint/dqstring-no-interpolate.test
 create mode 100644 t/chainlint/empty-here-doc.expect
 create mode 100644 t/chainlint/empty-here-doc.test
 create mode 100644 t/chainlint/exclamation.expect
 create mode 100644 t/chainlint/exclamation.test
 create mode 100644 t/chainlint/for-loop-abbreviated.expect
 create mode 100644 t/chainlint/for-loop-abbreviated.test
 create mode 100644 t/chainlint/function.expect
 create mode 100644 t/chainlint/function.test
 create mode 100644 t/chainlint/here-doc-indent-operator.expect
 create mode 100644 t/chainlint/here-doc-indent-operator.test
 create mode 100644 t/chainlint/if-condition-split.expect
 create mode 100644 t/chainlint/if-condition-split.test
 create mode 100644 t/chainlint/one-liner-for-loop.expect
 create mode 100644 t/chainlint/one-liner-for-loop.test
 create mode 100644 t/chainlint/sqstring-in-sqstring.expect
 create mode 100644 t/chainlint/sqstring-in-sqstring.test
 create mode 100644 t/chainlint/token-pasting.expect
 create mode 100644 t/chainlint/token-pasting.test

diff --git a/t/chainlint/blank-line-before-esac.expect b/t/chainlint/blank-line-before-esac.expect
new file mode 100644
index 0000000000..48ed4eb124
--- /dev/null
+++ b/t/chainlint/blank-line-before-esac.expect
@@ -0,0 +1,18 @@
+test_done ( ) {
+	case "$test_failure" in
+	0 )
+		test_at_end_hook_
+
+		exit 0 ;;
+
+	* )
+		if test $test_external_has_tap -eq 0
+		then
+			say_color error "# failed $test_failure among $msg"
+			say "1..$test_count"
+		fi
+
+		exit 1 ;;
+
+		esac
+}
diff --git a/t/chainlint/blank-line-before-esac.test b/t/chainlint/blank-line-before-esac.test
new file mode 100644
index 0000000000..cecccad19f
--- /dev/null
+++ b/t/chainlint/blank-line-before-esac.test
@@ -0,0 +1,19 @@
+# LINT: blank line before "esac"
+test_done () {
+	case "$test_failure" in
+	0)
+		test_at_end_hook_
+
+		exit 0 ;;
+
+	*)
+		if test $test_external_has_tap -eq 0
+		then
+			say_color error "# failed $test_failure among $msg"
+			say "1..$test_count"
+		fi
+
+		exit 1 ;;
+
+	esac
+}
diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect
index 37dbf7d95f..a3bcea492a 100644
--- a/t/chainlint/block.expect
+++ b/t/chainlint/block.expect
@@ -9,4 +9,15 @@
 		echo c
 	} ?!AMP?!
 	baz
-)
+) &&
+
+{
+	echo a ; ?!AMP?! echo b
+} &&
+{ echo a ; ?!AMP?! echo b ; } &&
+
+{
+	echo "${var}9" &&
+	echo "done"
+} &&
+finis
diff --git a/t/chainlint/block.test b/t/chainlint/block.test
index 0a82fd579f..4ab69a4afc 100644
--- a/t/chainlint/block.test
+++ b/t/chainlint/block.test
@@ -11,4 +11,17 @@
 		echo c
 	}
 	baz
-)
+) &&
+
+# LINT: ";" not allowed in place of "&&"
+{
+	echo a; echo b
+} &&
+{ echo a; echo b; } &&
+
+# LINT: "}" inside string not mistaken as end of block
+{
+	echo "${var}9" &&
+	echo "done"
+} &&
+finis
diff --git a/t/chainlint/chained-block.expect b/t/chainlint/chained-block.expect
new file mode 100644
index 0000000000..574cdceb07
--- /dev/null
+++ b/t/chainlint/chained-block.expect
@@ -0,0 +1,9 @@
+echo nobody home && {
+	test the doohicky ?!AMP?!
+	right now
+} &&
+
+GIT_EXTERNAL_DIFF=echo git diff | {
+	read path oldfile oldhex oldmode newfile newhex newmode &&
+	test "z$oh" = "z$oldhex"
+}
diff --git a/t/chainlint/chained-block.test b/t/chainlint/chained-block.test
new file mode 100644
index 0000000000..86f81ece63
--- /dev/null
+++ b/t/chainlint/chained-block.test
@@ -0,0 +1,11 @@
+# LINT: start of block chained to preceding command
+echo nobody home && {
+	test the doohicky
+	right now
+} &&
+
+# LINT: preceding command pipes to block on same line
+GIT_EXTERNAL_DIFF=echo git diff | {
+	read path oldfile oldhex oldmode newfile newhex newmode &&
+	test "z$oh" = "z$oldhex"
+}
diff --git a/t/chainlint/chained-subshell.expect b/t/chainlint/chained-subshell.expect
new file mode 100644
index 0000000000..af0369d328
--- /dev/null
+++ b/t/chainlint/chained-subshell.expect
@@ -0,0 +1,10 @@
+mkdir sub && (
+	cd sub &&
+	foo the bar ?!AMP?!
+	nuff said
+) &&
+
+cut "-d " -f actual | ( read s1 s2 s3 &&
+test -f $s1 ?!AMP?!
+test $(cat $s2) = tree2path1 &&
+test $(cat $s3) = tree3path1 )
diff --git a/t/chainlint/chained-subshell.test b/t/chainlint/chained-subshell.test
new file mode 100644
index 0000000000..4ff6ddd8cb
--- /dev/null
+++ b/t/chainlint/chained-subshell.test
@@ -0,0 +1,13 @@
+# LINT: start of subshell chained to preceding command
+mkdir sub && (
+	cd sub &&
+	foo the bar
+	nuff said
+) &&
+
+# LINT: preceding command pipes to subshell on same line
+cut "-d " -f actual | (read s1 s2 s3 &&
+test -f $s1
+test $(cat $s2) = tree2path1 &&
+# LINT: closing subshell ")" correctly detected on same line as "$(...)"
+test $(cat $s3) = tree3path1)
diff --git a/t/chainlint/command-substitution-subsubshell.expect b/t/chainlint/command-substitution-subsubshell.expect
new file mode 100644
index 0000000000..ab2f79e845
--- /dev/null
+++ b/t/chainlint/command-substitution-subsubshell.expect
@@ -0,0 +1,2 @@
+OUT=$(( ( large_git 1 >& 3 ) | : ) 3 >& 1) &&
+test_match_signal 13 "$OUT"
diff --git a/t/chainlint/command-substitution-subsubshell.test b/t/chainlint/command-substitution-subsubshell.test
new file mode 100644
index 0000000000..321de2951c
--- /dev/null
+++ b/t/chainlint/command-substitution-subsubshell.test
@@ -0,0 +1,3 @@
+# LINT: subshell nested in subshell nested in command substitution
+OUT=$( ((large_git 1>&3) | :) 3>&1 ) &&
+test_match_signal 13 "$OUT"
diff --git a/t/chainlint/double-here-doc.expect b/t/chainlint/double-here-doc.expect
new file mode 100644
index 0000000000..75477bb1ad
--- /dev/null
+++ b/t/chainlint/double-here-doc.expect
@@ -0,0 +1,2 @@
+run_sub_test_lib_test_err run-inv-range-start "--run invalid range start" --run="a-5" <<-EOF &&
+check_sub_test_lib_test_err run-inv-range-start <<-EOF_OUT 3 <<-EOF_ERR
diff --git a/t/chainlint/double-here-doc.test b/t/chainlint/double-here-doc.test
new file mode 100644
index 0000000000..cd584a4357
--- /dev/null
+++ b/t/chainlint/double-here-doc.test
@@ -0,0 +1,12 @@
+run_sub_test_lib_test_err run-inv-range-start \
+	"--run invalid range start" \
+	--run="a-5" <<-\EOF &&
+test_expect_success "passing test #1" "true"
+test_done
+EOF
+check_sub_test_lib_test_err run-inv-range-start \
+	<<-\EOF_OUT 3<<-EOF_ERR
+> FATAL: Unexpected exit with code 1
+EOF_OUT
+> error: --run: invalid non-numeric in range start: ${SQ}a-5${SQ}
+EOF_ERR
diff --git a/t/chainlint/dqstring-line-splice.expect b/t/chainlint/dqstring-line-splice.expect
new file mode 100644
index 0000000000..bf9ced60d4
--- /dev/null
+++ b/t/chainlint/dqstring-line-splice.expect
@@ -0,0 +1,3 @@
+echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' > expect &&
+test_must_fail git commit --fixup=reword:HEAD~ $1 2 > actual &&
+test_cmp expect actual
diff --git a/t/chainlint/dqstring-line-splice.test b/t/chainlint/dqstring-line-splice.test
new file mode 100644
index 0000000000..b40714439f
--- /dev/null
+++ b/t/chainlint/dqstring-line-splice.test
@@ -0,0 +1,7 @@
+# LINT: line-splice within DQ-string
+'"
+echo 'fatal: reword option of --fixup is mutually exclusive with'\
+	'--patch/--interactive/--all/--include/--only' >expect &&
+test_must_fail git commit --fixup=reword:HEAD~ $1 2>actual &&
+test_cmp expect actual
+"'
diff --git a/t/chainlint/dqstring-no-interpolate.expect b/t/chainlint/dqstring-no-interpolate.expect
new file mode 100644
index 0000000000..10724987a5
--- /dev/null
+++ b/t/chainlint/dqstring-no-interpolate.expect
@@ -0,0 +1,11 @@
+grep "^ ! [rejected][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out &&
+
+grep "^\.git$" output.txt &&
+
+
+(
+	cd client$version &&
+	GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. $(cat ../input)
+) > output &&
+	cut -d ' ' -f 2 < output | sort > actual &&
+	test_cmp expect actual
diff --git a/t/chainlint/dqstring-no-interpolate.test b/t/chainlint/dqstring-no-interpolate.test
new file mode 100644
index 0000000000..d2f4219cbb
--- /dev/null
+++ b/t/chainlint/dqstring-no-interpolate.test
@@ -0,0 +1,15 @@
+# LINT: regex dollar-sign eol anchor in double-quoted string not special
+grep "^ ! \[rejected\][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out &&
+
+# LINT: escaped "$" not mistaken for variable expansion
+grep "^\\.git\$" output.txt &&
+
+'"
+(
+	cd client$version &&
+# LINT: escaped dollar-sign in double-quoted test body
+	GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. \$(cat ../input)
+) >output &&
+	cut -d ' ' -f 2 <output | sort >actual &&
+	test_cmp expect actual
+"'
diff --git a/t/chainlint/empty-here-doc.expect b/t/chainlint/empty-here-doc.expect
new file mode 100644
index 0000000000..f42f2d41ba
--- /dev/null
+++ b/t/chainlint/empty-here-doc.expect
@@ -0,0 +1,3 @@
+git ls-tree $tree path > current &&
+cat > expected <<EOF &&
+test_output
diff --git a/t/chainlint/empty-here-doc.test b/t/chainlint/empty-here-doc.test
new file mode 100644
index 0000000000..24fc165de3
--- /dev/null
+++ b/t/chainlint/empty-here-doc.test
@@ -0,0 +1,5 @@
+git ls-tree $tree path >current &&
+# LINT: empty here-doc
+cat >expected <<\EOF &&
+EOF
+test_output
diff --git a/t/chainlint/exclamation.expect b/t/chainlint/exclamation.expect
new file mode 100644
index 0000000000..2d961a58c6
--- /dev/null
+++ b/t/chainlint/exclamation.expect
@@ -0,0 +1,4 @@
+if ! condition ; then echo nope ; else yep ; fi &&
+test_prerequisite !MINGW &&
+mail uucp!address &&
+echo !whatever!
diff --git a/t/chainlint/exclamation.test b/t/chainlint/exclamation.test
new file mode 100644
index 0000000000..323595b5bd
--- /dev/null
+++ b/t/chainlint/exclamation.test
@@ -0,0 +1,8 @@
+# LINT: "! word" is two tokens
+if ! condition; then echo nope; else yep; fi &&
+# LINT: "!word" is single token, not two tokens "!" and "word"
+test_prerequisite !MINGW &&
+# LINT: "word!word" is single token, not three tokens "word", "!", and "word"
+mail uucp!address &&
+# LINT: "!word!" is single token, not three tokens "!", "word", and "!"
+echo !whatever!
diff --git a/t/chainlint/for-loop-abbreviated.expect b/t/chainlint/for-loop-abbreviated.expect
new file mode 100644
index 0000000000..a21007a63f
--- /dev/null
+++ b/t/chainlint/for-loop-abbreviated.expect
@@ -0,0 +1,5 @@
+for it
+do
+	path=$(expr "$it" : ( [^:]*) ) &&
+	git update-index --add "$path" || exit
+done
diff --git a/t/chainlint/for-loop-abbreviated.test b/t/chainlint/for-loop-abbreviated.test
new file mode 100644
index 0000000000..1084eccb89
--- /dev/null
+++ b/t/chainlint/for-loop-abbreviated.test
@@ -0,0 +1,6 @@
+# LINT: for-loop lacking optional "in [word...]" before "do"
+for it
+do
+	path=$(expr "$it" : '\([^:]*\)') &&
+	git update-index --add "$path" || exit
+done
diff --git a/t/chainlint/function.expect b/t/chainlint/function.expect
new file mode 100644
index 0000000000..a14388e6b9
--- /dev/null
+++ b/t/chainlint/function.expect
@@ -0,0 +1,11 @@
+sha1_file ( ) {
+	echo "$*" | sed "s#..#.git/objects/&/#"
+} &&
+
+remove_object ( ) {
+	file=$(sha1_file "$*") &&
+	test -e "$file" ?!AMP?!
+	rm -f "$file"
+} ?!AMP?!
+
+sha1_file arg && remove_object arg
diff --git a/t/chainlint/function.test b/t/chainlint/function.test
new file mode 100644
index 0000000000..5ee59562c9
--- /dev/null
+++ b/t/chainlint/function.test
@@ -0,0 +1,13 @@
+# LINT: "()" in function definition not mistaken for subshell
+sha1_file() {
+	echo "$*" | sed "s#..#.git/objects/&/#"
+} &&
+
+# LINT: broken &&-chain in function and after function
+remove_object() {
+	file=$(sha1_file "$*") &&
+	test -e "$file"
+	rm -f "$file"
+}
+
+sha1_file arg && remove_object arg
diff --git a/t/chainlint/here-doc-indent-operator.expect b/t/chainlint/here-doc-indent-operator.expect
new file mode 100644
index 0000000000..fb6cf7285d
--- /dev/null
+++ b/t/chainlint/here-doc-indent-operator.expect
@@ -0,0 +1,5 @@
+cat > expect <<-EOF &&
+
+cat > expect <<-EOF ?!AMP?!
+
+cleanup
diff --git a/t/chainlint/here-doc-indent-operator.test b/t/chainlint/here-doc-indent-operator.test
new file mode 100644
index 0000000000..c8a6f18eb4
--- /dev/null
+++ b/t/chainlint/here-doc-indent-operator.test
@@ -0,0 +1,13 @@
+# LINT: whitespace between operator "<<-" and tag legal
+cat >expect <<- EOF &&
+header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
+num_commits: $1
+chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data
+EOF
+
+# LINT: not an indented here-doc; just a plain here-doc with tag named "-EOF"
+cat >expect << -EOF
+this is not indented
+-EOF
+
+cleanup
diff --git a/t/chainlint/if-condition-split.expect b/t/chainlint/if-condition-split.expect
new file mode 100644
index 0000000000..ee745ef8d7
--- /dev/null
+++ b/t/chainlint/if-condition-split.expect
@@ -0,0 +1,7 @@
+if bob &&
+   marcia ||
+   kevin
+then
+	echo "nomads" ?!AMP?!
+	echo "for sure"
+fi
diff --git a/t/chainlint/if-condition-split.test b/t/chainlint/if-condition-split.test
new file mode 100644
index 0000000000..240daa9fd5
--- /dev/null
+++ b/t/chainlint/if-condition-split.test
@@ -0,0 +1,8 @@
+# LINT: "if" condition split across multiple lines at "&&" or "||"
+if bob &&
+   marcia ||
+   kevin
+then
+	echo "nomads"
+	echo "for sure"
+fi
diff --git a/t/chainlint/one-liner-for-loop.expect b/t/chainlint/one-liner-for-loop.expect
new file mode 100644
index 0000000000..51a3dc7c54
--- /dev/null
+++ b/t/chainlint/one-liner-for-loop.expect
@@ -0,0 +1,9 @@
+git init dir-rename-and-content &&
+(
+	cd dir-rename-and-content &&
+	test_write_lines 1 2 3 4 5 >foo &&
+	mkdir olddir &&
+	for i in a b c; do echo $i >olddir/$i; ?!LOOP?! done ?!AMP?!
+	git add foo olddir &&
+	git commit -m "original" &&
+)
diff --git a/t/chainlint/one-liner-for-loop.test b/t/chainlint/one-liner-for-loop.test
new file mode 100644
index 0000000000..4bd8c066c7
--- /dev/null
+++ b/t/chainlint/one-liner-for-loop.test
@@ -0,0 +1,10 @@
+git init dir-rename-and-content &&
+(
+	cd dir-rename-and-content &&
+	test_write_lines 1 2 3 4 5 >foo &&
+	mkdir olddir &&
+# LINT: one-liner for-loop missing "|| exit"; also broken &&-chain
+	for i in a b c; do echo $i >olddir/$i; done
+	git add foo olddir &&
+	git commit -m "original" &&
+)
diff --git a/t/chainlint/sqstring-in-sqstring.expect b/t/chainlint/sqstring-in-sqstring.expect
new file mode 100644
index 0000000000..cf0b591cf7
--- /dev/null
+++ b/t/chainlint/sqstring-in-sqstring.expect
@@ -0,0 +1,4 @@
+perl -e '
+	defined($_ = -s $_) or die for @ARGV;
+	exit 1 if $ARGV[0] <= $ARGV[1];
+' test-2-$packname_2.pack test-3-$packname_3.pack
diff --git a/t/chainlint/sqstring-in-sqstring.test b/t/chainlint/sqstring-in-sqstring.test
new file mode 100644
index 0000000000..77a425e0c7
--- /dev/null
+++ b/t/chainlint/sqstring-in-sqstring.test
@@ -0,0 +1,5 @@
+# LINT: SQ-string Perl code fragment within SQ-string
+perl -e '\''
+	defined($_ = -s $_) or die for @ARGV;
+	exit 1 if $ARGV[0] <= $ARGV[1];
+'\'' test-2-$packname_2.pack test-3-$packname_3.pack
diff --git a/t/chainlint/token-pasting.expect b/t/chainlint/token-pasting.expect
new file mode 100644
index 0000000000..342360bcd0
--- /dev/null
+++ b/t/chainlint/token-pasting.expect
@@ -0,0 +1,27 @@
+git config filter.rot13.smudge ./rot13.sh &&
+git config filter.rot13.clean ./rot13.sh &&
+
+{
+    echo "*.t filter=rot13" ?!AMP?!
+    echo "*.i ident"
+} > .gitattributes &&
+
+{
+    echo a b c d e f g h i j k l m ?!AMP?!
+    echo n o p q r s t u v w x y z ?!AMP?!
+    echo '$Id$'
+} > test &&
+cat test > test.t &&
+cat test > test.o &&
+cat test > test.i &&
+git add test test.t test.i &&
+rm -f test test.t test.i &&
+git checkout -- test test.t test.i &&
+
+echo "content-test2" > test2.o &&
+echo "content-test3 - filename with special characters" > "test3 'sq',$x=.o" ?!AMP?!
+
+downstream_url_for_sed=$(
+	printf "%sn" "$downstream_url" |
+	sed -e 's/\/\\/g' -e 's/[[/.*^$]/\&/g'
+)
diff --git a/t/chainlint/token-pasting.test b/t/chainlint/token-pasting.test
new file mode 100644
index 0000000000..b4610ce815
--- /dev/null
+++ b/t/chainlint/token-pasting.test
@@ -0,0 +1,32 @@
+# LINT: single token; composite of multiple strings
+git config filter.rot13.smudge ./rot13.sh &&
+git config filter.rot13.clean ./rot13.sh &&
+
+{
+    echo "*.t filter=rot13"
+    echo "*.i ident"
+} >.gitattributes &&
+
+{
+    echo a b c d e f g h i j k l m
+    echo n o p q r s t u v w x y z
+# LINT: exit/enter string context and escaped-quote outside of string
+    echo '\''$Id$'\''
+} >test &&
+cat test >test.t &&
+cat test >test.o &&
+cat test >test.i &&
+git add test test.t test.i &&
+rm -f test test.t test.i &&
+git checkout -- test test.t test.i &&
+
+echo "content-test2" >test2.o &&
+# LINT: exit/enter string context and escaped-quote outside of string
+echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x=.o"
+
+# LINT: single token; composite of multiple strings
+downstream_url_for_sed=$(
+	printf "%s\n" "$downstream_url" |
+# LINT: exit/enter string context; "&" inside string not command terminator
+	sed -e '\''s/\\/\\\\/g'\'' -e '\''s/[[/.*^$]/\\&/g'\''
+)

From 9fd911237f94680e0d1985e1f2fba751b16f5a94 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:53 +0000
Subject: [PATCH 24/45] test-lib: retire "lint harder" optimization hack

`test_run_` in test-lib.sh "lints" the body of a test by sending it down
a `sed chainlint.sed | grep` pipeline; this happens once for each test
run by a test script. Although this pipeline may seem relatively cheap
in isolation, it can become expensive when invoked 26800+ times by `make
test`, once for each test run, despite the existence of only 16500+ test
definitions across all tests scripts.

This difference in the number of tests defined in the scripts (16500+)
and the number of tests actually run by `make test` (26800+) is
explained by the fact that some test scripts run a very large number of
small tests, all driven by a series of functions/loops which fill in the
test bodies. This means that certain test definitions are being linted
repeatedly (tens or hundreds of times) unnecessarily. To avoid such
unnecessary work, 2d86a96220 (t: avoid sed-based chain-linting in some
expensive cases, 2021-05-13) added an optimization hack which allows
individual scripts to manually suppress the unnecessary repeated linting
of the same test definition.

However, unlike chainlint.sed which checks a test body as the test is
run, chainlint.pl checks each test definition just once, no matter how
many times the test is run, thus the sort of optimization hack
introduced by 2d86a96220 is no longer needed and can be retired.
Therefore, revert 2d86a96220.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/README             | 5 -----
 t/t0027-auto-crlf.sh | 7 +------
 t/t3070-wildmatch.sh | 5 -----
 t/test-lib.sh        | 7 ++-----
 4 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/t/README b/t/README
index 2f439f9658..979b2d4833 100644
--- a/t/README
+++ b/t/README
@@ -196,11 +196,6 @@ appropriately before running "make". Short options can be bundled, i.e.
 	this feature by setting the GIT_TEST_CHAIN_LINT environment
 	variable to "1" or "0", respectively.
 
-	A few test scripts disable some of the more advanced
-	chain-linting detection in the name of efficiency. You can
-	override this by setting the GIT_TEST_CHAIN_LINT_HARDER
-	environment variable to "1".
-
 --stress::
 	Run the test script repeatedly in multiple parallel jobs until
 	one of them fails.  Useful for reproducing rare failures in
diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh
index a22e0e1382..a94ac1eae3 100755
--- a/t/t0027-auto-crlf.sh
+++ b/t/t0027-auto-crlf.sh
@@ -387,9 +387,7 @@ test_expect_success 'setup main' '
 	test_tick
 '
 
-# Disable extra chain-linting for the next set of tests. There are many
-# auto-generated ones that are not worth checking over and over.
-GIT_TEST_CHAIN_LINT_HARDER_DEFAULT=0
+
 
 warn_LF_CRLF="LF will be replaced by CRLF"
 warn_CRLF_LF="CRLF will be replaced by LF"
@@ -606,9 +604,6 @@ do
 	checkout_files     ""    "$id" "crlf" true    ""       CRLF  CRLF  CRLF         CRLF_mix_CR  CRLF_nul
 done
 
-# The rest of the tests are unique; do the usual linting.
-unset GIT_TEST_CHAIN_LINT_HARDER_DEFAULT
-
 # Should be the last test case: remove some files from the worktree
 test_expect_success 'ls-files --eol -d -z' '
 	rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes &&
diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh
index f9539968e4..5d871fde96 100755
--- a/t/t3070-wildmatch.sh
+++ b/t/t3070-wildmatch.sh
@@ -5,11 +5,6 @@ test_description='wildmatch tests'
 TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
-# Disable expensive chain-lint tests; all of the tests in this script
-# are variants of a few trivial test-tool invocations, and there are a lot of
-# them.
-GIT_TEST_CHAIN_LINT_HARDER_DEFAULT=0
-
 should_create_test_file() {
 	file=$1
 
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 377cc1c120..dc0d059109 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1091,11 +1091,8 @@ test_run_ () {
 		trace=
 		# 117 is magic because it is unlikely to match the exit
 		# code of other programs
-		if test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" ||
-		   {
-			test "${GIT_TEST_CHAIN_LINT_HARDER:-${GIT_TEST_CHAIN_LINT_HARDER_DEFAULT:-1}}" != 0 &&
-			$(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!')
-		   }
+		if $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') ||
+			test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)"
 		then
 			BUG "broken &&-chain or run-away HERE-DOC: $1"
 		fi

From 23a14f301662df6d003b5bf4dc598f02311c6b30 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:54 +0000
Subject: [PATCH 25/45] test-lib: replace chainlint.sed with chainlint.pl

By automatically invoking chainlint.sed upon each test it runs,
`test_run_` in test-lib.sh ensures that broken &&-chains will be
detected early as tests are modified or new are tests created since it
is typical to run a test script manually (i.e. `./t1234-test-script.sh`)
during test development. Now that the implementation of chainlint.pl is
complete, modify test-lib.sh to invoke it automatically instead of
chainlint.sed each time a test script is run.

This change reduces the number of "linter" invocations from 26800+ (once
per test run) down to 1050+ (once per test script), however, a
subsequent change will drop the number of invocations to 1 per `make
test`, thus fully realizing the benefit of the new linter.

Note that the "magic exit code 117" &&-chain checker added by bb79af9d09
(t/test-lib: introduce --chain-lint option, 2015-03-20) which is built
into t/test-lib.sh is retained since it has near zero-cost and
(theoretically) may catch a broken &&-chain not caught by chainlint.pl.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 contrib/buildsystems/CMakeLists.txt | 2 +-
 t/test-lib.sh                       | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 2237109b57..ca358a21a5 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -1076,7 +1076,7 @@ if(NOT ${CMAKE_BINARY_DIR}/CMakeCache.txt STREQUAL ${CACHE_PATH})
 		"string(REPLACE \"\${GIT_BUILD_DIR_REPL}\" \"GIT_BUILD_DIR=\\\"$TEST_DIRECTORY/../${BUILD_DIR_RELATIVE}\\\"\" content \"\${content}\")\n"
 		"file(WRITE ${CMAKE_SOURCE_DIR}/t/test-lib.sh \${content})")
 	#misc copies
-	file(COPY ${CMAKE_SOURCE_DIR}/t/chainlint.sed DESTINATION ${CMAKE_BINARY_DIR}/t/)
+	file(COPY ${CMAKE_SOURCE_DIR}/t/chainlint.pl DESTINATION ${CMAKE_BINARY_DIR}/t/)
 	file(COPY ${CMAKE_SOURCE_DIR}/po/is.po DESTINATION ${CMAKE_BINARY_DIR}/po/)
 	file(COPY ${CMAKE_SOURCE_DIR}/mergetools/tkdiff DESTINATION ${CMAKE_BINARY_DIR}/mergetools/)
 	file(COPY ${CMAKE_SOURCE_DIR}/contrib/completion/git-prompt.sh DESTINATION ${CMAKE_BINARY_DIR}/contrib/completion/)
diff --git a/t/test-lib.sh b/t/test-lib.sh
index dc0d059109..a65df2fd22 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1091,8 +1091,7 @@ test_run_ () {
 		trace=
 		# 117 is magic because it is unlikely to match the exit
 		# code of other programs
-		if $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') ||
-			test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)"
+		if test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)"
 		then
 			BUG "broken &&-chain or run-away HERE-DOC: $1"
 		fi
@@ -1588,6 +1587,12 @@ then
 	BAIL_OUT_ENV_NEEDS_SANITIZE_LEAK "GIT_TEST_SANITIZE_LEAK_LOG=true"
 fi
 
+if test "${GIT_TEST_CHAIN_LINT:-1}" != 0
+then
+	"$PERL_PATH" "$TEST_DIRECTORY/chainlint.pl" "$0" ||
+		BUG "lint error (see '?!...!? annotations above)"
+fi
+
 # Last-minute variable setup
 USER_HOME="$HOME"
 HOME="$TRASH_DIRECTORY"

From 69b9924b875079babb1d3f665bdc719c4871ba73 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:55 +0000
Subject: [PATCH 26/45] t/Makefile: teach `make test` and `make prove` to run
 chainlint.pl

Unlike chainlint.sed which "lints" a single test body at a time, thus is
invoked once per test, chainlint.pl can check all test bodies in all
test scripts with a single invocation. As such, it is akin to other bulk
"linters" run by the Makefile, such as `test-lint-shell-syntax`,
`test-lint-duplicates`, etc.

Therefore, teach `make test` and `make prove` to invoke chainlint.pl
along with the other bulk linters. Also, since the single chainlint.pl
invocation by `make test` or `make prove` has already checked all tests
in all scripts, instruct the individual test scripts not to run
chainlint.pl on themselves unnecessarily.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/Makefile | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/t/Makefile b/t/Makefile
index 11f276774e..3db48c0cb6 100644
--- a/t/Makefile
+++ b/t/Makefile
@@ -36,14 +36,21 @@ CHAINLINTTMP_SQ = $(subst ','\'',$(CHAINLINTTMP))
 
 T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh))
 THELPERS = $(sort $(filter-out $(T),$(wildcard *.sh)))
+TLIBS = $(sort $(wildcard lib-*.sh)) annotate-tests.sh
 TPERF = $(sort $(wildcard perf/p[0-9][0-9][0-9][0-9]-*.sh))
+TINTEROP = $(sort $(wildcard interop/i[0-9][0-9][0-9][0-9]-*.sh))
 CHAINLINTTESTS = $(sort $(patsubst chainlint/%.test,%,$(wildcard chainlint/*.test)))
 CHAINLINT = '$(PERL_PATH_SQ)' chainlint.pl
 
+# `test-chainlint` (which is a dependency of `test-lint`, `test` and `prove`)
+# checks all tests in all scripts via a single invocation, so tell individual
+# scripts not to "chainlint" themselves
+CHAINLINTSUPPRESS = GIT_TEST_CHAIN_LINT=0 && export GIT_TEST_CHAIN_LINT &&
+
 all: $(DEFAULT_TEST_TARGET)
 
 test: pre-clean check-chainlint $(TEST_LINT)
-	$(MAKE) aggregate-results-and-cleanup
+	$(CHAINLINTSUPPRESS) $(MAKE) aggregate-results-and-cleanup
 
 failed:
 	@failed=$$(cd '$(TEST_RESULTS_DIRECTORY_SQ)' && \
@@ -52,7 +59,7 @@ failed:
 	test -z "$$failed" || $(MAKE) $$failed
 
 prove: pre-clean check-chainlint $(TEST_LINT)
-	@echo "*** prove ***"; $(PROVE) --exec '$(TEST_SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS)
+	@echo "*** prove ***"; $(CHAINLINTSUPPRESS) $(PROVE) --exec '$(TEST_SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS)
 	$(MAKE) clean-except-prove-cache
 
 $(T):
@@ -99,6 +106,9 @@ check-chainlint:
 
 test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \
 	test-lint-filenames
+ifneq ($(GIT_TEST_CHAIN_LINT),0)
+test-lint: test-chainlint
+endif
 
 test-lint-duplicates:
 	@dups=`echo $(T) $(TPERF) | tr ' ' '\n' | sed 's/-.*//' | sort | uniq -d` && \
@@ -121,6 +131,9 @@ test-lint-filenames:
 		test -z "$$bad" || { \
 		echo >&2 "non-portable file name(s): $$bad"; exit 1; }
 
+test-chainlint:
+	@$(CHAINLINT) $(T) $(TLIBS) $(TPERF) $(TINTEROP)
+
 aggregate-results-and-cleanup: $(T)
 	$(MAKE) aggregate-results
 	$(MAKE) clean
@@ -136,4 +149,5 @@ valgrind:
 perf:
 	$(MAKE) -C perf/ all
 
-.PHONY: pre-clean $(T) aggregate-results clean valgrind perf check-chainlint clean-chainlint
+.PHONY: pre-clean $(T) aggregate-results clean valgrind perf \
+	check-chainlint clean-chainlint test-chainlint

From fb41727b7ed7f62d121cd846f826fb1c62d1bc6a Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Thu, 1 Sep 2022 00:29:56 +0000
Subject: [PATCH 27/45] t: retire unused chainlint.sed

Retire chainlint.sed since it has been replaced by a more accurate and
functional &&-chain "linter", thus is no longer used.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.sed | 399 ------------------------------------------------
 1 file changed, 399 deletions(-)
 delete mode 100644 t/chainlint.sed

diff --git a/t/chainlint.sed b/t/chainlint.sed
deleted file mode 100644
index dc4ce37cb5..0000000000
--- a/t/chainlint.sed
+++ /dev/null
@@ -1,399 +0,0 @@
-#------------------------------------------------------------------------------
-# Detect broken &&-chains in tests.
-#
-# At present, only &&-chains in subshells are examined by this linter;
-# top-level &&-chains are instead checked directly by the test framework. Like
-# the top-level &&-chain linter, the subshell linter (intentionally) does not
-# check &&-chains within {...} blocks.
-#
-# Checking for &&-chain breakage is done line-by-line by pure textual
-# inspection.
-#
-# Incomplete lines (those ending with "\") are stitched together with following
-# lines to simplify processing, particularly of "one-liner" statements.
-# Top-level here-docs are swallowed to avoid false positives within the
-# here-doc body, although the statement to which the here-doc is attached is
-# retained.
-#
-# Heuristics are used to detect end-of-subshell when the closing ")" is cuddled
-# with the final subshell statement on the same line:
-#
-#    (cd foo &&
-#        bar)
-#
-# in order to avoid misinterpreting the ")" in constructs such as "x=$(...)"
-# and "case $x in *)" as ending the subshell.
-#
-# Lines missing a final "&&" are flagged with "?!AMP?!", as are lines which
-# chain commands with ";" internally rather than "&&". A line may be flagged
-# for both violations.
-#
-# Detection of a missing &&-link in a multi-line subshell is complicated by the
-# fact that the last statement before the closing ")" must not end with "&&".
-# Since processing is line-by-line, it is not known whether a missing "&&" is
-# legitimate or not until the _next_ line is seen. To accommodate this, within
-# multi-line subshells, each line is stored in sed's "hold" area until after
-# the next line is seen and processed. If the next line is a stand-alone ")",
-# then a missing "&&" on the previous line is legitimate; otherwise a missing
-# "&&" is a break in the &&-chain.
-#
-#    (
-#         cd foo &&
-#         bar
-#    )
-#
-# In practical terms, when "bar" is encountered, it is flagged with "?!AMP?!",
-# but when the stand-alone ")" line is seen which closes the subshell, the
-# "?!AMP?!" violation is removed from the "bar" line (retrieved from the "hold"
-# area) since the final statement of a subshell must not end with "&&". The
-# final line of a subshell may still break the &&-chain by using ";" internally
-# to chain commands together rather than "&&", but an internal "?!AMP?!" is
-# never removed from a line even though a line-ending "?!AMP?!" might be.
-#
-# Care is taken to recognize the last _statement_ of a multi-line subshell, not
-# necessarily the last textual _line_ within the subshell, since &&-chaining
-# applies to statements, not to lines. Consequently, blank lines, comment
-# lines, and here-docs are swallowed (but not the command to which the here-doc
-# is attached), leaving the last statement in the "hold" area, not the last
-# line, thus simplifying &&-link checking.
-#
-# The final statement before "done" in for- and while-loops, and before "elif",
-# "else", and "fi" in if-then-else likewise must not end with "&&", thus
-# receives similar treatment.
-#
-# Swallowing here-docs with arbitrary tags requires a bit of finesse. When a
-# line such as "cat <<EOF" is seen, the here-doc tag is copied to the front of
-# the line enclosed in angle brackets as a sentinel, giving "<EOF>cat <<EOF".
-# As each subsequent line is read, it is appended to the target line and a
-# (whitespace-loose) back-reference match /^<(.*)>\n\1$/ is attempted to see if
-# the content inside "<...>" matches the entirety of the newly-read line. For
-# instance, if the next line read is "some data", when concatenated with the
-# target line, it becomes "<EOF>cat <<EOF\nsome data", and a match is attempted
-# to see if "EOF" matches "some data". Since it doesn't, the next line is
-# attempted. When a line consisting of only "EOF" (and possible whitespace) is
-# encountered, it is appended to the target line giving "<EOF>cat <<EOF\nEOF",
-# in which case the "EOF" inside "<...>" does match the text following the
-# newline, thus the closing here-doc tag has been found. The closing tag line
-# and the "<...>" prefix on the target line are then discarded, leaving just
-# the target line "cat <<EOF".
-#------------------------------------------------------------------------------
-
-# incomplete line -- slurp up next line
-:squash
-/\\$/ {
-	N
-	s/\\\n//
-	bsquash
-}
-
-# here-doc -- swallow it to avoid false hits within its body (but keep the
-# command to which it was attached)
-/<<-*[ 	]*[\\'"]*[A-Za-z0-9_]/ {
-	/"[^"]*<<[^"]*"/bnotdoc
-	s/^\(.*<<-*[ 	]*\)[\\'"]*\([A-Za-z0-9_][A-Za-z0-9_]*\)['"]*/<\2>\1\2/
-	:hered
-	N
-	/^<\([^>]*\)>.*\n[ 	]*\1[ 	]*$/!{
-		s/\n.*$//
-		bhered
-	}
-	s/^<[^>]*>//
-	s/\n.*$//
-}
-:notdoc
-
-# one-liner "(...) &&"
-/^[ 	]*!*[ 	]*(..*)[ 	]*&&[ 	]*$/boneline
-
-# same as above but without trailing "&&"
-/^[ 	]*!*[ 	]*(..*)[ 	]*$/boneline
-
-# one-liner "(...) >x" (or "2>x" or "<x" or "|x" or "&"
-/^[ 	]*!*[ 	]*(..*)[ 	]*[0-9]*[<>|&]/boneline
-
-# multi-line "(...\n...)"
-/^[ 	]*(/bsubsh
-
-# innocuous line -- print it and advance to next line
-b
-
-# found one-liner "(...)" -- mark suspect if it uses ";" internally rather than
-# "&&" (but not ";" in a string)
-:oneline
-/;/{
-	/"[^"]*;[^"]*"/!s/;/; ?!AMP?!/
-}
-b
-
-:subsh
-# bare "(" line? -- stash for later printing
-/^[ 	]*([	]*$/ {
-	h
-	bnextln
-}
-# "(..." line -- "(" opening subshell cuddled with command; temporarily replace
-# "(" with sentinel "^" and process the line as if "(" had been seen solo on
-# the preceding line; this temporary replacement prevents several rules from
-# accidentally thinking "(" introduces a nested subshell; "^" is changed back
-# to "(" at output time
-x
-s/.*//
-x
-s/(/^/
-bslurp
-
-:nextln
-N
-s/.*\n//
-
-:slurp
-# incomplete line "...\"
-/\\$/bicmplte
-# multi-line quoted string "...\n..."?
-/"/bdqstr
-# multi-line quoted string '...\n...'? (but not contraction in string "it's")
-/'/{
-	/"[^'"]*'[^'"]*"/!bsqstr
-}
-:folded
-# here-doc -- swallow it (but not "<<" in a string)
-/<<-*[ 	]*[\\'"]*[A-Za-z0-9_]/{
-	/"[^"]*<<[^"]*"/!bheredoc
-}
-# comment or empty line -- discard since final non-comment, non-empty line
-# before closing ")", "done", "elsif", "else", or "fi" will need to be
-# re-visited to drop "suspect" marking since final line of those constructs
-# legitimately lacks "&&", so "suspect" mark must be removed
-/^[ 	]*#/bnextln
-/^[ 	]*$/bnextln
-# in-line comment -- strip it (but not "#" in a string, Bash ${#...} array
-# length, or Perforce "//depot/path#42" revision in filespec)
-/[ 	]#/{
-	/"[^"]*#[^"]*"/!s/[ 	]#.*$//
-}
-# one-liner "case ... esac"
-/^[ 	^]*case[ 	]*..*esac/bchkchn
-# multi-line "case ... esac"
-/^[ 	^]*case[ 	]..*[ 	]in/bcase
-# multi-line "for ... done" or "while ... done"
-/^[ 	^]*for[ 	]..*[ 	]in/bcont
-/^[ 	^]*while[ 	]/bcont
-/^[ 	]*do[ 	]/bcont
-/^[ 	]*do[ 	]*$/bcont
-/;[ 	]*do/bcont
-/^[ 	]*done[ 	]*&&[ 	]*$/bdone
-/^[ 	]*done[ 	]*$/bdone
-/^[ 	]*done[ 	]*[<>|]/bdone
-/^[ 	]*done[ 	]*)/bdone
-/||[ 	]*exit[ 	]/bcont
-/||[ 	]*exit[ 	]*$/bcont
-# multi-line "if...elsif...else...fi"
-/^[ 	^]*if[ 	]/bcont
-/^[ 	]*then[ 	]/bcont
-/^[ 	]*then[ 	]*$/bcont
-/;[ 	]*then/bcont
-/^[ 	]*elif[ 	]/belse
-/^[ 	]*elif[ 	]*$/belse
-/^[ 	]*else[ 	]/belse
-/^[ 	]*else[ 	]*$/belse
-/^[ 	]*fi[ 	]*&&[ 	]*$/bdone
-/^[ 	]*fi[ 	]*$/bdone
-/^[ 	]*fi[ 	]*[<>|]/bdone
-/^[ 	]*fi[ 	]*)/bdone
-# nested one-liner "(...) &&"
-/^[ 	^]*(.*)[ 	]*&&[ 	]*$/bchkchn
-# nested one-liner "(...)"
-/^[ 	^]*(.*)[ 	]*$/bchkchn
-# nested one-liner "(...) >x" (or "2>x" or "<x" or "|x")
-/^[ 	^]*(.*)[ 	]*[0-9]*[<>|]/bchkchn
-# nested multi-line "(...\n...)"
-/^[ 	^]*(/bnest
-# multi-line "{...\n...}"
-/^[ 	^]*{/bblock
-# closing ")" on own line -- exit subshell
-/^[ 	]*)/bclssolo
-# "$((...))" -- arithmetic expansion; not closing ")"
-/\$(([^)][^)]*))[^)]*$/bchkchn
-# "$(...)" -- command substitution; not closing ")"
-/\$([^)][^)]*)[^)]*$/bchkchn
-# multi-line "$(...\n...)" -- command substitution; treat as nested subshell
-/\$([^)]*$/bnest
-# "=(...)" -- Bash array assignment; not closing ")"
-/=(/bchkchn
-# closing "...) &&"
-/)[ 	]*&&[ 	]*$/bclose
-# closing "...)"
-/)[ 	]*$/bclose
-# closing "...) >x" (or "2>x" or "<x" or "|x")
-/)[ 	]*[<>|]/bclose
-:chkchn
-# mark suspect if line uses ";" internally rather than "&&" (but not ";" in a
-# string and not ";;" in one-liner "case...esac")
-/;/{
-	/;;/!{
-		/"[^"]*;[^"]*"/!s/;/; ?!AMP?!/
-	}
-}
-# line ends with pipe "...|" -- valid; not missing "&&"
-/|[ 	]*$/bcont
-# missing end-of-line "&&" -- mark suspect
-/&&[ 	]*$/!s/$/ ?!AMP?!/
-:cont
-# retrieve and print previous line
-x
-s/^\([ 	]*\)^/\1(/
-s/?!HERE?!/<</g
-n
-bslurp
-
-# found incomplete line "...\" -- slurp up next line
-:icmplte
-N
-s/\\\n//
-bslurp
-
-# check for multi-line double-quoted string "...\n..." -- fold to one line
-:dqstr
-# remove all quote pairs
-s/"\([^"]*\)"/@!\1@!/g
-# done if no dangling quote
-/"/!bdqdone
-# otherwise, slurp next line and try again
-N
-s/\n//
-bdqstr
-:dqdone
-s/@!/"/g
-bfolded
-
-# check for multi-line single-quoted string '...\n...' -- fold to one line
-:sqstr
-# remove all quote pairs
-s/'\([^']*\)'/@!\1@!/g
-# done if no dangling quote
-/'/!bsqdone
-# otherwise, slurp next line and try again
-N
-s/\n//
-bsqstr
-:sqdone
-s/@!/'/g
-bfolded
-
-# found here-doc -- swallow it to avoid false hits within its body (but keep
-# the command to which it was attached)
-:heredoc
-s/^\(.*\)<<\(-*[ 	]*\)[\\'"]*\([A-Za-z0-9_][A-Za-z0-9_]*\)['"]*/<\3>\1?!HERE?!\2\3/
-:hdocsub
-N
-/^<\([^>]*\)>.*\n[ 	]*\1[ 	]*$/!{
-	s/\n.*$//
-	bhdocsub
-}
-s/^<[^>]*>//
-s/\n.*$//
-bfolded
-
-# found "case ... in" -- pass through untouched
-:case
-x
-s/^\([ 	]*\)^/\1(/
-s/?!HERE?!/<</g
-n
-:cascom
-/^[ 	]*#/{
-	N
-	s/.*\n//
-	bcascom
-}
-/^[ 	]*esac/bslurp
-bcase
-
-# found "else" or "elif" -- drop "suspect" from final line before "else" since
-# that line legitimately lacks "&&"
-:else
-x
-s/\( ?!AMP?!\)* ?!AMP?!$//
-x
-bcont
-
-# found "done" closing for-loop or while-loop, or "fi" closing if-then -- drop
-# "suspect" from final contained line since that line legitimately lacks "&&"
-:done
-x
-s/\( ?!AMP?!\)* ?!AMP?!$//
-x
-# is 'done' or 'fi' cuddled with ")" to close subshell?
-/done.*)/bclose
-/fi.*)/bclose
-bchkchn
-
-# found nested multi-line "(...\n...)" -- pass through untouched
-:nest
-x
-:nstslrp
-s/^\([ 	]*\)^/\1(/
-s/?!HERE?!/<</g
-n
-:nstcom
-# comment -- not closing ")" if in comment
-/^[ 	]*#/{
-	N
-	s/.*\n//
-	bnstcom
-}
-# closing ")" on own line -- stop nested slurp
-/^[ 	]*)/bnstcl
-# "$((...))" -- arithmetic expansion; not closing ")"
-/\$(([^)][^)]*))[^)]*$/bnstcnt
-# "$(...)" -- command substitution; not closing ")"
-/\$([^)][^)]*)[^)]*$/bnstcnt
-# closing "...)" -- stop nested slurp
-/)/bnstcl
-:nstcnt
-x
-bnstslrp
-:nstcl
-# is it "))" which closes nested and parent subshells?
-/)[ 	]*)/bslurp
-bchkchn
-
-# found multi-line "{...\n...}" block -- pass through untouched
-:block
-x
-s/^\([ 	]*\)^/\1(/
-s/?!HERE?!/<</g
-n
-:blkcom
-/^[ 	]*#/{
-	N
-	s/.*\n//
-	bblkcom
-}
-# closing "}" -- stop block slurp
-/}/bchkchn
-bblock
-
-# found closing ")" on own line -- drop "suspect" from final line of subshell
-# since that line legitimately lacks "&&" and exit subshell loop
-:clssolo
-x
-s/\( ?!AMP?!\)* ?!AMP?!$//
-s/^\([ 	]*\)^/\1(/
-s/?!HERE?!/<</g
-p
-x
-s/^\([ 	]*\)^/\1(/
-s/?!HERE?!/<</g
-b
-
-# found closing "...)" -- exit subshell loop
-:close
-x
-s/^\([ 	]*\)^/\1(/
-s/?!HERE?!/<</g
-p
-x
-s/^\([ 	]*\)^/\1(/
-s/?!HERE?!/<</g
-b

From b6faef396f0380237deae144ee48caf5ee20ae1e Mon Sep 17 00:00:00 2001
From: Victoria Dye <vdye@github.com>
Date: Fri, 2 Sep 2022 15:56:42 +0000
Subject: [PATCH 28/45] scalar: fix command documentation section header

Rename the last section header in 'contrib/scalar/scalar.txt' from "Scalar"
to "GIT". The linting rules of the 'documentation' CI build enforce the
existence of a "GIT" section in command documentation. Although 'scalar.txt'
is not yet checked, it will be in a future patch.

Here, changing the header name is more appropriate than making a
Scalar-specific exception to the linting rule. The existing "Scalar" section
contains only a link back to the main Git documentation, essentially the
same as the "GIT" section in builtin documentation. Changing the section
name further clarifies the Scalar-Git association and maintains consistency
with the rest of Git.

Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 contrib/scalar/scalar.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt
index 1a12dc4507..f33436c7f6 100644
--- a/contrib/scalar/scalar.txt
+++ b/contrib/scalar/scalar.txt
@@ -161,6 +161,6 @@ SEE ALSO
 --------
 linkgit:git-clone[1], linkgit:git-maintenance[1].
 
-Scalar
+GIT
 ---
-Associated with the linkgit:git[1] suite
+Part of the linkgit:git[1] suite

From 7b5c93c6c6847b4b6037e38418bc8bbb8c2eada8 Mon Sep 17 00:00:00 2001
From: Victoria Dye <vdye@github.com>
Date: Fri, 2 Sep 2022 15:56:43 +0000
Subject: [PATCH 29/45] scalar: include in standard Git build & installation

Move 'scalar' out of 'contrib/' and into the root of the Git tree. The goal
of this change is to build 'scalar' as part of the standard Git build &
install processes.

This patch includes both the physical move of Scalar's files out of
'contrib/' ('scalar.c', 'scalar.txt', and 't9xxx-scalar.sh'), and the
changes to the build definitions in 'Makefile' and 'CMakelists.txt' to
accommodate the new program.

At a high level, Scalar is built so that:
- there is a 'scalar-objs' target (similar to those created in 029bac01a8
  (Makefile: add {program,xdiff,test,git,fuzz}-objs & objects targets,
  2021-02-23)) for debugging purposes.
- it appears in the root of the install directory (rather than the
  gitexecdir).
- it is included in the 'bin-wrappers/' directory for use in tests.
- it receives a platform-specific executable suffix (e.g., '.exe'), if
  applicable.
- 'scalar.txt' is installed as 'man1' documentation.
- the 'clean' target removes the 'scalar' executable.

Additionally, update the root level '.gitignore' file to ignore the Scalar
executable.

Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .gitignore                                    |  1 +
 Documentation/Makefile                        |  1 +
 {contrib/scalar => Documentation}/scalar.txt  |  0
 Makefile                                      | 31 ++++---
 contrib/buildsystems/CMakeLists.txt           |  9 ++-
 contrib/scalar/.gitignore                     |  2 -
 contrib/scalar/Makefile                       | 35 --------
 contrib/scalar/t/Makefile                     | 81 -------------------
 contrib/scalar/scalar.c => scalar.c           |  0
 .../t/t9099-scalar.sh => t/t9210-scalar.sh    | 10 +--
 10 files changed, 28 insertions(+), 142 deletions(-)
 rename {contrib/scalar => Documentation}/scalar.txt (100%)
 delete mode 100644 contrib/scalar/.gitignore
 delete mode 100644 contrib/scalar/Makefile
 delete mode 100644 contrib/scalar/t/Makefile
 rename contrib/scalar/scalar.c => scalar.c (100%)
 rename contrib/scalar/t/t9099-scalar.sh => t/t9210-scalar.sh (96%)

diff --git a/.gitignore b/.gitignore
index 80b530bbed..3d1b880101 100644
--- a/.gitignore
+++ b/.gitignore
@@ -185,6 +185,7 @@
 /git-whatchanged
 /git-worktree
 /git-write-tree
+/scalar
 /git-core-*/?*
 /git.res
 /gitweb/GITWEB-BUILD-OPTIONS
diff --git a/Documentation/Makefile b/Documentation/Makefile
index bd6b6fcb93..16c9e06239 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -21,6 +21,7 @@ MAN1_TXT += $(filter-out \
 MAN1_TXT += git.txt
 MAN1_TXT += gitk.txt
 MAN1_TXT += gitweb.txt
+MAN1_TXT += scalar.txt
 
 # man5 / man7 guides (note: new guides should also be added to command-list.txt)
 MAN5_TXT += gitattributes.txt
diff --git a/contrib/scalar/scalar.txt b/Documentation/scalar.txt
similarity index 100%
rename from contrib/scalar/scalar.txt
rename to Documentation/scalar.txt
diff --git a/Makefile b/Makefile
index eac30126e2..e03f32ec1e 100644
--- a/Makefile
+++ b/Makefile
@@ -608,7 +608,9 @@ FUZZ_OBJS =
 FUZZ_PROGRAMS =
 GIT_OBJS =
 LIB_OBJS =
+SCALAR_OBJS =
 OBJECTS =
+OTHER_PROGRAMS =
 PROGRAM_OBJS =
 PROGRAMS =
 EXCLUDED_PROGRAMS =
@@ -821,10 +823,12 @@ BUILT_INS += git-switch$X
 BUILT_INS += git-whatchanged$X
 
 # what 'all' will build but not install in gitexecdir
-OTHER_PROGRAMS = git$X
+OTHER_PROGRAMS += git$X
+OTHER_PROGRAMS += scalar$X
 
 # what test wrappers are needed and 'install' will install, in bindir
 BINDIR_PROGRAMS_NEED_X += git
+BINDIR_PROGRAMS_NEED_X += scalar
 BINDIR_PROGRAMS_NEED_X += git-receive-pack
 BINDIR_PROGRAMS_NEED_X += git-shell
 BINDIR_PROGRAMS_NEED_X += git-upload-archive
@@ -2222,7 +2226,7 @@ profile-fast: profile-clean
 
 all:: $(ALL_COMMANDS_TO_INSTALL) $(SCRIPT_LIB) $(OTHER_PROGRAMS) GIT-BUILD-OPTIONS
 ifneq (,$X)
-	$(QUIET_BUILT_IN)$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) git$X)), test -d '$p' -o '$p' -ef '$p$X' || $(RM) '$p';)
+	$(QUIET_BUILT_IN)$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) $(OTHER_PROGRAMS))), test -d '$p' -o '$p' -ef '$p$X' || $(RM) '$p';)
 endif
 
 all::
@@ -2545,7 +2549,12 @@ GIT_OBJS += git.o
 .PHONY: git-objs
 git-objs: $(GIT_OBJS)
 
+SCALAR_OBJS += scalar.o
+.PHONY: scalar-objs
+scalar-objs: $(SCALAR_OBJS)
+
 OBJECTS += $(GIT_OBJS)
+OBJECTS += $(SCALAR_OBJS)
 OBJECTS += $(PROGRAM_OBJS)
 OBJECTS += $(TEST_OBJS)
 OBJECTS += $(XDIFF_OBJS)
@@ -2556,10 +2565,6 @@ ifndef NO_CURL
 	OBJECTS += http.o http-walker.o remote-curl.o
 endif
 
-SCALAR_SOURCES := contrib/scalar/scalar.c
-SCALAR_OBJECTS := $(SCALAR_SOURCES:c=o)
-OBJECTS += $(SCALAR_OBJECTS)
-
 .PHONY: objects
 objects: $(OBJECTS)
 
@@ -2691,7 +2696,7 @@ $(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o GIT-LDFLAGS $(GITLIBS
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS)
 
-contrib/scalar/scalar$X: $(SCALAR_OBJECTS) GIT-LDFLAGS $(GITLIBS)
+scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \
 		$(filter %.o,$^) $(LIBS)
 
@@ -2747,8 +2752,7 @@ XGETTEXT_FLAGS_SH = $(XGETTEXT_FLAGS) --language=Shell \
 XGETTEXT_FLAGS_PERL = $(XGETTEXT_FLAGS) --language=Perl \
 	--keyword=__ --keyword=N__ --keyword="__n:1,2"
 MSGMERGE_FLAGS = --add-location --backup=off --update
-LOCALIZED_C = $(sort $(FOUND_C_SOURCES) $(FOUND_H_SOURCES) $(SCALAR_SOURCES) \
-	        $(GENERATED_H))
+LOCALIZED_C = $(sort $(FOUND_C_SOURCES) $(FOUND_H_SOURCES) $(GENERATED_H))
 LOCALIZED_SH = $(sort $(SCRIPT_SH) git-sh-setup.sh)
 LOCALIZED_PERL = $(sort $(SCRIPT_PERL))
 
@@ -3062,7 +3066,7 @@ bin-wrappers/%: wrap-for-bin.sh
 	$(call mkdir_p_parent_template)
 	$(QUIET_GEN)sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
 	     -e 's|@@BUILD_DIR@@|$(shell pwd)|' \
-	     -e 's|@@PROG@@|$(patsubst test-%,t/helper/test-%$(X),$(@F))$(patsubst git%,$(X),$(filter $(@F),$(BINDIR_PROGRAMS_NEED_X)))|' < $< > $@ && \
+	     -e 's|@@PROG@@|$(patsubst test-%,t/helper/test-%,$(@F))$(if $(filter-out $(BINDIR_PROGRAMS_NO_X),$(@F)),$(X),)|' < $< > $@ && \
 	chmod +x $@
 
 # GNU make supports exporting all variables by "export" without parameters.
@@ -3276,14 +3280,14 @@ ifndef NO_TCLTK
 	$(MAKE) -C git-gui gitexecdir='$(gitexec_instdir_SQ)' install
 endif
 ifneq (,$X)
-	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) git$X)), test '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p' -ef '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p$X' || $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';)
+	$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) $(OTHER_PROGRAMS))), test '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p' -ef '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p$X' || $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';)
 endif
 
 	bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \
 	execdir=$$(cd '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' && pwd) && \
 	destdir_from_execdir_SQ=$$(echo '$(gitexecdir_relative_SQ)' | sed -e 's|[^/][^/]*|..|g') && \
 	{ test "$$bindir/" = "$$execdir/" || \
-	  for p in git$X $(filter $(install_bindir_programs),$(ALL_PROGRAMS)); do \
+	  for p in $(OTHER_PROGRAMS) $(filter $(install_bindir_programs),$(ALL_PROGRAMS)); do \
 		$(RM) "$$execdir/$$p" && \
 		test -n "$(INSTALL_SYMLINKS)" && \
 		ln -s "$$destdir_from_execdir_SQ/$(bindir_relative_SQ)/$$p" "$$execdir/$$p" || \
@@ -3458,7 +3462,7 @@ clean: profile-clean coverage-clean cocciclean
 	$(RM) git.res
 	$(RM) $(OBJECTS)
 	$(RM) $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(REFTABLE_TEST_LIB)
-	$(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X
+	$(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) $(OTHER_PROGRAMS)
 	$(RM) $(TEST_PROGRAMS)
 	$(RM) $(FUZZ_PROGRAMS)
 	$(RM) $(SP_OBJ)
@@ -3509,6 +3513,7 @@ ALL_COMMANDS += git-citool
 ALL_COMMANDS += git-gui
 ALL_COMMANDS += gitk
 ALL_COMMANDS += gitweb
+ALL_COMMANDS += scalar
 
 .PHONY: check-docs
 check-docs::
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 2237109b57..bae203c1fb 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -610,7 +610,7 @@ unset(CMAKE_REQUIRED_INCLUDES)
 #programs
 set(PROGRAMS_BUILT
 	git git-daemon git-http-backend git-sh-i18n--envsubst
-	git-shell)
+	git-shell scalar)
 
 if(NOT CURL_FOUND)
 	list(APPEND excluded_progs git-http-fetch git-http-push)
@@ -757,6 +757,9 @@ target_link_libraries(git-sh-i18n--envsubst common-main)
 add_executable(git-shell ${CMAKE_SOURCE_DIR}/shell.c)
 target_link_libraries(git-shell common-main)
 
+add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c)
+target_link_libraries(scalar common-main)
+
 if(CURL_FOUND)
 	add_library(http_obj OBJECT ${CMAKE_SOURCE_DIR}/http.c)
 
@@ -903,7 +906,7 @@ list(TRANSFORM git_perl_scripts PREPEND "${CMAKE_BINARY_DIR}/")
 
 #install
 foreach(program ${PROGRAMS_BUILT})
-if(program STREQUAL "git" OR program STREQUAL "git-shell")
+if(program MATCHES "^(git|git-shell|scalar)$")
 install(TARGETS ${program}
 	RUNTIME DESTINATION bin)
 else()
@@ -977,7 +980,7 @@ endif()
 
 #wrapper scripts
 set(wrapper_scripts
-	git git-upload-pack git-receive-pack git-upload-archive git-shell git-remote-ext)
+	git git-upload-pack git-receive-pack git-upload-archive git-shell git-remote-ext scalar)
 
 set(wrapper_test_scripts
 	test-fake-ssh test-tool)
diff --git a/contrib/scalar/.gitignore b/contrib/scalar/.gitignore
deleted file mode 100644
index ff3d47e84d..0000000000
--- a/contrib/scalar/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-/*.exe
-/scalar
diff --git a/contrib/scalar/Makefile b/contrib/scalar/Makefile
deleted file mode 100644
index 37f283f35d..0000000000
--- a/contrib/scalar/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-# The default target of this Makefile is...
-all::
-
-# Import tree-wide shared Makefile behavior and libraries
-include ../../shared.mak
-
-include ../../config.mak.uname
--include ../../config.mak.autogen
--include ../../config.mak
-
-TARGETS = scalar$(X) scalar.o
-GITLIBS = ../../common-main.o ../../libgit.a ../../xdiff/lib.a
-
-all:: scalar$(X) ../../bin-wrappers/scalar
-
-$(GITLIBS):
-	$(QUIET_SUBDIR0)../.. $(QUIET_SUBDIR1) $(subst ../../,,$@)
-
-$(TARGETS): $(GITLIBS) scalar.c
-	$(QUIET_SUBDIR0)../.. $(QUIET_SUBDIR1) $(patsubst %,contrib/scalar/%,$@)
-
-clean:
-	$(RM) $(TARGETS) ../../bin-wrappers/scalar
-
-../../bin-wrappers/scalar: ../../wrap-for-bin.sh Makefile
-	@mkdir -p ../../bin-wrappers
-	$(QUIET_GEN)sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
-	     -e 's|@@BUILD_DIR@@|$(shell cd ../.. && pwd)|' \
-	     -e 's|@@PROG@@|contrib/scalar/scalar$(X)|' < $< > $@ && \
-	chmod +x $@
-
-test: all
-	$(MAKE) -C t
-
-.PHONY: $(GITLIBS) all clean test FORCE
diff --git a/contrib/scalar/t/Makefile b/contrib/scalar/t/Makefile
deleted file mode 100644
index 1ed174a8cf..0000000000
--- a/contrib/scalar/t/Makefile
+++ /dev/null
@@ -1,81 +0,0 @@
-# Import tree-wide shared Makefile behavior and libraries
-include ../../../shared.mak
-
-# Run scalar tests
-#
-# Copyright (c) 2005,2021 Junio C Hamano, Johannes Schindelin
-#
-
--include ../../../config.mak.autogen
--include ../../../config.mak
-
-SHELL_PATH ?= $(SHELL)
-PERL_PATH ?= /usr/bin/perl
-RM ?= rm -f
-PROVE ?= prove
-DEFAULT_TEST_TARGET ?= test
-TEST_LINT ?= test-lint
-
-ifdef TEST_OUTPUT_DIRECTORY
-TEST_RESULTS_DIRECTORY = $(TEST_OUTPUT_DIRECTORY)/test-results
-else
-TEST_RESULTS_DIRECTORY = ../../../t/test-results
-endif
-
-# Shell quote;
-SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
-PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
-TEST_RESULTS_DIRECTORY_SQ = $(subst ','\'',$(TEST_RESULTS_DIRECTORY))
-
-T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh))
-
-all: $(DEFAULT_TEST_TARGET)
-
-test: $(TEST_LINT)
-	$(MAKE) aggregate-results-and-cleanup
-
-prove: $(TEST_LINT)
-	@echo "*** prove ***"; GIT_CONFIG=.git/config $(PROVE) --exec '$(SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS)
-	$(MAKE) clean-except-prove-cache
-
-$(T):
-	@echo "*** $@ ***"; GIT_CONFIG=.git/config '$(SHELL_PATH_SQ)' $@ $(GIT_TEST_OPTS)
-
-clean-except-prove-cache:
-	$(RM) -r 'trash directory'.*
-	$(RM) -r valgrind/bin
-
-clean: clean-except-prove-cache
-	$(RM) .prove
-
-test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax
-
-test-lint-duplicates:
-	@dups=`echo $(T) | tr ' ' '\n' | sed 's/-.*//' | sort | uniq -d` && \
-		test -z "$$dups" || { \
-		echo >&2 "duplicate test numbers:" $$dups; exit 1; }
-
-test-lint-executable:
-	@bad=`for i in $(T); do test -x "$$i" || echo $$i; done` && \
-		test -z "$$bad" || { \
-		echo >&2 "non-executable tests:" $$bad; exit 1; }
-
-test-lint-shell-syntax:
-	@'$(PERL_PATH_SQ)' ../../../t/check-non-portable-shell.pl $(T)
-
-aggregate-results-and-cleanup: $(T)
-	$(MAKE) aggregate-results
-	$(MAKE) clean
-
-aggregate-results:
-	for f in '$(TEST_RESULTS_DIRECTORY_SQ)'/t*-*.counts; do \
-		echo "$$f"; \
-	done | '$(SHELL_PATH_SQ)' ../../../t/aggregate-results.sh
-
-valgrind:
-	$(MAKE) GIT_TEST_OPTS="$(GIT_TEST_OPTS) --valgrind"
-
-test-results:
-	mkdir -p test-results
-
-.PHONY: $(T) aggregate-results clean valgrind
diff --git a/contrib/scalar/scalar.c b/scalar.c
similarity index 100%
rename from contrib/scalar/scalar.c
rename to scalar.c
diff --git a/contrib/scalar/t/t9099-scalar.sh b/t/t9210-scalar.sh
similarity index 96%
rename from contrib/scalar/t/t9099-scalar.sh
rename to t/t9210-scalar.sh
index dfb949f52e..14ca575a21 100755
--- a/contrib/scalar/t/t9099-scalar.sh
+++ b/t/t9210-scalar.sh
@@ -2,15 +2,9 @@
 
 test_description='test the `scalar` command'
 
-TEST_DIRECTORY=$PWD/../../../t
-export TEST_DIRECTORY
+. ./test-lib.sh
 
-# Make it work with --no-bin-wrappers
-PATH=$PWD/..:$PATH
-
-. ../../../t/test-lib.sh
-
-GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab ../cron.txt,launchctl:true,schtasks:true"
+GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true"
 export GIT_TEST_MAINT_SCHEDULER
 
 test_expect_success 'scalar shows a usage' '

From dd9603e22822fab19c0557dad0cf1825de325403 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 2 Sep 2022 15:56:44 +0000
Subject: [PATCH 30/45] git help: special-case `scalar`

With this commit, `git help scalar` will open the appropriate manual
or HTML page (instead of looking for `gitscalar`).

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/help.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/builtin/help.c b/builtin/help.c
index 09ac4289f1..6f2796f211 100644
--- a/builtin/help.c
+++ b/builtin/help.c
@@ -440,6 +440,8 @@ static const char *cmd_to_page(const char *git_cmd)
 		return git_cmd;
 	else if (is_git_command(git_cmd))
 		return xstrfmt("git-%s", git_cmd);
+	else if (!strcmp("scalar", git_cmd))
+		return xstrdup(git_cmd);
 	else
 		return xstrfmt("git%s", git_cmd);
 }

From 951759d3a5cb20ffeff4836aa0c4b793fa142b51 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Fri, 2 Sep 2022 15:56:45 +0000
Subject: [PATCH 31/45] scalar: implement the `help` subcommand

It is merely handing off to `git help scalar`.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 scalar.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/scalar.c b/scalar.c
index 642d16124e..c5c1ce6891 100644
--- a/scalar.c
+++ b/scalar.c
@@ -819,6 +819,25 @@ static int cmd_delete(int argc, const char **argv)
 	return res;
 }
 
+static int cmd_help(int argc, const char **argv)
+{
+	struct option options[] = {
+		OPT_END(),
+	};
+	const char * const usage[] = {
+		"scalar help",
+		NULL
+	};
+
+	argc = parse_options(argc, argv, NULL, options,
+			     usage, 0);
+
+	if (argc != 0)
+		usage_with_options(usage, options);
+
+	return run_git("help", "scalar", NULL);
+}
+
 static int cmd_version(int argc, const char **argv)
 {
 	int verbose = 0, build_options = 0;
@@ -858,6 +877,7 @@ static struct {
 	{ "run", cmd_run },
 	{ "reconfigure", cmd_reconfigure },
 	{ "delete", cmd_delete },
+	{ "help", cmd_help },
 	{ "version", cmd_version },
 	{ "diagnose", cmd_diagnose },
 	{ NULL, NULL},

From cc75e556a9de5d62c1ca52db900b729fc830f378 Mon Sep 17 00:00:00 2001
From: Victoria Dye <vdye@github.com>
Date: Fri, 2 Sep 2022 15:56:46 +0000
Subject: [PATCH 32/45] scalar: add to 'git help -a' command list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add 'scalar' as a 'mainporcelain' command in the Git command list. Update
the regex in 'cmd-list.perl' used to match the first line of command
documentation to find 'scalar(1)'.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/cmd-list.perl | 2 +-
 command-list.txt            | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/cmd-list.perl b/Documentation/cmd-list.perl
index af5da45d28..9515a499a3 100755
--- a/Documentation/cmd-list.perl
+++ b/Documentation/cmd-list.perl
@@ -10,7 +10,7 @@ sub format_one {
 	$state = 0;
 	open I, '<', "$name.txt" or die "No such file $name.txt";
 	while (<I>) {
-		if (/^git[a-z0-9-]*\(([0-9])\)$/) {
+		if (/^(git|scalar)[a-z0-9-]*\(([0-9])\)$/) {
 			$mansection = $1;
 			next;
 		}
diff --git a/command-list.txt b/command-list.txt
index f96bdabd7d..93f94e42ab 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -235,3 +235,4 @@ gittutorial                             guide
 gittutorial-2                           guide
 gitweb                                  ancillaryinterrogators
 gitworkflows                            guide
+scalar                                  mainporcelain

From 14b4e7e5a455b771f9dee74a5b4eb9a10f2a1cd4 Mon Sep 17 00:00:00 2001
From: Victoria Dye <vdye@github.com>
Date: Fri, 2 Sep 2022 15:56:47 +0000
Subject: [PATCH 33/45] scalar-clone: add test coverage

Create a new test file ('t9211-scalar-clone.sh') to exercise the options and
behavior of the 'scalar clone' command. Each test clones to a unique target
location and cleans up the cloned repo only when the test passes. This
ensures that failed tests' artifacts are captured in CI artifacts for
further debugging.

Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t9211-scalar-clone.sh | 151 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100755 t/t9211-scalar-clone.sh

diff --git a/t/t9211-scalar-clone.sh b/t/t9211-scalar-clone.sh
new file mode 100755
index 0000000000..dd33d87e9b
--- /dev/null
+++ b/t/t9211-scalar-clone.sh
@@ -0,0 +1,151 @@
+#!/bin/sh
+
+test_description='test the `scalar clone` subcommand'
+
+. ./test-lib.sh
+
+GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true"
+export GIT_TEST_MAINT_SCHEDULER
+
+test_expect_success 'set up repository to clone' '
+	rm -rf .git &&
+	git init to-clone &&
+	(
+		cd to-clone &&
+		git branch -m base &&
+
+		test_commit first &&
+		test_commit second &&
+		test_commit third &&
+
+		git switch -c parallel first &&
+		mkdir -p 1/2 &&
+		test_commit 1/2/3 &&
+
+		git switch base &&
+
+		# By default, permit
+		git config uploadpack.allowfilter true &&
+		git config uploadpack.allowanysha1inwant true
+	)
+'
+
+cleanup_clone () {
+	rm -rf "$1"
+}
+
+test_expect_success 'creates content in enlistment root' '
+	enlistment=cloned &&
+
+	scalar clone "file://$(pwd)/to-clone" $enlistment &&
+	ls -A $enlistment >enlistment-root &&
+	test_line_count = 1 enlistment-root &&
+	test_path_is_dir $enlistment/src &&
+	test_path_is_dir $enlistment/src/.git &&
+
+	cleanup_clone $enlistment
+'
+
+test_expect_success 'with spaces' '
+	enlistment="cloned with space" &&
+
+	scalar clone "file://$(pwd)/to-clone" "$enlistment" &&
+	test_path_is_dir "$enlistment" &&
+	test_path_is_dir "$enlistment/src" &&
+	test_path_is_dir "$enlistment/src/.git" &&
+
+	cleanup_clone "$enlistment"
+'
+
+test_expect_success 'partial clone if supported by server' '
+	enlistment=partial-clone &&
+
+	scalar clone "file://$(pwd)/to-clone" $enlistment &&
+
+	(
+		cd $enlistment/src &&
+
+		# Two promisor packs: one for refs, the other for blobs
+		ls .git/objects/pack/pack-*.promisor >promisorlist &&
+		test_line_count = 2 promisorlist
+	) &&
+
+	cleanup_clone $enlistment
+'
+
+test_expect_success 'fall back on full clone if partial unsupported' '
+	enlistment=no-partial-support &&
+
+	test_config -C to-clone uploadpack.allowfilter false &&
+	test_config -C to-clone uploadpack.allowanysha1inwant false &&
+
+	scalar clone "file://$(pwd)/to-clone" $enlistment 2>err &&
+	grep "filtering not recognized by server, ignoring" err &&
+
+	(
+		cd $enlistment/src &&
+
+		# Still get a refs promisor file, but none for blobs
+		ls .git/objects/pack/pack-*.promisor >promisorlist &&
+		test_line_count = 1 promisorlist
+	) &&
+
+	cleanup_clone $enlistment
+'
+
+test_expect_success 'initializes sparse-checkout by default' '
+	enlistment=sparse &&
+
+	scalar clone "file://$(pwd)/to-clone" $enlistment &&
+	(
+		cd $enlistment/src &&
+		test_cmp_config true core.sparseCheckout &&
+		test_cmp_config true core.sparseCheckoutCone
+	) &&
+
+	cleanup_clone $enlistment
+'
+
+test_expect_success '--full-clone does not create sparse-checkout' '
+	enlistment=full-clone &&
+
+	scalar clone --full-clone "file://$(pwd)/to-clone" $enlistment &&
+	(
+		cd $enlistment/src &&
+		test_cmp_config "" --default "" core.sparseCheckout &&
+		test_cmp_config "" --default "" core.sparseCheckoutCone
+	) &&
+
+	cleanup_clone $enlistment
+'
+
+test_expect_success '--single-branch clones HEAD only' '
+	enlistment=single-branch &&
+
+	scalar clone --single-branch "file://$(pwd)/to-clone" $enlistment &&
+	(
+		cd $enlistment/src &&
+		git for-each-ref refs/remotes/origin >out &&
+		test_line_count = 1 out &&
+		grep "refs/remotes/origin/base" out
+	) &&
+
+	cleanup_clone $enlistment
+'
+
+test_expect_success '--no-single-branch clones all branches' '
+	enlistment=no-single-branch &&
+
+	scalar clone --no-single-branch "file://$(pwd)/to-clone" $enlistment &&
+	(
+		cd $enlistment/src &&
+		git for-each-ref refs/remotes/origin >out &&
+		test_line_count = 2 out &&
+		grep "refs/remotes/origin/base" out &&
+		grep "refs/remotes/origin/parallel" out
+	) &&
+
+	cleanup_clone $enlistment
+'
+
+test_done

From e2809233d19e0faacedf59f229ec292cb9e7c7ef Mon Sep 17 00:00:00 2001
From: Victoria Dye <vdye@github.com>
Date: Fri, 2 Sep 2022 15:56:48 +0000
Subject: [PATCH 34/45] t/perf: add Scalar performance tests

Create 'p9210-scalar.sh' for testing Scalar performance and comparing
performance of Git operations in Scalar registrations and standard
repositories. Example results:

Test                                                   this tree
------------------------------------------------------------------------
9210.2: scalar clone                                   14.82(18.00+3.63)
9210.3: git clone                                      26.15(36.67+6.90)
9210.4: git status (scalar)                            0.04(0.01+0.01)
9210.5: git status (non-scalar)                        0.10(0.02+0.11)
9210.6: test_commit --append --no-tag A (scalar)       0.08(0.02+0.03)
9210.7: test_commit --append --no-tag A (non-scalar)   0.13(0.03+0.11)

Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/p9210-scalar.sh | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100755 t/perf/p9210-scalar.sh

diff --git a/t/perf/p9210-scalar.sh b/t/perf/p9210-scalar.sh
new file mode 100755
index 0000000000..265f7cd1fe
--- /dev/null
+++ b/t/perf/p9210-scalar.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+test_description='test scalar performance'
+. ./perf-lib.sh
+
+test_perf_large_repo "$TRASH_DIRECTORY/to-clone"
+
+test_expect_success 'enable server-side partial clone' '
+	git -C to-clone config uploadpack.allowFilter true &&
+	git -C to-clone config uploadpack.allowAnySHA1InWant true
+'
+
+test_perf 'scalar clone' '
+	rm -rf scalar-clone &&
+	scalar clone "file://$(pwd)/to-clone" scalar-clone
+'
+
+test_perf 'git clone' '
+	rm -rf git-clone &&
+	git clone "file://$(pwd)/to-clone" git-clone
+'
+
+test_compare_perf () {
+	command=$1
+	shift
+	args=$*
+	test_perf "$command $args (scalar)" "
+		$command -C scalar-clone/src $args
+	"
+
+	test_perf "$command $args (non-scalar)" "
+		$command -C git-clone $args
+	"
+}
+
+test_compare_perf git status
+test_compare_perf test_commit --append --no-tag A
+
+test_done

From ba1b117eec2f73c84f73f827e6f3ac8b82b35585 Mon Sep 17 00:00:00 2001
From: Victoria Dye <vdye@github.com>
Date: Fri, 2 Sep 2022 15:56:49 +0000
Subject: [PATCH 35/45] t/perf: add 'GIT_PERF_USE_SCALAR' run option

Add a 'GIT_PERF_USE_SCALAR' environment variable (and corresponding perf
config 'useScalar') to register a repository created with any of:

* test_perf_fresh_repo
* test_perf_default_repo
* test_perf_large_repo

as a Scalar enlistment. This is intended to allow a developer to test the
impact of Scalar on already-defined performance scenarios.

Suggested-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/perf/README      |  4 ++++
 t/perf/perf-lib.sh | 13 ++++++++++++-
 t/perf/run         |  3 +++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/t/perf/README b/t/perf/README
index fb9127a66f..8f217d7be7 100644
--- a/t/perf/README
+++ b/t/perf/README
@@ -95,6 +95,10 @@ You can set the following variables (also in your config.mak):
 	Git (e.g., performance of index-pack as the number of threads
 	changes). These can be enabled with GIT_PERF_EXTRA.
 
+    GIT_PERF_USE_SCALAR
+	Boolean indicating whether to register test repo(s) with Scalar
+	before executing tests.
+
 You can also pass the options taken by ordinary git tests; the most
 useful one is:
 
diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh
index 27c2801792..e7786775a9 100644
--- a/t/perf/perf-lib.sh
+++ b/t/perf/perf-lib.sh
@@ -49,6 +49,9 @@ export TEST_DIRECTORY TRASH_DIRECTORY GIT_BUILD_DIR GIT_TEST_CMP
 MODERN_GIT=$GIT_BUILD_DIR/bin-wrappers/git
 export MODERN_GIT
 
+MODERN_SCALAR=$GIT_BUILD_DIR/bin-wrappers/scalar
+export MODERN_SCALAR
+
 perf_results_dir=$TEST_RESULTS_DIR
 test -n "$GIT_PERF_SUBSECTION" && perf_results_dir="$perf_results_dir/$GIT_PERF_SUBSECTION"
 mkdir -p "$perf_results_dir"
@@ -120,6 +123,10 @@ test_perf_create_repo_from () {
 			# status" due to a locked index. Since we have
 			# a copy it's fine to remove the lock.
 			rm .git/index.lock
+		fi &&
+		if test_bool_env GIT_PERF_USE_SCALAR false
+		then
+			"$MODERN_SCALAR" register
 		fi
 	) || error "failed to copy repository '$source' to '$repo'"
 }
@@ -130,7 +137,11 @@ test_perf_fresh_repo () {
 	"$MODERN_GIT" init -q "$repo" &&
 	(
 		cd "$repo" &&
-		test_perf_do_repo_symlink_config_
+		test_perf_do_repo_symlink_config_ &&
+		if test_bool_env GIT_PERF_USE_SCALAR false
+		then
+			"$MODERN_SCALAR" register
+		fi
 	)
 }
 
diff --git a/t/perf/run b/t/perf/run
index 55219aa405..33da4d2aba 100755
--- a/t/perf/run
+++ b/t/perf/run
@@ -171,6 +171,9 @@ run_subsection () {
 	get_var_from_env_or_config "GIT_PERF_MAKE_COMMAND" "perf" "makeCommand"
 	get_var_from_env_or_config "GIT_PERF_MAKE_OPTS" "perf" "makeOpts"
 
+	get_var_from_env_or_config "GIT_PERF_USE_SCALAR" "perf" "useScalar" "--bool"
+	export GIT_PERF_USE_SCALAR
+
 	get_var_from_env_or_config "GIT_PERF_REPO_NAME" "perf" "repoName"
 	export GIT_PERF_REPO_NAME
 

From 9eb7a73158bdc91892a6b9a0b43b8f954b1e39e2 Mon Sep 17 00:00:00 2001
From: Victoria Dye <vdye@github.com>
Date: Fri, 2 Sep 2022 15:56:50 +0000
Subject: [PATCH 36/45] Documentation/technical: include Scalar technical doc

Include 'Documentation/technical/scalar.txt' alongside the other HTML
technical docs when installing them.

Now that the document is intended as a widely-accessible reference, remove
the internal work-in-progress roadmap from the document. Those details
should no longer be needed to guide Scalar's development and, if they were
left, they could fall out-of-date and be misleading to readers.

Signed-off-by: Victoria Dye <vdye@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/Makefile             |  1 +
 Documentation/technical/scalar.txt | 61 ------------------------------
 2 files changed, 1 insertion(+), 61 deletions(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 16c9e06239..9ec53afdf1 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -116,6 +116,7 @@ TECH_DOCS += technical/parallel-checkout
 TECH_DOCS += technical/partial-clone
 TECH_DOCS += technical/racy-git
 TECH_DOCS += technical/reftable
+TECH_DOCS += technical/scalar
 TECH_DOCS += technical/send-pack-pipeline
 TECH_DOCS += technical/shallow
 TECH_DOCS += technical/trivial-merge
diff --git a/Documentation/technical/scalar.txt b/Documentation/technical/scalar.txt
index 0600150b3a..921cb104c3 100644
--- a/Documentation/technical/scalar.txt
+++ b/Documentation/technical/scalar.txt
@@ -64,64 +64,3 @@ some "global" `git` options (e.g., `-c` and `-C`).
 Because `scalar` is not invoked as a Git subcommand (like `git scalar`), it is
 built and installed as its own executable in the `bin/` directory, alongside
 `git`, `git-gui`, etc.
-
-Roadmap
--------
-
-NOTE: this section will be removed once the remaining tasks outlined in this
-roadmap are complete.
-
-Scalar is a large enough project that it is being upstreamed incrementally,
-living in `contrib/` until it is feature-complete. So far, the following patch
-series have been accepted:
-
-- `scalar-the-beginning`: The initial patch series which sets up
-  `contrib/scalar/` and populates it with a minimal `scalar` command that
-  demonstrates the fundamental ideas.
-
-- `scalar-c-and-C`: The `scalar` command learns about two options that can be
-  specified before the command, `-c <key>=<value>` and `-C <directory>`.
-
-- `scalar-diagnose`: The `scalar` command is taught the `diagnose` subcommand.
-
-- `scalar-generalize-diagnose`: Move the functionality of `scalar diagnose`
-  into `git diagnose` and `git bugreport --diagnose`.
-
-- 'scalar-add-fsmonitor: Enable the built-in FSMonitor in Scalar
-  enlistments. At the end of this series, Scalar should be feature-complete
-  from the perspective of a user.
-
-Roughly speaking (and subject to change), the following series are needed to
-"finish" this initial version of Scalar:
-
-- Move Scalar to toplevel: Move Scalar out of `contrib/` and into the root of
-  `git`. This includes a variety of related updates, including:
-    - building & installing Scalar in the Git root-level 'make [install]'.
-    - builing & testing Scalar as part of CI.
-    - moving and expanding test coverage of Scalar (including perf tests).
-    - implementing 'scalar help'/'git help scalar' to display scalar
-      documentation.
-
-Finally, there are two additional patch series that exist in Microsoft's fork of
-Git, but there is no current plan to upstream them. There are some interesting
-ideas there, but the implementation is too specific to Azure Repos and/or VFS
-for Git to be of much help in general.
-
-These still exist mainly because the GVFS protocol is what Azure Repos has
-instead of partial clone, while Git is focused on improving partial clone:
-
-- `scalar-with-gvfs`: The primary purpose of this patch series is to support
-  existing Scalar users whose repositories are hosted in Azure Repos (which does
-  not support Git's partial clones, but supports its predecessor, the GVFS
-  protocol, which is used by Scalar to emulate the partial clone).
-
-  Since the GVFS protocol will never be supported by core Git, this patch series
-  will remain in Microsoft's fork of Git.
-
-- `run-scalar-functional-tests`: The Scalar project developed a quite
-  comprehensive set of integration tests (or, "Functional Tests"). They are the
-  sole remaining part of the original C#-based Scalar project, and this patch
-  adds a GitHub workflow that runs them all.
-
-  Since the tests partially depend on features that are only provided in the
-  `scalar-with-gvfs` patch series, this patch cannot be upstreamed.

From 7ead46810b507828c1481eaea6d64b9ed635b8b7 Mon Sep 17 00:00:00 2001
From: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Date: Fri, 9 Sep 2022 15:27:36 -0700
Subject: [PATCH 37/45] builtin/mv.c: fix possible segfault in add_slash()

A possible segfault was introduced in c08830de41 (mv: check if
<destination> is a SKIP_WORKTREE_DIR, 2022-08-09).

When running t7001 with SANITIZE=address, problem appears when running:

	git mv path1/path2/ .
or
	git mv directory ../
or
	any <destination> that makes dest_path[0] an empty string.

The add_slash() call could segfault when path argument to it is an empty
string, because it makes an out-of-bounds read to decide if an extra
slash '/' needs to be appended to it.

As add_slash() is used to make sure that a valid pathname to a file in
the given directory can be made by appending a filename after the value
returned from it, if path is an empty string, we want to return it
as-is.  The path to a file "F" in the top-level of the working tree
(i.e. path=="") is formed by appending "F" after "" (i.e. path) without
any slash in between.

So, just like the case where a non-empty path already ends with a slash,
return an empty path as-is.

Reported-by: Jeff King <peff@peff.net>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/mv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/mv.c b/builtin/mv.c
index 4b67bd096a..35bf1e9c71 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -71,7 +71,7 @@ static const char **internal_prefix_pathspec(const char *prefix,
 static const char *add_slash(const char *path)
 {
 	size_t len = strlen(path);
-	if (path[len - 1] != '/') {
+	if (len && path[len - 1] != '/') {
 		char *with_slash = xmalloc(st_add(len, 2));
 		memcpy(with_slash, path, len);
 		with_slash[len++] = '/';

From e40d906449950c140ba1e081b647c708d6d2979e Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 11 Sep 2022 00:58:09 -0400
Subject: [PATCH 38/45] list-objects-filter: don't memset after releasing
 filter struct

If we see an error while parsing a "combine" filter, we call
list_objects_filter_release() to free any allocated memory,
and then use memset() to return the struct to a known state. But the
release function already does that reinitializing. Doing it again is
pointless.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 list-objects-filter-options.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 6cc4eb8e1c..ea989db260 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -187,10 +187,8 @@ static int parse_combine_filter(
 
 cleanup:
 	strbuf_list_free(subspecs);
-	if (result) {
+	if (result)
 		list_objects_filter_release(filter_options);
-		memset(filter_options, 0, sizeof(*filter_options));
-	}
 	return result;
 }
 

From aff4bfcf0a51a26d069ccc3a29e643a112867b27 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 11 Sep 2022 01:00:45 -0400
Subject: [PATCH 39/45] list-objects-filter: handle null default filter spec

When we have a remote.*.promisor config variable, we know that we're in
a partial clone. Usually there's a matching remote.*.partialclonefilter
option, which tells us which filter to use with the remote. If that
option is missing, we skip setting up the filter at all. But something
funny happens: we stick a NULL entry into the string_list storing the
text filter spec.

This is a weird state, and could possibly segfault if anybody called
called list_objects_filter_spec(), etc. In practice, nobody does,
because filter->choice will still be LOFC_DISABLED, so code generally
realizes there's no filter to use. And the string_list itself is OK,
because it starts in non-dup mode until we actually parse a filter spec.
So it blindly stores the NULL without even looking at it.

But it's probably worth avoiding this confused state. It's an accident
waiting to happen, and it will be a problem if we replace the lazy
initialization from 7e2619d8ff (list_objects_filter_options: plug leak
of filter_spec strings, 2022-09-08) with a real initialization function.

The history is a little interesting here, as the bug was introduced
during the merge resolution in 627b826834 (Merge branch
'md/list-objects-filter-combo', 2019-09-18).

The original logic comes from cac1137dc4 (list-objects: check if filter
is NULL before using, 2018-06-11), where we had a single string via
core.partialCloneFilter, and a simple NULL check was sufficient. And it
even added a test in t0410 that covers this situation.

Later, that was expanded to allow per-remote filters in fa3d1b63e8
(promisor-remote: parse remote.*.partialclonefilter, 2019-06-25). After
that commit, we get a promisor struct with a partial_clone_filter
string, which could be NULL. The commit checks only that the struct
pointer is non-NULL, which is enough. It may pass NULL to
gently_parse_list_objects_filter(), but that function is smart enough to
consider it a noop.

But in parallel, cf9ceb5a12 (list-objects-filter-options: make
filter_spec a string_list, 2019-06-27) added a new line of code: before
we call gently_parse_list_objets_filter(), we append the filter spec to
the string_list. By itself that was OK, since we'd have returned early
if the string was NULL.

When the two were merged in 627b826834, the result is that we return
early only if the struct is NULL, but not the string. And we append to
the string_list, meaning we may append NULL.

The solution is to return early if either is NULL, as it would mean we
don't have a configured filter.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 list-objects-filter-options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index ea989db260..18c51001dc 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -399,7 +399,7 @@ void partial_clone_get_default_filter_spec(
 	/*
 	 * Parse default value, but silently ignore it if it is invalid.
 	 */
-	if (!promisor)
+	if (!promisor || !promisor->partial_clone_filter)
 		return;
 
 	string_list_append(&filter_options->filter_spec,

From 2a01bdedf87d7cbfc4411ff5059cfe406e1637db Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 11 Sep 2022 01:03:07 -0400
Subject: [PATCH 40/45] list-objects-filter: add and use initializers

In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec
strings, 2022-09-08), we noted that the filter_spec string_list was
inconsistent in how it handled memory ownership of strings stored in the
list. The fix there was a bit of a band-aid to set the "strdup_strings"
variable right before adding anything.

That works OK, and it lets the users of the API continue to
zero-initialize the struct. But it makes the code a bit hard to follow
and accident-prone, as any other spots appending the filter_spec need to
think about whether to set the strdup_strings value, too (there's one
such spot in partial_clone_get_default_filter_spec(), which is probably
a possible memory leak).

So let's do that full cleanup now. We'll introduce a
LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as
appropriate (though it is for the "_options" struct, this matches the
corresponding list_objects_filter_release() function).

This is harder than it seems! Many other structs, like
git_transport_data, embed the filter struct. So they need to initialize
it themselves even if the rest of the enclosing struct is OK with
zero-initialization. I found all of the relevant spots by grepping
manually for declarations of list_objects_filter_options. And then doing
so recursively for structs which embed it, and ones which embed those,
and so on.

I'm pretty sure I got everything, but there's no change that would alert
the compiler if any topics in flight added new declarations. To catch
this case, we now double-check in the parsing function that things were
initialized as expected and BUG() if appropriate.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/clone.c               |  2 +-
 builtin/fetch-pack.c          |  1 +
 builtin/fetch.c               |  2 +-
 builtin/submodule--helper.c   |  8 ++++----
 bundle.h                      |  1 +
 list-objects-filter-options.c | 20 +++++++++++---------
 list-objects-filter-options.h |  3 +++
 revision.c                    |  1 +
 transport-helper.c            |  2 ++
 transport.c                   |  1 +
 upload-pack.c                 |  1 +
 11 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 9e0b2b45ca..78fb80eab6 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -72,7 +72,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP;
 static int option_dissociate;
 static int max_jobs = -1;
 static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP;
-static struct list_objects_filter_options filter_options;
+static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT;
 static int option_filter_submodules = -1;    /* unspecified */
 static int config_filter_submodules = -1;    /* unspecified */
 static struct string_list server_options = STRING_LIST_INIT_NODUP;
diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index f045bbbe94..afe679368d 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -62,6 +62,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
 	packet_trace_identity("fetch-pack");
 
 	memset(&args, 0, sizeof(args));
+	list_objects_filter_init(&args.filter_options);
 	args.uploadpack = "git-upload-pack";
 
 	for (i = 1; i < argc && *argv[i] == '-'; i++) {
diff --git a/builtin/fetch.c b/builtin/fetch.c
index ac29c2b1ae..0e6238d283 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -80,7 +80,7 @@ static int recurse_submodules_cli = RECURSE_SUBMODULES_DEFAULT;
 static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND;
 static int shown_url = 0;
 static struct refspec refmap = REFSPEC_INIT_FETCH;
-static struct list_objects_filter_options filter_options;
+static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT;
 static struct string_list server_options = STRING_LIST_INIT_DUP;
 static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 static int fetch_write_commit_graph = -1;
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index c597df7528..f5dc63fab4 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1754,7 +1754,8 @@ static int module_clone(int argc, const char **argv, const char *prefix)
 {
 	int dissociate = 0, quiet = 0, progress = 0, require_init = 0;
 	struct module_clone_data clone_data = MODULE_CLONE_DATA_INIT;
-	struct list_objects_filter_options filter_options;
+	struct list_objects_filter_options filter_options =
+		LIST_OBJECTS_FILTER_INIT;
 
 	struct option module_clone_options[] = {
 		OPT_STRING(0, "prefix", &clone_data.prefix,
@@ -1796,7 +1797,6 @@ static int module_clone(int argc, const char **argv, const char *prefix)
 		NULL
 	};
 
-	memset(&filter_options, 0, sizeof(filter_options));
 	argc = parse_options(argc, argv, prefix, module_clone_options,
 			     git_submodule_helper_usage, 0);
 
@@ -2581,7 +2581,8 @@ static int module_update(int argc, const char **argv, const char *prefix)
 {
 	struct pathspec pathspec;
 	struct update_data opt = UPDATE_DATA_INIT;
-	struct list_objects_filter_options filter_options;
+	struct list_objects_filter_options filter_options =
+		LIST_OBJECTS_FILTER_INIT;
 	int ret;
 
 	struct option module_update_options[] = {
@@ -2639,7 +2640,6 @@ static int module_update(int argc, const char **argv, const char *prefix)
 	update_clone_config_from_gitmodules(&opt.max_jobs);
 	git_config(git_update_clone_config, &opt.max_jobs);
 
-	memset(&filter_options, 0, sizeof(filter_options));
 	argc = parse_options(argc, argv, prefix, module_update_options,
 			     git_submodule_helper_usage, 0);
 
diff --git a/bundle.h b/bundle.h
index 0c052f5496..68ff39a0a7 100644
--- a/bundle.h
+++ b/bundle.h
@@ -18,6 +18,7 @@ struct bundle_header {
 { \
 	.prerequisites = STRING_LIST_INIT_DUP, \
 	.references = STRING_LIST_INIT_DUP, \
+	.filter = LIST_OBJECTS_FILTER_INIT, \
 }
 void bundle_header_init(struct bundle_header *header);
 void bundle_header_release(struct bundle_header *header);
diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 18c51001dc..56a1933a50 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -108,7 +108,7 @@ int gently_parse_list_objects_filter(
 
 	strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg);
 
-	memset(filter_options, 0, sizeof(*filter_options));
+	list_objects_filter_init(filter_options);
 	return 1;
 }
 
@@ -223,8 +223,7 @@ static void transform_to_combine_type(
 		struct list_objects_filter_options *sub_array =
 			xcalloc(initial_sub_alloc, sizeof(*sub_array));
 		sub_array[0] = *filter_options;
-		memset(filter_options, 0, sizeof(*filter_options));
-		string_list_init_dup(&filter_options->filter_spec);
+		list_objects_filter_init(filter_options);
 		filter_options->sub = sub_array;
 		filter_options->sub_alloc = initial_sub_alloc;
 	}
@@ -255,11 +254,8 @@ void parse_list_objects_filter(
 	struct strbuf errbuf = STRBUF_INIT;
 	int parse_error;
 
-	if (!filter_options->filter_spec.strdup_strings) {
-		if (filter_options->filter_spec.nr)
-			BUG("unexpected non-allocated string in filter_spec");
-		filter_options->filter_spec.strdup_strings = 1;
-	}
+	if (!filter_options->filter_spec.strdup_strings)
+		BUG("filter_options not properly initialized");
 
 	if (!filter_options->choice) {
 		string_list_append(&filter_options->filter_spec, arg);
@@ -346,7 +342,7 @@ void list_objects_filter_release(
 	for (sub = 0; sub < filter_options->sub_nr; sub++)
 		list_objects_filter_release(&filter_options->sub[sub]);
 	free(filter_options->sub);
-	memset(filter_options, 0, sizeof(*filter_options));
+	list_objects_filter_init(filter_options);
 }
 
 void partial_clone_register(
@@ -429,3 +425,9 @@ void list_objects_filter_copy(
 	for (i = 0; i < src->sub_nr; i++)
 		list_objects_filter_copy(&dest->sub[i], &src->sub[i]);
 }
+
+void list_objects_filter_init(struct list_objects_filter_options *filter_options)
+{
+	struct list_objects_filter_options blank = LIST_OBJECTS_FILTER_INIT;
+	memcpy(filter_options, &blank, sizeof(*filter_options));
+}
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index ffc02d77e7..2720f7dba8 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -69,6 +69,9 @@ struct list_objects_filter_options {
 	 */
 };
 
+#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRING_LIST_INIT_DUP }
+void list_objects_filter_init(struct list_objects_filter_options *filter_options);
+
 /*
  * Parse value of the argument to the "filter" keyword.
  * On the command line this looks like:
diff --git a/revision.c b/revision.c
index 0c6e26cd9c..fbd89ef8e7 100644
--- a/revision.c
+++ b/revision.c
@@ -1900,6 +1900,7 @@ void repo_init_revisions(struct repository *r,
 	}
 
 	init_display_notes(&revs->notes_opt);
+	list_objects_filter_init(&revs->filter);
 }
 
 static void add_pending_commit_list(struct rev_info *revs,
diff --git a/transport-helper.c b/transport-helper.c
index 322c722478..e95267a4ab 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -1286,6 +1286,8 @@ int transport_helper_init(struct transport *transport, const char *name)
 	if (getenv("GIT_TRANSPORT_HELPER_DEBUG"))
 		debug = 1;
 
+	list_objects_filter_init(&data->transport_options.filter_options);
+
 	transport->data = data;
 	transport->vtable = &vtable;
 	transport->smart_options = &(data->transport_options);
diff --git a/transport.c b/transport.c
index 6ec6130852..a14179684b 100644
--- a/transport.c
+++ b/transport.c
@@ -1113,6 +1113,7 @@ struct transport *transport_get(struct remote *remote, const char *url)
 		 * will be checked individually in git_connect.
 		 */
 		struct git_transport_data *data = xcalloc(1, sizeof(*data));
+		list_objects_filter_init(&data->options.filter_options);
 		ret->data = data;
 		ret->vtable = &builtin_smart_vtable;
 		ret->smart_options = &(data->options);
diff --git a/upload-pack.c b/upload-pack.c
index 3a851b3606..cbd373f2e5 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -141,6 +141,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	data->allow_filter_fallback = 1;
 	data->tree_filter_max_depth = ULONG_MAX;
 	packet_writer_init(&data->writer, 1);
+	list_objects_filter_init(&data->filter_options);
 
 	data->keepalive = 5;
 	data->advertise_sid = 0;

From c54980ab83661e8e290003fbd5ab44b12e4e77b1 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sun, 11 Sep 2022 01:03:31 -0400
Subject: [PATCH 41/45] list-objects-filter: convert filter_spec to a strbuf

Originally, the filter_spec field was just a string pointer. In
cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list,
2019-06-27) it became a string_list, but that commit notes:

    A strbuf would seem to be a more natural choice for this object, but
    it unfortunately requires initialization besides just zero'ing out
    the memory.  This results in all container structs, and all
    containers of those structs, etc., to also require initialization.
    Initializing them all would be more cumbersome that simply using a
    string_list, which behaves properly when its contents are zero'd.

Now that we've changed the struct to require non-zero initialization
anyway (ironically, because string_list also needed non-zero
initialization to avoid leaks), we can now convert to that more natural
type.

This makes the list_objects_filter_spec() function much less awkward, as
it had to collapse the string_list to a single-entry list on the fly.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 list-objects-filter-options.c | 51 +++++++++++++----------------------
 list-objects-filter-options.h |  4 +--
 2 files changed, 20 insertions(+), 35 deletions(-)

diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c
index 56a1933a50..d46ce4acc4 100644
--- a/list-objects-filter-options.c
+++ b/list-objects-filter-options.c
@@ -202,10 +202,10 @@ static int allow_unencoded(char ch)
 static void filter_spec_append_urlencode(
 	struct list_objects_filter_options *filter, const char *raw)
 {
-	struct strbuf buf = STRBUF_INIT;
-	strbuf_addstr_urlencode(&buf, raw, allow_unencoded);
-	trace_printf("Add to combine filter-spec: %s\n", buf.buf);
-	string_list_append_nodup(&filter->filter_spec, strbuf_detach(&buf, NULL));
+	size_t orig_len = filter->filter_spec.len;
+	strbuf_addstr_urlencode(&filter->filter_spec, raw, allow_unencoded);
+	trace_printf("Add to combine filter-spec: %s\n",
+		     filter->filter_spec.buf + orig_len);
 }
 
 /*
@@ -229,7 +229,7 @@ static void transform_to_combine_type(
 	}
 	filter_options->sub_nr = 1;
 	filter_options->choice = LOFC_COMBINE;
-	string_list_append(&filter_options->filter_spec, "combine:");
+	strbuf_addstr(&filter_options->filter_spec, "combine:");
 	filter_spec_append_urlencode(
 		filter_options,
 		list_objects_filter_spec(&filter_options->sub[0]));
@@ -237,7 +237,7 @@ static void transform_to_combine_type(
 	 * We don't need the filter_spec strings for subfilter specs, only the
 	 * top level.
 	 */
-	string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0);
+	strbuf_release(&filter_options->sub[0].filter_spec);
 }
 
 void list_objects_filter_die_if_populated(
@@ -254,11 +254,11 @@ void parse_list_objects_filter(
 	struct strbuf errbuf = STRBUF_INIT;
 	int parse_error;
 
-	if (!filter_options->filter_spec.strdup_strings)
+	if (!filter_options->filter_spec.buf)
 		BUG("filter_options not properly initialized");
 
 	if (!filter_options->choice) {
-		string_list_append(&filter_options->filter_spec, arg);
+		strbuf_addstr(&filter_options->filter_spec, arg);
 
 		parse_error = gently_parse_list_objects_filter(
 			filter_options, arg, &errbuf);
@@ -269,7 +269,7 @@ void parse_list_objects_filter(
 		 */
 		transform_to_combine_type(filter_options);
 
-		string_list_append(&filter_options->filter_spec, "+");
+		strbuf_addch(&filter_options->filter_spec, '+');
 		filter_spec_append_urlencode(filter_options, arg);
 		ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1,
 			      filter_options->sub_alloc);
@@ -300,31 +300,18 @@ int opt_parse_list_objects_filter(const struct option *opt,
 
 const char *list_objects_filter_spec(struct list_objects_filter_options *filter)
 {
-	if (!filter->filter_spec.nr)
+	if (!filter->filter_spec.len)
 		BUG("no filter_spec available for this filter");
-	if (filter->filter_spec.nr != 1) {
-		struct strbuf concatted = STRBUF_INIT;
-		strbuf_add_separated_string_list(
-			&concatted, "", &filter->filter_spec);
-		string_list_clear(&filter->filter_spec, /*free_util=*/0);
-		string_list_append_nodup(
-			&filter->filter_spec, strbuf_detach(&concatted, NULL));
-	}
-
-	return filter->filter_spec.items[0].string;
+	return filter->filter_spec.buf;
 }
 
 const char *expand_list_objects_filter_spec(
 	struct list_objects_filter_options *filter)
 {
 	if (filter->choice == LOFC_BLOB_LIMIT) {
-		struct strbuf expanded_spec = STRBUF_INIT;
-		strbuf_addf(&expanded_spec, "blob:limit=%lu",
+		strbuf_release(&filter->filter_spec);
+		strbuf_addf(&filter->filter_spec, "blob:limit=%lu",
 			    filter->blob_limit_value);
-		string_list_clear(&filter->filter_spec, /*free_util=*/0);
-		string_list_append_nodup(
-			&filter->filter_spec,
-			strbuf_detach(&expanded_spec, NULL));
 	}
 
 	return list_objects_filter_spec(filter);
@@ -337,7 +324,7 @@ void list_objects_filter_release(
 
 	if (!filter_options)
 		return;
-	string_list_clear(&filter_options->filter_spec, /*free_util=*/0);
+	strbuf_release(&filter_options->filter_spec);
 	free(filter_options->sparse_oid_name);
 	for (sub = 0; sub < filter_options->sub_nr; sub++)
 		list_objects_filter_release(&filter_options->sub[sub]);
@@ -398,8 +385,8 @@ void partial_clone_get_default_filter_spec(
 	if (!promisor || !promisor->partial_clone_filter)
 		return;
 
-	string_list_append(&filter_options->filter_spec,
-			   promisor->partial_clone_filter);
+	strbuf_addstr(&filter_options->filter_spec,
+		      promisor->partial_clone_filter);
 	gently_parse_list_objects_filter(filter_options,
 					 promisor->partial_clone_filter,
 					 &errbuf);
@@ -411,14 +398,12 @@ void list_objects_filter_copy(
 	const struct list_objects_filter_options *src)
 {
 	int i;
-	struct string_list_item *item;
 
 	/* Copy everything. We will overwrite the pointers shortly. */
 	memcpy(dest, src, sizeof(struct list_objects_filter_options));
 
-	string_list_init_dup(&dest->filter_spec);
-	for_each_string_list_item(item, &src->filter_spec)
-		string_list_append(&dest->filter_spec, item->string);
+	strbuf_init(&dest->filter_spec, 0);
+	strbuf_addbuf(&dest->filter_spec, &src->filter_spec);
 	dest->sparse_oid_name = xstrdup_or_null(src->sparse_oid_name);
 
 	ALLOC_ARRAY(dest->sub, dest->sub_alloc);
diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h
index 2720f7dba8..7eeadab2dd 100644
--- a/list-objects-filter-options.h
+++ b/list-objects-filter-options.h
@@ -35,7 +35,7 @@ struct list_objects_filter_options {
 	 * To get the raw filter spec given by the user, use the result of
 	 * list_objects_filter_spec().
 	 */
-	struct string_list filter_spec;
+	struct strbuf filter_spec;
 
 	/*
 	 * 'choice' is determined by parsing the filter-spec.  This indicates
@@ -69,7 +69,7 @@ struct list_objects_filter_options {
 	 */
 };
 
-#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRING_LIST_INIT_DUP }
+#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRBUF_INIT }
 void list_objects_filter_init(struct list_objects_filter_options *filter_options);
 
 /*

From 746aae3dd1a8e8dba31797ac237916d9533e4254 Mon Sep 17 00:00:00 2001
From: ZheNing Hu <adlternative@gmail.com>
Date: Sun, 11 Sep 2022 14:03:17 +0000
Subject: [PATCH 42/45] ls-files: fix black space in error message

ce74de9(ls-files: introduce "--format" option) miss
a space between two words incorrectly, it leads to
wrong i10n messages. So fix it by adding a space at
the end of the error message.

Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/ls-files.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 779dc18e59..4cf8a23648 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -257,7 +257,7 @@ static size_t expand_show_index(struct strbuf *sb, const char *start,
 
 	end = strchr(start + 1, ')');
 	if (!end)
-		die(_("bad ls-files format: element '%s'"
+		die(_("bad ls-files format: element '%s' "
 		      "does not end in ')'"), start);
 
 	len = end - start + 1;

From 7c04aa73906b9186c9d46010227d4437fd534d93 Mon Sep 17 00:00:00 2001
From: Eric Sunshine <sunshine@sunshineco.com>
Date: Tue, 13 Sep 2022 04:01:47 +0000
Subject: [PATCH 43/45] chainlint: colorize problem annotations and test
 delimiters

When `chainlint.pl` detects problems in a test definition, it emits the
test definition with "?!FOO?!" annotations highlighting the problems it
discovered. For instance, given this problematic test:

    test_expect_success 'discombobulate frobnitz' '
        git frob babble &&
        (echo balderdash; echo gnabgib) >expect &&
        for i in three two one
        do
            git nitfol $i
        done >actual
        test_cmp expect actual
    '

chainlint.pl will output:

    # chainlint: t1234-confusing.sh
    # chainlint: discombobulate frobnitz
    git frob babble &&
    (echo balderdash ; ?!AMP?! echo gnabgib) >expect &&
    for i in three two one
    do
    git nitfol $i ?!LOOP?!
    done >actual ?!AMP?!
    test_cmp expect actual

in which it may be difficult to spot the "?!FOO?!" annotations. The
problem is compounded when multiple tests, possibly in multiple
scripts, fail "linting", in which case it may be difficult to spot the
"# chainlint:" lines which delimit one problematic test from another.

To ameliorate this potential problem, colorize the "?!FOO?!" annotations
in order to quickly draw the test author's attention to the problem
spots, and colorize the "# chainlint:" lines to help the author identify
the name of each script and each problematic test.

Colorization is disabled automatically if output is not directed to a
terminal or if NO_COLOR environment variable is set. The implementation
is specific to Unix (it employs `tput` if available) but works equally
well in the Git for Windows development environment which emulates Unix
sufficiently.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/chainlint.pl | 46 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/t/chainlint.pl b/t/chainlint.pl
index 386999ce65..976db4b8a0 100755
--- a/t/chainlint.pl
+++ b/t/chainlint.pl
@@ -585,12 +585,14 @@ sub check_test {
 	my $parser = TestParser->new(\$body);
 	my @tokens = $parser->parse();
 	return unless $emit_all || grep(/\?![^?]+\?!/, @tokens);
+	my $c = main::fd_colors(1);
 	my $checked = join(' ', @tokens);
 	$checked =~ s/^\n//;
 	$checked =~ s/^ //mg;
 	$checked =~ s/ $//mg;
+	$checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg;
 	$checked .= "\n" unless $checked =~ /\n$/;
-	push(@{$self->{output}}, "# chainlint: $title\n$checked");
+	push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
 }
 
 sub parse_cmd {
@@ -615,6 +617,41 @@ if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
 	$interval = sub { return Time::HiRes::tv_interval(shift); };
 }
 
+# Restore TERM if test framework set it to "dumb" so 'tput' will work; do this
+# outside of get_colors() since under 'ithreads' all threads use %ENV of main
+# thread and ignore %ENV changes in subthreads.
+$ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM};
+
+my @NOCOLORS = (bold => '', rev => '', reset => '', blue => '', green => '', red => '');
+my %COLORS = ();
+sub get_colors {
+	return \%COLORS if %COLORS;
+	if (exists($ENV{NO_COLOR}) ||
+	    system("tput sgr0 >/dev/null 2>&1") != 0 ||
+	    system("tput bold >/dev/null 2>&1") != 0 ||
+	    system("tput rev  >/dev/null 2>&1") != 0 ||
+	    system("tput setaf 1 >/dev/null 2>&1") != 0) {
+		%COLORS = @NOCOLORS;
+		return \%COLORS;
+	}
+	%COLORS = (bold  => `tput bold`,
+		   rev   => `tput rev`,
+		   reset => `tput sgr0`,
+		   blue  => `tput setaf 4`,
+		   green => `tput setaf 2`,
+		   red   => `tput setaf 1`);
+	chomp(%COLORS);
+	return \%COLORS;
+}
+
+my %FD_COLORS = ();
+sub fd_colors {
+	my $fd = shift;
+	return $FD_COLORS{$fd} if exists($FD_COLORS{$fd});
+	$FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS};
+	return $FD_COLORS{$fd};
+}
+
 sub ncores {
 	# Windows
 	return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS});
@@ -630,6 +667,8 @@ sub show_stats {
 	my $walltime = $interval->($start_time);
 	my ($usertime) = times();
 	my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0);
+	my $c = fd_colors(2);
+	print(STDERR $c->{green});
 	for (@$stats) {
 		my ($worker, $nscripts, $ntests, $nerrs) = @$_;
 		print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n");
@@ -638,7 +677,7 @@ sub show_stats {
 		$total_tests += $ntests;
 		$total_errs += $nerrs;
 	}
-	printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
+	printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
 }
 
 sub check_script {
@@ -656,8 +695,9 @@ sub check_script {
 		my $parser = ScriptParser->new(\$s);
 		1 while $parser->parse_cmd();
 		if (@{$parser->{output}}) {
+			my $c = fd_colors(1);
 			my $s = join('', @{$parser->{output}});
-			$emit->("# chainlint: $path\n" . $s);
+			$emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s);
 			$nerrs += () = $s =~ /\?![^?]+\?!/g;
 		}
 		$ntests += $parser->{ntests};

From 255a6f91ae4600ee2d257670477caf97b7986470 Mon Sep 17 00:00:00 2001
From: Adam Dinwoodie <adam@dinwoodie.org>
Date: Thu, 15 Sep 2022 08:57:17 +0100
Subject: [PATCH 44/45] t1800: correct test to handle Cygwin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Cygwin, when failing to spawn a process using start_command, Git
outputs the same error as on Linux systems, rather than using the
GIT_WINDOWS_NATIVE-specific error output.  The WINDOWS test prerequisite
is set in both Cygwin and native Windows environments, which means it's
not appropriate to use to anticipate the error output from
start_command.  Instead, use the MINGW test prerequisite, which is only
set for Git in native Windows environments, and not for Cygwin.

Signed-off-by: Adam Dinwoodie <adam@dinwoodie.org>
Helped-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t1800-hook.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh
index 64096adac7..43fcb7c0bf 100755
--- a/t/t1800-hook.sh
+++ b/t/t1800-hook.sh
@@ -157,9 +157,9 @@ test_expect_success 'git hook run a hook with a bad shebang' '
 	write_script bad-hooks/test-hook "/bad/path/no/spaces" </dev/null &&
 
 	# TODO: We should emit the same (or at least a more similar)
-	# error on Windows and !Windows. See the OS-specific code in
-	# start_command()
-	if test_have_prereq !WINDOWS
+	# error on MINGW (essentially Git for Windows) and all other
+	# platforms.. See the OS-specific code in start_command()
+	if test_have_prereq !MINGW
 	then
 		cat >expect <<-\EOF
 		fatal: cannot run bad-hooks/test-hook: ...

From dda7228a83e2e9ff584bf6adbf55910565b41e14 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Mon, 19 Sep 2022 12:55:59 -0700
Subject: [PATCH 45/45] A bit more of remaining topics before -rc1

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/RelNotes/2.38.0.txt | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/Documentation/RelNotes/2.38.0.txt b/Documentation/RelNotes/2.38.0.txt
index 01617baa98..5d9bd8c295 100644
--- a/Documentation/RelNotes/2.38.0.txt
+++ b/Documentation/RelNotes/2.38.0.txt
@@ -6,7 +6,7 @@ UI, Workflows & Features
  * "git remote show [-n] frotz" now pays attention to negative
    pathspec.
 
- * "git push" sometimes perform poorly when reachability bitmaps are
+ * "git push" sometimes performs poorly when reachability bitmaps are
    used, even in a repository where other operations are helped by
    bitmaps.  The push.useBitmaps configuration variable is introduced
    to allow disabling use of reachability bitmaps only for "git push".
@@ -27,7 +27,7 @@ UI, Workflows & Features
    what locale they are in by sending Accept-Language HTTP header, but
    this was done only for some requests but not others.
 
- * Introduce a discovery.barerepository configuration variable that
+ * Introduce a safe.barerepository configuration variable that
    allows users to forbid discovery of bare repositories.
 
  * Various messages that come from the pack-bitmap codepaths have been
@@ -79,12 +79,15 @@ UI, Workflows & Features
 
  * "git format-patch --from=<ident>" can be told to add an in-body
    "From:" line even for commits that are authored by the given
-   <ident> with "--force-in-body-from"option.
+   <ident> with "--force-in-body-from" option.
 
  * The built-in fsmonitor refuses to work on a network mounted
    repositories; a configuration knob for users to override this has
    been introduced.
 
+ * The "scalar" addition from Microsoft is now part of the core Git
+   installation.
+
 
 Performance, Internal Implementation, Development Support etc.
 
@@ -127,7 +130,7 @@ Performance, Internal Implementation, Development Support etc.
 
  * The way "git multi-pack" uses parse-options API has been improved.
 
- * A coccinelle rule (in contrib/) to encourage use of COPY_ARRAY
+ * A Coccinelle rule (in contrib/) to encourage use of COPY_ARRAY
    macro has been improved.
 
  * API tweak to make it easier to run fuzz testing on commit-graph parser.
@@ -172,6 +175,12 @@ Performance, Internal Implementation, Development Support etc.
  * Share the text used to explain configuration variables used by "git
    <subcmd>" in "git help <subcmd>" with the text from "git help config".
 
+ * "git mv A B" in a sparsely populated working tree can be asked to
+   move a path from a directory that is "in cone" to another directory
+   that is "out of cone".  Handling of such a case has been improved.
+
+ * The chainlint script for our tests has been revamped.
+
 
 Fixes since v2.37
 -----------------
@@ -297,7 +306,7 @@ Fixes since v2.37
  * "git fsck" reads mode from tree objects but canonicalizes the mode
    before passing it to the logic to check object sanity, which has
    hid broken tree objects from the checking logic.  This has been
-   corrected, but to help exiting projects with broken tree objects
+   corrected, but to help existing projects with broken tree objects
    that they cannot fix retroactively, the severity of anomalies this
    code detects has been demoted to "info" for now.
 
@@ -306,12 +315,10 @@ Fixes since v2.37
 
  * An earlier optimization discarded a tree-object buffer that is
    still in use, which has been corrected.
-   (merge 1490d7d82d jk/is-promisor-object-keep-tree-in-use later to maint).
 
  * Fix deadlocks between main Git process and subprocess spawned via
    the pipe_command() API, that can kill "git add -p" that was
    reimplemented in C recently.
-   (merge 716c1f649e jk/pipe-command-nonblock later to maint).
 
  * The sequencer machinery translated messages left in the reflog by
    mistake, which has been corrected.
@@ -319,20 +326,16 @@ Fixes since v2.37
  * xcalloc(), imitating calloc(), takes "number of elements of the
    array", and "size of a single element", in this order.  A call that
    does not follow this ordering has been corrected.
-   (merge c4bbd9bb8f sg/xcalloc-cocci-fix later to maint).
 
  * The preload-index codepath made copies of pathspec to give to
    multiple threads, which were left leaked.
-   (merge 23578904da ad/preload-plug-memleak later to maint).
 
  * Update the version of Ubuntu used for GitHub Actions CI from 18.04
    to 22.04.
-   (merge ef46584831 ds/github-actions-use-newer-ubuntu later to maint).
 
  * The auto-stashed local changes created by "git merge --autostash"
    was mixed into a conflicted state left in the working tree, which
    has been corrected.
-   (merge d3a9295ada en/merge-unstash-only-on-clean-merge later to maint).
 
  * Multi-pack index got corrupted when preferred pack changed from one
    pack to another in a certain way, which has been corrected.