Merge branch 'jk/chunk-bounds'
The codepaths that read "chunk" formatted files have been corrected to pay attention to the chunk size and notice broken files. * jk/chunk-bounds: (21 commits) t5319: make corrupted large-offset test more robust chunk-format: drop pair_chunk_unsafe() commit-graph: detect out-of-order BIDX offsets commit-graph: check bounds when accessing BIDX chunk commit-graph: check bounds when accessing BDAT chunk commit-graph: bounds-check generation overflow chunk commit-graph: check size of generations chunk commit-graph: bounds-check base graphs chunk commit-graph: detect out-of-bounds extra-edges pointers commit-graph: check size of commit data chunk midx: check size of revindex chunk midx: bounds-check large offset chunk midx: check size of object offset chunk midx: enforce chunk alignment on reading midx: check size of pack names chunk commit-graph: check consistency of fanout table midx: check size of oid lookup chunk commit-graph: check size of oid fanout chunk midx: stop ignoring malformed oid fanout chunk t: add library for munging chunk-format files ...
This commit is contained in:
17
t/lib-chunk.sh
Normal file
17
t/lib-chunk.sh
Normal file
@@ -0,0 +1,17 @@
|
||||
# Shell library for working with "chunk" files (commit-graph, midx, etc).
|
||||
|
||||
# corrupt_chunk_file <fn> <chunk> <offset> <bytes>
|
||||
#
|
||||
# Corrupt a chunk-based file (like a commit-graph) by overwriting the bytes
|
||||
# found in the chunk specified by the 4-byte <chunk> identifier. If <offset> is
|
||||
# "clear", replace the chunk entirely. Otherwise, overwrite data <offset> bytes
|
||||
# into the chunk.
|
||||
#
|
||||
# The <bytes> are interpreted as pairs of hex digits (so "000000FE" would be
|
||||
# big-endian 254).
|
||||
corrupt_chunk_file () {
|
||||
fn=$1; shift
|
||||
perl "$TEST_DIRECTORY"/lib-chunk/corrupt-chunk-file.pl \
|
||||
"$@" <"$fn" >"$fn.tmp" &&
|
||||
mv "$fn.tmp" "$fn"
|
||||
}
|
||||
66
t/lib-chunk/corrupt-chunk-file.pl
Normal file
66
t/lib-chunk/corrupt-chunk-file.pl
Normal file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
my ($chunk, $seek, $bytes) = @ARGV;
|
||||
$bytes =~ s/../chr(hex($&))/ge;
|
||||
|
||||
binmode STDIN;
|
||||
binmode STDOUT;
|
||||
|
||||
# A few helpers to read bytes, or read and copy them to the
|
||||
# output.
|
||||
sub get {
|
||||
my $n = shift;
|
||||
return unless $n;
|
||||
read(STDIN, my $buf, $n)
|
||||
or die "read error or eof: $!\n";
|
||||
return $buf;
|
||||
}
|
||||
sub copy {
|
||||
my $buf = get(@_);
|
||||
print $buf;
|
||||
return $buf;
|
||||
}
|
||||
|
||||
# read until we find table-of-contents entry for chunk;
|
||||
# note that we cheat a bit by assuming 4-byte alignment and
|
||||
# that no ToC entry will accidentally look like a header.
|
||||
#
|
||||
# If we don't find the entry, copy() will hit EOF and exit
|
||||
# (which should cause the caller to fail the test).
|
||||
while (copy(4) ne $chunk) { }
|
||||
my $offset = unpack("Q>", copy(8));
|
||||
|
||||
# In clear mode, our length will change. So figure out
|
||||
# the length by comparing to the offset of the next chunk, and
|
||||
# then adjust that offset (and all subsequent) ones.
|
||||
my $len;
|
||||
if ($seek eq "clear") {
|
||||
my $id;
|
||||
do {
|
||||
$id = copy(4);
|
||||
my $next = unpack("Q>", get(8));
|
||||
if (!defined $len) {
|
||||
$len = $next - $offset;
|
||||
}
|
||||
print pack("Q>", $next - $len + length($bytes));
|
||||
} while (unpack("N", $id));
|
||||
}
|
||||
|
||||
# and now copy up to our existing chunk data
|
||||
copy($offset - tell(STDIN));
|
||||
if ($seek eq "clear") {
|
||||
# if clearing, skip past existing data
|
||||
get($len);
|
||||
} else {
|
||||
# otherwise, copy up to the requested offset,
|
||||
# and skip past the overwritten bytes
|
||||
copy($seek);
|
||||
get(length($bytes));
|
||||
}
|
||||
|
||||
# now write out the requested bytes, along
|
||||
# with any other remaining data
|
||||
print $bytes;
|
||||
while (read(STDIN, my $buf, 4096)) {
|
||||
print $buf;
|
||||
}
|
||||
@@ -5,6 +5,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
|
||||
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
|
||||
|
||||
. ./test-lib.sh
|
||||
. "$TEST_DIRECTORY"/lib-chunk.sh
|
||||
|
||||
GIT_TEST_COMMIT_GRAPH=0
|
||||
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
|
||||
@@ -404,4 +405,53 @@ test_expect_success 'Bloom generation backfills empty commits' '
|
||||
)
|
||||
'
|
||||
|
||||
corrupt_graph () {
|
||||
graph=.git/objects/info/commit-graph &&
|
||||
test_when_finished "rm -rf $graph" &&
|
||||
git commit-graph write --reachable --changed-paths &&
|
||||
corrupt_chunk_file $graph "$@"
|
||||
}
|
||||
|
||||
check_corrupt_graph () {
|
||||
corrupt_graph "$@" &&
|
||||
git -c core.commitGraph=false log -- A/B/file2 >expect.out &&
|
||||
git -c core.commitGraph=true log -- A/B/file2 >out 2>err &&
|
||||
test_cmp expect.out out
|
||||
}
|
||||
|
||||
test_expect_success 'Bloom reader notices too-small data chunk' '
|
||||
check_corrupt_graph BDAT clear 00000000 &&
|
||||
echo "warning: ignoring too-small changed-path chunk" \
|
||||
"(4 < 12) in commit-graph file" >expect.err &&
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_expect_success 'Bloom reader notices out-of-bounds filter offsets' '
|
||||
check_corrupt_graph BIDX 12 FFFFFFFF &&
|
||||
# use grep to avoid depending on exact chunk size
|
||||
grep "warning: ignoring out-of-range offset (4294967295) for changed-path filter at pos 3 of .git/objects/info/commit-graph" err
|
||||
'
|
||||
|
||||
test_expect_success 'Bloom reader notices too-small index chunk' '
|
||||
# replace the index with a single entry, making most
|
||||
# lookups out-of-bounds
|
||||
check_corrupt_graph BIDX clear 00000000 &&
|
||||
echo "warning: commit-graph changed-path index chunk" \
|
||||
"is too small" >expect.err &&
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_expect_success 'Bloom reader notices out-of-order index offsets' '
|
||||
# we do not know any real offsets, but we can pick
|
||||
# something plausible; we should not get to the point of
|
||||
# actually reading from the bogus offsets anyway.
|
||||
corrupt_graph BIDX 4 0000000c00000005 &&
|
||||
echo "warning: ignoring decreasing changed-path index offsets" \
|
||||
"(12 > 5) for positions 1 and 2 of .git/objects/info/commit-graph" >expect.err &&
|
||||
git -c core.commitGraph=false log -- A/B/file2 >expect.out &&
|
||||
git -c core.commitGraph=true log -- A/B/file2 >out 2>err &&
|
||||
test_cmp expect.out out &&
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_done
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
test_description='commit graph'
|
||||
. ./test-lib.sh
|
||||
. "$TEST_DIRECTORY"/lib-chunk.sh
|
||||
|
||||
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
|
||||
|
||||
@@ -559,7 +560,7 @@ test_expect_success 'detect incorrect fanout' '
|
||||
|
||||
test_expect_success 'detect incorrect fanout final value' '
|
||||
corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \
|
||||
"fanout value"
|
||||
"oid table and fanout disagree on size"
|
||||
'
|
||||
|
||||
test_expect_success 'detect incorrect OID order' '
|
||||
@@ -821,4 +822,77 @@ test_expect_success 'overflow during generation version upgrade' '
|
||||
)
|
||||
'
|
||||
|
||||
corrupt_chunk () {
|
||||
graph=full/.git/objects/info/commit-graph &&
|
||||
test_when_finished "rm -rf $graph" &&
|
||||
git -C full commit-graph write --reachable &&
|
||||
corrupt_chunk_file $graph "$@"
|
||||
}
|
||||
|
||||
check_corrupt_chunk () {
|
||||
corrupt_chunk "$@" &&
|
||||
git -C full -c core.commitGraph=false log >expect.out &&
|
||||
git -C full -c core.commitGraph=true log >out 2>err &&
|
||||
test_cmp expect.out out
|
||||
}
|
||||
|
||||
test_expect_success 'reader notices too-small oid fanout chunk' '
|
||||
# make it big enough that the graph file is plausible,
|
||||
# otherwise we hit an earlier check
|
||||
check_corrupt_chunk OIDF clear $(printf "000000%02x" $(test_seq 250)) &&
|
||||
cat >expect.err <<-\EOF &&
|
||||
error: commit-graph oid fanout chunk is wrong size
|
||||
error: commit-graph is missing the OID Fanout chunk
|
||||
EOF
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices fanout/lookup table mismatch' '
|
||||
check_corrupt_chunk OIDF 1020 "FFFFFFFF" &&
|
||||
cat >expect.err <<-\EOF &&
|
||||
error: commit-graph oid table and fanout disagree on size
|
||||
EOF
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices out-of-bounds fanout' '
|
||||
# Rather than try to corrupt a specific hash, we will just
|
||||
# wreck them all. But we cannot just set them all to 0xFFFFFFFF or
|
||||
# similar, as they are used for hi/lo starts in a binary search (so if
|
||||
# they are identical, that indicates that the search should abort
|
||||
# immediately). Instead, we will give them high values that differ by
|
||||
# 2^24, ensuring that any that are used would cause an out-of-bounds
|
||||
# read.
|
||||
check_corrupt_chunk OIDF 0 $(printf "%02x000000" $(test_seq 0 254)) &&
|
||||
cat >expect.err <<-\EOF &&
|
||||
error: commit-graph fanout values out of order
|
||||
EOF
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices too-small commit data chunk' '
|
||||
check_corrupt_chunk CDAT clear 00000000 &&
|
||||
cat >expect.err <<-\EOF &&
|
||||
error: commit-graph commit data chunk is wrong size
|
||||
error: commit-graph is missing the Commit Data chunk
|
||||
EOF
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices out-of-bounds extra edge' '
|
||||
check_corrupt_chunk EDGE clear &&
|
||||
cat >expect.err <<-\EOF &&
|
||||
error: commit-graph extra-edges pointer out of bounds
|
||||
EOF
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices too-small generations chunk' '
|
||||
check_corrupt_chunk GDA2 clear 00000000 &&
|
||||
cat >expect.err <<-\EOF &&
|
||||
error: commit-graph generations chunk is wrong size
|
||||
EOF
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_done
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
test_description='multi-pack-indexes'
|
||||
. ./test-lib.sh
|
||||
. "$TEST_DIRECTORY"/lib-chunk.sh
|
||||
|
||||
GIT_TEST_MULTI_PACK_INDEX=0
|
||||
objdir=.git/objects
|
||||
@@ -438,7 +439,7 @@ test_expect_success 'verify extended chunk count' '
|
||||
|
||||
test_expect_success 'verify missing required chunk' '
|
||||
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \
|
||||
"missing required"
|
||||
"required pack-name chunk missing"
|
||||
'
|
||||
|
||||
test_expect_success 'verify invalid chunk offset' '
|
||||
@@ -1055,4 +1056,105 @@ test_expect_success 'repack with delta islands' '
|
||||
)
|
||||
'
|
||||
|
||||
corrupt_chunk () {
|
||||
midx=.git/objects/pack/multi-pack-index &&
|
||||
test_when_finished "rm -rf $midx" &&
|
||||
git repack -ad --write-midx &&
|
||||
corrupt_chunk_file $midx "$@"
|
||||
}
|
||||
|
||||
test_expect_success 'reader notices too-small oid fanout chunk' '
|
||||
corrupt_chunk OIDF clear 00000000 &&
|
||||
test_must_fail git log 2>err &&
|
||||
cat >expect <<-\EOF &&
|
||||
error: multi-pack-index OID fanout is of the wrong size
|
||||
fatal: multi-pack-index required OID fanout chunk missing or corrupted
|
||||
EOF
|
||||
test_cmp expect err
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices too-small oid lookup chunk' '
|
||||
corrupt_chunk OIDL clear 00000000 &&
|
||||
test_must_fail git log 2>err &&
|
||||
cat >expect <<-\EOF &&
|
||||
error: multi-pack-index OID lookup chunk is the wrong size
|
||||
fatal: multi-pack-index required OID lookup chunk missing or corrupted
|
||||
EOF
|
||||
test_cmp expect err
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices too-small pack names chunk' '
|
||||
# There is no NUL to terminate the name here, so the
|
||||
# chunk is too short.
|
||||
corrupt_chunk PNAM clear 70656666 &&
|
||||
test_must_fail git log 2>err &&
|
||||
cat >expect <<-\EOF &&
|
||||
fatal: multi-pack-index pack-name chunk is too short
|
||||
EOF
|
||||
test_cmp expect err
|
||||
'
|
||||
|
||||
test_expect_success 'reader handles unaligned chunks' '
|
||||
# A 9-byte PNAM means all of the subsequent chunks
|
||||
# will no longer be 4-byte aligned, but it is still
|
||||
# a valid one-pack chunk on its own (it is "foo.pack\0").
|
||||
corrupt_chunk PNAM clear 666f6f2e7061636b00 &&
|
||||
git -c core.multipackindex=false log >expect.out &&
|
||||
git -c core.multipackindex=true log >out 2>err &&
|
||||
test_cmp expect.out out &&
|
||||
cat >expect.err <<-\EOF &&
|
||||
error: chunk id 4f494446 not 4-byte aligned
|
||||
EOF
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices too-small object offset chunk' '
|
||||
corrupt_chunk OOFF clear 00000000 &&
|
||||
test_must_fail git log 2>err &&
|
||||
cat >expect <<-\EOF &&
|
||||
error: multi-pack-index object offset chunk is the wrong size
|
||||
fatal: multi-pack-index required object offsets chunk missing or corrupted
|
||||
EOF
|
||||
test_cmp expect err
|
||||
'
|
||||
|
||||
test_expect_success 'reader bounds-checks large offset table' '
|
||||
# re-use the objects64 dir here to cheaply get access to a midx
|
||||
# with large offsets.
|
||||
git init repo &&
|
||||
test_when_finished "rm -rf repo" &&
|
||||
(
|
||||
cd repo &&
|
||||
(cd ../objects64 && pwd) >.git/objects/info/alternates &&
|
||||
git multi-pack-index --object-dir=../objects64 write &&
|
||||
midx=../objects64/pack/multi-pack-index &&
|
||||
corrupt_chunk_file $midx LOFF clear &&
|
||||
# using only %(objectsize) is important here; see the commit
|
||||
# message for more details
|
||||
test_must_fail git cat-file --batch-all-objects \
|
||||
--batch-check="%(objectsize)" 2>err &&
|
||||
cat >expect <<-\EOF &&
|
||||
fatal: multi-pack-index large offset out of bounds
|
||||
EOF
|
||||
test_cmp expect err
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices too-small revindex chunk' '
|
||||
# We only get a revindex with bitmaps (and likewise only
|
||||
# load it when they are asked for).
|
||||
test_config repack.writeBitmaps true &&
|
||||
corrupt_chunk RIDX clear 00000000 &&
|
||||
git -c core.multipackIndex=false rev-list \
|
||||
--all --use-bitmap-index >expect.out &&
|
||||
git -c core.multipackIndex=true rev-list \
|
||||
--all --use-bitmap-index >out 2>err &&
|
||||
test_cmp expect.out out &&
|
||||
cat >expect.err <<-\EOF &&
|
||||
error: multi-pack-index reverse-index chunk is the wrong size
|
||||
warning: multi-pack bitmap is missing required reverse index
|
||||
EOF
|
||||
test_cmp expect.err err
|
||||
'
|
||||
|
||||
test_done
|
||||
|
||||
@@ -4,6 +4,7 @@ test_description='split commit graph'
|
||||
|
||||
TEST_PASSES_SANITIZE_LEAK=true
|
||||
. ./test-lib.sh
|
||||
. "$TEST_DIRECTORY"/lib-chunk.sh
|
||||
|
||||
GIT_TEST_COMMIT_GRAPH=0
|
||||
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
|
||||
@@ -319,7 +320,7 @@ test_expect_success 'verify --shallow does not check base contents' '
|
||||
cd verify-shallow &&
|
||||
git commit-graph verify &&
|
||||
base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph &&
|
||||
corrupt_file "$base_file" 1000 "\01" &&
|
||||
corrupt_file "$base_file" 1500 "\01" &&
|
||||
git commit-graph verify --shallow &&
|
||||
test_must_fail git commit-graph verify 2>test_err &&
|
||||
grep -v "^+" test_err >err &&
|
||||
@@ -393,10 +394,23 @@ test_expect_success 'verify across alternates' '
|
||||
test_commit extra &&
|
||||
git commit-graph write --reachable --split &&
|
||||
tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
|
||||
corrupt_file "$tip_file" 100 "\01" &&
|
||||
corrupt_file "$tip_file" 1500 "\01" &&
|
||||
test_must_fail git commit-graph verify --shallow 2>test_err &&
|
||||
grep -v "^+" test_err >err &&
|
||||
test_i18ngrep "commit-graph has incorrect fanout value" err
|
||||
test_i18ngrep "incorrect checksum" err
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'reader bounds-checks base-graph chunk' '
|
||||
git clone --no-hardlinks . corrupt-base-chunk &&
|
||||
(
|
||||
cd corrupt-base-chunk &&
|
||||
tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
|
||||
corrupt_chunk_file "$tip_file" BASE clear 01020304 &&
|
||||
git -c core.commitGraph=false log >expect.out &&
|
||||
git -c core.commitGraph=true log >out 2>err &&
|
||||
test_cmp expect.out out &&
|
||||
grep "commit-graph base graphs chunk is too small" err
|
||||
)
|
||||
'
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ then
|
||||
fi
|
||||
|
||||
. "$TEST_DIRECTORY"/lib-commit-graph.sh
|
||||
. "$TEST_DIRECTORY/lib-chunk.sh"
|
||||
|
||||
UNIX_EPOCH_ZERO="@0 +0000"
|
||||
FUTURE_DATE="@4147483646 +0000"
|
||||
@@ -74,4 +75,13 @@ test_expect_success 'single commit with generation data exceeding UINT32_MAX' '
|
||||
git -C repo-uint32-max commit-graph verify
|
||||
'
|
||||
|
||||
test_expect_success 'reader notices out-of-bounds generation overflow' '
|
||||
graph=.git/objects/info/commit-graph &&
|
||||
test_when_finished "rm -rf $graph" &&
|
||||
git commit-graph write --reachable &&
|
||||
corrupt_chunk_file $graph GDO2 clear &&
|
||||
test_must_fail git log 2>err &&
|
||||
grep "commit-graph overflow generation data is too small" err
|
||||
'
|
||||
|
||||
test_done
|
||||
|
||||
Reference in New Issue
Block a user