Merge branch 'tb/gc-recent-object-hook'

"git pack-objects" learned to invoke a new hook program that
enumerates extra objects to be used as anchoring points to keep
otherwise unreachable objects in cruft packs.

* tb/gc-recent-object-hook:
  gc: introduce `gc.recentObjectsHook`
  reachable.c: extract `obj_is_recent()`
This commit is contained in:
Junio C Hamano
2023-06-23 11:21:17 -07:00
5 changed files with 313 additions and 3 deletions

View File

@@ -350,4 +350,18 @@ test_expect_success 'old reachable-from-recent retained with bitmaps' '
test_must_fail git cat-file -e $to_drop
'
test_expect_success 'gc.recentObjectsHook' '
add_blob &&
test-tool chmtime =-86500 $BLOB_FILE &&
write_script precious-objects <<-EOF &&
echo $BLOB
EOF
test_config gc.recentObjectsHook ./precious-objects &&
git prune --expire=now &&
git cat-file -p $BLOB
'
test_done

View File

@@ -739,4 +739,175 @@ test_expect_success 'cruft objects are freshend via loose' '
)
'
test_expect_success 'gc.recentObjectsHook' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
# Create a handful of objects.
#
# - one reachable commit, "base", designated for the reachable
# pack
# - one unreachable commit, "cruft.discard", which is marked
# for deletion
# - one unreachable commit, "cruft.old", which would be marked
# for deletion, but is rescued as an extra cruft tip
# - one unreachable commit, "cruft.new", which is not marked
# for deletion
test_commit base &&
git branch -M main &&
git checkout --orphan discard &&
git rm -fr . &&
test_commit --no-tag cruft.discard &&
git checkout --orphan old &&
git rm -fr . &&
test_commit --no-tag cruft.old &&
cruft_old="$(git rev-parse HEAD)" &&
git checkout --orphan new &&
git rm -fr . &&
test_commit --no-tag cruft.new &&
cruft_new="$(git rev-parse HEAD)" &&
git checkout main &&
git branch -D discard old new &&
git reflog expire --all --expire=all &&
# mark cruft.old with an mtime that is many minutes
# older than the expiration period, and mark cruft.new
# with an mtime that is in the future (and thus not
# eligible for pruning).
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $cruft_old)" &&
test-tool chmtime +1000 "$objdir/$(test_oid_to_path $cruft_new)" &&
# Write the list of cruft objects we expect to
# accumulate, which is comprised of everything reachable
# from cruft.old and cruft.new, but not cruft.discard.
git rev-list --objects --no-object-names \
$cruft_old $cruft_new >cruft.raw &&
sort cruft.raw >cruft.expect &&
# Write the script to list extra tips, which are limited
# to cruft.old, in this case.
write_script extra-tips <<-EOF &&
echo $cruft_old
EOF
git config gc.recentObjectsHook ./extra-tips &&
git repack --cruft --cruft-expiration=now -d &&
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
git show-index <${mtimes%.mtimes}.idx >cruft &&
cut -d" " -f2 cruft | sort >cruft.actual &&
test_cmp cruft.expect cruft.actual &&
# Ensure that the "old" objects are removed after
# dropping the gc.recentObjectsHook hook.
git config --unset gc.recentObjectsHook &&
git repack --cruft --cruft-expiration=now -d &&
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
git show-index <${mtimes%.mtimes}.idx >cruft &&
cut -d" " -f2 cruft | sort >cruft.actual &&
git rev-list --objects --no-object-names $cruft_new >cruft.raw &&
cp cruft.expect cruft.old &&
sort cruft.raw >cruft.expect &&
test_cmp cruft.expect cruft.actual &&
# ensure objects which are no longer in the cruft pack were
# removed from the repository
for object in $(comm -13 cruft.expect cruft.old)
do
test_must_fail git cat-file -t $object || return 1
done
)
'
test_expect_success 'multi-valued gc.recentObjectsHook' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit base &&
git branch -M main &&
git checkout --orphan cruft.a &&
git rm -fr . &&
test_commit --no-tag cruft.a &&
cruft_a="$(git rev-parse HEAD)" &&
git checkout --orphan cruft.b &&
git rm -fr . &&
test_commit --no-tag cruft.b &&
cruft_b="$(git rev-parse HEAD)" &&
git checkout main &&
git branch -D cruft.a cruft.b &&
git reflog expire --all --expire=all &&
echo "echo $cruft_a" | write_script extra-tips.a &&
echo "echo $cruft_b" | write_script extra-tips.b &&
echo "false" | write_script extra-tips.c &&
git rev-list --objects --no-object-names $cruft_a $cruft_b \
>cruft.raw &&
sort cruft.raw >cruft.expect &&
# ensure that each extra cruft tip is saved by its
# respective hook
git config --add gc.recentObjectsHook ./extra-tips.a &&
git config --add gc.recentObjectsHook ./extra-tips.b &&
git repack --cruft --cruft-expiration=now -d &&
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
git show-index <${mtimes%.mtimes}.idx >cruft &&
cut -d" " -f2 cruft | sort >cruft.actual &&
test_cmp cruft.expect cruft.actual &&
# ensure that a dirty exit halts cruft pack generation
git config --add gc.recentObjectsHook ./extra-tips.c &&
test_must_fail git repack --cruft --cruft-expiration=now -d 2>err &&
grep "unable to enumerate additional recent objects" err &&
# and that the existing cruft pack is left alone
test_path_is_file "$mtimes"
)
'
test_expect_success 'additional cruft blobs via gc.recentObjectsHook' '
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit base &&
blob=$(echo "unreachable" | git hash-object -w --stdin) &&
# mark the unreachable blob we wrote above as having
# aged out of the retention period
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $blob)" &&
# Write the script to list extra tips, which is just the
# extra blob as above.
write_script extra-tips <<-EOF &&
echo $blob
EOF
git config gc.recentObjectsHook ./extra-tips &&
git repack --cruft --cruft-expiration=now -d &&
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
git show-index <${mtimes%.mtimes}.idx >cruft &&
cut -d" " -f2 cruft >actual &&
echo $blob >expect &&
test_cmp expect actual
)
'
test_done

View File

@@ -113,6 +113,37 @@ test_expect_success 'do not bother loosening old objects' '
test_must_fail git cat-file -p $obj2
'
test_expect_success 'gc.recentObjectsHook' '
obj1=$(echo one | git hash-object -w --stdin) &&
obj2=$(echo two | git hash-object -w --stdin) &&
obj3=$(echo three | git hash-object -w --stdin) &&
pack1=$(echo $obj1 | git pack-objects .git/objects/pack/pack) &&
pack2=$(echo $obj2 | git pack-objects .git/objects/pack/pack) &&
pack3=$(echo $obj3 | git pack-objects .git/objects/pack/pack) &&
git prune-packed &&
git cat-file -p $obj1 &&
git cat-file -p $obj2 &&
git cat-file -p $obj3 &&
git tag -a -m tag obj2-tag $obj2 &&
obj2_tag="$(git rev-parse obj2-tag)" &&
write_script precious-objects <<-EOF &&
echo $obj2_tag
EOF
git config gc.recentObjectsHook ./precious-objects &&
test-tool chmtime =-86400 .git/objects/pack/pack-$pack2.pack &&
test-tool chmtime =-86400 .git/objects/pack/pack-$pack3.pack &&
git repack -A -d --unpack-unreachable=1.hour.ago &&
git cat-file -p $obj1 &&
git cat-file -p $obj2 &&
git cat-file -p $obj2_tag &&
test_must_fail git cat-file -p $obj3
'
test_expect_success 'keep packed objects found only in index' '
echo my-unique-content >file &&
git add file &&