Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pack-objects: don't reuse deltas with path walk #707

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions builtin/push.c
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,10 @@ int cmd_push(int argc,
else if (recurse_submodules == RECURSE_SUBMODULES_ONLY)
flags |= TRANSPORT_RECURSE_SUBMODULES_ONLY;

prepare_repo_settings(the_repository);
if (the_repository->settings.pack_use_path_walk)
flags |= TRANSPORT_PUSH_NO_REUSE_DELTA;

if (tags)
refspec_append(&rs, "refs/tags/*");

Expand Down
2 changes: 2 additions & 0 deletions send-pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ static int pack_objects(int fd, struct ref *refs, struct oid_array *advertised,
strvec_push(&po.args, "--shallow");
if (args->disable_bitmaps)
strvec_push(&po.args, "--no-use-bitmap-index");
if (args->no_reuse_delta)
strvec_push(&po.args, "--no-reuse-delta");
po.in = -1;
po.out = args->stateless_rpc ? -1 : fd;
po.git_cmd = 1;
Expand Down
1 change: 1 addition & 0 deletions send-pack.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct send_pack_args {
force_update:1,
use_thin_pack:1,
use_ofs_delta:1,
no_reuse_delta:1,
dry_run:1,
/* One of the SEND_PACK_PUSH_CERT_* constants. */
push_cert:2,
Expand Down
109 changes: 109 additions & 0 deletions t/t5590-push-path-walk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/bin/sh

test_description='verify that push respects `pack.usePathWalk`'

TEST_PASSES_SANITIZE_LEAK=true
. ./test-lib.sh
. "$TEST_DIRECTORY"/lib-pack.sh

test_expect_success 'setup bare repository and clone' '
git init --bare -b main bare.git &&
git --git-dir=bare.git config receive.unpackLimit 0 &&
git --git-dir bare.git commit-tree -m initial $EMPTY_TREE >head_oid &&
git --git-dir bare.git update-ref refs/heads/main $(cat head_oid) &&
git clone --bare bare.git clone.git
'
test_expect_success 'avoid reusing deltified objects' '
# construct two commits, one containing a file with the hex digits
# repeated 16 times, the next reducing that to 8 times. The crucial
# part is that the blob of the second commit is deltified _really_
# badly and it is therefore easy to detect if a `git push` reused that
# delta.
x="0123456789abcdef" &&
printf "$x$x$x$x$x$x$x$x" >x128 &&
printf "$x$x$x$x$x$x$x$x$x$x$x$x$x$x$x$x" >x256 &&

pack=clone.git/objects/pack/pack-tmp.pack &&
pack_header 2 >$pack &&

# add x256 as a non-deltified object, using an uncompressed zlib stream
# for simplicity
# 060 = OBJ_BLOB << 4, 0200 = size larger than 15,
# 0 = lower 4 bits of size, 020 = bits 5-9 of size (size = 256)
printf "\260\020" >>$pack &&
# Uncompressed zlib stream always starts with 0170 1 1, followed
# by two bytes encoding the size, little endian, then two bytes with
# the bitwise-complement of that size, then the payload, and then the
# Adler32 checksum. For some reason, the checksum is in big-endian
# format.
printf "\170\001\001\0\001\377\376" >>$pack &&
cat x256 >>$pack &&
# Manually-computed Adler32 checksum: 0xd7ae4621
printf "\327\256\106\041" >>$pack &&

# add x128 as a very badly deltified object
# 0120 = OBJ_OFS_DELTA << 4, 0200 = total size larger than 15,
# 4 = lower 4 bits of size, 030 = bits 5-9 of size
# (size = 128 * 3 + 2 + 2)
printf "\344\030" >>$pack &&
# 0415 = size (i.e. the relative negative offset) of the previous
# object (x256, used as base object)
# encoded as 0200 | ((0415 >> 7) - 1), 0415 & 0177
printf "\201\015" >>$pack &&
# Uncompressed zlib stream, as before, size = 2 + 2 + 128 * 3 (i.e.
# 0604)
printf "\170\001\001\204\001\173\376" >>$pack &&
# base object size = 0400 (encoded as 0200 | (0400 & 0177),
# 0400 >> 7)
printf "\200\002" >>$pack &&
# object size = 0200 (encoded as 0200 | (0200 & 0177), 0200 >> 7
printf "\200\001" >>$pack &&
# massively badly-deltified object: copy every single byte individually
# 0200 = copy, 1 = use 1 byte to encode the offset (counter),
# 020 = use 1 byte to encode the size (1)
printf "$(printf "\\\\221\\\\%03o\\\\001" $(test_seq 0 127))" >>$pack &&
# Manually-computed Adler32 checksum: 0x99c369c4
printf "\231\303\151\304" >>$pack &&

pack_trailer $pack &&
git index-pack -v $pack &&

oid256=$(git hash-object x256) &&
printf "100755 blob $oid256\thex\n" >tree &&
tree_oid="$(git --git-dir=clone.git mktree <tree)" &&
commit_oid=$(git --git-dir=clone.git commit-tree \
-p $(git --git-dir=clone.git rev-parse main) \
-m 256 $tree_oid) &&

oid128=$(git hash-object x128) &&
printf "100755 blob $oid128\thex\n" >tree &&
tree_oid="$(git --git-dir=clone.git mktree <tree)" &&
commit_oid=$(git --git-dir=clone.git commit-tree \
-p $commit_oid \
-m 128 $tree_oid) &&

# Verify that the on-disk size of the delta object is suboptimal in the
# clone (see below why 18 bytes or smaller is the optimal size):
git index-pack --verify-stat clone.git/objects/pack/pack-*.pack >verify &&
size="$(sed -n "s/^$oid128 blob *\([^ ]*\).*/\1/p" <verify)" &&
test $size -gt 18 &&

git --git-dir=clone.git update-ref refs/heads/main $commit_oid &&
git --git-dir=clone.git -c pack.usePathWalk=true push origin main &&
git index-pack --verify-stat bare.git/objects/pack/pack-*.pack >verify &&
size="$(sed -n "s/^$oid128 blob *\([^ ]*\).*/\1/p" <verify)" &&
# The on-disk size of the delta object should be smaller than, or equal
# to, 18 bytes, as that would be the size if storing the payload
# uncompressed:
# 3 bytes: 0170 01 01
# + 2 bytes: zlib stream size
# + 2 bytes: but-wise complement of the zlib stream size
# + 7 bytes: payload
# (= 2 bytes for the size of tbe base object
# + 2 bytes for the size of the delta command
# + 3 bytes for the copy command)
# + 2 + 2 bytes: Adler32 checksum
test $size -le 18
'

test_done
1 change: 1 addition & 0 deletions transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,7 @@ static int git_transport_push(struct transport *transport, struct ref *remote_re
args.dry_run = !!(flags & TRANSPORT_PUSH_DRY_RUN);
args.porcelain = !!(flags & TRANSPORT_PUSH_PORCELAIN);
args.atomic = !!(flags & TRANSPORT_PUSH_ATOMIC);
args.no_reuse_delta = !!(flags & TRANSPORT_PUSH_NO_REUSE_DELTA);
args.push_options = transport->push_options;
args.url = transport->url;

Expand Down
1 change: 1 addition & 0 deletions transport.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ struct transport {
#define TRANSPORT_RECURSE_SUBMODULES_ONLY (1<<15)
#define TRANSPORT_PUSH_FORCE_IF_INCLUDES (1<<16)
#define TRANSPORT_PUSH_AUTO_UPSTREAM (1<<17)
#define TRANSPORT_PUSH_NO_REUSE_DELTA (1<<18)

int transport_summary_width(const struct ref *refs);

Expand Down
Loading