Skip to content

Commit

Permalink
maintenance: add new vfs-cache-move maintenance task
Browse files Browse the repository at this point in the history
Introduce a new maintenance task, `vfs-cache-move`, that operates on
Scalar or VFS for Git repositories with a per-volume, shared object
cache (specified by `gvfs.sharedCache`) to migrate packfiles from the
repository object directory to the shared cache.

Older versions of `microsoft/git` incorrectly placed packfiles in the
repository object directory instead of the shared cache; this task will
help clean up existing clones impacted by that issue.

Signed-off-by: Matthew John Cheetham <[email protected]>
  • Loading branch information
mjcheetham committed Jan 22, 2025
1 parent df789d1 commit e3d64ab
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 0 deletions.
8 changes: 8 additions & 0 deletions Documentation/git-maintenance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ task:
* `prefetch`: hourly.
* `loose-objects`: daily.
* `incremental-repack`: daily.
* `vfs-cache-move`: weekly.
--
+
`git maintenance register` will also disable foreground maintenance by
Expand Down Expand Up @@ -158,6 +159,13 @@ pack-refs::
need to iterate across many references. See linkgit:git-pack-refs[1]
for more information.

vfs-cache-move::
The `vfs-cache-move` task only operates on Scalar or VFS for Git
repositories (cloned with either `scalar clone` or `gvfs clone`) that
have the `gvfs.sharedCache` configuration setting present. This task
migrates pack files from the repository's object directory in to the
shared volume cache.

OPTIONS
-------
--auto::
Expand Down
141 changes: 141 additions & 0 deletions builtin/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* Copyright (c) 2006 Shawn O. Pearce
*/
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "builtin.h"
#include "abspath.h"
#include "date.h"
Expand Down Expand Up @@ -41,9 +42,19 @@
#include "hook.h"
#include "setup.h"
#include "trace2.h"
#include "copy.h"
#include "dir.h"

#define FAILED_RUN "failed to run %s"

#define DEBUG_WAIT { \
struct stat st; \
while (!stat("D:/tmp/debug", &st)) { \
fprintf(stderr, "[%d] Waiting to attach...\n", getpid()); \
sleep(5); \
} \
}

static const char * const builtin_gc_usage[] = {
N_("git gc [<options>]"),
NULL
Expand Down Expand Up @@ -1347,6 +1358,129 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
return 0;
}

static void link_or_copy_or_die(const char *src, const char *dst)
{
if (!link(src, dst))
return;

warning_errno(_("failed to link '%s' to '%s'"), src, dst);

if (!copy_file(dst, src, 0644))
return;

die_errno(_("failed to copy '%s' to '%s'"), src, dst);
}

static void migrate_pack(const char *srcdir, const char *dstdir,
const char *pack_filename)
{
struct stat st;
char *basename;
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
struct {
const char *ext;
unsigned move:1;
} files[] = {
{".pack", 0},
{".keep", 0},
{".rev", 0},
{".idx", 1}, /* The index file must be atomically moved last. */
};

trace2_region_enter("maintenance", "migrate_pack", the_repository);

basename = xstrndup(pack_filename, strlen(pack_filename) - 5 /*.pack*/);

/* A pack without an index file is not yet ready to be migrated. */
strbuf_addf(&src, "%s/%s%s", srcdir, basename, ".idx");
if (stat(src.buf, &st))
goto cleanup;

/* Move or copy files from the source directory to the destination. */
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
strbuf_reset(&src);
strbuf_addf(&src, "%s/%s%s", srcdir, basename, files[i].ext);

if (stat(src.buf, &st))
continue;

strbuf_reset(&dst);
strbuf_addf(&dst, "%s/%s%s", dstdir, basename, files[i].ext);

if (files[i].move) {
if (rename(src.buf, dst.buf))
die_errno(_("failed to move '%s' to '%s'"),
src.buf, dst.buf);
} else {
link_or_copy_or_die(src.buf, dst.buf);
}
}

/*
* Now the pack and all associated files exist at the destination we can
* now clean up the files in the source directory.
*/
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
/* Files that were moved rather than copied have no clean up. */
if (files[i].move)
continue;

strbuf_reset(&src);
strbuf_addf(&src, "%s/%s%s", srcdir, basename, files[i].ext);
if (unlink(src.buf))
warning_errno(_("failed to delete '%s'"), src.buf);
}

cleanup:
free(basename);
strbuf_release(&src);
strbuf_release(&dst);

trace2_region_leave("maintenance", "migrate_pack", the_repository);
}

static void move_pack_to_vfs_cache(const char *full_path, size_t full_path_len,
const char *file_name, UNUSED void *data)
{
char *srcdir;
struct strbuf dstdir = STRBUF_INIT;

/* We only care about the actual pack files here.
* The associated .idx, .keep, .rev files will be copied in tandem
* with the pack file, with the index file being moved last.
* The original locations of the non-index files will only deleted
* once all other files have been copied/moved.
*/
if (!ends_with(file_name, ".pack"))
return;

srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);

/* No cache or same source + desintation means there's no work to do. */
if (!vfs_object_dir || !fspathcmp(srcdir, vfs_object_dir))
return;

strbuf_addf(&dstdir, "%s/pack", vfs_object_dir);

migrate_pack(srcdir, dstdir.buf, file_name);

free(srcdir);
strbuf_release(&dstdir);
}

static int maintenance_task_vfs_cache_move(UNUSED struct maintenance_run_opts *opts,
UNUSED struct gc_config *cfg)
{
struct repository *r = the_repository;

DEBUG_WAIT

for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_vfs_cache,
NULL);

return 0;
}

typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
struct gc_config *cfg);

Expand Down Expand Up @@ -1376,6 +1510,7 @@ enum maintenance_task_label {
TASK_GC,
TASK_COMMIT_GRAPH,
TASK_PACK_REFS,
TASK_VFS_CACHE_MOVE,

/* Leave as final value */
TASK__COUNT
Expand Down Expand Up @@ -1412,6 +1547,10 @@ static struct maintenance_task tasks[] = {
maintenance_task_pack_refs,
pack_refs_condition,
},
[TASK_VFS_CACHE_MOVE] = {
"vfs-cache-move",
maintenance_task_vfs_cache_move,
},
};

static int compare_tasks_by_selection(const void *a_, const void *b_)
Expand Down Expand Up @@ -1506,6 +1645,8 @@ static void initialize_maintenance_strategy(void)
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
tasks[TASK_PACK_REFS].enabled = 1;
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
tasks[TASK_VFS_CACHE_MOVE].enabled = 1;
tasks[TASK_VFS_CACHE_MOVE].schedule = SCHEDULE_WEEKLY;
}
}

Expand Down
125 changes: 125 additions & 0 deletions t/t7900-maintenance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1012,4 +1012,129 @@ test_expect_success 'repacking loose objects is quiet' '
)
'

test_expect_success 'vfs-cache-move task with no shared cache no op' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit something &&
git config set maintenance.gc.enabled false &&
git config set maintenance.vfs-cache-move.enabled true &&
git config set maintenance.vfs-cache-move.auto 1 &&
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
git -c fastimport.unpackLimit=0 fast-import &&
find .git/objects/pack \
-type f \
\( -name "*.pack" \
-o -name "*.idx" \
-o -name "*.keep" \
-o -name "*.rev" \) >files.txt &&
git maintenance run &&
while IFS= read -r f; do
test_path_exists $f || exit 1
done <files.txt
)
'

test_expect_success 'vfs-cache-move task cache path same as pack dir no op' '
test_when_finished "rm -rf repo" &&
git init repo &&
(
cd repo &&
test_commit something &&
git config set gvfs.sharedcache .git/objects/pack &&
git config set maintenance.gc.enabled false &&
git config set maintenance.vfs-cache-move.enabled true &&
git config set maintenance.vfs-cache-move.auto 1 &&
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
git -c fastimport.unpackLimit=0 fast-import &&
find .git/objects/pack \
-type f \
\( -name "*.pack" \
-o -name "*.idx" \
-o -name "*.keep" \
-o -name "*.rev" \) >files.txt &&
git maintenance run &&
while IFS= read -r f; do
test_path_exists $f || exit 1
done <files.txt
)
'

test_expect_success 'vfs-cache-move task no .rev or .keep' '
test_when_finished "rm -rf repo cache" &&
mkdir -p cache/pack &&
git init repo &&
(
cd repo &&
test_commit something &&
git config set gvfs.sharedcache ../cache &&
git config set maintenance.gc.enabled false &&
git config set maintenance.vfs-cache-move.enabled true &&
git config set maintenance.vfs-cache-move.auto 1 &&
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
git -c fastimport.unpackLimit=0 fast-import &&
find .git/objects/pack \
-type f \
\( -name "*.pack" \
-o -name "*.idx" \) >src.txt &&
rm -f .git/objects/pack/*.rev .git/objects/pack/*.keep &&
sed "s|.*/|../cache/pack/|" src.txt >dst.txt &&
git maintenance run &&
while IFS= read -r f; do
test_path_is_missing $f || exit 1
done <src.txt &&
while IFS= read -r f; do
test_path_exists $f || exit 1
done <dst.txt
)
'

test_expect_success 'vfs-cache-move task success' '
test_when_finished "rm -rf repo cache" &&
mkdir -p cache/pack &&
git init repo &&
(
cd repo &&
test_commit something &&
git config set gvfs.sharedcache ../cache &&
git config set maintenance.gc.enabled false &&
git config set maintenance.vfs-cache-move.enabled true &&
git config set maintenance.vfs-cache-move.auto 1 &&
printf "blob\ndata <<END\n%s\nEND\n\n" 1 2 3 4 5 | \
git -c fastimport.unpackLimit=0 fast-import &&
find .git/objects/pack \
-type f \
\( -name "*.pack" \
-o -name "*.idx" \
-o -name "*.keep" \
-o -name "*.rev" \) >src.txt &&
sed "s|.*/|../cache/pack/|" src.txt >dst.txt &&
git maintenance run &&
while IFS= read -r f; do
test_path_is_missing $f || exit 1
done <src.txt &&
while IFS= read -r f; do
test_path_exists $f || exit 1
done <dst.txt
)
'

test_done

0 comments on commit e3d64ab

Please sign in to comment.