Skip to content

Commit

Permalink
path-walk: mark trees and blobs as UNINTERESTING
Browse files Browse the repository at this point in the history
When the input rev_info has UNINTERESTING starting points, we want to be
sure that the UNINTERESTING flag is passed appropriately through the
objects. To match how this is done in places such as 'git pack-objects', we
use the mark_edges_uninteresting() method.

This method has an option for using the "sparse" walk, which is similar in
spirit to the path-walk API's walk. To be sure to keep it independent, add a
new 'prune_all_uninteresting' option to the path_walk_info struct.

To check how the UNINTERSTING flag is spread through our objects, extend the
'test-tool path-walk' command to output whether or not an object has that
flag. This changes our tests significantly, including the removal of some
objects that were previously visited due to the incomplete implementation.

Signed-off-by: Derrick Stolee <[email protected]>
  • Loading branch information
derrickstolee committed Dec 18, 2024
1 parent 6df56f4 commit f2ffc32
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 22 deletions.
8 changes: 8 additions & 0 deletions Documentation/technical/api-path-walk.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ commits.
While it is possible to walk only commits in this way, consumers would be
better off using the revision walk API instead.

`prune_all_uninteresting`::
By default, all reachable paths are emitted by the path-walk API.
This option allows consumers to declare that they are not
interested in paths where all included objects are marked with the
`UNINTERESTING` flag. This requires using the `boundary` option in
the revision walk so that the walk emits commits marked with the
`UNINTERESTING` flag.

Examples
--------

Expand Down
74 changes: 74 additions & 0 deletions path-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "dir.h"
#include "hashmap.h"
#include "hex.h"
#include "list-objects.h"
#include "object.h"
#include "oid-array.h"
#include "revision.h"
Expand All @@ -23,6 +24,7 @@ static const char *root_path = "";
struct type_and_oid_list {
enum object_type type;
struct oid_array oids;
int maybe_interesting;
};

#define TYPE_AND_OID_LIST_INIT { \
Expand Down Expand Up @@ -142,6 +144,10 @@ static int add_tree_entries(struct path_walk_context *ctx,
strmap_put(&ctx->paths_to_lists, path.buf, list);
}
push_to_stack(ctx, path.buf);

if (!(o->flags & UNINTERESTING))
list->maybe_interesting = 1;

oid_array_append(&list->oids, &entry.oid);
}

Expand Down Expand Up @@ -169,6 +175,43 @@ static int walk_path(struct path_walk_context *ctx,
if (!list->oids.nr)
return 0;

if (ctx->info->prune_all_uninteresting) {
/*
* This is true if all objects were UNINTERESTING
* when added to the list.
*/
if (!list->maybe_interesting)
return 0;

/*
* But it's still possible that the objects were set
* as UNINTERESTING after being added. Do a quick check.
*/
list->maybe_interesting = 0;
for (size_t i = 0;
!list->maybe_interesting && i < list->oids.nr;
i++) {
if (list->type == OBJ_TREE) {
struct tree *t = lookup_tree(ctx->repo,
&list->oids.oid[i]);
if (t && !(t->object.flags & UNINTERESTING))
list->maybe_interesting = 1;
} else if (list->type == OBJ_BLOB) {
struct blob *b = lookup_blob(ctx->repo,
&list->oids.oid[i]);
if (b && !(b->object.flags & UNINTERESTING))
list->maybe_interesting = 1;
} else {
/* Tags are always interesting if visited. */
list->maybe_interesting = 1;
}
}

/* We have confirmed that all objects are UNINTERESTING. */
if (!list->maybe_interesting)
return 0;
}

/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
Expand Down Expand Up @@ -203,6 +246,26 @@ static void clear_paths_to_lists(struct strmap *map)
strmap_init(map);
}

static struct repository *edge_repo;
static struct type_and_oid_list *edge_tree_list;

static void show_edge(struct commit *commit)
{
struct tree *t = repo_get_commit_tree(edge_repo, commit);

if (!t)
return;

if (commit->object.flags & UNINTERESTING)
t->object.flags |= UNINTERESTING;

if (t->object.flags & SEEN)
return;
t->object.flags |= SEEN;

oid_array_append(&edge_tree_list->oids, &t->object.oid);
}

static int setup_pending_objects(struct path_walk_info *info,
struct path_walk_context *ctx)
{
Expand Down Expand Up @@ -314,6 +377,7 @@ static int setup_pending_objects(struct path_walk_info *info,
if (tagged_blobs->oids.nr) {
const char *tagged_blob_path = "/tagged-blobs";
tagged_blobs->type = OBJ_BLOB;
tagged_blobs->maybe_interesting = 1;
push_to_stack(ctx, tagged_blob_path);
strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs);
} else {
Expand All @@ -325,6 +389,7 @@ static int setup_pending_objects(struct path_walk_info *info,
if (tags->oids.nr) {
const char *tag_path = "/tags";
tags->type = OBJ_TAG;
tags->maybe_interesting = 1;
push_to_stack(ctx, tag_path);
strmap_put(&ctx->paths_to_lists, tag_path, tags);
} else {
Expand Down Expand Up @@ -369,6 +434,7 @@ int walk_objects_by_path(struct path_walk_info *info)
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;
root_tree_list->maybe_interesting = 1;
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
push_to_stack(&ctx, root_path);

Expand All @@ -382,6 +448,14 @@ int walk_objects_by_path(struct path_walk_info *info)
if (prepare_revision_walk(info->revs))
die(_("failed to setup revision walk"));

/* Walk trees to mark them as UNINTERESTING. */
edge_repo = info->revs->repo;
edge_tree_list = root_tree_list;
mark_edges_uninteresting(info->revs, show_edge,
info->prune_all_uninteresting);
edge_repo = NULL;
edge_tree_list = NULL;

info->revs->blob_objects = info->revs->tree_objects = 0;

trace2_region_enter("path-walk", "pending-walk", info->revs->repo);
Expand Down
8 changes: 8 additions & 0 deletions path-walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ struct path_walk_info {
int trees;
int blobs;
int tags;

/**
* When 'prune_all_uninteresting' is set and a path has all objects
* marked as UNINTERESTING, then the path-walk will not visit those
* objects. It will not call path_fn on those objects and will not
* walk the children of such trees.
*/
int prune_all_uninteresting;
};

#define PATH_WALK_INFO_INIT { \
Expand Down
12 changes: 9 additions & 3 deletions t/helper/test-path-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ static int emit_block(const char *path, struct oid_array *oids,
printf("%"PRIuMAX":%s:%s:EMPTY\n",
tdata->batch_nr, typestr, path);

for (size_t i = 0; i < oids->nr; i++)
printf("%"PRIuMAX":%s:%s:%s\n",
for (size_t i = 0; i < oids->nr; i++) {
struct object *o = lookup_unknown_object(the_repository,
&oids->oid[i]);
printf("%"PRIuMAX":%s:%s:%s%s\n",
tdata->batch_nr, typestr, path,
oid_to_hex(&oids->oid[i]));
oid_to_hex(&oids->oid[i]),
o->flags & UNINTERESTING ? ":UNINTERESTING" : "");
}

tdata->batch_nr++;
return 0;
Expand All @@ -74,6 +78,8 @@ int cmd__path_walk(int argc, const char **argv)
N_("toggle inclusion of tag objects")),
OPT_BOOL(0, "trees", &info.trees,
N_("toggle inclusion of tree objects")),
OPT_BOOL(0, "prune", &info.prune_all_uninteresting,
N_("toggle pruning of uninteresting paths")),
OPT_END(),
};

Expand Down
79 changes: 60 additions & 19 deletions t/t6601-path-walk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,11 @@ test_expect_success 'topic, not base' '
0:commit::$(git rev-parse topic)
1:tree::$(git rev-parse topic^{tree})
2:tree:right/:$(git rev-parse topic:right)
3:blob:right/d:$(git rev-parse topic:right/d)
3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
4:blob:right/c:$(git rev-parse topic:right/c)
5:tree:left/:$(git rev-parse topic:left)
6:blob:left/b:$(git rev-parse topic:left/b)
7:blob:a:$(git rev-parse topic:a)
5:tree:left/:$(git rev-parse topic:left):UNINTERESTING
6:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
7:blob:a:$(git rev-parse topic:a):UNINTERESTING
blobs:4
commits:1
tags:0
Expand All @@ -225,15 +225,38 @@ test_expect_success 'topic, not base' '
test_cmp_sorted expect out
'

test_expect_success 'fourth, blob-tag2, not base' '
test-tool path-walk -- fourth blob-tag2 --not base >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
1:tag:/tags:$(git rev-parse fourth)
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
3:tree::$(git rev-parse topic^{tree})
4:tree:right/:$(git rev-parse topic:right)
5:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
6:blob:right/c:$(git rev-parse topic:right/c)
7:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
8:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
9:blob:a:$(git rev-parse base~1:a):UNINTERESTING
blobs:5
commits:1
tags:1
trees:3
EOF
test_cmp_sorted expect out
'

test_expect_success 'topic, not base, only blobs' '
test-tool path-walk --no-trees --no-commits \
-- topic --not base >out &&
cat >expect <<-EOF &&
0:blob:right/d:$(git rev-parse topic:right/d)
0:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
1:blob:right/c:$(git rev-parse topic:right/c)
2:blob:left/b:$(git rev-parse topic:left/b)
3:blob:a:$(git rev-parse topic:a)
2:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
3:blob:a:$(git rev-parse topic:a):UNINTERESTING
blobs:4
commits:0
tags:0
Expand Down Expand Up @@ -267,7 +290,7 @@ test_expect_success 'topic, not base, only trees' '
cat >expect <<-EOF &&
0:tree::$(git rev-parse topic^{tree})
1:tree:right/:$(git rev-parse topic:right)
2:tree:left/:$(git rev-parse topic:left)
2:tree:left/:$(git rev-parse topic:left):UNINTERESTING
commits:0
blobs:0
tags:0
Expand All @@ -282,17 +305,17 @@ test_expect_success 'topic, not base, boundary' '
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1)
0:commit::$(git rev-parse base~1):UNINTERESTING
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree})
1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING
2:tree:right/:$(git rev-parse topic:right)
2:tree:right/:$(git rev-parse base~1:right)
3:blob:right/d:$(git rev-parse base~1:right/d)
4:blob:right/c:$(git rev-parse base~1:right/c)
2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
3:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
4:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
4:blob:right/c:$(git rev-parse topic:right/c)
5:tree:left/:$(git rev-parse base~1:left)
6:blob:left/b:$(git rev-parse base~1:left/b)
7:blob:a:$(git rev-parse base~1:a)
5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
7:blob:a:$(git rev-parse base~1:a):UNINTERESTING
blobs:5
commits:2
tags:0
Expand All @@ -302,22 +325,40 @@ test_expect_success 'topic, not base, boundary' '
test_cmp_sorted expect out
'

test_expect_success 'topic, not base, boundary with pruning' '
test-tool path-walk --prune -- --boundary topic --not base >out &&
cat >expect <<-EOF &&
0:commit::$(git rev-parse topic)
0:commit::$(git rev-parse base~1):UNINTERESTING
1:tree::$(git rev-parse topic^{tree})
1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING
2:tree:right/:$(git rev-parse topic:right)
2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
3:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
3:blob:right/c:$(git rev-parse topic:right/c)
blobs:2
commits:2
tags:0
trees:4
EOF
test_cmp_sorted expect out
'

test_expect_success 'trees are reported exactly once' '
test_when_finished "rm -rf unique-trees" &&
test_create_repo unique-trees &&
(
cd unique-trees &&
mkdir initial &&
test_commit initial/file &&
git switch -c move-to-top &&
git mv initial/file.t ./ &&
test_tick &&
git commit -m moved &&
git update-ref refs/heads/other HEAD
) &&
test-tool -C unique-trees path-walk -- --all >out &&
tree=$(git -C unique-trees rev-parse HEAD:) &&
grep "$tree" out >out-filtered &&
Expand Down

0 comments on commit f2ffc32

Please sign in to comment.