Skip to content

Commit

Permalink
Use git partial clone and worktree to reduce network/file io (#5412)
Browse files Browse the repository at this point in the history
* Use git worktree and partial clone to reduce network io

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Add defer statement to clean up cloned git repositories in detector implementations

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Change the repo.Copy to use worktree and implement CopyToModify

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Add tests for Copy and CopyToModify methods in repo

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* repo.Copy and related methods updated to use git.Worktree instead of git.Repo

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* MockRepo.Copy method updated to return git.Worktree instead of git.Repo

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Update CopyToModify method to clone repository using git clone command

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Test: Update TestCopy to use repo.Copy instead of CopyToModify

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Fix comment in CopyToModify to clarify remote URL setting after local cloning

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Fetch the latest changes from remote after local cloning

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Remove .git directory from copied deploy source to avoid the git ops

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Update TestCopyToModify to use a mock remote directory for testing

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

* Copy deploy source using tar to exclude .git directory and improve performance

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>

---------

Signed-off-by: Shinnosuke Sawada-Dazai <[email protected]>
  • Loading branch information
Warashi authored Dec 13, 2024
1 parent e7cbc64 commit fd58db0
Show file tree
Hide file tree
Showing 13 changed files with 203 additions and 26 deletions.
11 changes: 10 additions & 1 deletion pkg/app/piped/deploysource/deploysource.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,17 @@ func (p *provider) prepare(ctx context.Context, lw io.Writer) (*DeploySource, er
func (p *provider) copy(lw io.Writer) (*DeploySource, error) {
p.copyNum++

src := p.source.RepoDir
dest := fmt.Sprintf("%s-%d", p.source.RepoDir, p.copyNum)
cmd := exec.Command("cp", "-rf", p.source.RepoDir, dest)

// use tar to exclude the .git directory
// the tar command does not create the destination directory if it does not exist.
// so we need to create it before running the command.
if err := os.MkdirAll(dest, 0700); err != nil {
fmt.Fprintf(lw, "Unable to create the directory to store the copied deploy source (%v)\n", err)
return nil, err
}
cmd := exec.Command("sh", "-c", fmt.Sprintf("tar c -f - -C '%s' --exclude='.git' . | tar x -f - -C '%s'", src, dest))
out, err := cmd.CombinedOutput()
if err != nil {
fmt.Fprintf(lw, "Unable to copy deploy source data (%v, %s)\n", err, string(out))
Expand Down
4 changes: 3 additions & 1 deletion pkg/app/piped/driftdetector/cloudrun/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ func (d *detector) checkApplication(ctx context.Context, app *model.Application,
return d.reporter.ReportApplicationSyncState(ctx, app.Id, state)
}

func (d *detector) loadHeadServiceManifest(app *model.Application, repo git.Repo, headCommit git.Commit) (provider.ServiceManifest, error) {
func (d *detector) loadHeadServiceManifest(app *model.Application, repo git.Worktree, headCommit git.Commit) (provider.ServiceManifest, error) {
var (
manifestCache = provider.ServiceManifestCache{
AppID: app.Id,
Expand Down Expand Up @@ -263,6 +263,8 @@ func (d *detector) loadHeadServiceManifest(app *model.Application, repo git.Repo
if err != nil {
return provider.ServiceManifest{}, fmt.Errorf("failed to copy the cloned git repository (%w)", err)
}
defer repo.Clean()

repoDir := repo.GetPath()
appDir = filepath.Join(repoDir, app.GitPath.Path)
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/app/piped/driftdetector/ecs/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ func ignoreParameters(liveManifests provider.ECSManifests, headManifests provide
return live, head
}

func (d *detector) loadConfigs(app *model.Application, repo git.Repo, headCommit git.Commit) (provider.ECSManifests, error) {
func (d *detector) loadConfigs(app *model.Application, repo git.Worktree, headCommit git.Commit) (provider.ECSManifests, error) {
var (
manifestCache = provider.ECSManifestsCache{
AppID: app.Id,
Expand Down Expand Up @@ -387,6 +387,8 @@ func (d *detector) loadConfigs(app *model.Application, repo git.Repo, headCommit
if err != nil {
return provider.ECSManifests{}, fmt.Errorf("failed to copy the cloned git repository (%w)", err)
}
defer repo.Clean()

repoDir := repo.GetPath()
appDir = filepath.Join(repoDir, app.GitPath.Path)
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/app/piped/driftdetector/kubernetes/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ func (d *detector) checkApplication(ctx context.Context, app *model.Application,
return d.reporter.ReportApplicationSyncState(ctx, app.Id, state)
}

func (d *detector) loadHeadManifests(ctx context.Context, app *model.Application, repo git.Repo, headCommit git.Commit, watchingResourceKinds []provider.APIVersionKind) ([]provider.Manifest, error) {
func (d *detector) loadHeadManifests(ctx context.Context, app *model.Application, repo git.Worktree, headCommit git.Commit, watchingResourceKinds []provider.APIVersionKind) ([]provider.Manifest, error) {
var (
manifestCache = provider.AppManifestsCache{
AppID: app.Id,
Expand Down Expand Up @@ -278,6 +278,8 @@ func (d *detector) loadHeadManifests(ctx context.Context, app *model.Application
if err != nil {
return nil, fmt.Errorf("failed to copy the cloned git repository (%w)", err)
}
defer repo.Clean()

repoDir = repo.GetPath()
appDir = filepath.Join(repoDir, app.GitPath.Path)
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/app/piped/driftdetector/lambda/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ func ignoreAndSortParameters(headSpec provider.FunctionManifestSpec) provider.Fu
return cloneSpec
}

func (d *detector) loadHeadFunctionManifest(app *model.Application, repo git.Repo, headCommit git.Commit) (provider.FunctionManifest, error) {
func (d *detector) loadHeadFunctionManifest(app *model.Application, repo git.Worktree, headCommit git.Commit) (provider.FunctionManifest, error) {
var (
manifestCache = provider.FunctionManifestCache{
AppID: app.Id,
Expand Down Expand Up @@ -312,6 +312,8 @@ func (d *detector) loadHeadFunctionManifest(app *model.Application, repo git.Rep
if err != nil {
return provider.FunctionManifest{}, fmt.Errorf("failed to copy the cloned git repository (%w)", err)
}
defer repo.Clean()

repoDir := repo.GetPath()
appDir = filepath.Join(repoDir, app.GitPath.Path)
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/app/piped/driftdetector/terraform/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func (d *detector) check(ctx context.Context) {
}
}

func (d *detector) checkApplication(ctx context.Context, app *model.Application, repo git.Repo, headCommit git.Commit) error {
func (d *detector) checkApplication(ctx context.Context, app *model.Application, repo git.Worktree, headCommit git.Commit) error {
var (
repoDir = repo.GetPath()
appDir = filepath.Join(repoDir, app.GitPath.Path)
Expand Down Expand Up @@ -206,6 +206,8 @@ func (d *detector) checkApplication(ctx context.Context, app *model.Application,
if err != nil {
return fmt.Errorf("failed to copy the cloned git repository (%w)", err)
}
defer repo.Clean()

repoDir = repo.GetPath()
appDir = filepath.Join(repoDir, app.GitPath.Path)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/app/piped/eventwatcher/eventwatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ func (w *watcher) execute(ctx context.Context, repo git.Repo, repoID string, eve
if err != nil {
return fmt.Errorf("failed to create a new temporary directory: %w", err)
}
tmpRepo, err := repo.Copy(filepath.Join(tmpDir, "tmp-repo"))
tmpRepo, err := repo.CopyToModify(filepath.Join(tmpDir, "tmp-repo"))
if err != nil {
return fmt.Errorf("failed to copy the repository to the temporary directory: %w", err)
}
Expand Down Expand Up @@ -495,7 +495,7 @@ func (w *watcher) updateValues(ctx context.Context, repo git.Repo, repoID string
if err != nil {
return fmt.Errorf("failed to create a new temporary directory: %w", err)
}
tmpRepo, err := repo.Copy(filepath.Join(tmpDir, "tmp-repo"))
tmpRepo, err := repo.CopyToModify(filepath.Join(tmpDir, "tmp-repo"))
if err != nil {
return fmt.Errorf("failed to copy the repository to the temporary directory: %w", err)
}
Expand Down
11 changes: 10 additions & 1 deletion pkg/app/pipedv1/deploysource/deploysource.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,17 @@ func (p *provider) prepare(ctx context.Context, lw io.Writer) (*DeploySource, er
func (p *provider) copy(lw io.Writer) (*DeploySource, error) {
p.copyNum++

src := p.source.RepoDir
dest := fmt.Sprintf("%s-%d", p.source.RepoDir, p.copyNum)
cmd := exec.Command("cp", "-rf", p.source.RepoDir, dest)

// use tar to exclude the .git directory
// the tar command does not create the destination directory if it does not exist.
// so we need to create it before running the command.
if err := os.MkdirAll(dest, 0700); err != nil {
fmt.Fprintf(lw, "Unable to create the directory to store the copied deploy source (%v)\n", err)
return nil, err
}
cmd := exec.Command("sh", "-c", fmt.Sprintf("tar c -f - -C '%s' --exclude='.git' . | tar x -f - -C '%s'", src, dest))
out, err := cmd.CombinedOutput()
if err != nil {
fmt.Fprintf(lw, "Unable to copy deploy source data (%v, %s)\n", err, string(out))
Expand Down
4 changes: 2 additions & 2 deletions pkg/app/pipedv1/eventwatcher/eventwatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ func (w *watcher) execute(ctx context.Context, repo git.Repo, repoID string, eve
if err != nil {
return fmt.Errorf("failed to create a new temporary directory: %w", err)
}
tmpRepo, err := repo.Copy(filepath.Join(tmpDir, "tmp-repo"))
tmpRepo, err := repo.CopyToModify(filepath.Join(tmpDir, "tmp-repo"))
if err != nil {
return fmt.Errorf("failed to copy the repository to the temporary directory: %w", err)
}
Expand Down Expand Up @@ -478,7 +478,7 @@ func (w *watcher) updateValues(ctx context.Context, repo git.Repo, repoID string
if err != nil {
return fmt.Errorf("failed to create a new temporary directory: %w", err)
}
tmpRepo, err := repo.Copy(filepath.Join(tmpDir, "tmp-repo"))
tmpRepo, err := repo.CopyToModify(filepath.Join(tmpDir, "tmp-repo"))
if err != nil {
return fmt.Errorf("failed to copy the repository to the temporary directory: %w", err)
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/git/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func (c *client) Clone(ctx context.Context, repoID, remote, branch, destination
return nil, err
}
out, err := retryCommand(3, time.Second, logger, func() ([]byte, error) {
args := []string{"clone", "--mirror", remote, repoCachePath}
args := []string{"clone", "--mirror", "--filter=blob:none", remote, repoCachePath}
args = append(authArgs, args...)
return runGitCommand(ctx, c.gitPath, "", c.envsForRepo(remote), args...)
})
Expand Down Expand Up @@ -214,11 +214,12 @@ func (c *client) Clone(ctx context.Context, repoID, remote, branch, destination
}
}

args := []string{"clone"}
// git worktree add [-f] [--detach] [--checkout] [--lock [--reason <string>]]
// [--orphan] [(-b | -B) <new-branch>] <path> [<commit-ish>]
args := []string{"-C", repoCachePath, "worktree", "add", "--detach", destination}
if branch != "" {
args = append(args, "-b", branch)
args = append(args, branch)
}
args = append(args, repoCachePath, destination)

logger.Info("cloning a repo from cached one in local",
zap.String("src", repoCachePath),
Expand Down
19 changes: 17 additions & 2 deletions pkg/git/gittest/git.mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

112 changes: 103 additions & 9 deletions pkg/git/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ var (
type Repo interface {
GetPath() string
GetClonedBranch() string
Copy(dest string) (Repo, error)
Copy(dest string) (Worktree, error)
CopyToModify(dest string) (Repo, error)

ListCommits(ctx context.Context, visionRange string) ([]Commit, error)
GetLatestCommit(ctx context.Context) (Commit, error)
Expand All @@ -52,6 +53,17 @@ type Repo interface {
CommitChanges(ctx context.Context, branch, message string, newBranch bool, changes map[string][]byte, trailers map[string]string) error
}

// Worktree provides functions to get and handle git worktree.
// It is a separate checkout of the repository.
// It is used to make changes to the repository without affecting the main repository.
// Worktree always does checkout with the detached HEAD, so it doesn't affect the main repository.
type Worktree interface {
GetPath() string
Clean() error
Copy(dest string) (Worktree, error)
Checkout(ctx context.Context, commitish string) error
}

type repo struct {
dir string
gitPath string
Expand All @@ -60,6 +72,55 @@ type repo struct {
gitEnvs []string
}

// worktree is a git worktree.
// It is a separate checkout of the repository.
type worktree struct {
base *repo
worktreePath string
}

func (r *worktree) runGitCommand(ctx context.Context, args ...string) ([]byte, error) {
cmd := exec.CommandContext(ctx, r.base.gitPath, args...)
cmd.Dir = r.worktreePath
cmd.Env = append(os.Environ(), r.base.gitEnvs...)
return cmd.CombinedOutput()
}

func (r *worktree) Copy(dest string) (Worktree, error) {
// garbage collecting worktrees
if _, err := r.runGitCommand(context.Background(), "worktree", "prune"); err != nil {
// ignore the error
}

if out, err := r.runGitCommand(context.Background(), "worktree", "add", "--detach", dest); err != nil {
return nil, formatCommandError(err, out)
}

return &worktree{
base: r.base,
worktreePath: dest,
}, nil
}

func (r *worktree) GetPath() string {
return r.worktreePath
}

func (r *worktree) Clean() error {
if out, err := r.base.runGitCommand(context.Background(), "worktree", "remove", r.worktreePath); err != nil {
return formatCommandError(err, out)
}
return nil
}

func (r *worktree) Checkout(ctx context.Context, commitish string) error {
out, err := r.runGitCommand(ctx, "checkout", "--detach", commitish)
if err != nil {
return formatCommandError(err, out)
}
return nil
}

// NewRepo creates a new Repo instance.
func NewRepo(dir, gitPath, remote, clonedBranch string, gitEnvs []string) *repo {
return &repo{
Expand All @@ -81,22 +142,55 @@ func (r *repo) GetClonedBranch() string {
return r.clonedBranch
}

// Copy does copying the repository to the given destination.
// Copy does copying the repository to the given destination using git worktree.
// The repository is cloned to the given destination with the detached HEAD.
// NOTE: the given “dest” must be a path that doesn’t exist yet.
// If you don't, it copies the repo root itself to the given dest as a subdirectory.
func (r *repo) Copy(dest string) (Repo, error) {
cmd := exec.Command("cp", "-rf", r.dir, dest)
out, err := cmd.CombinedOutput()
if err != nil {
// If you don't, you will get an error.
func (r *repo) Copy(dest string) (Worktree, error) {
// garbage collecting worktrees
if _, err := r.runGitCommand(context.Background(), "worktree", "prune"); err != nil {
// ignore the error
}

if out, err := r.runGitCommand(context.Background(), "worktree", "add", "--detach", dest); err != nil {
return nil, formatCommandError(err, out)
}

return &repo{
return &worktree{
base: r,
worktreePath: dest,
}, nil
}

// CopyToModify does cloning the repository to the given destination.
// The repository is cloned to the given destination with the .
// NOTE: the given “dest” must be a path that doesn’t exist yet.
// If you don't, you will get an error.
func (r *repo) CopyToModify(dest string) (Repo, error) {
cmd := exec.Command(r.gitPath, "clone", r.dir, dest)
if out, err := cmd.CombinedOutput(); err != nil {
return nil, formatCommandError(err, out)
}

cloned := &repo{
dir: dest,
gitPath: r.gitPath,
remote: r.remote,
clonedBranch: r.clonedBranch,
}, nil
gitEnvs: r.gitEnvs,
}

// because we did a local cloning so set the remote url of origin
if err := cloned.setRemote(context.Background(), r.remote); err != nil {
return nil, err
}

// fetch the latest changes which doesn't exist in the local repository
if out, err := cloned.runGitCommand(context.Background(), "fetch"); err != nil {
return nil, formatCommandError(err, out)
}

return cloned, nil
}

// ListCommits returns a list of commits in a given revision range.
Expand Down
Loading

0 comments on commit fd58db0

Please sign in to comment.