Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: limit the size of the git clone #1111

Merged
merged 3 commits into from
Dec 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ const (
qadisablecliFlag = "qa-disable-cli"
qaportFlag = "qa-port"
planProgressPortFlag = "plan-progress-port"
maxCloneSizeBytesFlag = "max-clone-size"
transformerSelectorFlag = "transformer-selector"
qaEnabledCategoriesFlag = "qa-enable"
qaDisabledCategoriesFlag = "qa-disable"
Expand Down
4 changes: 4 additions & 0 deletions cmd/plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
)

type planFlags struct {
maxVCSRepoCloneSize int64
progressServerPort int
planfile string
srcpath string
Expand Down Expand Up @@ -65,6 +66,8 @@ func planHandler(cmd *cobra.Command, flags planFlags) {
}()
defer lib.Destroy()

vcs.SetMaxRepoCloneSize(flags.maxVCSRepoCloneSize)

var err error
planfile := flags.planfile
srcpath := flags.srcpath
Expand Down Expand Up @@ -182,6 +185,7 @@ func GetPlanCommand() *cobra.Command {
planCmd.Flags().StringSliceVar(&flags.preSets, preSetFlag, []string{}, "Specify preset config to use.")
planCmd.Flags().StringArrayVar(&flags.setconfigs, setConfigFlag, []string{}, "Specify config key-value pairs.")
planCmd.Flags().IntVar(&flags.progressServerPort, planProgressPortFlag, 0, "Port for the plan progress server. If not provided, the server won't be started.")
planCmd.Flags().Int64Var(&flags.maxVCSRepoCloneSize, maxCloneSizeBytesFlag, -1, "Max size in bytes when cloning a git repo. Default -1 is infinite")
planCmd.Flags().BoolVar(&flags.disableLocalExecution, common.DisableLocalExecutionFlag, false, "Allow files to be executed locally.")
planCmd.Flags().BoolVar(&flags.failOnEmptyPlan, common.FailOnEmptyPlan, false, "If true, planning will exit with a failure exit code if no services are detected (and no default transformers are found).")

Expand Down
13 changes: 12 additions & 1 deletion cmd/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import (

type transformFlags struct {
qaflags
// maxVCSRepoCloneSize is the maximum size in bytes for cloning repos
maxVCSRepoCloneSize int64
// ignoreEnv tells us whether to use data collected from the local machine
ignoreEnv bool
// disableLocalExecution disables execution of executables locally
Expand Down Expand Up @@ -72,6 +74,7 @@ func transformHandler(cmd *cobra.Command, flags transformFlags) {
}
defer pprof.StopCPUProfile()
}
vcs.SetMaxRepoCloneSize(flags.maxVCSRepoCloneSize)

ctx, cancel := context.WithCancel(cmd.Context())
logrus.AddHook(common.NewCleanupHook(cancel))
Expand Down Expand Up @@ -250,7 +253,14 @@ func transformHandler(cmd *cobra.Command, flags transformFlags) {
}
startQA(flags.qaflags)
}
if err := lib.Transform(ctx, transformationPlan, preExistingPlan, flags.outpath, flags.transformerSelector, flags.maxIterations); err != nil {
if err := lib.Transform(
ctx,
transformationPlan,
preExistingPlan,
flags.outpath,
flags.transformerSelector,
flags.maxIterations,
); err != nil {
logrus.Fatalf("failed to transform. Error: %q", err)
}
logrus.Infof("Transformed target artifacts can be found at [%s].", flags.outpath)
Expand Down Expand Up @@ -290,6 +300,7 @@ func GetTransformCommand() *cobra.Command {
transformCmd.Flags().StringVarP(&flags.customizationsPath, customizationsFlag, "c", "", "Specify directory or a git url (see https://move2kube.konveyor.io/concepts/git-support) where customizations are stored. By default we look for "+common.DefaultCustomizationDir)
transformCmd.Flags().StringVarP(&flags.transformerSelector, transformerSelectorFlag, "t", "", "Specify the transformer selector.")
transformCmd.Flags().BoolVar(&flags.qaskip, qaSkipFlag, false, "Enable/disable the default answers to questions posed in QA Cli sub-system. If disabled, you will have to answer the questions posed by QA during interaction.")
transformCmd.Flags().Int64Var(&flags.maxVCSRepoCloneSize, maxCloneSizeBytesFlag, -1, "Max size in bytes when cloning a git repo. Default -1 is infinite")

// QA options
transformCmd.Flags().StringSliceVar(&flags.qaEnabledCategories, qaEnabledCategoriesFlag, []string{}, "Specify the QA categories to enable (cannot be used in conjunction with qa-disable)")
Expand Down
127 changes: 71 additions & 56 deletions common/vcs/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@ import (
"strings"
"time"

"github.com/go-git/go-billy/v5"
"github.com/go-git/go-billy/v5/osfs"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/config"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/go-git/go-git/v5/plumbing/transport/http"
"github.com/go-git/go-git/v5/plumbing/transport/ssh"
"github.com/go-git/go-git/v5/storage/filesystem"
"github.com/konveyor/move2kube/common"
"github.com/konveyor/move2kube/qaengine"
"github.com/sirupsen/logrus"
Expand All @@ -47,6 +51,11 @@ type GitVCSRepo struct {
GitRepoPath string
}

var (
// for https or ssh git repo urls
gitVCSRegex = regexp.MustCompile(`^git\+(https|ssh)://[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,5}(:[0-9]{1,5})?(\/.*)?$`)
)

func isGitCommitHash(commithash string) bool {
gitCommitHashRegex := regexp.MustCompile(`^[a-fA-F0-9]{40}$`)
return gitCommitHashRegex.MatchString(commithash)
Expand Down Expand Up @@ -112,26 +121,23 @@ func getGitRepoStruct(vcsurl string) (*GitVCSRepo, error) {

}

// isGitVCS checks if the given vcs url is git
// isGitVCS checks if the given vcs url is a git repo url
func isGitVCS(vcsurl string) bool {
// for https or ssh
gitVCSRegex := `^git\+(https|ssh)://[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,5}(:[0-9]{1,5})?(\/.*)?$`
matched, err := regexp.MatchString(gitVCSRegex, vcsurl)
if err != nil {
logrus.Fatalf("failed to match the given vcsurl %v with the git vcs regex expression %v. Error : %v", vcsurl, gitVCSRegex, err)
}
return matched
return gitVCSRegex.MatchString(vcsurl)
}

func pushGitVCS(remotePath, folderName string) error {
func pushGitVCS(remotePath, folderName string, maxSize int64) error {
if !common.IgnoreEnvironment {
logrus.Warnf("push to remote git repositories using credentials from the environment is not yet supported.")
}
remotePathSplitByAt := strings.Split(remotePath, "@")
remotePathSplitByColon := strings.Split(remotePathSplitByAt[0], ":")
isSSH := strings.HasPrefix(remotePath, "git+ssh")
isHTTPS := strings.HasPrefix(remotePath, "git+https")
gitFSPath := GetClonedPath(remotePath, folderName, false)
gitFSPath, err := GetClonedPath(remotePath, folderName, false)
if err != nil {
return fmt.Errorf("failed to clone the repo. Error: %w", err)
}
if (isHTTPS && len(remotePathSplitByColon) > 2) || (isSSH && len(remotePathSplitByColon) > 2) {
gitFSPath = strings.TrimSuffix(gitFSPath, remotePathSplitByColon[len(remotePathSplitByColon)-1])
}
Expand Down Expand Up @@ -202,60 +208,73 @@ func pushGitVCS(remotePath, folderName string) error {
return nil
}

// Clone Clones a git repository with the given commit depth and path where to be cloned and returns final path
func (gvcsrepo *GitVCSRepo) Clone(gitCloneOptions VCSCloneOptions) (string, error) {

if gitCloneOptions.CloneDestinationPath == "" {
return "", fmt.Errorf("the path where the repository has to be clone is empty - %s", gitCloneOptions.CloneDestinationPath)
// Clone clones a git repository with the given commit depth
// and path where it is to be cloned and returns the final path inside the repo
func (gvcsrepo *GitVCSRepo) Clone(cloneOptions VCSCloneOptions) (string, error) {
if cloneOptions.CloneDestinationPath == "" {
return "", fmt.Errorf("the path where the repository has to be cloned cannot be empty")
}
repoPath := filepath.Join(gitCloneOptions.CloneDestinationPath, gvcsrepo.GitRepoPath)
_, err := os.Stat(repoPath)
if os.IsNotExist(err) {
logrus.Debugf("cloned output would be available at '%s'", repoPath)
} else if gitCloneOptions.Overwrite {
logrus.Infof("git repository might get overwritten at %s", repoPath)
err = os.RemoveAll(repoPath)
if err != nil {
return "", fmt.Errorf("failed to remove the directory at the given path - %s", repoPath)
repoPath := filepath.Join(cloneOptions.CloneDestinationPath, gvcsrepo.GitRepoPath)
repoDirInfo, err := os.Stat(repoPath)
if err != nil {
if !os.IsNotExist(err) {
return "", fmt.Errorf("failed to stat the git repo clone destination path '%s'. error: %w", repoPath, err)
}
logrus.Debugf("the cloned git repo will be available at '%s'", repoPath)
} else {
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil
if !cloneOptions.Overwrite {
if !repoDirInfo.IsDir() {
return "", fmt.Errorf("a file already exists at the git repo clone destination path '%s'", repoPath)
}
logrus.Infof("Assuming that the directory at '%s' is the cloned repo", repoPath)
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil
}
logrus.Infof("git repository clone will overwrite the files/directories at '%s'", repoPath)
if err := os.RemoveAll(repoPath); err != nil {
return "", fmt.Errorf("failed to remove the files/directories at '%s' . error: %w", repoPath, err)
}
}
logrus.Infof("Cloning the repository using git into '%s' . This might take some time.", cloneOptions.CloneDestinationPath)

// ------------
var repoDirWt, dotGitDir billy.Filesystem
repoDirWt = osfs.New(repoPath)
dotGitDir, _ = repoDirWt.Chroot(git.GitDirName)
fStorer := filesystem.NewStorage(dotGitDir, cache.NewObjectLRUDefault())
limitStorer := Limit(fStorer, cloneOptions.MaxSize)
// ------------

commitDepth := 1
if cloneOptions.CommitDepth != 0 {
commitDepth = cloneOptions.CommitDepth
}
logrus.Infof("Cloning the repository using git into %s. This might take some time.", gitCloneOptions.CloneDestinationPath)
if gvcsrepo.Branch != "" {
commitDepth := 1
if gitCloneOptions.CommitDepth != 0 {
commitDepth = gitCloneOptions.CommitDepth
}
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
Depth: commitDepth,
SingleBranch: true,
ReferenceName: plumbing.ReferenceName(fmt.Sprintf("refs/heads/%s", gvcsrepo.Branch)),
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
logrus.Debugf("provided branch %+v does not exist in the remote, therefore creating one.", gvcsrepo.Branch)
logrus.Debugf("failed to clone the given branch '%s' . Will clone the entire repo and try again.", gvcsrepo.Branch)
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
Depth: commitDepth,
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
return "", fmt.Errorf("failed to perform clone operation using git with options. Error : %+v", err)
return "", fmt.Errorf("failed to perform clone operation using git. Error: %w", err)
}
branch := fmt.Sprintf("refs/heads/%s", gvcsrepo.Branch)
b := plumbing.ReferenceName(branch)
w, err := gvcsrepo.GitRepository.Worktree()
if err != nil {
return "", fmt.Errorf("failed return a worktree for the repostiory. Error : %+v", err)
return "", fmt.Errorf("failed return a worktree for the repostiory. Error: %w", err)
}

err = w.Checkout(&git.CheckoutOptions{Create: false, Force: false, Branch: b})

if err != nil {
err := w.Checkout(&git.CheckoutOptions{Create: true, Force: false, Branch: b})
if err != nil {
if err := w.Checkout(&git.CheckoutOptions{Create: false, Force: false, Branch: b}); err != nil {
logrus.Debugf("failed to checkout the branch '%s', creating it...", b)
if err := w.Checkout(&git.CheckoutOptions{Create: true, Force: false, Branch: b}); err != nil {
return "", fmt.Errorf("failed checkout a new branch. Error : %+v", err)
}
}
Expand All @@ -265,45 +284,41 @@ func (gvcsrepo *GitVCSRepo) Clone(gitCloneOptions VCSCloneOptions) (string, erro
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error: %w", cloneOpts, err)
}
r, err := git.PlainOpen(repoPath)
if err != nil {
return "", fmt.Errorf("failed to open the git repository at the given path %+v. Error : %+v", repoPath, err)
return "", fmt.Errorf("failed to open the git repository at the given path '%s' . Error: %w", repoPath, err)
}
w, err := r.Worktree()
if err != nil {
return "", fmt.Errorf("failed return a worktree for the repostiory %+v. Error : %+v", r, err)
return "", fmt.Errorf("failed return a worktree for the repostiory %+v. Error: %w", r, err)
}
checkoutOpts := git.CheckoutOptions{
Hash: commitHash,
}
err = w.Checkout(&checkoutOpts)
if err != nil {
return "", fmt.Errorf("failed to checkout commit hash : %s on work tree. Error : %+v", commitHash, w)
checkoutOpts := git.CheckoutOptions{Hash: commitHash}
if err := w.Checkout(&checkoutOpts); err != nil {
return "", fmt.Errorf("failed to checkout commit hash '%s' on work tree. Error: %w", commitHash, err)
}
} else if gvcsrepo.Tag != "" {
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
ReferenceName: plumbing.ReferenceName(fmt.Sprintf("refs/tags/%s", gvcsrepo.Tag)),
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error: %w", cloneOpts, err)
}
} else {
commitDepth := 1
cloneOpts := git.CloneOptions{
URL: gvcsrepo.URL,
Depth: commitDepth,
SingleBranch: true,
ReferenceName: "refs/heads/main",
}
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
if err != nil {
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
return "", fmt.Errorf("failed to perform clone operation using git with options %+v and %+v. Error: %w", cloneOpts, cloneOptions, err)
}
}
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil
Expand Down
39 changes: 20 additions & 19 deletions common/vcs/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"testing"

"github.com/google/go-cmp/cmp"
"github.com/konveyor/move2kube/common"
)

func TestIsGitCommitHash(t *testing.T) {
Expand Down Expand Up @@ -125,45 +124,47 @@ func TestIsGitVCS(t *testing.T) {
}

func TestClone(t *testing.T) {
// Test case - clone a valid vcs url with overwrite true
t.Log("Test case - clone a valid vcs url with overwrite true")
gitURL := "git+https://github.com/konveyor/move2kube.git"
repo, err := getGitRepoStruct(gitURL)
if err != nil {
t.Errorf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
t.Fatalf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
}
overwrite := true
tempPath, err := filepath.Abs(common.RemoteTempPath)
if err != nil {
t.Errorf("failed to get absolute path of %s. Error : %+v", common.RemoteTempPath, err)
tempPath := t.TempDir()
cloneDestPath := filepath.Join(tempPath, "test-clone")
var infiniteSize int64 = -1
cloneOpts := VCSCloneOptions{
CommitDepth: 1,
Overwrite: overwrite,
CloneDestinationPath: cloneDestPath,
MaxSize: infiniteSize,
}
folderName := "test-clone"
cloneOpts := VCSCloneOptions{CommitDepth: 1, Overwrite: overwrite, CloneDestinationPath: filepath.Join(tempPath, folderName)}
clonedPath, err := repo.Clone(cloneOpts)
if err != nil {
t.Errorf("failed to clone the git repo. Error : %+v", err)
t.Fatalf("failed to clone the git repo. Error : %+v", err)
}

// Test case 2 - Repository already exists with overwrite true
t.Log("Test case 2 - Repository already exists with overwrite false")
gitURL = "git+https://github.com/konveyor/move2kube.git"
repo, err = getGitRepoStruct(gitURL)
if err != nil {
t.Errorf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
t.Fatalf("failed to get git repo struct for the given git URL '%s' . Error : %+v", gitURL, err)
}
overwrite = false
tempPath, err = filepath.Abs(common.RemoteTempPath)
if err != nil {
t.Errorf("failed to get absolute path of %s. Error : %+v", common.RemoteTempPath, err)
cloneOpts = VCSCloneOptions{
CommitDepth: 1,
Overwrite: overwrite,
CloneDestinationPath: cloneDestPath,
MaxSize: infiniteSize,
}
folderName = "test-clone"
cloneOpts = VCSCloneOptions{CommitDepth: 1, Overwrite: overwrite, CloneDestinationPath: filepath.Join(tempPath, folderName)}
clonedPathWithoutOverwrite, err := repo.Clone(cloneOpts)
if err != nil {
t.Errorf("failed to clone the git repo. Error : %+v", err)
t.Fatalf("failed to clone the git repo. Error : %+v", err)
}
if clonedPath != clonedPathWithoutOverwrite {
t.Errorf("cloned paths did not match with overwrite false. cloned path %s, cloned path without overwrite: %s", clonedPath, clonedPathWithoutOverwrite)
t.Fatalf("cloned paths did not match with overwrite false. cloned path '%s', cloned path without overwrite: '%s'", clonedPath, clonedPathWithoutOverwrite)
}

}

func TestIsGitBranch(t *testing.T) {
Expand Down
Loading
Loading