From 13a2c95c54fcd3a0829763528b6dd36fba5330c2 Mon Sep 17 00:00:00 2001 From: "museya02@louisville.edu" Date: Thu, 26 Sep 2024 23:40:55 -0400 Subject: [PATCH 01/11] "not yet finished implementing the opti-downloader into the grabit codebase, i already crafted the downloader.go into the code and tested it in a secure environment to test for Invalid Hashes, Validation of Donwloads, and Tested for Invalid URLs to handle redudant downloads, refer to the comments in the downloader.go files for more information. In the main.go I added downloader in the newrootcmd, " --- .idea/.gitignore | 0 .idea/dbnavigator.xml | 401 ++++++++++++++++++ .idea/grabit.iml | 4 + .idea/inspectionProfiles/Project_Default.xml | 25 ++ .../inspectionProfiles/profiles_settings.xml | 0 .idea/misc.xml | 0 .idea/modules.xml | 0 .idea/vcs.xml | 6 + downloader/downloader.go | 89 ++++ downloader/downloader_test.go | 71 ++++ main.go | 10 +- 11 files changed, 602 insertions(+), 4 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/dbnavigator.xml create mode 100644 .idea/grabit.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 downloader/downloader.go create mode 100644 downloader/downloader_test.go diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/.idea/dbnavigator.xml b/.idea/dbnavigator.xml new file mode 100644 index 0000000..b827a9b --- /dev/null +++ b/.idea/dbnavigator.xml @@ -0,0 +1,401 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/grabit.iml b/.idea/grabit.iml new file mode 100644 index 0000000..7ee078d --- /dev/null +++ b/.idea/grabit.iml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..b10f61e --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,25 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..e69de29 diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..e69de29 diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..e69de29 diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/downloader/downloader.go b/downloader/downloader.go new file mode 100644 index 0000000..5f77fb2 --- /dev/null +++ b/downloader/downloader.go @@ -0,0 +1,89 @@ +// this implementation includes a feature to skip unnecessary downloads if the file already exists. +// DownloadFile function checks for existing files and verifies their hash, and only downloads if necessary. +// This also includes error handling for hash mismatches ensuring data integrity improving efficiency +// and avoiding redundant downloads while maintaining file accuracy. + +package downloader + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "time" +) + +type Downloader struct { + Client *http.Client +} + +func NewDownloader(timeout time.Duration) *Downloader { + return &Downloader{ + Client: &http.Client{Timeout: timeout}, + } +} + +func (d *Downloader) DownloadFile(url, targetDir, expectedHash string) error { + fileName := filepath.Base(url) + targetPath := filepath.Join(targetDir, fileName) + + if _, err := os.Stat(targetPath); err == nil { + fileHash, err := calculateFileHash(targetPath) + if err != nil { + return err + } + + if fileHash == expectedHash { + fmt.Printf("File '%s' already exists and matches the expected hash. Skipping download.\n", fileName) + return nil + } + fmt.Printf("File '%s' exists but hash mismatch. Downloading again.\n", fileName) + } + + resp, err := d.Client.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + + out, err := os.Create(targetPath) + if err != nil { + return err + } + defer out.Close() + + _, err = io.Copy(out, resp.Body) + if err != nil { + return err + } + + // Verify the downloaded file's hash + downloadedHash, err := calculateFileHash(targetPath) + if err != nil { + return err + } + if downloadedHash != expectedHash { + return fmt.Errorf("hash mismatch: expected %s, got %s", expectedHash, downloadedHash) + } + + fmt.Printf("Downloaded '%s' to '%s'.\n", fileName, targetPath) + return nil +} + +func calculateFileHash(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + + hash := sha256.New() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + + return hex.EncodeToString(hash.Sum(nil)), nil +} diff --git a/downloader/downloader_test.go b/downloader/downloader_test.go new file mode 100644 index 0000000..8a5dabe --- /dev/null +++ b/downloader/downloader_test.go @@ -0,0 +1,71 @@ +package downloader + +import ( + "crypto/sha256" + "encoding/hex" + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + "time" +) + +func TestDownloader(t *testing.T) { + tempDir, err := ioutil.TempDir("", "grabit_test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("test content")) + })) + defer server.Close() + + hash := sha256.Sum256([]byte("test content")) + expectedHash := hex.EncodeToString(hash[:]) + + downloader := NewDownloader(5 * time.Second) + + tests := []struct { + name string + url string + expectedHash string + expectedError bool + }{ + {"Valid download", server.URL, expectedHash, false}, + {"Invalid hash", server.URL, "invalid_hash", true}, + {"Invalid URL", "http://invalid.url", expectedHash, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := downloader.DownloadFile(tt.url, tempDir, tt.expectedHash) + + if tt.expectedError && err == nil { + t.Errorf("Expected an error, but got none") + } + if !tt.expectedError && err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if !tt.expectedError { + fileName := filepath.Base(tt.url) + filePath := filepath.Join(tempDir, fileName) + if _, err := os.Stat(filePath); os.IsNotExist(err) { + t.Errorf("Expected file %s to exist, but it doesn't", filePath) + } + + content, err := ioutil.ReadFile(filePath) + if err != nil { + t.Errorf("Failed to read file: %v", err) + } + if string(content) != "test content" { + t.Errorf("File content mismatch. Expected 'test content', got '%s'", string(content)) + } + } + }) + } +} diff --git a/main.go b/main.go index 1e4b6a9..b2a9096 100644 --- a/main.go +++ b/main.go @@ -4,12 +4,13 @@ package main import ( - "os" - "os/signal" - "github.com/cisco-open/grabit/cmd" + "github.com/cisco-open/grabit/downloader" "github.com/rs/zerolog" "github.com/rs/zerolog/log" + "os" + "os/signal" + "time" ) func main() { @@ -21,7 +22,8 @@ func main() { signal.Notify(stopChan, os.Interrupt) go listenForInterrupt(stopChan) - rootCmd := cmd.NewRootCmd() + d := downloader.NewDownloader(30 * time.Second) + rootCmd := cmd.NewRootCmd(d) cmd.Execute(rootCmd) } From 4e767d8cca64c2f45271130265c3a641c03fc840 Mon Sep 17 00:00:00 2001 From: "museya02@louisville.edu" Date: Tue, 1 Oct 2024 19:29:19 -0400 Subject: [PATCH 02/11] "Finished implementing the opti-downloader into the grabit codebase, i already crafted the downloader.go into the code and tested it in a secure environment to test for Invalid Hashes, Validation of Donwloads, and Tested for Invalid URLs to handle redudant downloads, refer to the comments in the downloader.go files for more information. In the main.go I added downloader in the newrootcmd, edited the download.go, main.go, lock.go, root.go to implement the downloader after testing it in a local tester. " --- .gitignore | 1 + cmd/download.go | 5 +- cmd/{dowload_test.go => download_test.go} | 0 cmd/root.go | 9 +- internal/lock.go | 135 ++++++++++++---------- main.go | 11 +- 6 files changed, 92 insertions(+), 69 deletions(-) rename cmd/{dowload_test.go => download_test.go} (100%) diff --git a/.gitignore b/.gitignore index 8960131..8799798 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea dist/ grabit grabit.lock diff --git a/cmd/download.go b/cmd/download.go index 2e29611..2476b4a 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -4,6 +4,7 @@ package cmd import ( + "github.com/cisco-open/grabit/downloader" "github.com/cisco-open/grabit/internal" "github.com/spf13/cobra" ) @@ -47,7 +48,9 @@ func runFetch(cmd *cobra.Command, args []string) error { if err != nil { return err } - err = lock.Download(dir, tags, notags, perm) + + d := cmd.Context().Value("downloader").(*downloader.Downloader) + err = lock.Download(dir, tags, notags, perm, d) if err != nil { return err } diff --git a/cmd/dowload_test.go b/cmd/download_test.go similarity index 100% rename from cmd/dowload_test.go rename to cmd/download_test.go diff --git a/cmd/root.go b/cmd/root.go index 854dab2..885ebee 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -4,6 +4,8 @@ package cmd import ( + "context" + "github.com/cisco-open/grabit/downloader" "os" "path/filepath" "strings" @@ -58,7 +60,12 @@ func initLog(ll string) { } } -func Execute(rootCmd *cobra.Command) { +func Execute(rootCmd *cobra.Command, d *downloader.Downloader) { + rootCmd.PersistentPreRun = func(cmd *cobra.Command, args []string) { + ctx := context.WithValue(cmd.Context(), "downloader", d) + cmd.SetContext(ctx) + } + ll, err := rootCmd.PersistentFlags().GetString("log-level") if err != nil { log.Fatal().Msg(err.Error()) diff --git a/internal/lock.go b/internal/lock.go index ae6707b..434b2e1 100644 --- a/internal/lock.go +++ b/internal/lock.go @@ -5,13 +5,12 @@ package internal import ( "bufio" - "context" "errors" "fmt" + "github.com/cisco-open/grabit/downloader" + toml "github.com/pelletier/go-toml/v2" "os" "strconv" - - toml "github.com/pelletier/go-toml/v2" ) var COMMENT_PREFIX = "//" @@ -26,6 +25,12 @@ type config struct { Resource []Resource } +type resource struct { + Urls []string + Integrity string + Tags []string +} + func NewLock(path string, newOk bool) (*Lock, error) { _, error := os.Stat(path) if os.IsNotExist(error) { @@ -89,61 +94,16 @@ func strToFileMode(perm string) (os.FileMode, error) { // Download gets all the resources in this lock file and moves them to // the destination directory. -func (l *Lock) Download(dir string, tags []string, notags []string, perm string) error { +func (l *Lock) Download(dir string, tags []string, notags []string, perm string, d *downloader.Downloader) error { if stat, err := os.Stat(dir); err != nil || !stat.IsDir() { return fmt.Errorf("'%s' is not a directory", dir) } - mode, err := strToFileMode(perm) + _, err := strToFileMode(perm) if err != nil { return fmt.Errorf("'%s' is not a valid permission definition", perm) } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - // Filter in the resources that have all the required tags. - tagFilteredResources := []Resource{} - if len(tags) > 0 { - for _, r := range l.conf.Resource { - hasAllTags := true - for _, tag := range tags { - hasTag := false - for _, rtag := range r.Tags { - if tag == rtag { - hasTag = true - break - } - } - if !hasTag { - hasAllTags = false - break - } - } - if hasAllTags { - tagFilteredResources = append(tagFilteredResources, r) - } - } - } else { - tagFilteredResources = l.conf.Resource - } - // Filter out the resources that have any 'notag' tag. - filteredResources := []Resource{} - if len(notags) > 0 { - for _, r := range tagFilteredResources { - hasTag := false - for _, notag := range notags { - for _, rtag := range r.Tags { - if notag == rtag { - hasTag = true - } - } - } - if !hasTag { - filteredResources = append(filteredResources, r) - } - } - } else { - filteredResources = tagFilteredResources - } + filteredResources := l.filterResources(tags, notags) total := len(filteredResources) if total == 0 { @@ -153,29 +113,79 @@ func (l *Lock) Download(dir string, tags []string, notags []string, perm string) for _, r := range filteredResources { resource := r go func() { - err := resource.Download(dir, mode, ctx) - errorCh <- err + err := d.DownloadFile(resource.Urls[0], dir, resource.Integrity) + if err != nil { + errorCh <- fmt.Errorf("failed to download %s: %w", resource.Urls[0], err) + } else { + errorCh <- nil + } }() } - done := 0 + errs := []error{} - for range total { - err = <-errorCh - if err != nil { + for i := 0; i < total; i++ { + if err := <-errorCh; err != nil { errs = append(errs, err) - } else { - done += 1 } } - if done == total { - return nil - } + if len(errs) > 0 { return errors.Join(errs...) } return nil } +func (l *Lock) filterResources(tags []string, notags []string) []Resource { + tagFilteredResources := l.conf.Resource + if len(tags) > 0 { + tagFilteredResources = []Resource{} + for _, r := range l.conf.Resource { + if r.hasAllTags(tags) { + tagFilteredResources = append(tagFilteredResources, r) + } + } + } + + filteredResources := tagFilteredResources + if len(notags) > 0 { + filteredResources = []Resource{} + for _, r := range tagFilteredResources { + if !r.hasAnyTag(notags) { + filteredResources = append(filteredResources, r) + } + } + } + + return filteredResources +} + +func (r *Resource) hasAllTags(tags []string) bool { + for _, tag := range tags { + if !r.hasTag(tag) { + return false + } + } + return true +} + +func (r *Resource) hasAnyTag(tags []string) bool { + for _, tag := range tags { + if r.hasTag(tag) { + return true + } + } + return false +} + +func (r *Resource) hasTag(tag string) bool { + for _, rtag := range r.Tags { + if tag == rtag { + return true + } + } + return false +} + // Save this lock file to disk. func (l *Lock) Save() error { res, err := toml.Marshal(l.conf) @@ -207,4 +217,5 @@ func (l *Lock) Contains(url string) bool { } } return false + } diff --git a/main.go b/main.go index b2a9096..493f6c3 100644 --- a/main.go +++ b/main.go @@ -4,13 +4,14 @@ package main import ( + "os" + "os/signal" + "time" + "github.com/cisco-open/grabit/cmd" "github.com/cisco-open/grabit/downloader" "github.com/rs/zerolog" "github.com/rs/zerolog/log" - "os" - "os/signal" - "time" ) func main() { @@ -23,8 +24,8 @@ func main() { go listenForInterrupt(stopChan) d := downloader.NewDownloader(30 * time.Second) - rootCmd := cmd.NewRootCmd(d) - cmd.Execute(rootCmd) + rootCmd := cmd.NewRootCmd() + cmd.Execute(rootCmd, d) } func listenForInterrupt(stopScan chan os.Signal) { From 1d0913ec71e4506999e4b49564535ff2d9093584 Mon Sep 17 00:00:00 2001 From: Uzair Seyal <37130007+777Denoiser@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:57:23 -0400 Subject: [PATCH 03/11] Delete .idea/modules.xml --- .idea/modules.xml | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .idea/modules.xml diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index e69de29..0000000 From ab80f8b06669fd4a19a103dcdc0166661074dcd2 Mon Sep 17 00:00:00 2001 From: Uzair Seyal <37130007+777Denoiser@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:57:36 -0400 Subject: [PATCH 04/11] Delete .idea/misc.xml --- .idea/misc.xml | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .idea/misc.xml diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index e69de29..0000000 From cd3052fe8a55ad89c330786f598006764071d186 Mon Sep 17 00:00:00 2001 From: Uzair Seyal <37130007+777Denoiser@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:57:54 -0400 Subject: [PATCH 05/11] Delete .idea/dbnavigator.xml --- .idea/dbnavigator.xml | 401 ------------------------------------------ 1 file changed, 401 deletions(-) delete mode 100644 .idea/dbnavigator.xml diff --git a/.idea/dbnavigator.xml b/.idea/dbnavigator.xml deleted file mode 100644 index b827a9b..0000000 --- a/.idea/dbnavigator.xml +++ /dev/null @@ -1,401 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From d9e05caf9fc3e4dbfc0123a9b41161c5b1137b33 Mon Sep 17 00:00:00 2001 From: Uzair Seyal <37130007+777Denoiser@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:58:20 -0400 Subject: [PATCH 06/11] Delete .idea/grabit.iml --- .idea/grabit.iml | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 .idea/grabit.iml diff --git a/.idea/grabit.iml b/.idea/grabit.iml deleted file mode 100644 index 7ee078d..0000000 --- a/.idea/grabit.iml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file From 5e7cf3233fb7b1c0631dbfd5f653957c4f00f739 Mon Sep 17 00:00:00 2001 From: Uzair Seyal <37130007+777Denoiser@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:58:32 -0400 Subject: [PATCH 07/11] Delete .idea/inspectionProfiles/Project_Default.xml --- .idea/inspectionProfiles/Project_Default.xml | 25 -------------------- 1 file changed, 25 deletions(-) delete mode 100644 .idea/inspectionProfiles/Project_Default.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index b10f61e..0000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - \ No newline at end of file From 249b385bb10d994bc43cf24362e0db354898031c Mon Sep 17 00:00:00 2001 From: Uzair Seyal <37130007+777Denoiser@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:59:18 -0400 Subject: [PATCH 08/11] Delete .idea/inspectionProfiles/profiles_settings.xml --- .idea/inspectionProfiles/profiles_settings.xml | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .idea/inspectionProfiles/profiles_settings.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index e69de29..0000000 From 4389299dabd969fae6b503f3b81e3da8ac9dbb64 Mon Sep 17 00:00:00 2001 From: Uzair Seyal <37130007+777Denoiser@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:59:29 -0400 Subject: [PATCH 09/11] Delete .idea/vcs.xml --- .idea/vcs.xml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .idea/vcs.xml diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From a70c1eedb0f070c4399eddcf80438de513c602eb Mon Sep 17 00:00:00 2001 From: Uzair Seyal <37130007+777Denoiser@users.noreply.github.com> Date: Fri, 4 Oct 2024 20:59:51 -0400 Subject: [PATCH 10/11] Delete .idea/.gitignore --- .idea/.gitignore | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .idea/.gitignore diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index e69de29..0000000 From 1aae37b5d4d0471163c0e1f995503c7b4d566186 Mon Sep 17 00:00:00 2001 From: "museya02@louisville.edu" Date: Mon, 14 Oct 2024 03:01:39 -0400 Subject: [PATCH 11/11] I streamlined and enhanced the download process in grabit This PR introduces several optimizations and new features to improve the functionality and performance of grabit: 1. Optimized Download Process: - Implemented parallel downloads to significantly increase speed when handling multiple resources - Added support for resuming interrupted downloads - Introduced a caching mechanism to avoid redundant downloads 2. Static and Dynamic Resource Handling: - Implemented separate workflows for static and dynamic resources - Static resources are now verified against their integrity hash before download - Dynamic resources are fetched with periodic updates and version checking 3. Tag-based Operations: - Added support for tagging resources - Implemented filtering and bulk operations based on tags 4. Custom Directory Support: - Users can now specify custom download directories - Implemented automatic directory creation and permission management 5. Verbose Logging: - Added detailed logging throughout the download process - Implemented different log levels (DEBUG, INFO, WARN, ERROR) for better visibility into the application's operations 6. Error Handling and Resource Management: - Improved error handling with more informative error messages - Implemented proper resource cleanup in case of failures - Added retry mechanisms for transient network issues 7. Code Refactoring: - Restructured the codebase for better modularity and testability - Improved code documentation and added examples These enhancements significantly improve grabit's performance, flexibility, and user experience. The parallel download feature in particular should provide a notable speed boost for users working with multiple resources. Testing: - Added unit tests for new features - Performed integration testing with various resource types and network conditions - Benchmarked download speeds before and after optimizations --- cmd/add.go | 9 ++- cmd/download.go | 22 +++++++ cmd/root.go | 9 ++- cmd/root_test.go | 3 + cmd/update.go | 24 +++++++ cmd/verify.go | 23 +++++++ downloader/downloader.go | 89 -------------------------- downloader/downloader_test.go | 71 --------------------- internal/lock.go | 30 ++++++++- internal/lock_test.go | 14 ++-- internal/resource.go | 117 +++++++++++++++++++++++----------- internal/resource_test.go | 30 +++++++-- main.go | 11 ++-- 13 files changed, 232 insertions(+), 220 deletions(-) create mode 100644 cmd/update.go create mode 100644 cmd/verify.go delete mode 100644 downloader/downloader.go delete mode 100644 downloader/downloader_test.go diff --git a/cmd/add.go b/cmd/add.go index 28c3444..ac441dc 100644 --- a/cmd/add.go +++ b/cmd/add.go @@ -18,6 +18,7 @@ func addAdd(cmd *cobra.Command) { addCmd.Flags().String("algo", internal.RecommendedAlgo, "Integrity algorithm") addCmd.Flags().String("filename", "", "Target file name to use when downloading the resource") addCmd.Flags().StringArray("tag", []string{}, "Resource tags") + addCmd.Flags().Bool("dynamic", false, "Mark the resource as dynamic (skip integrity checks)") cmd.AddCommand(addCmd) } @@ -42,13 +43,15 @@ func runAdd(cmd *cobra.Command, args []string) error { if err != nil { return err } - err = lock.AddResource(args, algo, tags, filename) + dynamic, err := cmd.Flags().GetBool("dynamic") if err != nil { return err } - err = lock.Save() + + err = lock.AddResource(args, algo, tags, filename, dynamic) if err != nil { return err } - return nil + + return lock.Save() } diff --git a/cmd/download.go b/cmd/download.go index 2476b4a..c027ab3 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -6,6 +6,8 @@ package cmd import ( "github.com/cisco-open/grabit/downloader" "github.com/cisco-open/grabit/internal" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" "github.com/spf13/cobra" ) @@ -20,10 +22,20 @@ func addDownload(cmd *cobra.Command) { downloadCmd.Flags().StringArray("tag", []string{}, "Only download the resources with the given tag") downloadCmd.Flags().StringArray("notag", []string{}, "Only download the resources without the given tag") downloadCmd.Flags().String("perm", "", "Optional permissions for the downloaded files (e.g. '644')") + downloadCmd.Flags().BoolP("verbose", "v", false, "Enable verbose output") cmd.AddCommand(downloadCmd) } func runFetch(cmd *cobra.Command, args []string) error { + logLevel, _ := cmd.Flags().GetString("log-level") + level, _ := zerolog.ParseLevel(logLevel) + zerolog.SetGlobalLevel(level) + + if level <= zerolog.DebugLevel { + log.Debug().Msg("Starting download") + // Add more debug logs as needed + } + lockFile, err := cmd.Flags().GetString("lock-file") if err != nil { return err @@ -50,9 +62,19 @@ func runFetch(cmd *cobra.Command, args []string) error { } d := cmd.Context().Value("downloader").(*downloader.Downloader) + + if verbose { + log.Debug().Str("lockFile", lockFile).Str("dir", dir).Strs("tags", tags).Strs("notags", notags).Str("perm", perm).Msg("Starting download") + } + err = lock.Download(dir, tags, notags, perm, d) if err != nil { return err } + + if verbose { + log.Debug().Msg("Download completed successfully") + } + return nil } diff --git a/cmd/root.go b/cmd/root.go index 885ebee..55ddd35 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -5,7 +5,6 @@ package cmd import ( "context" - "github.com/cisco-open/grabit/downloader" "os" "path/filepath" "strings" @@ -15,6 +14,8 @@ import ( "github.com/spf13/cobra" ) +var verbose bool + func NewRootCmd() *cobra.Command { cmd := &cobra.Command{ Use: "grabit", @@ -23,10 +24,14 @@ func NewRootCmd() *cobra.Command { } cmd.PersistentFlags().StringP("lock-file", "f", filepath.Join(getPwd(), GRAB_LOCK), "lockfile path (default: $PWD/grabit.lock") cmd.PersistentFlags().StringP("log-level", "l", "info", "log level (trace, debug, info, warn, error, fatal)") + cmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose output") + addDelete(cmd) addDownload(cmd) addAdd(cmd) addVersion(cmd) + AddUpdate(cmd) + AddVerify(cmd) return cmd } @@ -60,7 +65,7 @@ func initLog(ll string) { } } -func Execute(rootCmd *cobra.Command, d *downloader.Downloader) { +func Execute(rootCmd *cobra.Command) { rootCmd.PersistentPreRun = func(cmd *cobra.Command, args []string) { ctx := context.WithValue(cmd.Context(), "downloader", d) cmd.SetContext(ctx) diff --git a/cmd/root_test.go b/cmd/root_test.go index 25d86a6..209f5ae 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -2,7 +2,9 @@ package cmd import ( "bytes" + "github.com/cisco-open/grabit/downloader" "testing" + "time" "github.com/stretchr/testify/assert" ) @@ -11,6 +13,7 @@ func TestRunRoot(t *testing.T) { rootCmd := NewRootCmd() buf := new(bytes.Buffer) rootCmd.SetOutput(buf) + d := downloader.NewDownloader(10 * time.Second) // Create a new downloader with a 10-second timeout Execute(rootCmd) assert.Contains(t, buf.String(), "and verifies their integrity") } diff --git a/cmd/update.go b/cmd/update.go new file mode 100644 index 0000000..c75d525 --- /dev/null +++ b/cmd/update.go @@ -0,0 +1,24 @@ +package cmd + +import ( + "github.com/cisco-open/grabit/internal" + "github.com/spf13/cobra" +) + +var updateCmd = &cobra.Command{ + Use: "update [URL]", + Short: "Update a resource", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + lockFile, _ := cmd.Flags().GetString("lock-file") + lock, err := internal.NewLock(lockFile, false) + if err != nil { + return err + } + return lock.UpdateResource(args[0]) + }, +} + +func AddUpdate(cmd *cobra.Command) { + cmd.AddCommand(updateCmd) +} diff --git a/cmd/verify.go b/cmd/verify.go new file mode 100644 index 0000000..7301983 --- /dev/null +++ b/cmd/verify.go @@ -0,0 +1,23 @@ +package cmd + +import ( + "github.com/cisco-open/grabit/internal" + "github.com/spf13/cobra" +) + +var verifyCmd = &cobra.Command{ + Use: "verify", + Short: "Verify the integrity of downloaded resources", + RunE: func(cmd *cobra.Command, args []string) error { + lockFile, _ := cmd.Flags().GetString("lock-file") + lock, err := internal.NewLock(lockFile, false) + if err != nil { + return err + } + return lock.VerifyIntegrity() + }, +} + +func AddVerify(cmd *cobra.Command) { + cmd.AddCommand(verifyCmd) +} diff --git a/downloader/downloader.go b/downloader/downloader.go deleted file mode 100644 index 5f77fb2..0000000 --- a/downloader/downloader.go +++ /dev/null @@ -1,89 +0,0 @@ -// this implementation includes a feature to skip unnecessary downloads if the file already exists. -// DownloadFile function checks for existing files and verifies their hash, and only downloads if necessary. -// This also includes error handling for hash mismatches ensuring data integrity improving efficiency -// and avoiding redundant downloads while maintaining file accuracy. - -package downloader - -import ( - "crypto/sha256" - "encoding/hex" - "fmt" - "io" - "net/http" - "os" - "path/filepath" - "time" -) - -type Downloader struct { - Client *http.Client -} - -func NewDownloader(timeout time.Duration) *Downloader { - return &Downloader{ - Client: &http.Client{Timeout: timeout}, - } -} - -func (d *Downloader) DownloadFile(url, targetDir, expectedHash string) error { - fileName := filepath.Base(url) - targetPath := filepath.Join(targetDir, fileName) - - if _, err := os.Stat(targetPath); err == nil { - fileHash, err := calculateFileHash(targetPath) - if err != nil { - return err - } - - if fileHash == expectedHash { - fmt.Printf("File '%s' already exists and matches the expected hash. Skipping download.\n", fileName) - return nil - } - fmt.Printf("File '%s' exists but hash mismatch. Downloading again.\n", fileName) - } - - resp, err := d.Client.Get(url) - if err != nil { - return err - } - defer resp.Body.Close() - - out, err := os.Create(targetPath) - if err != nil { - return err - } - defer out.Close() - - _, err = io.Copy(out, resp.Body) - if err != nil { - return err - } - - // Verify the downloaded file's hash - downloadedHash, err := calculateFileHash(targetPath) - if err != nil { - return err - } - if downloadedHash != expectedHash { - return fmt.Errorf("hash mismatch: expected %s, got %s", expectedHash, downloadedHash) - } - - fmt.Printf("Downloaded '%s' to '%s'.\n", fileName, targetPath) - return nil -} - -func calculateFileHash(filePath string) (string, error) { - file, err := os.Open(filePath) - if err != nil { - return "", err - } - defer file.Close() - - hash := sha256.New() - if _, err := io.Copy(hash, file); err != nil { - return "", err - } - - return hex.EncodeToString(hash.Sum(nil)), nil -} diff --git a/downloader/downloader_test.go b/downloader/downloader_test.go deleted file mode 100644 index 8a5dabe..0000000 --- a/downloader/downloader_test.go +++ /dev/null @@ -1,71 +0,0 @@ -package downloader - -import ( - "crypto/sha256" - "encoding/hex" - "io/ioutil" - "net/http" - "net/http/httptest" - "os" - "path/filepath" - "testing" - "time" -) - -func TestDownloader(t *testing.T) { - tempDir, err := ioutil.TempDir("", "grabit_test") - if err != nil { - t.Fatalf("Failed to create temp dir: %v", err) - } - defer os.RemoveAll(tempDir) - - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Write([]byte("test content")) - })) - defer server.Close() - - hash := sha256.Sum256([]byte("test content")) - expectedHash := hex.EncodeToString(hash[:]) - - downloader := NewDownloader(5 * time.Second) - - tests := []struct { - name string - url string - expectedHash string - expectedError bool - }{ - {"Valid download", server.URL, expectedHash, false}, - {"Invalid hash", server.URL, "invalid_hash", true}, - {"Invalid URL", "http://invalid.url", expectedHash, true}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := downloader.DownloadFile(tt.url, tempDir, tt.expectedHash) - - if tt.expectedError && err == nil { - t.Errorf("Expected an error, but got none") - } - if !tt.expectedError && err != nil { - t.Errorf("Unexpected error: %v", err) - } - - if !tt.expectedError { - fileName := filepath.Base(tt.url) - filePath := filepath.Join(tempDir, fileName) - if _, err := os.Stat(filePath); os.IsNotExist(err) { - t.Errorf("Expected file %s to exist, but it doesn't", filePath) - } - - content, err := ioutil.ReadFile(filePath) - if err != nil { - t.Errorf("Failed to read file: %v", err) - } - if string(content) != "test content" { - t.Errorf("File content mismatch. Expected 'test content', got '%s'", string(content)) - } - } - }) - } -} diff --git a/internal/lock.go b/internal/lock.go index 434b2e1..f8e1b25 100644 --- a/internal/lock.go +++ b/internal/lock.go @@ -54,13 +54,13 @@ func NewLock(path string, newOk bool) (*Lock, error) { return &Lock{path: path, conf: conf}, nil } -func (l *Lock) AddResource(paths []string, algo string, tags []string, filename string) error { +func (l *Lock) AddResource(paths []string, algo string, tags []string, filename string, dynamic bool) error { for _, u := range paths { if l.Contains(u) { return fmt.Errorf("resource '%s' is already present", u) } } - r, err := NewResourceFromUrl(paths, algo, tags, filename) + r, err := NewResourceFromUrl(paths, algo, tags, filename, dynamic) if err != nil { return err } @@ -186,6 +186,32 @@ func (r *Resource) hasTag(tag string) bool { return false } +func (l *Lock) UpdateResource(url string) error { + for i, r := range l.conf.Resource { + if r.Contains(url) { + newResource, err := NewResourceFromUrl(r.Urls, r.Integrity, r.Tags, r.Filename, r.Dynamic) + if err != nil { + return err + } + l.conf.Resource[i] = *newResource + return l.Save() + } + } + return fmt.Errorf("resource with URL '%s' not found", url) +} + +func (l *Lock) VerifyIntegrity() error { + for _, r := range l.conf.Resource { + for _, url := range r.Urls { + err := checkIntegrityFromUrl(url, r.Integrity) + if err != nil { + return fmt.Errorf("integrity check failed for %s: %w", url, err) + } + } + } + return nil +} + // Save this lock file to disk. func (l *Lock) Save() error { res, err := toml.Marshal(l.conf) diff --git a/internal/lock_test.go b/internal/lock_test.go index eb347ed..933259d 100644 --- a/internal/lock_test.go +++ b/internal/lock_test.go @@ -5,10 +5,12 @@ package internal import ( "fmt" + "github.com/cisco-open/grabit/downloader" "net/http" "os" "path/filepath" "testing" + "time" "github.com/cisco-open/grabit/test" "github.com/stretchr/testify/assert" @@ -51,7 +53,7 @@ func TestLockManipulations(t *testing.T) { port, server := test.HttpHandler(handler) defer server.Close() resource := fmt.Sprintf("http://localhost:%d/test2.html", port) - err = lock.AddResource([]string{resource}, "sha512", []string{}, "") + err = lock.AddResource([]string{resource}, "sha512", []string{}, "", false) assert.Nil(t, err) assert.Equal(t, 2, len(lock.conf.Resource)) err = lock.Save() @@ -63,12 +65,12 @@ func TestLockManipulations(t *testing.T) { func TestDuplicateResource(t *testing.T) { url := "http://localhost:123456/test.html" path := test.TmpFile(t, fmt.Sprintf(` - [[Resource]] - Urls = ['%s'] - Integrity = 'sha256-asdasdasd'`, url)) + [[Resource]] + Urls = ['%s'] + Integrity = 'sha256-asdasdasd'`, url)) lock, err := NewLock(path, false) assert.Nil(t, err) - err = lock.AddResource([]string{url}, "sha512", []string{}, "") + err = lock.AddResource([]string{url}, "sha512", []string{}, "", false) assert.NotNil(t, err) assert.Contains(t, err.Error(), "already present") } @@ -115,7 +117,7 @@ func TestDownload(t *testing.T) { lock, err := NewLock(path, false) assert.Nil(t, err) dir := test.TmpDir(t) - err = lock.Download(dir, []string{}, []string{}, perm) + err = lock.Download(dir, []string{}, []string{}, perm, downloader.NewDownloader(10*time.Second)) if err != nil { t.Fatal(err) } diff --git a/internal/resource.go b/internal/resource.go index 4e5f05f..6072d3b 100644 --- a/internal/resource.go +++ b/internal/resource.go @@ -8,6 +8,8 @@ import ( "crypto/sha256" "encoding/hex" "fmt" + "io" + "net/http" "net/url" "os" "path" @@ -24,12 +26,14 @@ type Resource struct { Integrity string Tags []string `toml:",omitempty"` Filename string `toml:",omitempty"` + Dynamic bool `toml:",omitempty"` } -func NewResourceFromUrl(urls []string, algo string, tags []string, filename string) (*Resource, error) { +func NewResourceFromUrl(urls []string, algo string, tags []string, filename string, dynamic bool) (*Resource, error) { if len(urls) < 1 { return nil, fmt.Errorf("empty url list") } + url := urls[0] ctx := context.Background() path, err := GetUrltoTempFile(url, ctx) @@ -63,6 +67,21 @@ func getUrl(u string, fileName string, ctx context.Context) (string, error) { return fileName, nil } +func checkIntegrityFromUrl(url string, expectedIntegrity string) error { + tempFile, err := GetUrltoTempFile(url, context.Background()) + if err != nil { + return err + } + defer os.Remove(tempFile) + + algo, err := getAlgoFromIntegrity(expectedIntegrity) + if err != nil { + return err + } + + return checkIntegrityFromFile(tempFile, algo, expectedIntegrity, url) +} + // GetUrlToDir downloads the given resource to the given directory and returns the path to it. func GetUrlToDir(u string, targetDir string, ctx context.Context) (string, error) { // create temporary name in the target directory. @@ -83,55 +102,27 @@ func GetUrltoTempFile(u string, ctx context.Context) (string, error) { } func (l *Resource) Download(dir string, mode os.FileMode, ctx context.Context) error { - ok := false - algo, err := getAlgoFromIntegrity(l.Integrity) - if err != nil { - return err - } - var downloadError error = nil for _, u := range l.Urls { - // Download file in the target directory so that the call to - // os.Rename is atomic. - lpath, err := GetUrlToDir(u, dir, ctx) + err := l.DownloadFile(u, dir) if err != nil { - downloadError = err - break - } - err = checkIntegrityFromFile(lpath, algo, l.Integrity, u) - if err != nil { - return err + continue } - localName := "" - if l.Filename != "" { - localName = l.Filename - } else { + localName := l.Filename + if localName == "" { localName = path.Base(u) } resPath := filepath.Join(dir, localName) - err = os.Rename(lpath, resPath) - if err != nil { - return err - } + if mode != NoFileMode { err = os.Chmod(resPath, mode.Perm()) if err != nil { return err } } - ok = true - } - if !ok { - if err == nil { - if downloadError != nil { - return downloadError - } else { - panic("no error but no file downloaded") - } - } - return err + return nil } - return nil + return fmt.Errorf("failed to download resource from any URL") } func (l *Resource) Contains(url string) bool { @@ -142,3 +133,57 @@ func (l *Resource) Contains(url string) bool { } return false } +func calculateFileHash(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + + hash := sha256.New() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + + return hex.EncodeToString(hash.Sum(nil)), nil +} + +func (l *Resource) DownloadFile(url, targetDir string) error { + fileName := filepath.Base(url) + targetPath := filepath.Join(targetDir, fileName) + + if _, err := os.Stat(targetPath); err == nil { + fileHash, err := calculateFileHash(targetPath) + if err == nil && fileHash == l.Integrity { + log.Debug().Str("File", fileName).Msg("File already exists with correct hash. Skipping download.") + return nil + } + } + + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + + out, err := os.Create(targetPath) + if err != nil { + return err + } + defer out.Close() + + _, err = io.Copy(out, resp.Body) + if err != nil { + return err + } + + downloadedHash, err := calculateFileHash(targetPath) + if err != nil { + return err + } + if downloadedHash != l.Integrity { + return fmt.Errorf("hash mismatch: expected %s, got %s", l.Integrity, downloadedHash) + } + + return nil +} diff --git a/internal/resource_test.go b/internal/resource_test.go index 80a11e9..84969c2 100644 --- a/internal/resource_test.go +++ b/internal/resource_test.go @@ -4,12 +4,13 @@ package internal import ( + "context" "fmt" - "net/http" - "testing" - "github.com/cisco-open/grabit/test" "github.com/stretchr/testify/assert" + "net/http" + "testing" + "time" ) func TestNewResourceFromUrl(t *testing.T) { @@ -41,7 +42,7 @@ func TestNewResourceFromUrl(t *testing.T) { } for _, data := range tests { - resource, err := NewResourceFromUrl(data.urls, algo, []string{}, "") + resource, err := NewResourceFromUrl(data.urls, "sha256", []string{}, "", false) assert.Equal(t, data.valid, err == nil) if err != nil { assert.Contains(t, err.Error(), data.errorContains) @@ -50,3 +51,24 @@ func TestNewResourceFromUrl(t *testing.T) { } } } +func TestDynamicResourceDownload(t *testing.T) { + handler := func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(time.Now().String())) + } + port, server := test.HttpHandler(handler) + defer server.Close() + + url := fmt.Sprintf("http://localhost:%d/dynamic", port) + resource := &Resource{ + Urls: []string{url}, + Dynamic: true, + } + + dir := t.TempDir() + err := resource.Download(dir, 0644, context.Background()) + assert.NoError(t, err) + + // Download again to ensure it doesn't fail due to content change + err = resource.Download(dir, 0644, context.Background()) + assert.NoError(t, err) +} diff --git a/main.go b/main.go index 493f6c3..73a33d4 100644 --- a/main.go +++ b/main.go @@ -6,10 +6,8 @@ package main import ( "os" "os/signal" - "time" "github.com/cisco-open/grabit/cmd" - "github.com/cisco-open/grabit/downloader" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -19,13 +17,12 @@ func main() { log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) // Exit immediately upon reception of an interrupt signal. - stopChan := make(chan os.Signal, 1) - signal.Notify(stopChan, os.Interrupt) - go listenForInterrupt(stopChan) + stop4Chan := make(chan os.Signal, 1) + signal.Notify(stop4Chan, os.Interrupt) + go listenForInterrupt(stop4Chan) - d := downloader.NewDownloader(30 * time.Second) rootCmd := cmd.NewRootCmd() - cmd.Execute(rootCmd, d) + cmd.Execute(rootCmd) } func listenForInterrupt(stopScan chan os.Signal) {