diff --git a/README.md b/README.md index 0d2ea94..a0fe8e7 100644 --- a/README.md +++ b/README.md @@ -90,8 +90,9 @@ Flags: --configName string Base name for config file (default "config") --configPath string Path to any config files -c, --copyrights Flag copyrights - --custom string Custom templates to use (default "..") + --custom string Custom templates to use (default "default") -d, --debug Enable debug logging + --dir string A directory in which to identify licenses -f, --file string A file in which to identify licenses -x, --hash Output file hash -h, --help help for license-scanner @@ -100,7 +101,7 @@ Flags: --list List the license templates to be used -n, --normalized Flag normalized -q, --quiet Set logging to quiet - --spdx string SPDX templates to use (default "3.18") + --spdx string SPDX templates to use (default "default") ``` ### Example CLI usage @@ -364,11 +365,13 @@ In help mode, all other flags are ignored. ### Scan mode -When running `license_scanner -f ` the input file is scanned for license matches. +When running `license_scanner --file ` the input file is scanned for license matches. +When running `license_scanner --dir ` the input directory is recursively scanned for license matches. -| Name | Shorthand | Type | Usage | -|--------|-----------|--------|-------------------------------------| -| -file | --f | string | A file in which to identify licenses | +| Name | Shorthand | Type | Usage | +|--------|-----------|--------|-------------------------------------------| +| --file | -f | string | A file in which to identify licenses | +| --dir | | string | A directory in which to identify licenses | The following **optional** runtime flags may be used to modify and enhance the behavior: diff --git a/cmd/license-scanner.md b/cmd/license-scanner.md index 8a8dc98..f485778 100644 --- a/cmd/license-scanner.md +++ b/cmd/license-scanner.md @@ -35,6 +35,7 @@ license-scanner [flags] -c, --copyrights Flag copyrights --custom string Custom templates to use (default "default") -d, --debug Enable debug logging + --dir string A directory in which to identify licenses -f, --file string A file in which to identify licenses -x, --hash Output file hash -h, --help help for license-scanner @@ -46,4 +47,4 @@ license-scanner [flags] --spdx string SPDX templates to use (default "default") ``` -###### Auto generated by spf13/cobra on 30-Sep-2022 +###### Auto generated by spf13/cobra on 6-Oct-2022 diff --git a/cmd/root.go b/cmd/root.go index 6346a8f..9ef1de1 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -81,6 +81,8 @@ Please give us feedback at: https://github.com/IBM/license-scanner/issues f := cfg.GetString(configurer.FileFlag) if f != "" { return findLicensesInFile(cfg, f) + } else if cfg.GetString(configurer.DirFlag) != "" { + return findLicensesInDirectory(cfg) } else if cfg.GetBool(configurer.ListFlag) { return listLicenses(cfg) } else if cfg.GetString(configurer.AddAllFlag) != "" { @@ -153,6 +155,69 @@ func listLicenses(cfg *viper.Viper) error { return nil } +func findLicensesInDirectory(cfg *viper.Viper) error { + d := cfg.GetString(configurer.DirFlag) + + licenseLibrary, err := licenses.NewLicenseLibrary(cfg) + if err != nil { + return err + } + if err := licenseLibrary.AddAll(); err != nil { + return err + } + + options := identifier.Options{ + ForceResult: true, + Enhancements: identifier.Enhancements{ + AddNotes: "", + AddTextBlocks: true, + FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), + FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), + FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), + }, + } + + results, err := identifier.IdentifyLicensesInDirectory(d, options, licenseLibrary) + if err != nil { + return err + } + + for _, result := range results { + if len(result.Matches) > 0 { + + // Print the matches by license ID in alphabetical order + fmt.Printf("\nFOUND LICENSE MATCHES: %v\n", result.File) + var found []string + for id := range result.Matches { + found = append(found, id) + } + sort.Strings(found) + for _, id := range found { + fmt.Printf("\tLicense ID:\t%v", id) + fmt.Println() + var prev identifier.Match + for _, m := range result.Matches[id] { + // Print if not same as prev + if m != prev { + fmt.Printf("\t\tbegins: %5v\tends: %5v\n", m.Begins, m.Ends) + prev = m + } + } + } + fmt.Println() + + if ProjectLogger.GetLevel() >= log.INFO { + for _, block := range result.Blocks { + ProjectLogger.Infof("%v :: %v", block.Matches, block.Text) + } + } + } else { + fmt.Printf("\nNo licenses were found: %v\n", result.File) + } + } + return nil +} + func findLicensesInFile(cfg *viper.Viper, f string) error { ProjectLogger.Enter() defer ProjectLogger.Exit() diff --git a/configurer/configurer.go b/configurer/configurer.go index 7bd578d..c200102 100644 --- a/configurer/configurer.go +++ b/configurer/configurer.go @@ -26,6 +26,7 @@ const ( DebugFlag = "debug" QuietFlag = "quiet" LicenseFlag = "license" + DirFlag = "dir" FileFlag = "file" ConfigPathFlag = "configPath" ConfigNameFlag = "configName" @@ -122,6 +123,7 @@ func NewDefaultFlags() *pflag.FlagSet { func AddDefaultFlags(flagSet *pflag.FlagSet) { flagSet.BoolP(DebugFlag, "d", false, "Enable debug logging") flagSet.BoolP(QuietFlag, "q", false, "Set logging to quiet") + flagSet.String(DirFlag, "", "A directory in which to identify licenses") flagSet.StringP(FileFlag, "f", "", "A file in which to identify licenses") flagSet.BoolP(AcceptableFlag, "g", false, "Flag acceptable") flagSet.BoolP(KeywordsFlag, "k", false, "Flag keywords") diff --git a/identifier/identifier.go b/identifier/identifier.go index bed7b4e..4750fa7 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -4,8 +4,10 @@ package identifier import ( "fmt" + "io/fs" "io/ioutil" "os" + "path/filepath" "regexp" "sort" "strings" @@ -117,43 +119,68 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li input := string(b) result, err := IdentifyLicensesInString(input, options, licenseLibrary) - if err != nil { - return result, err - } - - if options.ForceResult || len(result.Matches) > 0 || len(result.KeywordMatches) > 0 { - result.File = filePath - return result, nil - } else { - return result, nil // result is not interesting - } + result.File = filePath + return result, err } -func identifyLicensesInDirectory(dirPath string, options Options, licenseLibrary *licenses.LicenseLibrary) ([]IdentifierResults, error) { // nolint:unused - - var results []IdentifierResults +func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary *licenses.LicenseLibrary) (ret []IdentifierResults, err error) { + var lfs []string - files, err := ioutil.ReadDir(dirPath) - if err != nil { + if err := filepath.WalkDir(dirPath, func(path string, d fs.DirEntry, err error) error { + if err != nil { + fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err) + return err + } + if !d.IsDir() { + info, _ := d.Info() + if info.Size() > 0 { + lfs = append(lfs, path) + } + } + return nil + }); err != nil { + fmt.Printf("error walking the path %v: %v\n", dirPath, err) return nil, err } - for _, file := range files { - if file.IsDir() { - result, err := identifyLicensesInDirectory(file.Name(), options, licenseLibrary) - if err != nil { - return nil, err - } - results = append(results, result...) - } else { - result, err := IdentifyLicensesInFile(file.Name(), options, licenseLibrary) - if err != nil { - return nil, err - } - results = append(results, result) + // errGroup to do the work in parallel until error + workers := errgroup.Group{} + workers.SetLimit(10) + ch := make(chan IdentifierResults, 10) + + // WaitGroup to know when we have all the results + waitForResults := sync.WaitGroup{} + waitForResults.Add(1) + + // Start receiving the results until channel closes + go func() { + for ir := range ch { + ret = append(ret, ir) } + waitForResults.Done() + }() + + // Loop using a worker to send results to a channel + for _, lf := range lfs { + lf := lf + workers.Go(func() error { + ir, err := IdentifyLicensesInFile(lf, options, licenseLibrary) + if err == nil { + ch <- ir + } + return err + }) } - return results, nil + + // Close the channel when done or error + go func() { + err = workers.Wait() + close(ch) + }() + + // Make sure we got all the results + waitForResults.Wait() + return ret, err } func findAllLicensesInNormalizedData(licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (IdentifierResults, error) { diff --git a/identifier/identifier_spdx_test.go b/identifier/identifier_spdx_test.go index 19df64c..57f995c 100644 --- a/identifier/identifier_spdx_test.go +++ b/identifier/identifier_spdx_test.go @@ -7,14 +7,11 @@ package identifier import ( "fmt" "io/fs" - "os" "path" "path/filepath" "strings" "testing" - "github.com/IBM/license-scanner/configurer" - "github.com/IBM/license-scanner/licenses" ) @@ -24,37 +21,59 @@ const ( ) var testDataDir = path.Join(resources, "spdx", spdx, "testdata") +var options = Options{ + ForceResult: false, + Enhancements: Enhancements{ + AddNotes: "", + AddTextBlocks: true, + FlagAcceptable: false, + FlagCopyrights: true, + FlagKeywords: false, + }, +} -func Test_identifyLicensesInSPDXTestData(t *testing.T) { - if _, err := os.Stat(testDataDir); os.IsNotExist(err) { - // Skip test if this data isn't in place (in repo) yet. Else continue for identify errors. - t.Skipf("Skipping test with optional resources: %v", resources) +func Test_identifyLicensesInSPDXTestDataDirectory(t *testing.T) { + t.Parallel() + licenseLibrary, err := licenses.NewLicenseLibrary(nil) + if err != nil { + t.Fatalf("NewLicenseLibrary() error = %v", err) + } + if err := licenseLibrary.AddAllSPDX(); err != nil { + t.Fatalf("licenseLibrary.AddAllSPDX() error = %v", err) } - config, err := configurer.InitConfig(nil) + results, err := IdentifyLicensesInDirectory(testDataDir, options, licenseLibrary) if err != nil { - t.Fatal(err) + t.Errorf("IdentifyLicensesInDirectory(%v) err = %v", testDataDir, err) + } + + const expected = 499 + if actual := len(results); actual != expected { + t.Errorf("IdentifyLicensesInDirectory(%v) len(results) expected %v actual: %v", testDataDir, expected, actual) + } + + for _, result := range results { + result := result + t.Run(result.File, func(t *testing.T) { + t.Parallel() + wantLicenseID := strings.TrimSuffix(path.Base(result.File), ".txt") + wantLicenseID = strings.TrimPrefix(wantLicenseID, "deprecated_") + if _, ok := result.Matches[wantLicenseID]; !ok { + t.Error("Did not get: ", wantLicenseID) + } + }) } - config.Set("resources", resources) // override - config.Set("spdx", spdx) // override +} + +func Test_identifyLicensesInSPDXTestDataFiles(t *testing.T) { + t.Parallel() - licenseLibrary, err := licenses.NewLicenseLibrary(config) + licenseLibrary, err := licenses.NewLicenseLibrary(nil) if err != nil { t.Fatalf("NewLicenseLibrary() error = %v", err) } if err := licenseLibrary.AddAllSPDX(); err != nil { - t.Fatalf("licenseLibrary.AddAll() error = %v", err) - } - - options := Options{ - ForceResult: false, - Enhancements: Enhancements{ - AddNotes: "", - AddTextBlocks: true, - FlagAcceptable: false, - FlagCopyrights: true, - FlagKeywords: false, - }, + t.Fatalf("licenseLibrary.AddAllSPDX() error = %v", err) } type tf struct {