Skip to content

Commit

Permalink
Add --dir to for CLI to scan licenses in a dir instead of a single fi…
Browse files Browse the repository at this point in the history
…le (IBM#18)

* Add --dir to for CLI to scan licenses in a dir instead of a single file

Dir is walked recursively.  Skipping empty files.
IdentifyLicensesInDir just calls IdentifyLicensesInFile after doing a WalkDir.
The files are processed in parallel.
This doesn't help the API (yet), but it's very useful for testing.

Closes IBM#17

Signed-off-by: Mark Sturdevant <[email protected]>

* Add file to CLI results when using --dir to make the output more usable

The initial implementation of --dir loops and prints for each file like the singular --file.
But since --dir can have a bunch of files, need to print the file name on each "FOUND..." and "No licenses..." line,
so that we can understand the results better (visually and/or by grepping/sorting/counting).

Signed-off-by: Mark Sturdevant <[email protected]>

Signed-off-by: Mark Sturdevant <[email protected]>
  • Loading branch information
markstur authored Oct 13, 2022
1 parent e63c947 commit 6469124
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 60 deletions.
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,9 @@ Flags:
--configName string Base name for config file (default "config")
--configPath string Path to any config files
-c, --copyrights Flag copyrights
--custom string Custom templates to use (default "..")
--custom string Custom templates to use (default "default")
-d, --debug Enable debug logging
--dir string A directory in which to identify licenses
-f, --file string A file in which to identify licenses
-x, --hash Output file hash
-h, --help help for license-scanner
Expand All @@ -100,7 +101,7 @@ Flags:
--list List the license templates to be used
-n, --normalized Flag normalized
-q, --quiet Set logging to quiet
--spdx string SPDX templates to use (default "3.18")
--spdx string SPDX templates to use (default "default")
```

### Example CLI usage
Expand Down Expand Up @@ -364,11 +365,13 @@ In help mode, all other flags are ignored.

### Scan mode

When running `license_scanner -f <input_file>` the input file is scanned for license matches.
When running `license_scanner --file <input_file>` the input file is scanned for license matches.
When running `license_scanner --dir <input_dir>` the input directory is recursively scanned for license matches.

| Name | Shorthand | Type | Usage |
|--------|-----------|--------|-------------------------------------|
| -file | --f | string | A file in which to identify licenses |
| Name | Shorthand | Type | Usage |
|--------|-----------|--------|-------------------------------------------|
| --file | -f | string | A file in which to identify licenses |
| --dir | | string | A directory in which to identify licenses |

The following **optional** runtime flags may be used to modify and enhance the behavior:

Expand Down
3 changes: 2 additions & 1 deletion cmd/license-scanner.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ license-scanner [flags]
-c, --copyrights Flag copyrights
--custom string Custom templates to use (default "default")
-d, --debug Enable debug logging
--dir string A directory in which to identify licenses
-f, --file string A file in which to identify licenses
-x, --hash Output file hash
-h, --help help for license-scanner
Expand All @@ -46,4 +47,4 @@ license-scanner [flags]
--spdx string SPDX templates to use (default "default")
```

###### Auto generated by spf13/cobra on 30-Sep-2022
###### Auto generated by spf13/cobra on 6-Oct-2022
65 changes: 65 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ Please give us feedback at: https://github.com/IBM/license-scanner/issues
f := cfg.GetString(configurer.FileFlag)
if f != "" {
return findLicensesInFile(cfg, f)
} else if cfg.GetString(configurer.DirFlag) != "" {
return findLicensesInDirectory(cfg)
} else if cfg.GetBool(configurer.ListFlag) {
return listLicenses(cfg)
} else if cfg.GetString(configurer.AddAllFlag) != "" {
Expand Down Expand Up @@ -153,6 +155,69 @@ func listLicenses(cfg *viper.Viper) error {
return nil
}

func findLicensesInDirectory(cfg *viper.Viper) error {
d := cfg.GetString(configurer.DirFlag)

licenseLibrary, err := licenses.NewLicenseLibrary(cfg)
if err != nil {
return err
}
if err := licenseLibrary.AddAll(); err != nil {
return err
}

options := identifier.Options{
ForceResult: true,
Enhancements: identifier.Enhancements{
AddNotes: "",
AddTextBlocks: true,
FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag),
FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag),
FlagKeywords: cfg.GetBool(configurer.KeywordsFlag),
},
}

results, err := identifier.IdentifyLicensesInDirectory(d, options, licenseLibrary)
if err != nil {
return err
}

for _, result := range results {
if len(result.Matches) > 0 {

// Print the matches by license ID in alphabetical order
fmt.Printf("\nFOUND LICENSE MATCHES: %v\n", result.File)
var found []string
for id := range result.Matches {
found = append(found, id)
}
sort.Strings(found)
for _, id := range found {
fmt.Printf("\tLicense ID:\t%v", id)
fmt.Println()
var prev identifier.Match
for _, m := range result.Matches[id] {
// Print if not same as prev
if m != prev {
fmt.Printf("\t\tbegins: %5v\tends: %5v\n", m.Begins, m.Ends)
prev = m
}
}
}
fmt.Println()

if ProjectLogger.GetLevel() >= log.INFO {
for _, block := range result.Blocks {
ProjectLogger.Infof("%v :: %v", block.Matches, block.Text)
}
}
} else {
fmt.Printf("\nNo licenses were found: %v\n", result.File)
}
}
return nil
}

func findLicensesInFile(cfg *viper.Viper, f string) error {
ProjectLogger.Enter()
defer ProjectLogger.Exit()
Expand Down
2 changes: 2 additions & 0 deletions configurer/configurer.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (
DebugFlag = "debug"
QuietFlag = "quiet"
LicenseFlag = "license"
DirFlag = "dir"
FileFlag = "file"
ConfigPathFlag = "configPath"
ConfigNameFlag = "configName"
Expand Down Expand Up @@ -122,6 +123,7 @@ func NewDefaultFlags() *pflag.FlagSet {
func AddDefaultFlags(flagSet *pflag.FlagSet) {
flagSet.BoolP(DebugFlag, "d", false, "Enable debug logging")
flagSet.BoolP(QuietFlag, "q", false, "Set logging to quiet")
flagSet.String(DirFlag, "", "A directory in which to identify licenses")
flagSet.StringP(FileFlag, "f", "", "A file in which to identify licenses")
flagSet.BoolP(AcceptableFlag, "g", false, "Flag acceptable")
flagSet.BoolP(KeywordsFlag, "k", false, "Flag keywords")
Expand Down
85 changes: 56 additions & 29 deletions identifier/identifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ package identifier

import (
"fmt"
"io/fs"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
Expand Down Expand Up @@ -117,43 +119,68 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li
input := string(b)

result, err := IdentifyLicensesInString(input, options, licenseLibrary)
if err != nil {
return result, err
}

if options.ForceResult || len(result.Matches) > 0 || len(result.KeywordMatches) > 0 {
result.File = filePath
return result, nil
} else {
return result, nil // result is not interesting
}
result.File = filePath
return result, err
}

func identifyLicensesInDirectory(dirPath string, options Options, licenseLibrary *licenses.LicenseLibrary) ([]IdentifierResults, error) { // nolint:unused

var results []IdentifierResults
func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary *licenses.LicenseLibrary) (ret []IdentifierResults, err error) {
var lfs []string

files, err := ioutil.ReadDir(dirPath)
if err != nil {
if err := filepath.WalkDir(dirPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err)
return err
}
if !d.IsDir() {
info, _ := d.Info()
if info.Size() > 0 {
lfs = append(lfs, path)
}
}
return nil
}); err != nil {
fmt.Printf("error walking the path %v: %v\n", dirPath, err)
return nil, err
}

for _, file := range files {
if file.IsDir() {
result, err := identifyLicensesInDirectory(file.Name(), options, licenseLibrary)
if err != nil {
return nil, err
}
results = append(results, result...)
} else {
result, err := IdentifyLicensesInFile(file.Name(), options, licenseLibrary)
if err != nil {
return nil, err
}
results = append(results, result)
// errGroup to do the work in parallel until error
workers := errgroup.Group{}
workers.SetLimit(10)
ch := make(chan IdentifierResults, 10)

// WaitGroup to know when we have all the results
waitForResults := sync.WaitGroup{}
waitForResults.Add(1)

// Start receiving the results until channel closes
go func() {
for ir := range ch {
ret = append(ret, ir)
}
waitForResults.Done()
}()

// Loop using a worker to send results to a channel
for _, lf := range lfs {
lf := lf
workers.Go(func() error {
ir, err := IdentifyLicensesInFile(lf, options, licenseLibrary)
if err == nil {
ch <- ir
}
return err
})
}
return results, nil

// Close the channel when done or error
go func() {
err = workers.Wait()
close(ch)
}()

// Make sure we got all the results
waitForResults.Wait()
return ret, err
}

func findAllLicensesInNormalizedData(licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (IdentifierResults, error) {
Expand Down
67 changes: 43 additions & 24 deletions identifier/identifier_spdx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@ package identifier
import (
"fmt"
"io/fs"
"os"
"path"
"path/filepath"
"strings"
"testing"

"github.com/IBM/license-scanner/configurer"

"github.com/IBM/license-scanner/licenses"
)

Expand All @@ -24,37 +21,59 @@ const (
)

var testDataDir = path.Join(resources, "spdx", spdx, "testdata")
var options = Options{
ForceResult: false,
Enhancements: Enhancements{
AddNotes: "",
AddTextBlocks: true,
FlagAcceptable: false,
FlagCopyrights: true,
FlagKeywords: false,
},
}

func Test_identifyLicensesInSPDXTestData(t *testing.T) {
if _, err := os.Stat(testDataDir); os.IsNotExist(err) {
// Skip test if this data isn't in place (in repo) yet. Else continue for identify errors.
t.Skipf("Skipping test with optional resources: %v", resources)
func Test_identifyLicensesInSPDXTestDataDirectory(t *testing.T) {
t.Parallel()
licenseLibrary, err := licenses.NewLicenseLibrary(nil)
if err != nil {
t.Fatalf("NewLicenseLibrary() error = %v", err)
}
if err := licenseLibrary.AddAllSPDX(); err != nil {
t.Fatalf("licenseLibrary.AddAllSPDX() error = %v", err)
}

config, err := configurer.InitConfig(nil)
results, err := IdentifyLicensesInDirectory(testDataDir, options, licenseLibrary)
if err != nil {
t.Fatal(err)
t.Errorf("IdentifyLicensesInDirectory(%v) err = %v", testDataDir, err)
}

const expected = 499
if actual := len(results); actual != expected {
t.Errorf("IdentifyLicensesInDirectory(%v) len(results) expected %v actual: %v", testDataDir, expected, actual)
}

for _, result := range results {
result := result
t.Run(result.File, func(t *testing.T) {
t.Parallel()
wantLicenseID := strings.TrimSuffix(path.Base(result.File), ".txt")
wantLicenseID = strings.TrimPrefix(wantLicenseID, "deprecated_")
if _, ok := result.Matches[wantLicenseID]; !ok {
t.Error("Did not get: ", wantLicenseID)
}
})
}
config.Set("resources", resources) // override
config.Set("spdx", spdx) // override
}

func Test_identifyLicensesInSPDXTestDataFiles(t *testing.T) {
t.Parallel()

licenseLibrary, err := licenses.NewLicenseLibrary(config)
licenseLibrary, err := licenses.NewLicenseLibrary(nil)
if err != nil {
t.Fatalf("NewLicenseLibrary() error = %v", err)
}
if err := licenseLibrary.AddAllSPDX(); err != nil {
t.Fatalf("licenseLibrary.AddAll() error = %v", err)
}

options := Options{
ForceResult: false,
Enhancements: Enhancements{
AddNotes: "",
AddTextBlocks: true,
FlagAcceptable: false,
FlagCopyrights: true,
FlagKeywords: false,
},
t.Fatalf("licenseLibrary.AddAllSPDX() error = %v", err)
}

type tf struct {
Expand Down

0 comments on commit 6469124

Please sign in to comment.