From a34574775134f723c4df147947a496f4c6cf2ea5 Mon Sep 17 00:00:00 2001 From: Kiko Fernandez-Reyes Date: Tue, 19 Nov 2024 13:20:54 +0100 Subject: [PATCH 1/2] github: scancode scans multiple files at once --- .github/workflows/main.yaml | 2 +- scripts/scan-code.escript | 186 +++++++++++++++++++++--------------- 2 files changed, 112 insertions(+), 76 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 904d6bbca186..6c399392a466 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -489,7 +489,7 @@ jobs: run: docker run -v $PWD/:/github otp '/github/scripts/run-dialyzer' - name: License Compliance Check - run: docker run -v $PWD/:/github otp 'sudo apt install -y pip && pip install scancode-toolkit==32.3.0 && export PATH="$HOME/.local/bin:$PATH" && /github/scripts/scan-code.escript --file-or-dir "${{ needs.pack.outputs.added_files }}" --template-path /github/scripts/scan-code/template.txt --prefix /github/' + run: docker run -v $PWD/:/github otp 'sudo apt install -y pip && pip install scancode-toolkit==32.3.0 && export PATH="$HOME/.local/bin:$PATH" && /github/scripts/scan-code.escript --file-or-dir "${{ needs.pack.outputs.added_files }}" --prefix /github/' test: name: Test Erlang/OTP diff --git a/scripts/scan-code.escript b/scripts/scan-code.escript index 82bb3d9daac4..9a00a916cc2b 100755 --- a/scripts/scan-code.escript +++ b/scripts/scan-code.escript @@ -20,6 +20,8 @@ %% %% %CopyrightEnd% +-define(tmp_folder, "tmp"). + main(Args) -> argparse:run(Args, cli(), #{progname => scancode}). @@ -29,110 +31,144 @@ cli() -> Run 'scancode' with multiple options """, arguments => [ scan_option(), - template_option(), prefix_option(), + scan_results(), file_or_dir() ], handler => fun scancode/1}. approved() -> - [ "mit", "agpl-3.0", "apache-2.0", "boost-1.0", "llvm-exception", - "lgpl-2.1-plus", "cc0-1.0", "bsd-simplified", "bsd-new", "pcre", - "fsf-free", "autoconf-exception-3.0", "mpl-1.1", "public-domain", - "autoconf-simple-exception", "unicode", "tcl", "gpl-2.0 WITH classpath-exception-2.0", - "zlib", "lgpl-2.0-plus WITH wxwindows-exception-3.1", "lgpl-2.0-plus", - "openssl-ssleay", "cc-by-sa-3.0", "cc-by-4.0", "dco-1.1", "fsf-ap", - "agpl-1.0-plus", "agpl-1.0", "agpl-3.0-plus", "classpath-exception-2.0", - "ietf-trust"]. + [ ~"mit", ~"agpl-3.0", ~"apache-2.0", ~"boost-1.0", ~"llvm-exception", + ~"lgpl-2.1-plus", ~"cc0-1.0", ~"bsd-simplified", ~"bsd-new", ~"pcre", + ~"fsf-free", ~"autoconf-exception-3.0", ~"mpl-1.1", ~"public-domain", + ~"autoconf-simple-exception", ~"unicode", ~"tcl", ~"gpl-2.0 WITH classpath-exception-2.0", + ~"zlib", ~"lgpl-2.0-plus WITH wxwindows-exception-3.1", ~"lgpl-2.0-plus", + ~"openssl-ssleay", ~"cc-by-sa-3.0", ~"cc-by-4.0", ~"dco-1.1", ~"fsf-ap", + ~"agpl-1.0-plus", ~"agpl-1.0", ~"agpl-3.0-plus", ~"classpath-exception-2.0", + ~"ietf-trust"]. not_approved() -> - ["gpl", "gpl-3.0-plus", "gpl-2.0", "gpl-1.0-plus", "unlicense", - "erlangpl-1.1", "gpl-2.0-plus", "null"]. - + [~"gpl", ~"gpl-3.0-plus", ~"gpl-2.0", ~"gpl-1.0-plus", ~"unlicense", + ~"erlangpl-1.1", ~"gpl-2.0-plus", ~"null", 'null']. scan_option() -> #{name => scan_option, type => string, default => "cli", - long => "-scan-option"}. + long => "-scan-option", + help => "scancode options to pass to the escript."}. prefix_option() -> #{name => prefix, type => string, default => "", - long => "-prefix"}. + long => "-prefix", + help => "Prefix used for all paths (main use case is Github CI)."}. +scan_results() -> + #{name => scan_results, + type => string, + default => "scan-results.json", + long => "-scan_results", + help => "Output file where to scan the results."}. file_or_dir() -> #{name => file_or_dir, type => string, required => true, - long => "-file-or-dir"}. - -template_option() -> - #{name => template_path, - type => string, - default => "scripts/scan-code/template.txt", - long => "-template-path"}. - - -scancode(#{ file_or_dir := FilesOrDirs}=Config) -> - Files = string:split(FilesOrDirs, " ", all), - scancode0(Files, Config). - -scancode0([[]], _) -> - ok; -scancode0(Files, Config) -> - Results = lists:foldl(fun (File, Errors) -> - Command = scancode(Config, File), - case execute(Command, File) of - {error, Err} -> - [Err | Errors]; - ok -> - Errors - end - end, [], Files), - case Results of + long => "-file-or-dir", + help => "Files and/or directories to analyse."}. + +scancode(Config) -> + ok = cp_files(Config), + scan_folder(Config). + +cp_files(#{file_or_dir := FilesOrDirs, + prefix := Prefix}) -> + ok = create_folder(Prefix, ?tmp_folder), + Files = cleanup_files(FilesOrDirs), + lists:foreach(fun (File) -> + Command = cp_with_path(Prefix, File, ?tmp_folder), + os:cmd(Command) + end, Files), + ok. + +create_folder(Prefix, Folder) -> + [] = os:cmd("mkdir " ++ Prefix ++ Folder), + ok. + +cleanup_files(FilesOrDirs) -> + lists:filter(fun ([]) -> false; (_) -> true end, + string:split(FilesOrDirs, " ", all)). + +cp_with_path(Prefix, File, Folder) -> + "cp -f --parents " ++ Prefix ++ File ++ " " ++ Folder. + +scan_folder(Config) -> + Command = scancode_command(Config), + execute(Command, Config). + +scan_result_path(#{scan_results := ScanResult, + prefix := Prefix}) -> + Prefix ++ ScanResult. + +folder_path(#{prefix := Prefix}) -> + Prefix ++ ?tmp_folder. + +scancode_command(#{scan_option := Options}=Config) -> + ScanResultPath = scan_result_path(Config), + FolderPath = folder_path(Config), + "scancode -" ++ Options ++ " --json-pp " ++ ScanResultPath ++ " " ++ FolderPath. + +execute(Command, Config) -> + _ = os:cmd(Command), + ScanResult = scan_result_path(Config), + Json = decode(ScanResult), + Licenses = fetch_licenses(Json), + + Errors = compliance_check(Licenses), + io:format("~n~nResuling Errors: ~p~n~n", [Errors]), + case Errors of [] -> ok; - Errors -> + _ -> error(Errors) end. -scancode(#{scan_option := Options, - prefix := Prefix, - template_path := TemplatePath}, File) -> - "scancode -" ++ Options ++ " --custom-output - --custom-template " ++ TemplatePath ++ " " ++ Prefix ++ File. - -execute(Command, File) -> - Port = open_port({spawn, Command}, [stream, in, eof, hide, exit_status]), - Result = loop(Port, []), - Ls = string:split(string:trim(Result, both), ",", all), - - case lists:filter(fun ([]) -> false; (_) -> true end, Ls) of - [] -> - {error, {File, no_license_found}}; - Ls1 -> - NotApproved = lists:any(fun (License) -> lists:member(License, not_approved()) end, Ls1), - case NotApproved of - true -> - {error, {File, license_not_approved}}; +compliance_check(Licenses) when is_list(Licenses) -> + lists:filtermap(fun (License) -> + case compliance_check(License) of + ok -> + false; + {error, Err} -> + {true, Err} + end + end, Licenses); +compliance_check({Path, 'null'=License}) -> + {error, {License, Path, no_license}}; +compliance_check({Path, License}) -> + case lists:member(License, not_approved()) of + true -> + {error, {License, Path, license_not_approved}}; + false -> + case lists:member(License, approved()) of false -> - InPolicy = lists:all(fun (License) -> lists:member(License, approved()) end, Ls1), - case InPolicy of - false -> - %% this can happen if a license is - %% not in the approve/not_approved list - {error, {File, license_not_approved}}; - true -> - ok - end + %% this can happen if a license is + %% not in the approve/not_approved list + {error, {License, Path, license_not_recognised}}; + true -> + ok end end. -loop(Port, Acc) -> - receive - {Port, {data, Data}} -> - loop(Port, [Data|Acc]); - {Port,{exit_status, _ExitStatus}} -> - lists:flatten(lists:reverse(Acc)) - end. +decode(Filename) -> + {ok, Bin} = file:read_file(Filename), + json:decode(Bin). + +fetch_licenses(#{<<"files">> := Files}) -> + lists:filtermap(fun(#{<<"type">> := <<"file">>, + <<"detected_license_expression">> := License, + <<"path">> := Path}) -> + {true, {Path, License}}; + (_) -> + false + end, Files). From ad78d8db838247eba658ed52bbf600d19660828d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lukas=20Backstr=C3=B6m?= Date: Tue, 19 Nov 2024 14:02:43 +0100 Subject: [PATCH 2/2] gh: Implement SARIF license uploader --- .github/scripts/get-supported-branches.sh | 26 ++++ .github/scripts/get-supported-versions.sh | 21 +++ .github/scripts/update-gh-actions-versions.es | 2 +- .github/workflows/license-scanner.yaml | 89 ++++++++++++ .github/workflows/main.yaml | 3 - .github/workflows/osv-scanner-scheduled.yml | 8 +- scripts/scan-code.escript | 134 ++++++++++++++++-- 7 files changed, 259 insertions(+), 24 deletions(-) create mode 100755 .github/scripts/get-supported-branches.sh create mode 100755 .github/scripts/get-supported-versions.sh create mode 100644 .github/workflows/license-scanner.yaml diff --git a/.github/scripts/get-supported-branches.sh b/.github/scripts/get-supported-branches.sh new file mode 100755 index 000000000000..5dbee859c6ad --- /dev/null +++ b/.github/scripts/get-supported-branches.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +## %CopyrightBegin% +## +## Copyright Ericsson AB 2024. All Rights Reserved. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +## %CopyrightEnd% + +printf '["master","maint"' +for vsn in $(.github/scripts/get-supported-versions.sh); do + printf ',"maint-%s"' "${vsn}" +done + +printf ']' \ No newline at end of file diff --git a/.github/scripts/get-supported-versions.sh b/.github/scripts/get-supported-versions.sh new file mode 100755 index 000000000000..5b8ccc88fad4 --- /dev/null +++ b/.github/scripts/get-supported-versions.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +## %CopyrightBegin% +## +## Copyright Ericsson AB 2024. All Rights Reserved. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +## %CopyrightEnd% + +.github/scripts/get-major-versions.sh | head -3 \ No newline at end of file diff --git a/.github/scripts/update-gh-actions-versions.es b/.github/scripts/update-gh-actions-versions.es index a5727d23e376..890f75c96ff5 100755 --- a/.github/scripts/update-gh-actions-versions.es +++ b/.github/scripts/update-gh-actions-versions.es @@ -82,7 +82,7 @@ run(Opts) -> "Do you want to want to proceed?", [Cwd, Upstream]), %% Get this for dependabot update before we start switching branches and other chenanigans - SupportedMajorVersions = string:split(cmd(Opts, ".github/scripts/get-major-versions.sh | head -3"),"\n", all), + SupportedMajorVersions = string:split(cmd(Opts, ".github/scripts/get-supported-versions.sh"),"\n", all), OriginalBranch = cmd(Opts, "git branch --show-current"), %% Fetch all PRs done by dependabot diff --git a/.github/workflows/license-scanner.yaml b/.github/workflows/license-scanner.yaml new file mode 100644 index 000000000000..0c49c683e7ba --- /dev/null +++ b/.github/workflows/license-scanner.yaml @@ -0,0 +1,89 @@ +## %CopyrightBegin% +## +## Copyright Ericsson AB 2024. All Rights Reserved. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +## %CopyrightEnd% + +## This workflow continually scan the master branch to make sure that no files +## are added without a copyright notice and license. + +name: Check license header + +on: + pull_request: + push: + +permissions: + contents: read + +jobs: + run-scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4.2.2 + - name: Check which files have been added + uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3.0.2 + id: new-files + with: + predicate-quantifier: 'every' + filters: | + new: + - added: '**' + - '!**/*.beam' + - '!**/*.exe' + - '!**/*.config.cache.static' + ## OTP-27.0 tag as base + base: 601a012837ea0a5c8095bf24223132824177124d + list-files: shell + + - uses: erlef/setup-beam@5304e04ea2b355f03681464e683d92e3b2f18451 # ratchet:erlef/setup-beam@v1 + with: + otp-version: '27.1' + + - name: License Compliance Check + run: | + pip install scancode-toolkit==32.3.0 + scripts/scan-code.escript --file-or-dir "${{ steps.new-files.outputs.new_files }}" \ + --sarif results.sarif + + - name: "Upload artifact" + if: ${{ !cancelled() }} + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 ratchet:actions/upload-artifact@v4 + with: + name: SARIF file + path: results.sarif + + upload-scan: + needs: run-scan + runs-on: ubuntu-latest + if: ${{ !cancelled() }} + permissions: + # Required to upload SARIF file to CodeQL. + # See: https://github.com/github/codeql-action/issues/2117 + actions: read + # Require writing security events to upload SARIF file to security tab + security-events: write + # Only need to read contents + contents: read + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # ratchet:actions/checkout@v4.2.2 + - name: Download sarif file + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # ratchet:actions/download-artifact@v4.1.8 + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + if: ${{ !cancelled() }} + uses: github/codeql-action/upload-sarif@ea9e4e37992a54ee68a9622e985e60c8e8f12d9f # ratchet:github/codeql-action/upload-sarif@v3 + with: + sarif_file: "SARIF file/results.sarif" \ No newline at end of file diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 6c399392a466..1f4c2eff6028 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -488,9 +488,6 @@ jobs: - name: Run dialyzer run: docker run -v $PWD/:/github otp '/github/scripts/run-dialyzer' - - name: License Compliance Check - run: docker run -v $PWD/:/github otp 'sudo apt install -y pip && pip install scancode-toolkit==32.3.0 && export PATH="$HOME/.local/bin:$PATH" && /github/scripts/scan-code.escript --file-or-dir "${{ needs.pack.outputs.added_files }}" --prefix /github/' - test: name: Test Erlang/OTP runs-on: ubuntu-latest diff --git a/.github/workflows/osv-scanner-scheduled.yml b/.github/workflows/osv-scanner-scheduled.yml index a95201bb2375..0f02a17c3707 100644 --- a/.github/workflows/osv-scanner-scheduled.yml +++ b/.github/workflows/osv-scanner-scheduled.yml @@ -42,13 +42,7 @@ jobs: - id: get-versions name: Fetch latest 3 OTP versions run: | - VSNs=$(grep -E 'OTP-[^.]+[.]0 :' otp_versions.table | awk '{ print $1 '} | head -3 | sed 's/[-.]/ /g' | awk '{print $2}') - versions='["maint", "master"' - for vsn in $VSNs; do - versions="${versions}, \"maint-$vsn\"" - done - versions="${versions}]" - echo "versions=${versions}" >> "$GITHUB_OUTPUT" + echo "versions=$(.github/scripts/get-supported-branches.sh)" >> "$GITHUB_OUTPUT" run-scheduled-scan: # Fan out and create requests to run OSV on multiple branches. diff --git a/scripts/scan-code.escript b/scripts/scan-code.escript index 9a00a916cc2b..48c68b4c872d 100755 --- a/scripts/scan-code.escript +++ b/scripts/scan-code.escript @@ -4,7 +4,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2024. All Rights Reserved. +%% Copyright Ericsson AB 2024. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ %% %% %CopyrightEnd% --define(tmp_folder, "tmp"). +-define(tmp_folder, "tmp/"). main(Args) -> argparse:run(Args, cli(), #{progname => scancode}). @@ -33,7 +33,8 @@ cli() -> arguments => [ scan_option(), prefix_option(), scan_results(), - file_or_dir() ], + file_or_dir(), + sarif_option() ], handler => fun scancode/1}. approved() -> @@ -78,6 +79,12 @@ file_or_dir() -> long => "-file-or-dir", help => "Files and/or directories to analyse."}. +sarif_option() -> + #{name => sarif, + type => string, + default => undefined, + long => "-sarif"}. + scancode(Config) -> ok = cp_files(Config), scan_folder(Config). @@ -120,19 +127,22 @@ scancode_command(#{scan_option := Options}=Config) -> "scancode -" ++ Options ++ " --json-pp " ++ ScanResultPath ++ " " ++ FolderPath. execute(Command, Config) -> - _ = os:cmd(Command), + io:format("Running: ~ts~n", [Command]), + R = os:cmd(Command), + io:format("Result: ~ts~n",[R]), ScanResult = scan_result_path(Config), Json = decode(ScanResult), - Licenses = fetch_licenses(Json), + Licenses = fetch_licenses(folder_path(Config), Json), Errors = compliance_check(Licenses), io:format("~n~nResuling Errors: ~p~n~n", [Errors]), - case Errors of - [] -> - ok; - _ -> - error(Errors) - end. + + maps:get(sarif, Config) =/= undefined andalso + sarif(maps:get(sarif, Config), Errors), + + Errors =/= [] andalso erlang:raise(exit, Errors, []), + + ok. compliance_check(Licenses) when is_list(Licenses) -> lists:filtermap(fun (License) -> @@ -164,11 +174,109 @@ decode(Filename) -> {ok, Bin} = file:read_file(Filename), json:decode(Bin). -fetch_licenses(#{<<"files">> := Files}) -> +fetch_licenses(FolderPath, #{<<"files">> := Files}) -> lists:filtermap(fun(#{<<"type">> := <<"file">>, <<"detected_license_expression">> := License, <<"path">> := Path}) -> - {true, {Path, License}}; + {true, {string:trim(Path, leading, FolderPath), License}}; (_) -> false end, Files). + +sarif(SarifFile, Errors) -> + file:write_file(SarifFile, sarif(Errors)). +sarif(Errors) -> + ErrorTypes = lists:usort([{Type, License} || {License, _File, Type} <- Errors]), + ErrorTypesIndex = lists:zip(ErrorTypes, lists:seq(0,length(ErrorTypes) - 1)), + json:format( + #{ ~"version" => ~"2.1.0", + ~"$schema" => ~"https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json", + ~"runs" => + [ #{ + ~"tool" => + #{ ~"driver" => + #{ ~"informationUri" => ~"https://github.com/erlang/otp/scripts/scan-code.escript", + ~"name" => ~"scan-code", + ~"rules" => + [ #{ ~"id" => error_type_to_id(ErrorType), + ~"name" => error_type_to_name(ErrorType), + ~"shortDescription" => + #{ ~"text" => error_type_to_text(ErrorType) }, + % ~"helpUri" => ~"????", + ~"fullDescription" => + #{ + ~"text" => error_type_to_description(ErrorType) + } + } + || ErrorType <- ErrorTypes], + ~"version" => ~"1.0" + } + }, + ~"artifacts" => + [ #{ + ~"location" => #{ + ~"uri" => File + }, + ~"length" => -1 + } || {_, File, _} <- Errors + ], + ~"results" => + [ #{ + ~"ruleId" => error_type_to_id({ErrorType, License}), + ~"ruleIndex" => proplists:get_value({ErrorType, License}, ErrorTypesIndex), + ~"level" => error_type_to_level({ErrorType, License}), + ~"message" => #{ ~"text" => error_type_to_text({ErrorType, License}) }, + ~"locations" => + [ #{ ~"physicalLocation" => + #{ ~"artifactLocation" => + #{ ~"uri" => File } + } + } ] + } || {License, File, ErrorType} <- Errors] + } ] + }). + +error_type_to_id({no_license, _}) -> + atom_to_binary(no_license); +error_type_to_id(ErrorType) -> + base64:encode(integer_to_binary(erlang:phash2(ErrorType))). +error_type_to_text({license_not_recognised, L}) -> + <<"License not recognized: ", L/binary>>; +error_type_to_text({no_license, _}) -> + <<"License not found">>; +error_type_to_text({license_not_approved, L}) -> + <<"License not approved: ",L/binary>>. + +error_type_to_name({no_license, _}) -> + ~"NoLicense"; +error_type_to_name({license_not_recognised, _}) -> + ~"NoLicense"; +error_type_to_name({license_not_approved, _}) -> + ~"UnapprovedLicense". +error_type_to_level({no_license, _}) -> + ~"warning"; +error_type_to_level({license_not_recognised, _}) -> + ~"error"; +error_type_to_level({license_not_approved, _}) -> + ~"error". +error_type_to_description({no_license, _}) -> + ~""" + scancode has not found any license in this file. To fix this, + add a license declaration to the top of the file. + """; +error_type_to_description({license_not_recognised, L}) -> + unicode:characters_to_binary( + io_lib:format( + """ + The license ~ts is not recognized by scancode. + You need to update scripts/scan-code.escript to include the + license, or change to another license. + """, [L])); +error_type_to_description({license_not_approved, L}) -> + unicode:characters_to_binary( + io_lib:format( + """ + scancode has detected ~ts, which is a license that is + not on the list of approved licenses. This file is not + allowed to be part of Erlang/OTP under this license. + """, [L])).