-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add ValidateReadme method Signed-off-by: Josh Dolitsky <[email protected]> * add ValidateReadme method Signed-off-by: Josh Dolitsky <[email protected]> * typo in unescape Signed-off-by: Josh Dolitsky <[email protected]> --------- Signed-off-by: Josh Dolitsky <[email protected]>
- Loading branch information
Showing
4 changed files
with
152 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* | ||
Copyright 2024 Chainguard, Inc. | ||
SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package validation | ||
|
||
import ( | ||
"fmt" | ||
"html" | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/google/go-cmp/cmp" | ||
"github.com/microcosm-cc/bluemonday" | ||
"github.com/russross/blackfriday/v2" | ||
) | ||
|
||
var ErrUnsafeReadme = fmt.Errorf("readme contained unsafe html content") | ||
|
||
// ValidateReadme validates the contents of a Markdown README.md file. | ||
// If the contents are invalid, a string will be returned containing the | ||
// diff of what the Markdown would look like as HTML if properly sanitized. | ||
func ValidateReadme(readme string) (string, error) { | ||
// Treat empty readme as valid, prevent further processing | ||
if readme == "" { | ||
return "", nil | ||
} | ||
unsafe := readmeToHTML(readme) | ||
safe := sanitizeHTML(unsafe) | ||
// After converting the Markdown to HTML, | ||
// make sure there is no diff after sanitizing it. | ||
// Unescape any encoded HTML tags for proper comparison. | ||
if diff := cmp.Diff(unescapeHTML(unsafe), unescapeHTML(safe)); diff != "" { | ||
return diff, ErrUnsafeReadme | ||
} | ||
return "", nil | ||
} | ||
|
||
func readmeToHTML(rawMarkdown string) string { | ||
s := string(blackfriday.Run([]byte(rawMarkdown))) | ||
// Fix issue where single tags get extra space on conversion (e.g. "<hr />") | ||
s = strings.ReplaceAll(s, " />", "/>") | ||
return s | ||
} | ||
|
||
var bluemondayPolicy = func() *bluemonday.Policy { | ||
p := bluemonday.UGCPolicy() | ||
// Allow fenced code block classes | ||
p = p.AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code") | ||
// Allow links without ref="nofollow" which are not set automatically on links on markdown conversion | ||
p = p.RequireNoFollowOnLinks(false) | ||
// Allow custom height and width on images | ||
p = p.AllowAttrs("width", "height").OnElements("img") | ||
// Allow HTML comments | ||
p.AllowComments() | ||
return p | ||
}() | ||
|
||
func sanitizeHTML(unsafeHTML string) string { | ||
return bluemondayPolicy.Sanitize(unsafeHTML) | ||
} | ||
|
||
func unescapeHTML(safeHTML string) string { | ||
return html.UnescapeString(safeHTML) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
Copyright 2024 Chainguard, Inc. | ||
SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package validation | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
const ( | ||
safeReadme = ` | ||
<!--testing:start--> | ||
# safe | ||
<img src="logo.png" width="36px" height="36px"/> | ||
[click here](./other.md) | ||
<!--testing:end--> | ||
--- | ||
Here is some dangerous code inside a code block: | ||
` + "```go" + ` | ||
Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World | ||
` + "```" | ||
|
||
unsafeReadme1 = ` | ||
<!--testing:start--> | ||
# unsafe | ||
<img src="logo.png" width="36px" height="36px"/> | ||
[click here](./other.md) | ||
<!--testing:end--> | ||
--- | ||
Here is some dangerous code outside a code block: | ||
Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World | ||
` | ||
|
||
unsafeReadme2 = ` | ||
# unsafe | ||
TAKE THAT!!!!!!! | ||
<script>alert("XSS")</script> | ||
` | ||
) | ||
|
||
func TestValidateReadme(t *testing.T) { | ||
tests := map[string]struct { | ||
Input string | ||
Expect bool | ||
}{ | ||
"empty": {"", true}, | ||
"safe": {safeReadme, true}, | ||
"unsafe 1": {unsafeReadme1, false}, | ||
"unsafe 2": {unsafeReadme2, false}, | ||
} | ||
|
||
for name, tt := range tests { | ||
t.Run(name, func(t *testing.T) { | ||
diff, got := ValidateReadme(tt.Input) | ||
if (got == nil) != tt.Expect { | ||
t.Errorf("Expected ValidateReadme(`%s`) to return (err == nil) == %v, but got %v. diff: %s", tt.Input, tt.Expect, got, diff) | ||
} | ||
}) | ||
} | ||
} |