Skip to content

Commit

Permalink
add ValidateReadme method (#94)
Browse files Browse the repository at this point in the history
* add ValidateReadme method

Signed-off-by: Josh Dolitsky <[email protected]>

* add ValidateReadme method

Signed-off-by: Josh Dolitsky <[email protected]>

* typo in unescape

Signed-off-by: Josh Dolitsky <[email protected]>

---------

Signed-off-by: Josh Dolitsky <[email protected]>
  • Loading branch information
jdolitsky authored Jan 16, 2024
1 parent 41d664b commit aa61931
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 0 deletions.
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ require (
github.com/coreos/go-oidc/v3 v3.9.0
github.com/google/go-cmp v0.6.0
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0
github.com/microcosm-cc/bluemonday v1.0.26
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8
github.com/russross/blackfriday/v2 v2.1.0
github.com/sigstore/policy-controller v0.8.2
go.uber.org/zap v1.26.0
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
Expand Down Expand Up @@ -51,6 +53,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.17.1 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.23.0 // indirect
github.com/aws/smithy-go v1.18.1 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blendle/zapdriver v1.3.1 // indirect
github.com/cenkalti/backoff/v3 v3.2.2 // indirect
Expand All @@ -72,6 +75,7 @@ require (
github.com/google/uuid v1.4.0 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.0 // indirect
github.com/gorilla/css v1.0.0 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.1-0.20210315223345-82c243799c99 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
Expand Down
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.23.0/go.mod h1:VC7JDqsqiwXukYEDjoHh9
github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/smithy-go v1.18.1 h1:pOdBTUfXNazOlxLrgeYalVnuTpKreACHtc62xLwIB3c=
github.com/aws/smithy-go v1.18.1/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down Expand Up @@ -176,6 +178,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfF
github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0=
github.com/googleapis/gax-go/v2 v2.12.0 h1:A+gCJKdRfqXkr+BIRGtZLibNXf0m1f9E4HG56etFpas=
github.com/googleapis/gax-go/v2 v2.12.0/go.mod h1:y+aIqrI5eb1YGMVJfuV3185Ts/D7qKpsEkdD5+I6QGU=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI=
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.1-0.20210315223345-82c243799c99 h1:JYghRBlGCZyCF2wNUJ8W0cwaQdtpcssJ4CgC406g+WU=
Expand Down Expand Up @@ -253,6 +257,8 @@ github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg=
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k=
github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58=
github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
Expand Down Expand Up @@ -290,6 +296,7 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkBk=
github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc=
Expand Down
66 changes: 66 additions & 0 deletions validation/readme.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
Copyright 2024 Chainguard, Inc.
SPDX-License-Identifier: Apache-2.0
*/

package validation

import (
"fmt"
"html"
"regexp"
"strings"

"github.com/google/go-cmp/cmp"
"github.com/microcosm-cc/bluemonday"
"github.com/russross/blackfriday/v2"
)

var ErrUnsafeReadme = fmt.Errorf("readme contained unsafe html content")

// ValidateReadme validates the contents of a Markdown README.md file.
// If the contents are invalid, a string will be returned containing the
// diff of what the Markdown would look like as HTML if properly sanitized.
func ValidateReadme(readme string) (string, error) {
// Treat empty readme as valid, prevent further processing
if readme == "" {
return "", nil
}
unsafe := readmeToHTML(readme)
safe := sanitizeHTML(unsafe)
// After converting the Markdown to HTML,
// make sure there is no diff after sanitizing it.
// Unescape any encoded HTML tags for proper comparison.
if diff := cmp.Diff(unescapeHTML(unsafe), unescapeHTML(safe)); diff != "" {
return diff, ErrUnsafeReadme
}
return "", nil
}

func readmeToHTML(rawMarkdown string) string {
s := string(blackfriday.Run([]byte(rawMarkdown)))
// Fix issue where single tags get extra space on conversion (e.g. "<hr />")
s = strings.ReplaceAll(s, " />", "/>")
return s
}

var bluemondayPolicy = func() *bluemonday.Policy {
p := bluemonday.UGCPolicy()
// Allow fenced code block classes
p = p.AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code")
// Allow links without ref="nofollow" which are not set automatically on links on markdown conversion
p = p.RequireNoFollowOnLinks(false)
// Allow custom height and width on images
p = p.AllowAttrs("width", "height").OnElements("img")
// Allow HTML comments
p.AllowComments()
return p
}()

func sanitizeHTML(unsafeHTML string) string {
return bluemondayPolicy.Sanitize(unsafeHTML)
}

func unescapeHTML(safeHTML string) string {
return html.UnescapeString(safeHTML)
}
75 changes: 75 additions & 0 deletions validation/readme_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
Copyright 2024 Chainguard, Inc.
SPDX-License-Identifier: Apache-2.0
*/

package validation

import (
"testing"
)

const (
safeReadme = `
<!--testing:start-->
# safe
<img src="logo.png" width="36px" height="36px"/>
[click here](./other.md)
<!--testing:end-->
---
Here is some dangerous code inside a code block:
` + "```go" + `
Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World
` + "```"

unsafeReadme1 = `
<!--testing:start-->
# unsafe
<img src="logo.png" width="36px" height="36px"/>
[click here](./other.md)
<!--testing:end-->
---
Here is some dangerous code outside a code block:
Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World
`

unsafeReadme2 = `
# unsafe
TAKE THAT!!!!!!!
<script>alert("XSS")</script>
`
)

func TestValidateReadme(t *testing.T) {
tests := map[string]struct {
Input string
Expect bool
}{
"empty": {"", true},
"safe": {safeReadme, true},
"unsafe 1": {unsafeReadme1, false},
"unsafe 2": {unsafeReadme2, false},
}

for name, tt := range tests {
t.Run(name, func(t *testing.T) {
diff, got := ValidateReadme(tt.Input)
if (got == nil) != tt.Expect {
t.Errorf("Expected ValidateReadme(`%s`) to return (err == nil) == %v, but got %v. diff: %s", tt.Input, tt.Expect, got, diff)
}
})
}
}

0 comments on commit aa61931

Please sign in to comment.