Skip to content

Commit

Permalink
JS: decode unicode escape sequences in strings to UTF-8 literals
Browse files Browse the repository at this point in the history
  • Loading branch information
tdewolff committed May 15, 2023
1 parent 7d47c2c commit 9fbf910
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 6 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ require (
github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2
github.com/spf13/pflag v1.0.5
github.com/tdewolff/parse/v2 v2.6.6
github.com/tdewolff/test v1.0.7
github.com/tdewolff/test v1.0.9
golang.org/x/sys v0.6.0 // indirect
)
3 changes: 2 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/tdewolff/parse/v2 v2.6.6 h1:Yld+0CrKUJaCV78DL1G2nk3C9lKrxyRTux5aaK/AkDo=
github.com/tdewolff/parse/v2 v2.6.6/go.mod h1:woz0cgbLwFdtbjJu8PIKxhW05KplTFQkOdX78o+Jgrs=
github.com/tdewolff/test v1.0.7 h1:8Vs0142DmPFW/bQeHRP3MV19m1gvndjUb1sn8yy74LM=
github.com/tdewolff/test v1.0.7/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
github.com/tdewolff/test v1.0.9 h1:SswqJCmeN4B+9gEAi/5uqT0qpi1y2/2O47V/1hhGZT0=
github.com/tdewolff/test v1.0.9/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
8 changes: 5 additions & 3 deletions js/js_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,13 @@ func TestJS(t *testing.T) {
{`"string\t\f\v\bstring"`, "\"string\t\f\v\bstring\""},
{`"string\a\c\'string"`, `"stringac'string"`},
{`"string\∀string"`, `"string∀string"`},
{`"string\0\uFFFFstring"`, "\"string\x00\\uFFFFstring\""},
{`"string\0\uFFFFstring"`, "\"string\x00string\""},
{`"string\x00\x55\x0A\x0D\x22\x27string"`, "\"string\x00U\\n\\r\\\"'string\""},
{`"string\000\12\015\042\47\411string"`, "\"string\x00\\n\\r\\\"'!1string\""},
{"'string\\n\\rstring'", "`string\n\rstring`"},
{"'string\\\r\nstring\\\nstring\\\rstring\\\u2028string\\\u2029string'", `"stringstringstringstringstringstring"`},
{`"\x7H\u877H"`, `"\x7H\u877H"`},
{`"\u01ac\u01de\u0187\u{0001a0}"`, `"ƬǞƇƠ"`},
{`"str1ng" + "str2ng"`, `"str1ngstr2ng"`},
{`"str1ng" + "str2ng" + "str3ng"`, `"str1ngstr2ngstr3ng"`},
{`"padding" + this`, `"padding"+this`},
Expand Down Expand Up @@ -793,8 +795,8 @@ func TestJS(t *testing.T) {
{`var a=5;({});var b=class{c(){3}}`, `var b,a=5;({},b=class{c(){3}})`}, // #494
{`({});a={b(){3}}`, `({},a={b(){3}})`}, // #494
{`export default function Foo(){a}Foo.prototype.bar=b`, `export default function Foo(){a}Foo.prototype.bar=b`}, // #525
{`(e=1,e=2)`, `e=1,e=2`}, // #528
{`"\x00\x31 \0"`, "\"\x001 \x00\""}, // #577
{`(e=1,e=2)`, `e=1,e=2`}, // #528
{`"\x00\x31 \0\u0000"`, "\"\x001 \x00\x00\""}, // #577
}

m := minify.New()
Expand Down
34 changes: 33 additions & 1 deletion js/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package js
import (
"bytes"
"encoding/hex"
stdStrconv "strconv"
"unicode/utf8"

"github.com/tdewolff/minify/v2"
"github.com/tdewolff/parse/v2/js"
Expand Down Expand Up @@ -981,7 +983,7 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte {
for i := prefix; i < len(b)-suffix; i++ {
if c := b[i]; c == '\\' {
c = b[i+1]
if c == quote || c == '\\' || c == 'u' || quote != '`' && (c == 'n' || c == 'r') {
if c == quote || c == '\\' || quote != '`' && (c == 'n' || c == 'r') {
// keep escape sequence
i++
continue
Expand Down Expand Up @@ -1014,6 +1016,36 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte {
i++
continue
}
} else if c == 'u' && i+2 < len(b) {
l := i + 2
if b[i+2] == '{' {
l++
}
r := l
for ; r < len(b) && (b[i+2] == '{' || r < l+4); r++ {
if b[r] < '0' || '9' < b[r] && b[r] < 'A' || 'F' < b[r] && b[r] < 'a' || 'f' < b[r] {
break
}
}
if b[i+2] == '{' && 6 < r-l || b[i+2] != '{' && r-l != 4 {
i++
continue
}
num, err := stdStrconv.ParseInt(string(b[l:r]), 16, 32)
if err != nil || 0x10FFFF <= num {
i++
continue
}

// decode unicode character to UTF-8 and put at the end of the escape sequence
// then skip the first part of the escape sequence until the decoded character
n = 2 + r - l
if b[i+2] == '{' {
n += 2
}
m := utf8.RuneLen(rune(num))
utf8.EncodeRune(b[i+n-m:], rune(num))
n -= m
} else if c == '0' && (i+2 == len(b)-1 || b[i+2] < '0' || '7' < b[i+2]) {
// \0 (NULL)
b[i+1] = '\x00'
Expand Down

0 comments on commit 9fbf910

Please sign in to comment.