From 9fbf9105be774ca8d4174c54f08525c799269319 Mon Sep 17 00:00:00 2001 From: Taco de Wolff Date: Sun, 14 May 2023 21:21:45 -0400 Subject: [PATCH] JS: decode unicode escape sequences in strings to UTF-8 literals --- go.mod | 2 +- go.sum | 3 ++- js/js_test.go | 8 +++++--- js/util.go | 34 +++++++++++++++++++++++++++++++++- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index e690e105c8..cebe363a77 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,6 @@ require ( github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2 github.com/spf13/pflag v1.0.5 github.com/tdewolff/parse/v2 v2.6.6 - github.com/tdewolff/test v1.0.7 + github.com/tdewolff/test v1.0.9 golang.org/x/sys v0.6.0 // indirect ) diff --git a/go.sum b/go.sum index 6773c6fd73..dd790e5ef3 100644 --- a/go.sum +++ b/go.sum @@ -12,8 +12,9 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/tdewolff/parse/v2 v2.6.6 h1:Yld+0CrKUJaCV78DL1G2nk3C9lKrxyRTux5aaK/AkDo= github.com/tdewolff/parse/v2 v2.6.6/go.mod h1:woz0cgbLwFdtbjJu8PIKxhW05KplTFQkOdX78o+Jgrs= -github.com/tdewolff/test v1.0.7 h1:8Vs0142DmPFW/bQeHRP3MV19m1gvndjUb1sn8yy74LM= github.com/tdewolff/test v1.0.7/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= +github.com/tdewolff/test v1.0.9 h1:SswqJCmeN4B+9gEAi/5uqT0qpi1y2/2O47V/1hhGZT0= +github.com/tdewolff/test v1.0.9/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/js/js_test.go b/js/js_test.go index 21306911c1..0f308ef6e3 100644 --- a/js/js_test.go +++ b/js/js_test.go @@ -149,11 +149,13 @@ func TestJS(t *testing.T) { {`"string\t\f\v\bstring"`, "\"string\t\f\v\bstring\""}, {`"string\a\c\'string"`, `"stringac'string"`}, {`"string\∀string"`, `"string∀string"`}, - {`"string\0\uFFFFstring"`, "\"string\x00\\uFFFFstring\""}, + {`"string\0\uFFFFstring"`, "\"string\x00￿string\""}, {`"string\x00\x55\x0A\x0D\x22\x27string"`, "\"string\x00U\\n\\r\\\"'string\""}, {`"string\000\12\015\042\47\411string"`, "\"string\x00\\n\\r\\\"'!1string\""}, {"'string\\n\\rstring'", "`string\n\rstring`"}, {"'string\\\r\nstring\\\nstring\\\rstring\\\u2028string\\\u2029string'", `"stringstringstringstringstringstring"`}, + {`"\x7H\u877H"`, `"\x7H\u877H"`}, + {`"\u01ac\u01de\u0187\u{0001a0}"`, `"ƬǞƇƠ"`}, {`"str1ng" + "str2ng"`, `"str1ngstr2ng"`}, {`"str1ng" + "str2ng" + "str3ng"`, `"str1ngstr2ngstr3ng"`}, {`"padding" + this`, `"padding"+this`}, @@ -793,8 +795,8 @@ func TestJS(t *testing.T) { {`var a=5;({});var b=class{c(){3}}`, `var b,a=5;({},b=class{c(){3}})`}, // #494 {`({});a={b(){3}}`, `({},a={b(){3}})`}, // #494 {`export default function Foo(){a}Foo.prototype.bar=b`, `export default function Foo(){a}Foo.prototype.bar=b`}, // #525 - {`(e=1,e=2)`, `e=1,e=2`}, // #528 - {`"\x00\x31 \0"`, "\"\x001 \x00\""}, // #577 + {`(e=1,e=2)`, `e=1,e=2`}, // #528 + {`"\x00\x31 \0\u0000"`, "\"\x001 \x00\x00\""}, // #577 } m := minify.New() diff --git a/js/util.go b/js/util.go index 585f32ef1d..a5c2879258 100644 --- a/js/util.go +++ b/js/util.go @@ -3,6 +3,8 @@ package js import ( "bytes" "encoding/hex" + stdStrconv "strconv" + "unicode/utf8" "github.com/tdewolff/minify/v2" "github.com/tdewolff/parse/v2/js" @@ -981,7 +983,7 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte { for i := prefix; i < len(b)-suffix; i++ { if c := b[i]; c == '\\' { c = b[i+1] - if c == quote || c == '\\' || c == 'u' || quote != '`' && (c == 'n' || c == 'r') { + if c == quote || c == '\\' || quote != '`' && (c == 'n' || c == 'r') { // keep escape sequence i++ continue @@ -1014,6 +1016,36 @@ func replaceEscapes(b []byte, quote byte, prefix, suffix int) []byte { i++ continue } + } else if c == 'u' && i+2 < len(b) { + l := i + 2 + if b[i+2] == '{' { + l++ + } + r := l + for ; r < len(b) && (b[i+2] == '{' || r < l+4); r++ { + if b[r] < '0' || '9' < b[r] && b[r] < 'A' || 'F' < b[r] && b[r] < 'a' || 'f' < b[r] { + break + } + } + if b[i+2] == '{' && 6 < r-l || b[i+2] != '{' && r-l != 4 { + i++ + continue + } + num, err := stdStrconv.ParseInt(string(b[l:r]), 16, 32) + if err != nil || 0x10FFFF <= num { + i++ + continue + } + + // decode unicode character to UTF-8 and put at the end of the escape sequence + // then skip the first part of the escape sequence until the decoded character + n = 2 + r - l + if b[i+2] == '{' { + n += 2 + } + m := utf8.RuneLen(rune(num)) + utf8.EncodeRune(b[i+n-m:], rune(num)) + n -= m } else if c == '0' && (i+2 == len(b)-1 || b[i+2] < '0' || '7' < b[i+2]) { // \0 (NULL) b[i+1] = '\x00'