From 68768e579186644e9bdd5f13750d490ae1c75300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sat, 27 Apr 2024 22:53:36 +0100 Subject: [PATCH] syntax: avoid a slice bounds check on every input byte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When our lexer peeks one byte or consumes one rune, we do a check like `p.bsp < len(p.bs)` followed by `p.bs[p.bsp]`. In practice this made a bounds check unnecessary, as we knew we had enough input bytes in p.bs to grab the byte at p.bsp, but since p.bsp was a signed integer, the compiler knew that in theory p.bsp could have overflowed to a negative value and caused a panic. In the past I tried swapping p.bsp from an int to an uint to solve this issue, but the compiler wasn't yet clever enough to take note. It seems like it now is: │ old │ new │ │ sec/op │ sec/op vs base │ Parse-8 14.86µ ± 1% 14.46µ ± 1% -2.74% (p=0.000 n=10) --- syntax/lexer.go | 20 ++++++++++---------- syntax/parser.go | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/syntax/lexer.go b/syntax/lexer.go index f93b27df..28f96df6 100644 --- a/syntax/lexer.go +++ b/syntax/lexer.go @@ -67,7 +67,7 @@ func (p *Parser) rune() rune { p.col += int64(p.w) bquotes := 0 retry: - if p.bsp < len(p.bs) { + if p.bsp < uint(len(p.bs)) { if b := p.bs[p.bsp]; b < utf8.RuneSelf { p.bsp++ if b == '\x00' { @@ -86,7 +86,7 @@ retry: return escNewl } if p.openBquotes > 0 && bquotes < p.openBquotes && - p.bsp < len(p.bs) && bquoteEscaped(p.bs[p.bsp]) { + p.bsp < uint(len(p.bs)) && bquoteEscaped(p.bs[p.bsp]) { // We turn backquote command substitutions into $(), // so we remove the extra backslashes needed by the backquotes. // For good position information, we still include them in p.w. @@ -110,9 +110,9 @@ retry: var w int p.r, w = utf8.DecodeRune(p.bs[p.bsp:]) if p.litBs != nil { - p.litBs = append(p.litBs, p.bs[p.bsp:p.bsp+w]...) + p.litBs = append(p.litBs, p.bs[p.bsp:p.bsp+uint(w)]...) } - p.bsp += w + p.bsp += uint(w) if p.r == utf8.RuneError && w == 1 { p.posErr(p.nextPos(), "invalid UTF-8 encoding") } @@ -135,7 +135,7 @@ retry: // beginning of the buffer. func (p *Parser) fill() { p.offs += int64(p.bsp) - left := len(p.bs) - p.bsp + left := len(p.bs) - int(p.bsp) copy(p.readBuf[:left], p.readBuf[p.bsp:]) readAgain: n, err := 0, p.readErr @@ -254,7 +254,7 @@ skipSpace: } if p.stopAt != nil && (p.spaced || p.tok == illegalTok || p.stopToken()) { w := utf8.RuneLen(r) - if bytes.HasPrefix(p.bs[p.bsp-w:], p.stopAt) { + if bytes.HasPrefix(p.bs[p.bsp-uint(w):], p.stopAt) { p.r = utf8.RuneSelf p.w = 1 p.tok = _EOF @@ -388,7 +388,7 @@ func (p *Parser) extendedGlob() bool { } func (p *Parser) peekBytes(s string) bool { - peekEnd := p.bsp + len(s) + peekEnd := int(p.bsp) + len(s) // TODO: This should loop for slow readers, e.g. those providing one byte at // a time. Use a loop and test it with testing/iotest.OneByteReader. if peekEnd > len(p.bs) { @@ -398,10 +398,10 @@ func (p *Parser) peekBytes(s string) bool { } func (p *Parser) peekByte(b byte) bool { - if p.bsp == len(p.bs) { + if p.bsp == uint(len(p.bs)) { p.fill() } - return p.bsp < len(p.bs) && p.bs[p.bsp] == b + return p.bsp < uint(len(p.bs)) && p.bs[p.bsp] == b } func (p *Parser) regToken(r rune) token { @@ -809,7 +809,7 @@ func (p *Parser) newLit(r rune) { p.litBs[0] = byte(r) case r > escNewl: w := utf8.RuneLen(r) - p.litBs = append(p.litBuf[:0], p.bs[p.bsp-w:p.bsp]...) + p.litBs = append(p.litBuf[:0], p.bs[p.bsp-uint(w):p.bsp]...) default: // don't let r == utf8.RuneSelf go to the second case as RuneLen // would return -1 diff --git a/syntax/parser.go b/syntax/parser.go index 105485ea..d39336bc 100644 --- a/syntax/parser.go +++ b/syntax/parser.go @@ -338,7 +338,7 @@ func (p *Parser) Arithmetic(r io.Reader) (ArithmExpr, error) { type Parser struct { src io.Reader bs []byte // current chunk of read bytes - bsp int // pos within chunk for the rune after r + bsp uint // pos within chunk for the rune after r; uint helps eliminate bounds checks r rune // next rune w int // width of r @@ -733,7 +733,7 @@ func (p *Parser) matched(lpos Pos, left, right token) Pos { func (p *Parser) errPass(err error) { if p.err == nil { p.err = err - p.bsp = len(p.bs) + 1 + p.bsp = uint(len(p.bs)) + 1 p.r = utf8.RuneSelf p.w = 1 p.tok = _EOF