diff --git a/gnovm/stdlibs/encoding/base32/base32.gno b/gnovm/stdlibs/encoding/base32/base32.gno new file mode 100644 index 00000000000..eba0a1d539e --- /dev/null +++ b/gnovm/stdlibs/encoding/base32/base32.gno @@ -0,0 +1,590 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package base32 implements base32 encoding as specified by RFC 4648. +package base32 + +import ( + "io" + "strconv" +) + +/* + * Encodings + */ + +// An Encoding is a radix 32 encoding/decoding scheme, defined by a +// 32-character alphabet. The most common is the "base32" encoding +// introduced for SASL GSSAPI and standardized in RFC 4648. +// The alternate "base32hex" encoding is used in DNSSEC. +type Encoding struct { + encode [32]byte // mapping of symbol index to symbol byte value + decodeMap [256]uint8 // mapping of symbol byte value to symbol index + padChar rune +} + +const ( + StdPadding rune = '=' // Standard padding character + NoPadding rune = -1 // No padding +) + +const ( + decodeMapInitialize = "" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + invalidIndex = '\xff' +) + +// NewEncoding returns a new padded Encoding defined by the given alphabet, +// which must be a 32-byte string that contains unique byte values and +// does not contain the padding character or CR / LF ('\r', '\n'). +// The alphabet is treated as a sequence of byte values +// without any special treatment for multi-byte UTF-8. +// The resulting Encoding uses the default padding character ('='), +// which may be changed or disabled via [Encoding.WithPadding]. +func NewEncoding(encoder string) *Encoding { + if len(encoder) != 32 { + panic("encoding alphabet is not 32-bytes long") + } + + e := new(Encoding) + e.padChar = StdPadding + copy(e.encode[:], encoder) + copy(e.decodeMap[:], decodeMapInitialize) + + for i := 0; i < len(encoder); i++ { + // Note: While we document that the alphabet cannot contain + // the padding character, we do not enforce it since we do not know + // if the caller intends to switch the padding from StdPadding later. + switch { + case encoder[i] == '\n' || encoder[i] == '\r': + panic("encoding alphabet contains newline character") + case e.decodeMap[encoder[i]] != invalidIndex: + panic("encoding alphabet includes duplicate symbols") + } + e.decodeMap[encoder[i]] = uint8(i) + } + return e +} + +// StdEncoding is the standard base32 encoding, as defined in RFC 4648. +var StdEncoding = NewEncoding("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567") + +// HexEncoding is the “Extended Hex Alphabet” defined in RFC 4648. +// It is typically used in DNS. +var HexEncoding = NewEncoding("0123456789ABCDEFGHIJKLMNOPQRSTUV") + +// WithPadding creates a new encoding identical to enc except +// with a specified padding character, or NoPadding to disable padding. +// The padding character must not be '\r' or '\n', +// must not be contained in the encoding's alphabet, +// must not be negative, and must be a rune equal or below '\xff'. +// Padding characters above '\x7f' are encoded as their exact byte value +// rather than using the UTF-8 representation of the codepoint. +func (enc Encoding) WithPadding(padding rune) *Encoding { + switch { + case padding < NoPadding || padding == '\r' || padding == '\n' || padding > 0xff: + panic("invalid padding") + case padding != NoPadding && enc.decodeMap[byte(padding)] != invalidIndex: + panic("padding contained in alphabet") + } + enc.padChar = padding + return &enc +} + +/* + * Encoder + */ + +// Encode encodes src using the encoding enc, +// writing [Encoding.EncodedLen](len(src)) bytes to dst. +// +// The encoding pads the output to a multiple of 8 bytes, +// so Encode is not appropriate for use on individual blocks +// of a large data stream. Use [NewEncoder] instead. +func (enc *Encoding) Encode(dst, src []byte) { + if len(src) == 0 { + return + } + // enc is a pointer receiver, so the use of enc.encode within the hot + // loop below means a nil check at every operation. Lift that nil check + // outside of the loop to speed up the encoder. + _ = enc.encode + + di, si := 0, 0 + n := (len(src) / 5) * 5 + for si < n { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + hi := uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]) + lo := hi<<8 | uint32(src[si+4]) + + dst[di+0] = enc.encode[(hi>>27)&0x1F] + dst[di+1] = enc.encode[(hi>>22)&0x1F] + dst[di+2] = enc.encode[(hi>>17)&0x1F] + dst[di+3] = enc.encode[(hi>>12)&0x1F] + dst[di+4] = enc.encode[(hi>>7)&0x1F] + dst[di+5] = enc.encode[(hi>>2)&0x1F] + dst[di+6] = enc.encode[(lo>>5)&0x1F] + dst[di+7] = enc.encode[(lo)&0x1F] + + si += 5 + di += 8 + } + + // Add the remaining small block + remain := len(src) - si + if remain == 0 { + return + } + + // Encode the remaining bytes in reverse order. + val := uint32(0) + switch remain { + case 4: + val |= uint32(src[si+3]) + dst[di+6] = enc.encode[val<<3&0x1F] + dst[di+5] = enc.encode[val>>2&0x1F] + fallthrough + case 3: + val |= uint32(src[si+2]) << 8 + dst[di+4] = enc.encode[val>>7&0x1F] + fallthrough + case 2: + val |= uint32(src[si+1]) << 16 + dst[di+3] = enc.encode[val>>12&0x1F] + dst[di+2] = enc.encode[val>>17&0x1F] + fallthrough + case 1: + val |= uint32(src[si+0]) << 24 + dst[di+1] = enc.encode[val>>22&0x1F] + dst[di+0] = enc.encode[val>>27&0x1F] + } + + // Pad the final quantum + if enc.padChar != NoPadding { + nPad := (remain * 8 / 5) + 1 + for i := nPad; i < 8; i++ { + dst[di+i] = byte(enc.padChar) + } + } +} + +// AppendEncode appends the base32 encoded src to dst +// and returns the extended buffer. +func (enc *Encoding) AppendEncode(dst, src []byte) []byte { + n := enc.EncodedLen(len(src)) + dst = slicesGrow(dst, n) + enc.Encode(dst[len(dst):][:n], src) + return dst[:len(dst)+n] +} + +// XXX: non-generic slices.Grow +func slicesGrow(s []byte, n int) []byte { + if n -= cap(s) - len(s); n > 0 { + s = append(s[:cap(s)], make([]byte, n)...)[:len(s)] + } + return s +} + +// EncodeToString returns the base32 encoding of src. +func (enc *Encoding) EncodeToString(src []byte) string { + buf := make([]byte, enc.EncodedLen(len(src))) + enc.Encode(buf, src) + return string(buf) +} + +type encoder struct { + err error + enc *Encoding + w io.Writer + buf [5]byte // buffered data waiting to be encoded + nbuf int // number of bytes in buf + out [1024]byte // output buffer +} + +func (e *encoder) Write(p []byte) (n int, err error) { + if e.err != nil { + return 0, e.err + } + + // Leading fringe. + if e.nbuf > 0 { + var i int + for i = 0; i < len(p) && e.nbuf < 5; i++ { + e.buf[e.nbuf] = p[i] + e.nbuf++ + } + n += i + p = p[i:] + if e.nbuf < 5 { + return + } + e.enc.Encode(e.out[0:], e.buf[0:]) + if _, e.err = e.w.Write(e.out[0:8]); e.err != nil { + return n, e.err + } + e.nbuf = 0 + } + + // Large interior chunks. + for len(p) >= 5 { + nn := len(e.out) / 8 * 5 + if nn > len(p) { + nn = len(p) + nn -= nn % 5 + } + e.enc.Encode(e.out[0:], p[0:nn]) + if _, e.err = e.w.Write(e.out[0 : nn/5*8]); e.err != nil { + return n, e.err + } + n += nn + p = p[nn:] + } + + // Trailing fringe. + copy(e.buf[:], p) + e.nbuf = len(p) + n += len(p) + return +} + +// Close flushes any pending output from the encoder. +// It is an error to call Write after calling Close. +func (e *encoder) Close() error { + // If there's anything left in the buffer, flush it out + if e.err == nil && e.nbuf > 0 { + e.enc.Encode(e.out[0:], e.buf[0:e.nbuf]) + encodedLen := e.enc.EncodedLen(e.nbuf) + e.nbuf = 0 + _, e.err = e.w.Write(e.out[0:encodedLen]) + } + return e.err +} + +// NewEncoder returns a new base32 stream encoder. Data written to +// the returned writer will be encoded using enc and then written to w. +// Base32 encodings operate in 5-byte blocks; when finished +// writing, the caller must Close the returned encoder to flush any +// partially written blocks. +func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser { + return &encoder{enc: enc, w: w} +} + +// EncodedLen returns the length in bytes of the base32 encoding +// of an input buffer of length n. +func (enc *Encoding) EncodedLen(n int) int { + if enc.padChar == NoPadding { + return n/5*8 + (n%5*8+4)/5 + } + return (n + 4) / 5 * 8 +} + +/* + * Decoder + */ + +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "illegal base32 data at input byte " + strconv.FormatInt(int64(e), 10) +} + +// decode is like Decode but returns an additional 'end' value, which +// indicates if end-of-message padding was encountered and thus any +// additional data is an error. This method assumes that src has been +// stripped of all supported whitespace ('\r' and '\n'). +func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { + // Lift the nil check outside of the loop. + _ = enc.decodeMap + + dsti := 0 + olen := len(src) + + for len(src) > 0 && !end { + // Decode quantum using the base32 alphabet + var dbuf [8]byte + dlen := 8 + + for j := 0; j < 8; { + + if len(src) == 0 { + if enc.padChar != NoPadding { + // We have reached the end and are missing padding + return n, false, CorruptInputError(olen - len(src) - j) + } + // We have reached the end and are not expecting any padding + dlen, end = j, true + break + } + in := src[0] + src = src[1:] + if in == byte(enc.padChar) && j >= 2 && len(src) < 8 { + // We've reached the end and there's padding + if len(src)+j < 8-1 { + // not enough padding + return n, false, CorruptInputError(olen) + } + for k := 0; k < 8-1-j; k++ { + if len(src) > k && src[k] != byte(enc.padChar) { + // incorrect padding + return n, false, CorruptInputError(olen - len(src) + k - 1) + } + } + dlen, end = j, true + // 7, 5 and 2 are not valid padding lengths, and so 1, 3 and 6 are not + // valid dlen values. See RFC 4648 Section 6 "Base 32 Encoding" listing + // the five valid padding lengths, and Section 9 "Illustrations and + // Examples" for an illustration for how the 1st, 3rd and 6th base32 + // src bytes do not yield enough information to decode a dst byte. + if dlen == 1 || dlen == 3 || dlen == 6 { + return n, false, CorruptInputError(olen - len(src) - 1) + } + break + } + dbuf[j] = enc.decodeMap[in] + if dbuf[j] == 0xFF { + return n, false, CorruptInputError(olen - len(src) - 1) + } + j++ + } + + // Pack 8x 5-bit source blocks into 5 byte destination + // quantum + switch dlen { + case 8: + dst[dsti+4] = dbuf[6]<<5 | dbuf[7] + n++ + fallthrough + case 7: + dst[dsti+3] = dbuf[4]<<7 | dbuf[5]<<2 | dbuf[6]>>3 + n++ + fallthrough + case 5: + dst[dsti+2] = dbuf[3]<<4 | dbuf[4]>>1 + n++ + fallthrough + case 4: + dst[dsti+1] = dbuf[1]<<6 | dbuf[2]<<1 | dbuf[3]>>4 + n++ + fallthrough + case 2: + dst[dsti+0] = dbuf[0]<<3 | dbuf[1]>>2 + n++ + } + dsti += 5 + } + return n, end, nil +} + +// Decode decodes src using the encoding enc. It writes at most +// [Encoding.DecodedLen](len(src)) bytes to dst and returns the number of bytes +// written. If src contains invalid base32 data, it will return the +// number of bytes successfully written and [CorruptInputError]. +// Newline characters (\r and \n) are ignored. +func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { + buf := make([]byte, len(src)) + l := stripNewlines(buf, src) + n, _, err = enc.decode(dst, buf[:l]) + return +} + +// AppendDecode appends the base32 decoded src to dst +// and returns the extended buffer. +// If the input is malformed, it returns the partially decoded src and an error. +func (enc *Encoding) AppendDecode(dst, src []byte) ([]byte, error) { + // Compute the output size without padding to avoid over allocating. + n := len(src) + for n > 0 && rune(src[n-1]) == enc.padChar { + n-- + } + n = decodedLen(n, NoPadding) + + dst = slicesGrow(dst, n) + n, err := enc.Decode(dst[len(dst):][:n], src) + return dst[:len(dst)+n], err +} + +// DecodeString returns the bytes represented by the base32 string s. +func (enc *Encoding) DecodeString(s string) ([]byte, error) { + buf := []byte(s) + l := stripNewlines(buf, buf) + n, _, err := enc.decode(buf, buf[:l]) + return buf[:n], err +} + +type decoder struct { + err error + enc *Encoding + r io.Reader + end bool // saw end of message + buf [1024]byte // leftover input + nbuf int + out []byte // leftover decoded output + outbuf [1024 / 8 * 5]byte +} + +func readEncodedData(r io.Reader, buf []byte, min int, expectsPadding bool) (n int, err error) { + for n < min && err == nil { + var nn int + nn, err = r.Read(buf[n:]) + n += nn + } + // data was read, less than min bytes could be read + if n < min && n > 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + // no data was read, the buffer already contains some data + // when padding is disabled this is not an error, as the message can be of + // any length + if expectsPadding && min < 8 && n == 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + return +} + +func (d *decoder) Read(p []byte) (n int, err error) { + // Use leftover decoded output from last read. + if len(d.out) > 0 { + n = copy(p, d.out) + d.out = d.out[n:] + if len(d.out) == 0 { + return n, d.err + } + return n, nil + } + + if d.err != nil { + return 0, d.err + } + + // Read a chunk. + nn := (len(p) + 4) / 5 * 8 + if nn < 8 { + nn = 8 + } + if nn > len(d.buf) { + nn = len(d.buf) + } + + // Minimum amount of bytes that needs to be read each cycle + var min int + var expectsPadding bool + if d.enc.padChar == NoPadding { + min = 1 + expectsPadding = false + } else { + min = 8 - d.nbuf + expectsPadding = true + } + + nn, d.err = readEncodedData(d.r, d.buf[d.nbuf:nn], min, expectsPadding) + d.nbuf += nn + if d.nbuf < min { + return 0, d.err + } + if nn > 0 && d.end { + return 0, CorruptInputError(0) + } + + // Decode chunk into p, or d.out and then p if p is too small. + var nr int + if d.enc.padChar == NoPadding { + nr = d.nbuf + } else { + nr = d.nbuf / 8 * 8 + } + nw := d.enc.DecodedLen(d.nbuf) + + if nw > len(p) { + nw, d.end, err = d.enc.decode(d.outbuf[0:], d.buf[0:nr]) + d.out = d.outbuf[0:nw] + n = copy(p, d.out) + d.out = d.out[n:] + } else { + n, d.end, err = d.enc.decode(p, d.buf[0:nr]) + } + d.nbuf -= nr + for i := 0; i < d.nbuf; i++ { + d.buf[i] = d.buf[i+nr] + } + + if err != nil && (d.err == nil || d.err == io.EOF) { + d.err = err + } + + if len(d.out) > 0 { + // We cannot return all the decoded bytes to the caller in this + // invocation of Read, so we return a nil error to ensure that Read + // will be called again. The error stored in d.err, if any, will be + // returned with the last set of decoded bytes. + return n, nil + } + + return n, d.err +} + +type newlineFilteringReader struct { + wrapped io.Reader +} + +// stripNewlines removes newline characters and returns the number +// of non-newline characters copied to dst. +func stripNewlines(dst, src []byte) int { + offset := 0 + for _, b := range src { + if b == '\r' || b == '\n' { + continue + } + dst[offset] = b + offset++ + } + return offset +} + +func (r *newlineFilteringReader) Read(p []byte) (int, error) { + n, err := r.wrapped.Read(p) + for n > 0 { + s := p[0:n] + offset := stripNewlines(s, s) + if err != nil || offset > 0 { + return offset, err + } + // Previous buffer entirely whitespace, read again + n, err = r.wrapped.Read(p) + } + return n, err +} + +// NewDecoder constructs a new base32 stream decoder. +func NewDecoder(enc *Encoding, r io.Reader) io.Reader { + return &decoder{enc: enc, r: &newlineFilteringReader{r}} +} + +// DecodedLen returns the maximum length in bytes of the decoded data +// corresponding to n bytes of base32-encoded data. +func (enc *Encoding) DecodedLen(n int) int { + return decodedLen(n, enc.padChar) +} + +func decodedLen(n int, padChar rune) int { + if padChar == NoPadding { + return n/8*5 + n%8*5/8 + } + return n / 8 * 5 +} diff --git a/gnovm/stdlibs/encoding/base32/base32_test.gno b/gnovm/stdlibs/encoding/base32/base32_test.gno new file mode 100644 index 00000000000..fa09c2cd2f3 --- /dev/null +++ b/gnovm/stdlibs/encoding/base32/base32_test.gno @@ -0,0 +1,913 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package base32 + +import ( + "bytes" + "errors" + "io" + "math" + "strconv" + "strings" + "testing" +) + +type testpair struct { + decoded, encoded string +} + +var pairs = []testpair{ + // RFC 4648 examples + {"", ""}, + {"f", "MY======"}, + {"fo", "MZXQ===="}, + {"foo", "MZXW6==="}, + {"foob", "MZXW6YQ="}, + {"fooba", "MZXW6YTB"}, + {"foobar", "MZXW6YTBOI======"}, + + // Wikipedia examples, converted to base32 + {"sure.", "ON2XEZJO"}, + {"sure", "ON2XEZI="}, + {"sur", "ON2XE==="}, + {"su", "ON2Q===="}, + {"leasure.", "NRSWC43VOJSS4==="}, + {"easure.", "MVQXG5LSMUXA===="}, + {"asure.", "MFZXK4TFFY======"}, + {"sure.", "ON2XEZJO"}, +} + +var bigtest = testpair{ + "Twas brillig, and the slithy toves", + "KR3WC4ZAMJZGS3DMNFTSYIDBNZSCA5DIMUQHG3DJORUHSIDUN53GK4Y=", +} + +func testEqual(t *testing.T, msg string, args ...interface{}) bool { + t.Helper() + if args[len(args)-2] != args[len(args)-1] { + t.Errorf(msg, args...) + return false + } + return true +} + +func TestEncode(t *testing.T) { + for _, p := range pairs { + got := StdEncoding.EncodeToString([]byte(p.decoded)) + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, got, p.encoded) + dst := StdEncoding.AppendEncode([]byte("lead"), []byte(p.decoded)) + testEqual(t, `AppendEncode("lead", %q) = %q, want %q`, p.decoded, string(dst), "lead"+p.encoded) + } +} + +func TestEncoder(t *testing.T) { + for _, p := range pairs { + bb := &strings.Builder{} + encoder := NewEncoder(StdEncoding, bb) + encoder.Write([]byte(p.decoded)) + encoder.Close() + testEqual(t, "Encode(%q) = %q, want %q", p.decoded, bb.String(), p.encoded) + } +} + +func TestEncoderBuffering(t *testing.T) { + input := []byte(bigtest.decoded) + for bs := 1; bs <= 12; bs++ { + bb := &strings.Builder{} + encoder := NewEncoder(StdEncoding, bb) + for pos := 0; pos < len(input); pos += bs { + end := pos + bs + if end > len(input) { + end = len(input) + } + n, err := encoder.Write(input[pos:end]) + testEqual(t, "Write(%q) gave error %v, want %v", input[pos:end], err, error(nil)) + testEqual(t, "Write(%q) gave length %v, want %v", input[pos:end], n, end-pos) + } + err := encoder.Close() + testEqual(t, "Close gave error %v, want %v", err, error(nil)) + testEqual(t, "Encoding/%d of %q = %q, want %q", bs, bigtest.decoded, bb.String(), bigtest.encoded) + } +} + +func TestDecoderBufferingWithPadding(t *testing.T) { + for bs := 0; bs <= 12; bs++ { + for _, s := range pairs { + decoder := NewDecoder(StdEncoding, strings.NewReader(s.encoded)) + buf := make([]byte, len(s.decoded)+bs) + + var n int + var err error + n, err = decoder.Read(buf) + + if err != nil && err != io.EOF { + t.Errorf("Read from %q at pos %d = %d, unexpected error %v", s.encoded, len(s.decoded), n, err) + } + testEqual(t, "Decoding/%d of %q = %q, want %q\n", bs, s.encoded, string(buf[:n]), s.decoded) + } + } +} + +func TestDecoderBufferingWithoutPadding(t *testing.T) { + for bs := 0; bs <= 12; bs++ { + for _, s := range pairs { + encoded := strings.TrimRight(s.encoded, "=") + decoder := NewDecoder(StdEncoding.WithPadding(NoPadding), strings.NewReader(encoded)) + buf := make([]byte, len(s.decoded)+bs) + + var n int + var err error + n, err = decoder.Read(buf) + + if err != nil && err != io.EOF { + t.Errorf("Read from %q at pos %d = %d, unexpected error %v", encoded, len(s.decoded), n, err) + } + testEqual(t, "Decoding/%d of %q = %q, want %q\n", bs, encoded, string(buf[:n]), s.decoded) + } + } +} + +func TestDecode(t *testing.T) { + for _, p := range pairs { + dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded))) + count, end, err := StdEncoding.decode(dbuf, []byte(p.encoded)) + testEqual(t, "Decode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "Decode(%q) = length %v, want %v", p.encoded, count, len(p.decoded)) + if len(p.encoded) > 0 { + testEqual(t, "Decode(%q) = end %v, want %v", p.encoded, end, (p.encoded[len(p.encoded)-1] == '=')) + } + testEqual(t, "Decode(%q) = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + + dbuf, err = StdEncoding.DecodeString(p.encoded) + testEqual(t, "DecodeString(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, "DecodeString(%q) = %q, want %q", p.encoded, string(dbuf), p.decoded) + + dst, err := StdEncoding.AppendDecode([]byte("lead"), []byte(p.encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("lead", %q) = %q, want %q`, p.encoded, string(dst), "lead"+p.decoded) + + dst2, err := StdEncoding.AppendDecode(dst[:0:len(p.decoded)], []byte(p.encoded)) + testEqual(t, "AppendDecode(%q) = error %v, want %v", p.encoded, err, error(nil)) + testEqual(t, `AppendDecode("", %q) = %q, want %q`, p.encoded, string(dst2), p.decoded) + if len(dst) > 0 && len(dst2) > 0 && &dst[0] != &dst2[0] { + t.Errorf("unexpected capacity growth: got %d, want %d", cap(dst2), cap(dst)) + } + } +} + +func TestDecoder(t *testing.T) { + for _, p := range pairs { + decoder := NewDecoder(StdEncoding, strings.NewReader(p.encoded)) + dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded))) + count, err := decoder.Read(dbuf) + if err != nil && err != io.EOF { + t.Fatal("Read failed", err) + } + testEqual(t, "Read from %q = length %v, want %v", p.encoded, count, len(p.decoded)) + testEqual(t, "Decoding of %q = %q, want %q", p.encoded, string(dbuf[0:count]), p.decoded) + if err != io.EOF { + _, err = decoder.Read(dbuf) + } + testEqual(t, "Read from %q = %v, want %v", p.encoded, err, io.EOF) + } +} + +type badReader struct { + data []byte + errs []error + called int + limit int +} + +// Populates p with data, returns a count of the bytes written and an +// error. The error returned is taken from badReader.errs, with each +// invocation of Read returning the next error in this slice, or io.EOF, +// if all errors from the slice have already been returned. The +// number of bytes returned is determined by the size of the input buffer +// the test passes to decoder.Read and will be a multiple of 8, unless +// badReader.limit is non zero. +func (b *badReader) Read(p []byte) (int, error) { + lim := len(p) + if b.limit != 0 && b.limit < lim { + lim = b.limit + } + if len(b.data) < lim { + lim = len(b.data) + } + for i := range p[:lim] { + p[i] = b.data[i] + } + b.data = b.data[lim:] + err := io.EOF + if b.called < len(b.errs) { + err = b.errs[b.called] + } + b.called++ + return lim, err +} + +// TestIssue20044 tests that decoder.Read behaves correctly when the caller +// supplied reader returns an error. +func TestIssue20044(t *testing.T) { + badErr := errors.New("bad reader error") + testCases := []struct { + r badReader + res string + err error + dbuflen int + }{ + // Check valid input data accompanied by an error is processed and the error is propagated. + {r: badReader{data: []byte("MY======"), errs: []error{badErr}}, + res: "f", err: badErr}, + // Check a read error accompanied by input data consisting of newlines only is propagated. + {r: badReader{data: []byte("\n\n\n\n\n\n\n\n"), errs: []error{badErr, nil}}, + res: "", err: badErr}, + // Reader will be called twice. The first time it will return 8 newline characters. The + // second time valid base32 encoded data and an error. The data should be decoded + // correctly and the error should be propagated. + {r: badReader{data: []byte("\n\n\n\n\n\n\n\nMY======"), errs: []error{nil, badErr}}, + res: "f", err: badErr, dbuflen: 8}, + // Reader returns invalid input data (too short) and an error. Verify the reader + // error is returned. + {r: badReader{data: []byte("MY====="), errs: []error{badErr}}, + res: "", err: badErr}, + // Reader returns invalid input data (too short) but no error. Verify io.ErrUnexpectedEOF + // is returned. + {r: badReader{data: []byte("MY====="), errs: []error{nil}}, + res: "", err: io.ErrUnexpectedEOF}, + // Reader returns invalid input data and an error. Verify the reader and not the + // decoder error is returned. + {r: badReader{data: []byte("Ma======"), errs: []error{badErr}}, + res: "", err: badErr}, + // Reader returns valid data and io.EOF. Check data is decoded and io.EOF is propagated. + {r: badReader{data: []byte("MZXW6YTB"), errs: []error{io.EOF}}, + res: "fooba", err: io.EOF}, + // Check errors are properly reported when decoder.Read is called multiple times. + // decoder.Read will be called 8 times, badReader.Read will be called twice, returning + // valid data both times but an error on the second call. + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{nil, badErr}}, + res: "leasure.", err: badErr, dbuflen: 1}, + // Check io.EOF is properly reported when decoder.Read is called multiple times. + // decoder.Read will be called 8 times, badReader.Read will be called twice, returning + // valid data both times but io.EOF on the second call. + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{nil, io.EOF}}, + res: "leasure.", err: io.EOF, dbuflen: 1}, + // The following two test cases check that errors are propagated correctly when more than + // 8 bytes are read at a time. + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{io.EOF}}, + res: "leasure.", err: io.EOF, dbuflen: 11}, + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{badErr}}, + res: "leasure.", err: badErr, dbuflen: 11}, + // Check that errors are correctly propagated when the reader returns valid bytes in + // groups that are not divisible by 8. The first read will return 11 bytes and no + // error. The second will return 7 and an error. The data should be decoded correctly + // and the error should be propagated. + {r: badReader{data: []byte("NRSWC43VOJSS4==="), errs: []error{nil, badErr}, limit: 11}, + res: "leasure.", err: badErr}, + } + + for _, tc := range testCases { + input := tc.r.data + decoder := NewDecoder(StdEncoding, &tc.r) + var dbuflen int + if tc.dbuflen > 0 { + dbuflen = tc.dbuflen + } else { + dbuflen = StdEncoding.DecodedLen(len(input)) + } + dbuf := make([]byte, dbuflen) + var err error + var res []byte + for err == nil { + var n int + n, err = decoder.Read(dbuf) + if n > 0 { + res = append(res, dbuf[:n]...) + } + } + + testEqual(t, "Decoding of %q = %q, want %q", string(input), string(res), tc.res) + testEqual(t, "Decoding of %q err = %v, expected %v", string(input), err, tc.err) + } +} + +// TestDecoderError verifies decode errors are propagated when there are no read +// errors. +func TestDecoderError(t *testing.T) { + for _, readErr := range []error{io.EOF, nil} { + input := "MZXW6YTb" + dbuf := make([]byte, StdEncoding.DecodedLen(len(input))) + br := badReader{data: []byte(input), errs: []error{readErr}} + decoder := NewDecoder(StdEncoding, &br) + n, err := decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + if _, ok := err.(CorruptInputError); !ok { + t.Errorf("Corrupt input error expected. Found %T", err) + } + } +} + +// TestReaderEOF ensures decoder.Read behaves correctly when input data is +// exhausted. +func TestReaderEOF(t *testing.T) { + for _, readErr := range []error{io.EOF, nil} { + input := "MZXW6YTB" + br := badReader{data: []byte(input), errs: []error{nil, readErr}} + decoder := NewDecoder(StdEncoding, &br) + dbuf := make([]byte, StdEncoding.DecodedLen(len(input))) + n, err := decoder.Read(dbuf) + testEqual(t, "Decoding of %q err = %v, expected %v", input, err, error(nil)) + n, err = decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + testEqual(t, "Read after EOF, err = %v, expected %v", err, io.EOF) + n, err = decoder.Read(dbuf) + testEqual(t, "Read after EOF, n = %d, expected %d", n, 0) + testEqual(t, "Read after EOF, err = %v, expected %v", err, io.EOF) + } +} + +func TestDecoderBuffering(t *testing.T) { + for bs := 1; bs <= 12; bs++ { + decoder := NewDecoder(StdEncoding, strings.NewReader(bigtest.encoded)) + buf := make([]byte, len(bigtest.decoded)+12) + var total int + var n int + var err error + for total = 0; total < len(bigtest.decoded) && err == nil; { + n, err = decoder.Read(buf[total : total+bs]) + total += n + } + if err != nil && err != io.EOF { + t.Errorf("Read from %q at pos %d = %d, unexpected error %v", bigtest.encoded, total, n, err) + } + testEqual(t, "Decoding/%d of %q = %q, want %q", bs, bigtest.encoded, string(buf[0:total]), bigtest.decoded) + } +} + +func TestDecodeCorrupt(t *testing.T) { + testCases := []struct { + input string + offset int // -1 means no corruption. + }{ + {"", -1}, + {"!!!!", 0}, + {"x===", 0}, + {"AA=A====", 2}, + {"AAA=AAAA", 3}, + {"MMMMMMMMM", 8}, + {"MMMMMM", 0}, + {"A=", 1}, + {"AA=", 3}, + {"AA==", 4}, + {"AA===", 5}, + {"AAAA=", 5}, + {"AAAA==", 6}, + {"AAAAA=", 6}, + {"AAAAA==", 7}, + {"A=======", 1}, + {"AA======", -1}, + {"AAA=====", 3}, + {"AAAA====", -1}, + {"AAAAA===", -1}, + {"AAAAAA==", 6}, + {"AAAAAAA=", -1}, + {"AAAAAAAA", -1}, + } + for _, tc := range testCases { + dbuf := make([]byte, StdEncoding.DecodedLen(len(tc.input))) + _, err := StdEncoding.Decode(dbuf, []byte(tc.input)) + if tc.offset == -1 { + if err != nil { + t.Error("Decoder wrongly detected corruption in", tc.input) + } + continue + } + switch err := err.(type) { + case CorruptInputError: + testEqual(t, "Corruption in %q at offset %v, want %v", tc.input, int(err), tc.offset) + default: + t.Error("Decoder failed to detect corruption in", tc) + } + } +} + +func TestBig(t *testing.T) { + n := 3*1000 + 1 + raw := make([]byte, n) + const alpha = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + for i := 0; i < n; i++ { + raw[i] = alpha[i%len(alpha)] + } + encoded := new(bytes.Buffer) + w := NewEncoder(StdEncoding, encoded) + nn, err := w.Write(raw) + if nn != n || err != nil { + t.Fatalf("Encoder.Write(raw) = %d, %v want %d, nil", nn, err, n) + } + err = w.Close() + if err != nil { + t.Fatalf("Encoder.Close() = %v want nil", err) + } + decoded, err := io.ReadAll(NewDecoder(StdEncoding, encoded)) + if err != nil { + t.Fatalf("io.ReadAll(NewDecoder(...)): %v", err) + } + + if !bytes.Equal(raw, decoded) { + var i int + for i = 0; i < len(decoded) && i < len(raw); i++ { + if decoded[i] != raw[i] { + break + } + } + t.Errorf("Decode(Encode(%d-byte string)) failed at offset %d", n, i) + } +} + +func testStringEncoding(t *testing.T, expected string, examples []string) { + for _, e := range examples { + buf, err := StdEncoding.DecodeString(e) + if err != nil { + t.Errorf("Decode(%q) failed: %v", e, err) + continue + } + if s := string(buf); s != expected { + t.Errorf("Decode(%q) = %q, want %q", e, s, expected) + } + } +} + +func TestNewLineCharacters(t *testing.T) { + // Each of these should decode to the string "sure", without errors. + examples := []string{ + "ON2XEZI=", + "ON2XEZI=\r", + "ON2XEZI=\n", + "ON2XEZI=\r\n", + "ON2XEZ\r\nI=", + "ON2X\rEZ\nI=", + "ON2X\nEZ\rI=", + "ON2XEZ\nI=", + "ON2XEZI\n=", + } + testStringEncoding(t, "sure", examples) + + // Each of these should decode to the string "foobar", without errors. + examples = []string{ + "MZXW6YTBOI======", + "MZXW6YTBOI=\r\n=====", + } + testStringEncoding(t, "foobar", examples) +} + +func TestDecoderIssue4779(t *testing.T) { + encoded := `JRXXEZLNEBUXA43VNUQGI33MN5ZCA43JOQQGC3LFOQWCAY3PNZZWKY3UMV2HK4 +RAMFSGS4DJONUWG2LOM4QGK3DJOQWCA43FMQQGI3YKMVUXK43NN5SCA5DFNVYG64RANFXGG2LENFSH +K3TUEB2XIIDMMFRG64TFEBSXIIDEN5WG64TFEBWWCZ3OMEQGC3DJOF2WCLRAKV2CAZLONFWQUYLEEB +WWS3TJNUQHMZLONFQW2LBAOF2WS4ZANZXXG5DSOVSCAZLYMVZGG2LUMF2GS33OEB2WY3DBNVRW6IDM +MFRG64TJOMQG42LTNEQHK5AKMFWGS4LVNFYCAZLYEBSWCIDDN5WW233EN4QGG33OONSXC5LBOQXCAR +DVNFZSAYLVORSSA2LSOVZGKIDEN5WG64RANFXAU4TFOBZGK2DFNZSGK4TJOQQGS3RAOZXWY5LQORQX +IZJAOZSWY2LUEBSXG43FEBRWS3DMOVWSAZDPNRXXEZJAMV2SAZTVM5UWC5BANZ2WY3DBBJYGC4TJMF +2HK4ROEBCXQY3FOB2GK5LSEBZWS3TUEBXWGY3BMVRWC5BAMN2XA2LEMF2GC5BANZXW4IDQOJXWSZDF +NZ2CYIDTOVXHIIDJNYFGG5LMOBQSA4LVNEQG6ZTGNFRWSYJAMRSXGZLSOVXHIIDNN5WGY2LUEBQW42 +LNEBUWIIDFON2CA3DBMJXXE5LNFY== +====` + encodedShort := strings.ReplaceAll(encoded, "\n", "") + + dec := NewDecoder(StdEncoding, strings.NewReader(encoded)) + res1, err := io.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + dec = NewDecoder(StdEncoding, strings.NewReader(encodedShort)) + var res2 []byte + res2, err = io.ReadAll(dec) + if err != nil { + t.Errorf("ReadAll failed: %v", err) + } + + if !bytes.Equal(res1, res2) { + t.Error("Decoded results not equal") + } +} + +func BenchmarkEncode(b *testing.B) { + data := make([]byte, 8192) + buf := make([]byte, StdEncoding.EncodedLen(len(data))) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.Encode(buf, data) + } +} + +func BenchmarkEncodeToString(b *testing.B) { + data := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.EncodeToString(data) + } +} + +func BenchmarkDecode(b *testing.B) { + data := make([]byte, StdEncoding.EncodedLen(8192)) + StdEncoding.Encode(data, make([]byte, 8192)) + buf := make([]byte, 8192) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.Decode(buf, data) + } +} + +func BenchmarkDecodeString(b *testing.B) { + data := StdEncoding.EncodeToString(make([]byte, 8192)) + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + StdEncoding.DecodeString(data) + } +} + +func TestWithCustomPadding(t *testing.T) { + for _, testcase := range pairs { + defaultPadding := StdEncoding.EncodeToString([]byte(testcase.decoded)) + customPadding := StdEncoding.WithPadding('@').EncodeToString([]byte(testcase.decoded)) + expected := strings.ReplaceAll(defaultPadding, "=", "@") + + if expected != customPadding { + t.Errorf("Expected custom %s, got %s", expected, customPadding) + } + if testcase.encoded != defaultPadding { + t.Errorf("Expected %s, got %s", testcase.encoded, defaultPadding) + } + } +} + +func TestWithoutPadding(t *testing.T) { + for _, testcase := range pairs { + defaultPadding := StdEncoding.EncodeToString([]byte(testcase.decoded)) + customPadding := StdEncoding.WithPadding(NoPadding).EncodeToString([]byte(testcase.decoded)) + expected := strings.TrimRight(defaultPadding, "=") + + if expected != customPadding { + t.Errorf("Expected custom %s, got %s", expected, customPadding) + } + if testcase.encoded != defaultPadding { + t.Errorf("Expected %s, got %s", testcase.encoded, defaultPadding) + } + } +} + +func TestDecodeWithPadding(t *testing.T) { + encodings := []*Encoding{ + StdEncoding, + StdEncoding.WithPadding('-'), + StdEncoding.WithPadding(NoPadding), + } + + for i, enc := range encodings { + for _, pair := range pairs { + + input := pair.decoded + encoded := enc.EncodeToString([]byte(input)) + + decoded, err := enc.DecodeString(encoded) + if err != nil { + t.Errorf("DecodeString Error for encoding %d (%q): %v", i, input, err) + } + + if input != string(decoded) { + t.Errorf("Unexpected result for encoding %d: got %q; want %q", i, decoded, input) + } + } + } +} + +func TestDecodeWithWrongPadding(t *testing.T) { + encoded := StdEncoding.EncodeToString([]byte("foobar")) + + _, err := StdEncoding.WithPadding('-').DecodeString(encoded) + if err == nil { + t.Error("expected error") + } + + _, err = StdEncoding.WithPadding(NoPadding).DecodeString(encoded) + if err == nil { + t.Error("expected error") + } +} + +func TestBufferedDecodingSameError(t *testing.T) { + testcases := []struct { + prefix string + chunkCombinations [][]string + expected error + }{ + // NBSWY3DPO5XXE3DE == helloworld + // Test with "ZZ" as extra input + {"helloworld", [][]string{ + {"NBSW", "Y3DP", "O5XX", "E3DE", "ZZ"}, + {"NBSWY3DPO5XXE3DE", "ZZ"}, + {"NBSWY3DPO5XXE3DEZZ"}, + {"NBS", "WY3", "DPO", "5XX", "E3D", "EZZ"}, + {"NBSWY3DPO5XXE3", "DEZZ"}, + }, io.ErrUnexpectedEOF}, + + // Test with "ZZY" as extra input + {"helloworld", [][]string{ + {"NBSW", "Y3DP", "O5XX", "E3DE", "ZZY"}, + {"NBSWY3DPO5XXE3DE", "ZZY"}, + {"NBSWY3DPO5XXE3DEZZY"}, + {"NBS", "WY3", "DPO", "5XX", "E3D", "EZZY"}, + {"NBSWY3DPO5XXE3", "DEZZY"}, + }, io.ErrUnexpectedEOF}, + + // Normal case, this is valid input + {"helloworld", [][]string{ + {"NBSW", "Y3DP", "O5XX", "E3DE"}, + {"NBSWY3DPO5XXE3DE"}, + {"NBS", "WY3", "DPO", "5XX", "E3D", "E"}, + {"NBSWY3DPO5XXE3", "DE"}, + }, nil}, + + // MZXW6YTB = fooba + {"fooba", [][]string{ + {"MZXW6YTBZZ"}, + {"MZXW6YTBZ", "Z"}, + {"MZXW6YTB", "ZZ"}, + {"MZXW6YT", "BZZ"}, + {"MZXW6Y", "TBZZ"}, + {"MZXW6Y", "TB", "ZZ"}, + {"MZXW6", "YTBZZ"}, + {"MZXW6", "YTB", "ZZ"}, + {"MZXW6", "YT", "BZZ"}, + }, io.ErrUnexpectedEOF}, + + // Normal case, this is valid input + {"fooba", [][]string{ + {"MZXW6YTB"}, + {"MZXW6YT", "B"}, + {"MZXW6Y", "TB"}, + {"MZXW6", "YTB"}, + {"MZXW6", "YT", "B"}, + {"MZXW", "6YTB"}, + {"MZXW", "6Y", "TB"}, + }, nil}, + } + + for _, testcase := range testcases { + for _, chunks := range testcase.chunkCombinations { + r := &chunkReader{chunks: chunks} + + decoder := NewDecoder(StdEncoding, r) + _, err := io.ReadAll(decoder) + + if err != testcase.expected { + t.Errorf("Expected %v, got %v; case %s %+v", testcase.expected, err, testcase.prefix, chunks) + } + } + } +} + +// XXX: +// chunkReader exists as an alternative to the original behaviour of these +// tests in go, which used io.Pipe and a separate goroutine. +type chunkReader struct { + chunks []string + nChunk int + read int +} + +func (c *chunkReader) Read(b []byte) (int, error) { + if c.nChunk >= len(c.chunks) { + return 0, io.EOF + } + chunk := c.chunks[c.nChunk] + read := copy(b, chunk[c.read:]) + c.read += read + if c.read == len(chunk) { + c.nChunk++ + c.read = 0 + } + return read, nil +} + +func TestBufferedDecodingPadding(t *testing.T) { + testcases := []struct { + chunks []string + expectedError string + }{ + {[]string{ + "I4======", + "==", + }, "unexpected EOF"}, + + {[]string{ + "I4======N4======", + }, "illegal base32 data at input byte 2"}, + + {[]string{ + "I4======", + "N4======", + }, "illegal base32 data at input byte 0"}, + + {[]string{ + "I4======", + "========", + }, "illegal base32 data at input byte 0"}, + + {[]string{ + "I4I4I4I4", + "I4======", + "I4======", + }, "illegal base32 data at input byte 0"}, + } + + for _, testcase := range testcases { + r := &chunkReader{chunks: testcase.chunks} + + decoder := NewDecoder(StdEncoding, r) + _, err := io.ReadAll(decoder) + + if err == nil && len(testcase.expectedError) != 0 { + t.Errorf("case %q: got nil error, want %v", testcase.chunks, testcase.expectedError) + } else if err.Error() != testcase.expectedError { + t.Errorf("case %q: got %v, want %v", testcase.chunks, err, testcase.expectedError) + } + } +} + +func TestEncodedLen(t *testing.T) { + var rawStdEncoding = StdEncoding.WithPadding(NoPadding) + type test struct { + enc *Encoding + n int + want int64 + } + tests := []test{ + {StdEncoding, 0, 0}, + {StdEncoding, 1, 8}, + {StdEncoding, 2, 8}, + {StdEncoding, 3, 8}, + {StdEncoding, 4, 8}, + {StdEncoding, 5, 8}, + {StdEncoding, 6, 16}, + {StdEncoding, 10, 16}, + {StdEncoding, 11, 24}, + {rawStdEncoding, 0, 0}, + {rawStdEncoding, 1, 2}, + {rawStdEncoding, 2, 4}, + {rawStdEncoding, 3, 5}, + {rawStdEncoding, 4, 7}, + {rawStdEncoding, 5, 8}, + {rawStdEncoding, 6, 10}, + {rawStdEncoding, 7, 12}, + {rawStdEncoding, 10, 16}, + {rawStdEncoding, 11, 18}, + } + // check overflow + switch strconv.IntSize { + case 32: + tests = append(tests, test{rawStdEncoding, (math.MaxInt-4)/8 + 1, 429496730}) + tests = append(tests, test{rawStdEncoding, math.MaxInt/8*5 + 4, math.MaxInt}) + case 64: + tests = append(tests, test{rawStdEncoding, (math.MaxInt-4)/8 + 1, 1844674407370955162}) + tests = append(tests, test{rawStdEncoding, math.MaxInt/8*5 + 4, math.MaxInt}) + } + for _, tt := range tests { + if got := tt.enc.EncodedLen(tt.n); int64(got) != tt.want { + t.Errorf("EncodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} + +func TestDecodedLen(t *testing.T) { + var rawStdEncoding = StdEncoding.WithPadding(NoPadding) + type test struct { + enc *Encoding + n int + want int64 + } + tests := []test{ + {StdEncoding, 0, 0}, + {StdEncoding, 8, 5}, + {StdEncoding, 16, 10}, + {StdEncoding, 24, 15}, + {rawStdEncoding, 0, 0}, + {rawStdEncoding, 2, 1}, + {rawStdEncoding, 4, 2}, + {rawStdEncoding, 5, 3}, + {rawStdEncoding, 7, 4}, + {rawStdEncoding, 8, 5}, + {rawStdEncoding, 10, 6}, + {rawStdEncoding, 12, 7}, + {rawStdEncoding, 16, 10}, + {rawStdEncoding, 18, 11}, + } + // check overflow + switch strconv.IntSize { + case 32: + tests = append(tests, test{rawStdEncoding, math.MaxInt/5 + 1, 268435456}) + tests = append(tests, test{rawStdEncoding, math.MaxInt, 1342177279}) + case 64: + tests = append(tests, test{rawStdEncoding, math.MaxInt/5 + 1, 1152921504606846976}) + tests = append(tests, test{rawStdEncoding, math.MaxInt, 5764607523034234879}) + } + for _, tt := range tests { + if got := tt.enc.DecodedLen(tt.n); int64(got) != tt.want { + t.Errorf("DecodedLen(%d): got %d, want %d", tt.n, got, tt.want) + } + } +} + +func TestWithoutPaddingClose(t *testing.T) { + encodings := []*Encoding{ + StdEncoding, + StdEncoding.WithPadding(NoPadding), + } + + for _, encoding := range encodings { + for _, testpair := range pairs { + + var buf strings.Builder + encoder := NewEncoder(encoding, &buf) + encoder.Write([]byte(testpair.decoded)) + encoder.Close() + + expected := testpair.encoded + if encoding.padChar == NoPadding { + expected = strings.ReplaceAll(expected, "=", "") + } + + res := buf.String() + + if res != expected { + t.Errorf("Expected %s got %s; padChar=%d", expected, res, encoding.padChar) + } + } + } +} + +func TestDecodeReadAll(t *testing.T) { + encodings := []*Encoding{ + StdEncoding, + StdEncoding.WithPadding(NoPadding), + } + + for _, pair := range pairs { + for encIndex, encoding := range encodings { + encoded := pair.encoded + if encoding.padChar == NoPadding { + encoded = strings.ReplaceAll(encoded, "=", "") + } + + decReader, err := io.ReadAll(NewDecoder(encoding, strings.NewReader(encoded))) + if err != nil { + t.Errorf("NewDecoder error: %v", err) + } + + if pair.decoded != string(decReader) { + t.Errorf("Expected %s got %s; Encoding %d", pair.decoded, decReader, encIndex) + } + } + } +} + +func TestDecodeSmallBuffer(t *testing.T) { + encodings := []*Encoding{ + StdEncoding, + StdEncoding.WithPadding(NoPadding), + } + + for bufferSize := 1; bufferSize < 200; bufferSize++ { + for _, pair := range pairs { + for encIndex, encoding := range encodings { + encoded := pair.encoded + if encoding.padChar == NoPadding { + encoded = strings.ReplaceAll(encoded, "=", "") + } + + decoder := NewDecoder(encoding, strings.NewReader(encoded)) + + var allRead []byte + + for { + buf := make([]byte, bufferSize) + n, err := decoder.Read(buf) + allRead = append(allRead, buf[0:n]...) + if err == io.EOF { + break + } + if err != nil { + t.Error(err) + } + } + + if pair.decoded != string(allRead) { + t.Errorf("Expected %s got %s; Encoding %d; bufferSize %d", pair.decoded, allRead, encIndex, bufferSize) + } + } + } + } +} diff --git a/gnovm/stdlibs/encoding/base32/example_test.gno b/gnovm/stdlibs/encoding/base32/example_test.gno new file mode 100644 index 00000000000..251624f0bd8 --- /dev/null +++ b/gnovm/stdlibs/encoding/base32/example_test.gno @@ -0,0 +1,68 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Keep in sync with ../base64/example_test.go. + +package base32_test + +import ( + "encoding/base32" + "fmt" + "os" +) + +func ExampleEncoding_EncodeToString() { + data := []byte("any + old & data") + str := base32.StdEncoding.EncodeToString(data) + fmt.Println(str) + // Output: + // MFXHSIBLEBXWYZBAEYQGIYLUME====== +} + +func ExampleEncoding_Encode() { + data := []byte("Hello, world!") + dst := make([]byte, base32.StdEncoding.EncodedLen(len(data))) + base32.StdEncoding.Encode(dst, data) + fmt.Println(string(dst)) + // Output: + // JBSWY3DPFQQHO33SNRSCC=== +} + +func ExampleEncoding_DecodeString() { + str := "ONXW2ZJAMRQXIYJAO5UXI2BAAAQGC3TEEDX3XPY=" + data, err := base32.StdEncoding.DecodeString(str) + if err != nil { + fmt.Println("error:", err) + return + } + fmt.Printf("%q\n", data) + // Output: + // "some data with \x00 and \ufeff" +} + +func ExampleEncoding_Decode() { + str := "JBSWY3DPFQQHO33SNRSCC===" + dst := make([]byte, base32.StdEncoding.DecodedLen(len(str))) + n, err := base32.StdEncoding.Decode(dst, []byte(str)) + if err != nil { + fmt.Println("decode error:", err) + return + } + dst = dst[:n] + fmt.Printf("%q\n", dst) + // Output: + // "Hello, world!" +} + +func ExampleNewEncoder() { + input := []byte("foo\x00bar") + encoder := base32.NewEncoder(base32.StdEncoding, os.Stdout) + encoder.Write(input) + // Must close the encoder when finished to flush any partial blocks. + // If you comment out the following line, the last partial block "r" + // won't be encoded. + encoder.Close() + // Output: + // MZXW6ADCMFZA==== +} diff --git a/gnovm/stdlibs/encoding/binary/varint.gno b/gnovm/stdlibs/encoding/binary/varint.gno new file mode 100644 index 00000000000..7b14fb2b631 --- /dev/null +++ b/gnovm/stdlibs/encoding/binary/varint.gno @@ -0,0 +1,166 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +// This file implements "varint" encoding of 64-bit integers. +// The encoding is: +// - unsigned integers are serialized 7 bits at a time, starting with the +// least significant bits +// - the most significant bit (msb) in each output byte indicates if there +// is a continuation byte (msb = 1) +// - signed integers are mapped to unsigned integers using "zig-zag" +// encoding: Positive values x are written as 2*x + 0, negative values +// are written as 2*(^x) + 1; that is, negative numbers are complemented +// and whether to complement is encoded in bit 0. +// +// Design note: +// At most 10 bytes are needed for 64-bit values. The encoding could +// be more dense: a full 64-bit value needs an extra byte just to hold bit 63. +// Instead, the msb of the previous byte could be used to hold bit 63 since we +// know there can't be more than 64 bits. This is a trivial improvement and +// would reduce the maximum encoding length to 9 bytes. However, it breaks the +// invariant that the msb is always the "continuation bit" and thus makes the +// format incompatible with a varint encoding for larger numbers (say 128-bit). + +import ( + "errors" + "io" +) + +// MaxVarintLenN is the maximum length of a varint-encoded N-bit integer. +const ( + MaxVarintLen16 = 3 + MaxVarintLen32 = 5 + MaxVarintLen64 = 10 +) + +// AppendUvarint appends the varint-encoded form of x, +// as generated by PutUvarint, to buf and returns the extended buffer. +func AppendUvarint(buf []byte, x uint64) []byte { + for x >= 0x80 { + buf = append(buf, byte(x)|0x80) + x >>= 7 + } + return append(buf, byte(x)) +} + +// PutUvarint encodes a uint64 into buf and returns the number of bytes written. +// If the buffer is too small, PutUvarint will panic. +func PutUvarint(buf []byte, x uint64) int { + i := 0 + for x >= 0x80 { + buf[i] = byte(x) | 0x80 + x >>= 7 + i++ + } + buf[i] = byte(x) + return i + 1 +} + +// Uvarint decodes a uint64 from buf and returns that value and the +// number of bytes read (> 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +func Uvarint(buf []byte) (uint64, int) { + var x uint64 + var s uint + for i, b := range buf { + if i == MaxVarintLen64 { + // Catch byte reads past MaxVarintLen64. + // See issue https://golang.org/issues/41185 + return 0, -(i + 1) // overflow + } + if b < 0x80 { + if i == MaxVarintLen64-1 && b > 1 { + return 0, -(i + 1) // overflow + } + return x | uint64(b)< 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 with the following meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +func Varint(buf []byte) (int64, int) { + ux, n := Uvarint(buf) // ok to continue in presence of error + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x, n +} + +var errOverflow = errors.New("binary: varint overflows a 64-bit integer") + +// ReadUvarint reads an encoded unsigned integer from r and returns it as a uint64. +// The error is EOF only if no bytes were read. +// If an EOF happens after reading some but not all the bytes, +// ReadUvarint returns io.ErrUnexpectedEOF. +func ReadUvarint(r io.ByteReader) (uint64, error) { + var x uint64 + var s uint + for i := 0; i < MaxVarintLen64; i++ { + b, err := r.ReadByte() + if err != nil { + if i > 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + return x, err + } + if b < 0x80 { + if i == MaxVarintLen64-1 && b > 1 { + return x, errOverflow + } + return x | uint64(b)<> 1) + if ux&1 != 0 { + x = ^x + } + return x, err +} diff --git a/gnovm/stdlibs/encoding/binary/varint_test.gno b/gnovm/stdlibs/encoding/binary/varint_test.gno new file mode 100644 index 00000000000..274ac74d0dc --- /dev/null +++ b/gnovm/stdlibs/encoding/binary/varint_test.gno @@ -0,0 +1,245 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +import ( + "bytes" + "io" + "math" + "testing" +) + +func testConstant(t *testing.T, w uint, max int) { + buf := make([]byte, MaxVarintLen64) + n := PutUvarint(buf, 1< 0 { + wantErr = io.ErrUnexpectedEOF + } + if x != 0 || err != wantErr { + t.Errorf("ReadUvarint(%v): got x = %d, err = %s", buf, x, err) + } + } +} + +// Ensure that we catch overflows of bytes going past MaxVarintLen64. +// See issue https://golang.org/issues/41185 +func TestBufferTooBigWithOverflow(t *testing.T) { + tests := []struct { + in []byte + name string + wantN int + wantValue uint64 + }{ + { + name: "invalid: 1000 bytes", + in: func() []byte { + b := make([]byte, 1000) + for i := range b { + b[i] = 0xff + } + b[999] = 0 + return b + }(), + wantN: -11, + wantValue: 0, + }, + { + name: "valid: math.MaxUint64-40", + in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, + wantValue: math.MaxUint64 - 40, + wantN: 10, + }, + { + name: "invalid: with more than MaxVarintLen64 bytes", + in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, + wantN: -11, + wantValue: 0, + }, + { + name: "invalid: 10th byte", + in: []byte{0xd7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f}, + wantN: -10, + wantValue: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + value, n := Uvarint(tt.in) + if g, w := n, tt.wantN; g != w { + t.Errorf("bytes returned=%d, want=%d", g, w) + } + if g, w := value, tt.wantValue; g != w { + t.Errorf("value=%d, want=%d", g, w) + } + }) + } +} + +func testOverflow(t *testing.T, buf []byte, x0 uint64, n0 int, err0 error) { + x, n := Uvarint(buf) + if x != 0 || n != n0 { + t.Errorf("Uvarint(% X): got x = %d, n = %d; want 0, %d", buf, x, n, n0) + } + + r := bytes.NewReader(buf) + ln := r.Len() + x, err := ReadUvarint(r) + if x != x0 || err != err0 { + t.Errorf("ReadUvarint(%v): got x = %d, err = %s; want %d, %s", buf, x, err, x0, err0) + } + if read := ln - r.Len(); read > MaxVarintLen64 { + t.Errorf("ReadUvarint(%v): read more than MaxVarintLen64 bytes, got %d", buf, read) + } +} + +func TestOverflow(t *testing.T) { + testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2}, 0, -10, errOverflow) + testOverflow(t, []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0}, 0, -11, errOverflow) + testOverflow(t, []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 1<<64-1, -11, errOverflow) // 11 bytes, should overflow +} + +func TestNonCanonicalZero(t *testing.T) { + buf := []byte{0x80, 0x80, 0x80, 0} + x, n := Uvarint(buf) + if x != 0 || n != 4 { + t.Errorf("Uvarint(%v): got x = %d, n = %d; want 0, 4", buf, x, n) + } +} + +func BenchmarkPutUvarint32(b *testing.B) { + buf := make([]byte, MaxVarintLen32) + b.SetBytes(4) + for i := 0; i < b.N; i++ { + for j := uint(0); j < MaxVarintLen32; j++ { + PutUvarint(buf, 1<<(j*7)) + } + } +} + +func BenchmarkPutUvarint64(b *testing.B) { + buf := make([]byte, MaxVarintLen64) + b.SetBytes(8) + for i := 0; i < b.N; i++ { + for j := uint(0); j < MaxVarintLen64; j++ { + PutUvarint(buf, 1<<(j*7)) + } + } +} diff --git a/gnovm/stdlibs/encoding/csv/reader.gno b/gnovm/stdlibs/encoding/csv/reader.gno new file mode 100644 index 00000000000..889b7ff5588 --- /dev/null +++ b/gnovm/stdlibs/encoding/csv/reader.gno @@ -0,0 +1,467 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package csv reads and writes comma-separated values (CSV) files. +// There are many kinds of CSV files; this package supports the format +// described in RFC 4180. +// +// A csv file contains zero or more records of one or more fields per record. +// Each record is separated by the newline character. The final record may +// optionally be followed by a newline character. +// +// field1,field2,field3 +// +// White space is considered part of a field. +// +// Carriage returns before newline characters are silently removed. +// +// Blank lines are ignored. A line with only whitespace characters (excluding +// the ending newline character) is not considered a blank line. +// +// Fields which start and stop with the quote character " are called +// quoted-fields. The beginning and ending quote are not part of the +// field. +// +// The source: +// +// normal string,"quoted-field" +// +// results in the fields +// +// {`normal string`, `quoted-field`} +// +// Within a quoted-field a quote character followed by a second quote +// character is considered a single quote. +// +// "the ""word"" is true","a ""quoted-field""" +// +// results in +// +// {`the "word" is true`, `a "quoted-field"`} +// +// Newlines and commas may be included in a quoted-field +// +// "Multi-line +// field","comma is ," +// +// results in +// +// {`Multi-line +// field`, `comma is ,`} +package csv + +import ( + "bufio" + "bytes" + "errors" + "io" + "strconv" + "unicode" + "unicode/utf8" +) + +// A ParseError is returned for parsing errors. +// Line and column numbers are 1-indexed. +type ParseError struct { + StartLine int // Line where the record starts + Line int // Line where the error occurred + Column int // Column (1-based byte index) where the error occurred + Err error // The actual error +} + +func (e *ParseError) Error() string { + if e.Err == ErrFieldCount { + return "record on line " + strconv.Itoa(e.Line) + ": " + e.Err.Error() + } + if e.StartLine != e.Line { + return "record on line " + strconv.Itoa(e.StartLine) + ": parse error on line " + strconv.Itoa(e.Line) + + ", column " + strconv.Itoa(e.Column) + ": " + e.Err.Error() + } + return "parse error on line " + strconv.Itoa(e.Line) + ", column " + strconv.Itoa(e.Column) + ": " + e.Err.Error() +} + +func (e *ParseError) Unwrap() error { return e.Err } + +// These are the errors that can be returned in [ParseError.Err]. +var ( + ErrBareQuote = errors.New("bare \" in non-quoted-field") + ErrQuote = errors.New("extraneous or missing \" in quoted-field") + ErrFieldCount = errors.New("wrong number of fields") + + // Deprecated: ErrTrailingComma is no longer used. + ErrTrailingComma = errors.New("extra delimiter at end of line") +) + +var ErrInvalidDelim = errors.New("csv: invalid field or comment delimiter") + +func validDelim(r rune) bool { + return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError +} + +// A Reader reads records from a CSV-encoded file. +// +// As returned by [NewReader], a Reader expects input conforming to RFC 4180. +// The exported fields can be changed to customize the details before the +// first call to [Reader.Read] or [Reader.ReadAll]. +// +// The Reader converts all \r\n sequences in its input to plain \n, +// including in multiline field values, so that the returned data does +// not depend on which line-ending convention an input file uses. +type Reader struct { + // Comma is the field delimiter. + // It is set to comma (',') by NewReader. + // Comma must be a valid rune and must not be \r, \n, + // or the Unicode replacement character (0xFFFD). + Comma rune + + // Comment, if not 0, is the comment character. Lines beginning with the + // Comment character without preceding whitespace are ignored. + // With leading whitespace the Comment character becomes part of the + // field, even if TrimLeadingSpace is true. + // Comment must be a valid rune and must not be \r, \n, + // or the Unicode replacement character (0xFFFD). + // It must also not be equal to Comma. + Comment rune + + // FieldsPerRecord is the number of expected fields per record. + // If FieldsPerRecord is positive, Read requires each record to + // have the given number of fields. If FieldsPerRecord is 0, Read sets it to + // the number of fields in the first record, so that future records must + // have the same field count. If FieldsPerRecord is negative, no check is + // made and records may have a variable number of fields. + FieldsPerRecord int + + // If LazyQuotes is true, a quote may appear in an unquoted field and a + // non-doubled quote may appear in a quoted field. + LazyQuotes bool + + // If TrimLeadingSpace is true, leading white space in a field is ignored. + // This is done even if the field delimiter, Comma, is white space. + TrimLeadingSpace bool + + // ReuseRecord controls whether calls to Read may return a slice sharing + // the backing array of the previous call's returned slice for performance. + // By default, each call to Read returns newly allocated memory owned by the caller. + ReuseRecord bool + + // Deprecated: TrailingComma is no longer used. + TrailingComma bool + + r *bufio.Reader + + // numLine is the current line being read in the CSV file. + numLine int + + // offset is the input stream byte offset of the current reader position. + offset int64 + + // rawBuffer is a line buffer only used by the readLine method. + rawBuffer []byte + + // recordBuffer holds the unescaped fields, one after another. + // The fields can be accessed by using the indexes in fieldIndexes. + // E.g., For the row `a,"b","c""d",e`, recordBuffer will contain `abc"de` + // and fieldIndexes will contain the indexes [1, 2, 5, 6]. + recordBuffer []byte + + // fieldIndexes is an index of fields inside recordBuffer. + // The i'th field ends at offset fieldIndexes[i] in recordBuffer. + fieldIndexes []int + + // fieldPositions is an index of field positions for the + // last record returned by Read. + fieldPositions []position + + // lastRecord is a record cache and only used when ReuseRecord == true. + lastRecord []string +} + +// NewReader returns a new Reader that reads from r. +func NewReader(r io.Reader) *Reader { + return &Reader{ + Comma: ',', + r: bufio.NewReader(r), + } +} + +// Read reads one record (a slice of fields) from r. +// If the record has an unexpected number of fields, +// Read returns the record along with the error [ErrFieldCount]. +// If the record contains a field that cannot be parsed, +// Read returns a partial record along with the parse error. +// The partial record contains all fields read before the error. +// If there is no data left to be read, Read returns nil, [io.EOF]. +// If [Reader.ReuseRecord] is true, the returned slice may be shared +// between multiple calls to Read. +func (r *Reader) Read() (record []string, err error) { + if r.ReuseRecord { + record, err = r.readRecord(r.lastRecord) + r.lastRecord = record + } else { + record, err = r.readRecord(nil) + } + return record, err +} + +// FieldPos returns the line and column corresponding to +// the start of the field with the given index in the slice most recently +// returned by [Reader.Read]. Numbering of lines and columns starts at 1; +// columns are counted in bytes, not runes. +// +// If this is called with an out-of-bounds index, it panics. +func (r *Reader) FieldPos(field int) (line, column int) { + if field < 0 || field >= len(r.fieldPositions) { + panic("out of range index passed to FieldPos") + } + p := &r.fieldPositions[field] + return p.line, p.col +} + +// InputOffset returns the input stream byte offset of the current reader +// position. The offset gives the location of the end of the most recently +// read row and the beginning of the next row. +func (r *Reader) InputOffset() int64 { + return r.offset +} + +// pos holds the position of a field in the current line. +type position struct { + line, col int +} + +// ReadAll reads all the remaining records from r. +// Each record is a slice of fields. +// A successful call returns err == nil, not err == [io.EOF]. Because ReadAll is +// defined to read until EOF, it does not treat end of file as an error to be +// reported. +func (r *Reader) ReadAll() (records [][]string, err error) { + for { + record, err := r.readRecord(nil) + if err == io.EOF { + return records, nil + } + if err != nil { + return nil, err + } + records = append(records, record) + } +} + +// readLine reads the next line (with the trailing endline). +// If EOF is hit without a trailing endline, it will be omitted. +// If some bytes were read, then the error is never [io.EOF]. +// The result is only valid until the next call to readLine. +func (r *Reader) readLine() ([]byte, error) { + line, err := r.r.ReadSlice('\n') + if err == bufio.ErrBufferFull { + r.rawBuffer = append(r.rawBuffer[:0], line...) + for err == bufio.ErrBufferFull { + line, err = r.r.ReadSlice('\n') + r.rawBuffer = append(r.rawBuffer, line...) + } + line = r.rawBuffer + } + readSize := len(line) + if readSize > 0 && err == io.EOF { + err = nil + // For backwards compatibility, drop trailing \r before EOF. + if line[readSize-1] == '\r' { + line = line[:readSize-1] + } + } + r.numLine++ + r.offset += int64(readSize) + // Normalize \r\n to \n on all input lines. + if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' { + line[n-2] = '\n' + line = line[:n-1] + } + return line, err +} + +// lengthNL reports the number of bytes for the trailing \n. +func lengthNL(b []byte) int { + if len(b) > 0 && b[len(b)-1] == '\n' { + return 1 + } + return 0 +} + +// nextRune returns the next rune in b or utf8.RuneError. +func nextRune(b []byte) rune { + r, _ := utf8.DecodeRune(b) + return r +} + +func (r *Reader) readRecord(dst []string) ([]string, error) { + if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) { + return nil, ErrInvalidDelim + } + + // Read line (automatically skipping past empty lines and any comments). + var line []byte + var errRead error + for errRead == nil { + line, errRead = r.readLine() + if r.Comment != 0 && nextRune(line) == r.Comment { + line = nil + continue // Skip comment lines + } + if errRead == nil && len(line) == lengthNL(line) { + line = nil + continue // Skip empty lines + } + break + } + if errRead == io.EOF { + return nil, errRead + } + + // Parse each field in the record. + var err error + const quoteLen = len(`"`) + commaLen := utf8.RuneLen(r.Comma) + recLine := r.numLine // Starting line for record + r.recordBuffer = r.recordBuffer[:0] + r.fieldIndexes = r.fieldIndexes[:0] + r.fieldPositions = r.fieldPositions[:0] + pos := position{line: r.numLine, col: 1} +parseField: + for { + if r.TrimLeadingSpace { + i := bytes.IndexFunc(line, func(r rune) bool { + return !unicode.IsSpace(r) + }) + if i < 0 { + i = len(line) + pos.col -= lengthNL(line) + } + line = line[i:] + pos.col += i + } + if len(line) == 0 || line[0] != '"' { + // Non-quoted string field + i := bytes.IndexRune(line, r.Comma) + field := line + if i >= 0 { + field = field[:i] + } else { + field = field[:len(field)-lengthNL(field)] + } + // Check to make sure a quote does not appear in field. + if !r.LazyQuotes { + if j := bytes.IndexByte(field, '"'); j >= 0 { + col := pos.col + j + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote} + break parseField + } + } + r.recordBuffer = append(r.recordBuffer, field...) + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, pos) + if i >= 0 { + line = line[i+commaLen:] + pos.col += i + commaLen + continue parseField + } + break parseField + } else { + // Quoted string field + fieldPos := pos + line = line[quoteLen:] + pos.col += quoteLen + for { + i := bytes.IndexByte(line, '"') + if i >= 0 { + // Hit next quote. + r.recordBuffer = append(r.recordBuffer, line[:i]...) + line = line[i+quoteLen:] + pos.col += i + quoteLen + switch rn := nextRune(line); { + case rn == '"': + // `""` sequence (append quote). + r.recordBuffer = append(r.recordBuffer, '"') + line = line[quoteLen:] + pos.col += quoteLen + case rn == r.Comma: + // `",` sequence (end of field). + line = line[commaLen:] + pos.col += commaLen + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + continue parseField + case lengthNL(line) == len(line): + // `"\n` sequence (end of line). + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + break parseField + case r.LazyQuotes: + // `"` sequence (bare quote). + r.recordBuffer = append(r.recordBuffer, '"') + default: + // `"*` sequence (invalid non-escaped quote). + err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote} + break parseField + } + } else if len(line) > 0 { + // Hit end of line (copy all data so far). + r.recordBuffer = append(r.recordBuffer, line...) + if errRead != nil { + break parseField + } + pos.col += len(line) + line, errRead = r.readLine() + if len(line) > 0 { + pos.line++ + pos.col = 1 + } + if errRead == io.EOF { + errRead = nil + } + } else { + // Abrupt end of file (EOF or error). + if !r.LazyQuotes && errRead == nil { + err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote} + break parseField + } + r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer)) + r.fieldPositions = append(r.fieldPositions, fieldPos) + break parseField + } + } + } + } + if err == nil { + err = errRead + } + + // Create a single string and create slices out of it. + // This pins the memory of the fields together, but allocates once. + str := string(r.recordBuffer) // Convert to string once to batch allocations + dst = dst[:0] + if cap(dst) < len(r.fieldIndexes) { + dst = make([]string, len(r.fieldIndexes)) + } + dst = dst[:len(r.fieldIndexes)] + var preIdx int + for i, idx := range r.fieldIndexes { + dst[i] = str[preIdx:idx] + preIdx = idx + } + + // Check or update the expected fields per record. + if r.FieldsPerRecord > 0 { + if len(dst) != r.FieldsPerRecord && err == nil { + err = &ParseError{ + StartLine: recLine, + Line: recLine, + Column: 1, + Err: ErrFieldCount, + } + } + } else if r.FieldsPerRecord == 0 { + r.FieldsPerRecord = len(dst) + } + return dst, err +} diff --git a/gnovm/stdlibs/encoding/csv/reader_test.gno b/gnovm/stdlibs/encoding/csv/reader_test.gno new file mode 100644 index 00000000000..fa4ecdce3ed --- /dev/null +++ b/gnovm/stdlibs/encoding/csv/reader_test.gno @@ -0,0 +1,706 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package csv + +import ( + "fmt" + "io" + + // "reflect" + "strings" + "testing" + "unicode/utf8" +) + +type readTest struct { + Name string + Input string + Output [][]string + Positions [][][2]int + Errors []error + + // These fields are copied into the Reader + Comma rune + Comment rune + UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1 + FieldsPerRecord int + LazyQuotes bool + TrimLeadingSpace bool + ReuseRecord bool +} + +// In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote +// the start of a field, a record boundary and the position of an error respectively. +// They are removed before parsing and are used to verify the position +// information reported by FieldPos. + +var readTests = []readTest{{ + Name: "Simple", + Input: "§a,§b,§c\n", + Output: [][]string{{"a", "b", "c"}}, +}, { + Name: "CRLF", + Input: "§a,§b\r\n¶§c,§d\r\n", + Output: [][]string{{"a", "b"}, {"c", "d"}}, +}, { + Name: "BareCR", + Input: "§a,§b\rc,§d\r\n", + Output: [][]string{{"a", "b\rc", "d"}}, +}, { + Name: "RFC4180test", + Input: `§#field1,§field2,§field3 +¶§"aaa",§"bb +b",§"ccc" +¶§"a,a",§"b""bb",§"ccc" +¶§zzz,§yyy,§xxx +`, + Output: [][]string{ + {"#field1", "field2", "field3"}, + {"aaa", "bb\nb", "ccc"}, + {"a,a", `b"bb`, "ccc"}, + {"zzz", "yyy", "xxx"}, + }, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, +}, { + Name: "NoEOLTest", + Input: "§a,§b,§c", + Output: [][]string{{"a", "b", "c"}}, +}, { + Name: "Semicolon", + Input: "§a;§b;§c\n", + Output: [][]string{{"a", "b", "c"}}, + Comma: ';', +}, { + Name: "MultiLine", + Input: `§"two +line",§"one line",§"three +line +field"`, + Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}}, +}, { + Name: "BlankLine", + Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n", + Output: [][]string{ + {"a", "b", "c"}, + {"d", "e", "f"}, + }, +}, { + Name: "BlankLineFieldCount", + Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n", + Output: [][]string{ + {"a", "b", "c"}, + {"d", "e", "f"}, + }, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, +}, { + Name: "TrimSpace", + Input: " §a, §b, §c\n", + Output: [][]string{{"a", "b", "c"}}, + TrimLeadingSpace: true, +}, { + Name: "LeadingSpace", + Input: "§ a,§ b,§ c\n", + Output: [][]string{{" a", " b", " c"}}, +}, { + Name: "Comment", + Input: "#1,2,3\n§a,§b,§c\n#comment", + Output: [][]string{{"a", "b", "c"}}, + Comment: '#', +}, { + Name: "NoComment", + Input: "§#1,§2,§3\n¶§a,§b,§c", + Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}}, +}, { + Name: "LazyQuotes", + Input: `§a "word",§"1"2",§a",§"b`, + Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}}, + LazyQuotes: true, +}, { + Name: "BareQuotes", + Input: `§a "word",§"1"2",§a"`, + Output: [][]string{{`a "word"`, `1"2`, `a"`}}, + LazyQuotes: true, +}, { + Name: "BareDoubleQuotes", + Input: `§a""b,§c`, + Output: [][]string{{`a""b`, `c`}}, + LazyQuotes: true, +}, { + Name: "BadDoubleQuotes", + Input: `§a∑""b,c`, + Errors: []error{&ParseError{Err: ErrBareQuote}}, +}, { + Name: "TrimQuote", + Input: ` §"a",§" b",§c`, + Output: [][]string{{"a", " b", "c"}}, + TrimLeadingSpace: true, +}, { + Name: "BadBareQuote", + Input: `§a ∑"word","b"`, + Errors: []error{&ParseError{Err: ErrBareQuote}}, +}, { + Name: "BadTrailingQuote", + Input: `§"a word",b∑"`, + Errors: []error{&ParseError{Err: ErrBareQuote}}, +}, { + Name: "ExtraneousQuote", + Input: `§"a ∑"word","b"`, + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "BadFieldCount", + Input: "§a,§b,§c\n¶∑§d,§e", + Errors: []error{nil, &ParseError{Err: ErrFieldCount}}, + Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, +}, { + Name: "BadFieldCountMultiple", + Input: "§a,§b,§c\n¶∑§d,§e\n¶∑§f", + Errors: []error{nil, &ParseError{Err: ErrFieldCount}, &ParseError{Err: ErrFieldCount}}, + Output: [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}}, + UseFieldsPerRecord: true, + FieldsPerRecord: 0, +}, { + Name: "BadFieldCount1", + Input: `§∑a,§b,§c`, + Errors: []error{&ParseError{Err: ErrFieldCount}}, + Output: [][]string{{"a", "b", "c"}}, + UseFieldsPerRecord: true, + FieldsPerRecord: 2, +}, { + Name: "FieldCount", + Input: "§a,§b,§c\n¶§d,§e", + Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, +}, { + Name: "TrailingCommaEOF", + Input: "§a,§b,§c,§", + Output: [][]string{{"a", "b", "c", ""}}, +}, { + Name: "TrailingCommaEOL", + Input: "§a,§b,§c,§\n", + Output: [][]string{{"a", "b", "c", ""}}, +}, { + Name: "TrailingCommaSpaceEOF", + Input: "§a,§b,§c, §", + Output: [][]string{{"a", "b", "c", ""}}, + TrimLeadingSpace: true, +}, { + Name: "TrailingCommaSpaceEOL", + Input: "§a,§b,§c, §\n", + Output: [][]string{{"a", "b", "c", ""}}, + TrimLeadingSpace: true, +}, { + Name: "TrailingCommaLine3", + Input: "§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§", + Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}}, + TrimLeadingSpace: true, +}, { + Name: "NotTrailingComma3", + Input: "§a,§b,§c,§ \n", + Output: [][]string{{"a", "b", "c", " "}}, +}, { + Name: "CommaFieldTest", + Input: `§x,§y,§z,§w +¶§x,§y,§z,§ +¶§x,§y,§,§ +¶§x,§,§,§ +¶§,§,§,§ +¶§"x",§"y",§"z",§"w" +¶§"x",§"y",§"z",§"" +¶§"x",§"y",§"",§"" +¶§"x",§"",§"",§"" +¶§"",§"",§"",§"" +`, + Output: [][]string{ + {"x", "y", "z", "w"}, + {"x", "y", "z", ""}, + {"x", "y", "", ""}, + {"x", "", "", ""}, + {"", "", "", ""}, + {"x", "y", "z", "w"}, + {"x", "y", "z", ""}, + {"x", "y", "", ""}, + {"x", "", "", ""}, + {"", "", "", ""}, + }, +}, { + Name: "TrailingCommaIneffective1", + Input: "§a,§b,§\n¶§c,§d,§e", + Output: [][]string{ + {"a", "b", ""}, + {"c", "d", "e"}, + }, + TrimLeadingSpace: true, +}, { + Name: "ReadAllReuseRecord", + Input: "§a,§b\n¶§c,§d", + Output: [][]string{ + {"a", "b"}, + {"c", "d"}, + }, + ReuseRecord: true, +}, { + Name: "StartLine1", // Issue 19019 + Input: "§a,\"b\nc∑\"d,e", + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "StartLine2", + Input: "§a,§b\n¶§\"d\n\n,e∑", + Errors: []error{nil, &ParseError{Err: ErrQuote}}, + Output: [][]string{{"a", "b"}}, +}, { + Name: "CRLFInQuotedField", // Issue 21201 + Input: "§A,§\"Hello\r\nHi\",§B\r\n", + Output: [][]string{ + {"A", "Hello\nHi", "B"}, + }, +}, { + Name: "BinaryBlobField", // Issue 19410 + Input: "§x09\x41\xb4\x1c,§aktau", + Output: [][]string{{"x09A\xb4\x1c", "aktau"}}, +}, { + Name: "TrailingCR", + Input: "§field1,§field2\r", + Output: [][]string{{"field1", "field2"}}, +}, { + Name: "QuotedTrailingCR", + Input: "§\"field\"\r", + Output: [][]string{{"field"}}, +}, { + Name: "QuotedTrailingCRCR", + Input: "§\"field∑\"\r\r", + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "FieldCR", + Input: "§field\rfield\r", + Output: [][]string{{"field\rfield"}}, +}, { + Name: "FieldCRCR", + Input: "§field\r\rfield\r\r", + Output: [][]string{{"field\r\rfield\r"}}, +}, { + Name: "FieldCRCRLF", + Input: "§field\r\r\n¶§field\r\r\n", + Output: [][]string{{"field\r"}, {"field\r"}}, +}, { + Name: "FieldCRCRLFCR", + Input: "§field\r\r\n¶§\rfield\r\r\n\r", + Output: [][]string{{"field\r"}, {"\rfield\r"}}, +}, { + Name: "FieldCRCRLFCRCR", + Input: "§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r", + Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}}, +}, { + Name: "MultiFieldCRCRLFCRCR", + Input: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§", + Output: [][]string{ + {"field1", "field2\r"}, + {"\r\rfield1", "field2\r"}, + {"\r\r", ""}, + }, +}, { + Name: "NonASCIICommaAndComment", + Input: "§a£§b,c£ \t§d,e\n€ comment\n", + Output: [][]string{{"a", "b,c", "d,e"}}, + TrimLeadingSpace: true, + Comma: '£', + Comment: '€', +}, { + Name: "NonASCIICommaAndCommentWithQuotes", + Input: "§a€§\" b,\"€§ c\nλ comment\n", + Output: [][]string{{"a", " b,", " c"}}, + Comma: '€', + Comment: 'λ', +}, { + // λ and θ start with the same byte. + // This tests that the parser doesn't confuse such characters. + Name: "NonASCIICommaConfusion", + Input: "§\"abθcd\"λ§efθgh", + Output: [][]string{{"abθcd", "efθgh"}}, + Comma: 'λ', + Comment: '€', +}, { + Name: "NonASCIICommentConfusion", + Input: "§λ\n¶§λ\nθ\n¶§λ\n", + Output: [][]string{{"λ"}, {"λ"}, {"λ"}}, + Comment: 'θ', +}, { + Name: "QuotedFieldMultipleLF", + Input: "§\"\n\n\n\n\"", + Output: [][]string{{"\n\n\n\n"}}, +}, { + Name: "MultipleCRLF", + Input: "\r\n\r\n\r\n\r\n", +}, { + // The implementation may read each line in several chunks if it doesn't fit entirely + // in the read buffer, so we should test the code to handle that condition. + Name: "HugeLines", + Input: strings.Repeat("#ignore\n", 10000) + "§" + strings.Repeat("@", 5000) + ",§" + strings.Repeat("*", 5000), + Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}}, + Comment: '#', +}, { + Name: "QuoteWithTrailingCRLF", + Input: "§\"foo∑\"bar\"\r\n", + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "LazyQuoteWithTrailingCRLF", + Input: "§\"foo\"bar\"\r\n", + Output: [][]string{{`foo"bar`}}, + LazyQuotes: true, +}, { + Name: "DoubleQuoteWithTrailingCRLF", + Input: "§\"foo\"\"bar\"\r\n", + Output: [][]string{{`foo"bar`}}, +}, { + Name: "EvenQuotes", + Input: `§""""""""`, + Output: [][]string{{`"""`}}, +}, { + Name: "OddQuotes", + Input: `§"""""""∑`, + Errors: []error{&ParseError{Err: ErrQuote}}, +}, { + Name: "LazyOddQuotes", + Input: `§"""""""`, + Output: [][]string{{`"""`}}, + LazyQuotes: true, +}, { + Name: "BadComma1", + Comma: '\n', + Errors: []error{ErrInvalidDelim}, +}, { + Name: "BadComma2", + Comma: '\r', + Errors: []error{ErrInvalidDelim}, +}, { + Name: "BadComma3", + Comma: '"', + Errors: []error{ErrInvalidDelim}, +}, { + Name: "BadComma4", + Comma: utf8.RuneError, + Errors: []error{ErrInvalidDelim}, +}, { + Name: "BadComment1", + Comment: '\n', + Errors: []error{ErrInvalidDelim}, +}, { + Name: "BadComment2", + Comment: '\r', + Errors: []error{ErrInvalidDelim}, +}, { + Name: "BadComment3", + Comment: utf8.RuneError, + Errors: []error{ErrInvalidDelim}, +}, { + Name: "BadCommaComment", + Comma: 'X', + Comment: 'X', + Errors: []error{ErrInvalidDelim}, +}} + +func TestRead(t *testing.T) { + newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int, string) { + positions, errPositions, input := makePositions(tt.Input) + r := NewReader(strings.NewReader(input)) + + if tt.Comma != 0 { + r.Comma = tt.Comma + } + r.Comment = tt.Comment + if tt.UseFieldsPerRecord { + r.FieldsPerRecord = tt.FieldsPerRecord + } else { + r.FieldsPerRecord = -1 + } + r.LazyQuotes = tt.LazyQuotes + r.TrimLeadingSpace = tt.TrimLeadingSpace + r.ReuseRecord = tt.ReuseRecord + return r, positions, errPositions, input + } + + for _, tt := range readTests { + t.Run(tt.Name, func(t *testing.T) { + r, positions, errPositions, input := newReader(tt) + out, err := r.ReadAll() + if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil { + if !deepEqual(err, wantErr) { + t.Fatalf("ReadAll() error mismatch:\ngot %v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr) + } + if out != nil { + t.Fatalf("ReadAll() output:\ngot %q\nwant nil", out) + } + } else { + if err != nil { + t.Fatalf("unexpected Readall() error: %v", err) + } + if !deepEqual(out, tt.Output) { + t.Fatalf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output) + } + } + + // Check input offset after call ReadAll() + inputByteSize := len(input) + inputOffset := r.InputOffset() + if err == nil && int64(inputByteSize) != inputOffset { + t.Errorf("wrong input offset after call ReadAll():\ngot: %d\nwant: %d\ninput: %s", inputOffset, inputByteSize, input) + } + + // Check field and error positions. + r, _, _, _ = newReader(tt) + for recNum := 0; ; recNum++ { + rec, err := r.Read() + var wantErr error + if recNum < len(tt.Errors) && tt.Errors[recNum] != nil { + wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions) + } else if recNum >= len(tt.Output) { + wantErr = io.EOF + } + if !deepEqual(err, wantErr) { + t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr) + } + // ErrFieldCount is explicitly non-fatal. + if err != nil && !isErr(err, ErrFieldCount) { + if recNum < len(tt.Output) { + t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output)) + } + break + } + if got, want := rec, tt.Output[recNum]; !deepEqual(got, want) { + t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want) + } + pos := positions[recNum] + if len(pos) != len(rec) { + t.Fatalf("mismatched position length at record %d", recNum) + } + for i := range rec { + line, col := r.FieldPos(i) + if got, want := [2]int{line, col}, pos[i]; got != want { + t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want) + } + } + } + }) + } +} + +// XXX: substitute for errors.Is +func isErr(err, match error) bool { + if err == match { + return true + } + if w, ok := err.(interface{ Unwrap() error }); ok { + return isErr(w.Unwrap(), match) + } + return false +} + +// XXX: substitute for reflect.DeepEqual +func deepEqual(v, x interface{}) bool { + if v == nil { + return x == nil + } + // dumb deep equal, handling only a few cases + switch v := v.(type) { + case error: + return v.Error() == x.(error).Error() + case string: + return v == x.(string) + case []string: + x := x.([]string) + if len(v) != len(x) { + return false + } + for i := range v { + if !deepEqual(v[i], x[i]) { + return false + } + } + return true + case [][]string: + x := x.([][]string) + if len(v) != len(x) { + return false + } + for i := range v { + if !deepEqual(v[i], x[i]) { + return false + } + } + return true + default: + panic("not handled " + fmt.Sprintf("%T %T", v, x)) + } +} + +// firstError returns the first non-nil error in errs, +// with the position adjusted according to the error's +// index inside positions. +func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error { + for i, err := range errs { + if err != nil { + return errorWithPosition(err, i, positions, errPositions) + } + } + return nil +} + +func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error { + parseErr, ok := err.(*ParseError) + if !ok { + return err + } + if recNum >= len(positions) { + panic(fmt.Errorf("no positions found for error at record %d", recNum)) + } + errPos, ok := errPositions[recNum] + if !ok { + panic(fmt.Errorf("no error position found for error at record %d", recNum)) + } + parseErr1 := *parseErr + parseErr1.StartLine = positions[recNum][0][0] + parseErr1.Line = errPos[0] + parseErr1.Column = errPos[1] + return &parseErr1 +} + +// makePositions returns the expected field positions of all +// the fields in text, the positions of any errors, and the text with the position markers +// removed. +// +// The start of each field is marked with a § symbol; +// CSV lines are separated by ¶ symbols; +// Error positions are marked with ∑ symbols. +func makePositions(text string) ([][][2]int, map[int][2]int, string) { + buf := make([]byte, 0, len(text)) + var positions [][][2]int + errPositions := make(map[int][2]int) + line, col := 1, 1 + recNum := 0 + + for len(text) > 0 { + r, size := utf8.DecodeRuneInString(text) + switch r { + case '\n': + line++ + col = 1 + buf = append(buf, '\n') + case '§': + if len(positions) == 0 { + positions = append(positions, [][2]int{}) + } + positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{line, col}) + case '¶': + positions = append(positions, [][2]int{}) + recNum++ + case '∑': + errPositions[recNum] = [2]int{line, col} + default: + buf = append(buf, text[:size]...) + col += size + } + text = text[size:] + } + return positions, errPositions, string(buf) +} + +// nTimes is an io.Reader which yields the string s n times. +type nTimes struct { + s string + n int + off int +} + +func (r *nTimes) Read(p []byte) (n int, err error) { + for { + if r.n <= 0 || r.s == "" { + return n, io.EOF + } + n0 := copy(p, r.s[r.off:]) + p = p[n0:] + n += n0 + r.off += n0 + if r.off == len(r.s) { + r.off = 0 + r.n-- + } + if len(p) == 0 { + return + } + } +} + +// benchmarkRead measures reading the provided CSV rows data. +// initReader, if non-nil, modifies the Reader before it's used. +func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) { + b.ReportAllocs() + r := NewReader(&nTimes{s: rows, n: b.N}) + if initReader != nil { + initReader(r) + } + for { + _, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + b.Fatal(err) + } + } +} + +const benchmarkCSVData = `x,y,z,w +x,y,z, +x,y,, +x,,, +,,, +"x","y","z","w" +"x","y","z","" +"x","y","","" +"x","","","" +"","","","" +` + +func BenchmarkRead(b *testing.B) { + benchmarkRead(b, nil, benchmarkCSVData) +} + +func BenchmarkReadWithFieldsPerRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData) +} + +func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData) +} + +func BenchmarkReadLargeFields(b *testing.B) { + benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv +,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +`, 3)) +} + +func BenchmarkReadReuseRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData) +} + +func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData) +} + +func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData) +} + +func BenchmarkReadReuseRecordLargeFields(b *testing.B) { + benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv +,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv +`, 3)) +} diff --git a/gnovm/stdlibs/encoding/csv/writer.gno b/gnovm/stdlibs/encoding/csv/writer.gno new file mode 100644 index 00000000000..f874765bd4e --- /dev/null +++ b/gnovm/stdlibs/encoding/csv/writer.gno @@ -0,0 +1,184 @@ +package csv + +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +import ( + "bufio" + "io" + "strings" + "unicode" + "unicode/utf8" +) + +// A Writer writes records using CSV encoding. +// +// As returned by [NewWriter], a Writer writes records terminated by a +// newline and uses ',' as the field delimiter. The exported fields can be +// changed to customize the details before +// the first call to [Writer.Write] or [Writer.WriteAll]. +// +// [Writer.Comma] is the field delimiter. +// +// If [Writer.UseCRLF] is true, +// the Writer ends each output line with \r\n instead of \n. +// +// The writes of individual records are buffered. +// After all data has been written, the client should call the +// [Writer.Flush] method to guarantee all data has been forwarded to +// the underlying [io.Writer]. Any errors that occurred should +// be checked by calling the [Writer.Error] method. +type Writer struct { + Comma rune // Field delimiter (set to ',' by NewWriter) + UseCRLF bool // True to use \r\n as the line terminator + w *bufio.Writer +} + +// NewWriter returns a new Writer that writes to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + Comma: ',', + w: bufio.NewWriter(w), + } +} + +// Write writes a single CSV record to w along with any necessary quoting. +// A record is a slice of strings with each string being one field. +// Writes are buffered, so [Writer.Flush] must eventually be called to ensure +// that the record is written to the underlying [io.Writer]. +func (w *Writer) Write(record []string) error { + if !validDelim(w.Comma) { + return ErrInvalidDelim + } + + for n, field := range record { + if n > 0 { + if _, err := w.w.WriteRune(w.Comma); err != nil { + return err + } + } + + // If we don't have to have a quoted field then just + // write out the field and continue to the next field. + if !w.fieldNeedsQuotes(field) { + if _, err := w.w.WriteString(field); err != nil { + return err + } + continue + } + + if err := w.w.WriteByte('"'); err != nil { + return err + } + for len(field) > 0 { + // Search for special characters. + i := strings.IndexAny(field, "\"\r\n") + if i < 0 { + i = len(field) + } + + // Copy verbatim everything before the special character. + if _, err := w.w.WriteString(field[:i]); err != nil { + return err + } + field = field[i:] + + // Encode the special character. + if len(field) > 0 { + var err error + switch field[0] { + case '"': + _, err = w.w.WriteString(`""`) + case '\r': + if !w.UseCRLF { + err = w.w.WriteByte('\r') + } + case '\n': + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } + } + field = field[1:] + if err != nil { + return err + } + } + } + if err := w.w.WriteByte('"'); err != nil { + return err + } + } + var err error + if w.UseCRLF { + _, err = w.w.WriteString("\r\n") + } else { + err = w.w.WriteByte('\n') + } + return err +} + +// Flush writes any buffered data to the underlying [io.Writer]. +// To check if an error occurred during Flush, call [Writer.Error]. +func (w *Writer) Flush() { + w.w.Flush() +} + +// Error reports any error that has occurred during +// a previous [Writer.Write] or [Writer.Flush]. +func (w *Writer) Error() error { + _, err := w.w.Write(nil) + return err +} + +// WriteAll writes multiple CSV records to w using [Writer.Write] and +// then calls [Writer.Flush], returning any error from the Flush. +func (w *Writer) WriteAll(records [][]string) error { + for _, record := range records { + err := w.Write(record) + if err != nil { + return err + } + } + return w.w.Flush() +} + +// fieldNeedsQuotes reports whether our field must be enclosed in quotes. +// Fields with a Comma, fields with a quote or newline, and +// fields which start with a space must be enclosed in quotes. +// We used to quote empty strings, but we do not anymore (as of Go 1.4). +// The two representations should be equivalent, but Postgres distinguishes +// quoted vs non-quoted empty string during database imports, and it has +// an option to force the quoted behavior for non-quoted CSV but it has +// no option to force the non-quoted behavior for quoted CSV, making +// CSV with quoted empty strings strictly less useful. +// Not quoting the empty string also makes this package match the behavior +// of Microsoft Excel and Google Drive. +// For Postgres, quote the data terminating string `\.`. +func (w *Writer) fieldNeedsQuotes(field string) bool { + if field == "" { + return false + } + + if field == `\.` { + return true + } + + if w.Comma < utf8.RuneSelf { + for i := 0; i < len(field); i++ { + c := field[i] + if c == '\n' || c == '\r' || c == '"' || c == byte(w.Comma) { + return true + } + } + } else { + if strings.ContainsRune(field, w.Comma) || strings.ContainsAny(field, "\"\r\n") { + return true + } + } + + r1, _ := utf8.DecodeRuneInString(field) + return unicode.IsSpace(r1) +} diff --git a/gnovm/stdlibs/encoding/csv/writer_test.gno b/gnovm/stdlibs/encoding/csv/writer_test.gno new file mode 100644 index 00000000000..1407f3c670a --- /dev/null +++ b/gnovm/stdlibs/encoding/csv/writer_test.gno @@ -0,0 +1,134 @@ +package csv + +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +import ( + "bytes" + "encoding/csv" + "errors" + "strings" + "testing" +) + +func TestWriteCSV(t *testing.T) { + records := [][]string{ + {"first_name", "last_name", "username"}, + {"Rob", "Pike", "rob"}, + {"Ken", "Thompson", "ken"}, + {"Robert", "Griesemer", "gri"}, + } + + var buf bytes.Buffer + w := csv.NewWriter(&buf) + err := w.WriteAll(records) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + expected := "first_name,last_name,username\nRob,Pike,rob\nKen,Thompson,ken\nRobert,Griesemer,gri\n" + if buf.String() != expected { + t.Errorf("Unexpected output:\ngot %q\nwant %q", buf.String(), expected) + } +} + +var writeTests = []struct { + Input [][]string + Output string + Error error + UseCRLF bool + Comma rune +}{ + {Input: [][]string{{"abc"}}, Output: "abc\n"}, + {Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true}, + {Input: [][]string{{`"abc"`}}, Output: `"""abc"""` + "\n"}, + {Input: [][]string{{`a"b`}}, Output: `"a""b"` + "\n"}, + {Input: [][]string{{`"a"b"`}}, Output: `"""a""b"""` + "\n"}, + {Input: [][]string{{" abc"}}, Output: `" abc"` + "\n"}, + {Input: [][]string{{"abc,def"}}, Output: `"abc,def"` + "\n"}, + {Input: [][]string{{"abc", "def"}}, Output: "abc,def\n"}, + {Input: [][]string{{"abc"}, {"def"}}, Output: "abc\ndef\n"}, + {Input: [][]string{{"abc\ndef"}}, Output: "\"abc\ndef\"\n"}, + {Input: [][]string{{"abc\ndef"}}, Output: "\"abc\r\ndef\"\r\n", UseCRLF: true}, + {Input: [][]string{{"abc\rdef"}}, Output: "\"abcdef\"\r\n", UseCRLF: true}, + {Input: [][]string{{"abc\rdef"}}, Output: "\"abc\rdef\"\n", UseCRLF: false}, + {Input: [][]string{{""}}, Output: "\n"}, + {Input: [][]string{{"", ""}}, Output: ",\n"}, + {Input: [][]string{{"", "", ""}}, Output: ",,\n"}, + {Input: [][]string{{"", "", "a"}}, Output: ",,a\n"}, + {Input: [][]string{{"", "a", ""}}, Output: ",a,\n"}, + {Input: [][]string{{"", "a", "a"}}, Output: ",a,a\n"}, + {Input: [][]string{{"a", "", ""}}, Output: "a,,\n"}, + {Input: [][]string{{"a", "", "a"}}, Output: "a,,a\n"}, + {Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, + {Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, + {Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, + {Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"}, + {Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"}, + {Input: [][]string{{"a", "a", ""}}, Output: "a|a|\n", Comma: '|'}, + {Input: [][]string{{",", ",", ""}}, Output: ",|,|\n", Comma: '|'}, + {Input: [][]string{{"foo"}}, Comma: '"', Error: ErrInvalidDelim}, +} + +func TestWrite(t *testing.T) { + for n, tt := range writeTests { + b := &strings.Builder{} + f := csv.NewWriter(b) + f.UseCRLF = tt.UseCRLF + if tt.Comma != 0 { + f.Comma = tt.Comma + } + err := f.WriteAll(tt.Input) + if err != tt.Error { + t.Errorf("Unexpected error:\ngot %v\nwant %v", err, tt.Error) + } + out := b.String() + if out != tt.Output { + t.Errorf("#%d: out=%q want %q", n, out, tt.Output) + } + } +} + +type errorWriter struct{} + +func (e errorWriter) Write(b []byte) (int, error) { + return 0, errors.New("Test") +} + +func TestError(t *testing.T) { + b := &bytes.Buffer{} + f := csv.NewWriter(b) + f.Write([]string{"abc"}) + f.Flush() + err := f.Error() + if err != nil { + t.Errorf("Unexpected error: %s\n", err) + } + + f = csv.NewWriter(errorWriter{}) + f.Write([]string{"abc"}) + f.Flush() + err = f.Error() + + if err == nil { + t.Error("Error should not be nil") + } +} + +var benchmarkWriteData = [][]string{ + {"abc", "def", "12356", "1234567890987654311234432141542132"}, + {"abc", "def", "12356", "1234567890987654311234432141542132"}, + {"abc", "def", "12356", "1234567890987654311234432141542132"}, +} + +func BenchmarkWrite(b *testing.B) { + for i := 0; i < b.N; i++ { + w := csv.NewWriter(&bytes.Buffer{}) + err := w.WriteAll(benchmarkWriteData) + if err != nil { + b.Fatal(err) + } + w.Flush() + } +} diff --git a/gnovm/stdlibs/generated.go b/gnovm/stdlibs/generated.go index 67b492a34b2..c1198e5f351 100644 --- a/gnovm/stdlibs/generated.go +++ b/gnovm/stdlibs/generated.go @@ -988,7 +988,9 @@ var initOrder = [...]string{ "crypto/ed25519", "crypto/sha256", "encoding", + "encoding/base32", "encoding/base64", + "encoding/csv", "encoding/hex", "hash", "hash/adler32",