Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use skiplists to save on the bitmap size when compressing leaves #454

Draft
wants to merge 8 commits into
base: jsign-type-3
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 44 additions & 11 deletions encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,11 @@ const (
leafC1CommitmentOffset = leafCommitmentOffset + banderwagon.UncompressedSize
leafC2CommitmentOffset = leafC1CommitmentOffset + banderwagon.UncompressedSize
leafChildrenOffset = leafC2CommitmentOffset + banderwagon.UncompressedSize
leafBalanceSize = 32
leafNonceSize = 8
leafBasicDataSize = 32
leafSlotSize = 32
leafValueIndexSize = 1
singleSlotLeafSize = nodeTypeSize + StemSize + 2*banderwagon.UncompressedSize + leafValueIndexSize + leafSlotSize
eoaLeafSize = nodeTypeSize + StemSize + 2*banderwagon.UncompressedSize + leafBalanceSize + leafNonceSize
eoaLeafSize = nodeTypeSize + StemSize + 2*banderwagon.UncompressedSize + leafBasicDataSize
)

func bit(bitlist []byte, nr int) bool {
Expand Down Expand Up @@ -94,6 +93,8 @@ func ParseNode(serializedNode []byte, depth byte) (VerkleNode, error) {
return parseEoAccountNode(serializedNode, depth)
case singleSlotType:
return parseSingleSlotNode(serializedNode, depth)
case skipListType:
return parseSkipList(serializedNode, depth)
default:
return nil, ErrInvalidNodeEncoding
}
Expand Down Expand Up @@ -135,17 +136,49 @@ func parseLeafNode(serialized []byte, depth byte) (VerkleNode, error) {
return ln, nil
}

func parseSkipList(serialized []byte, depth byte) (VerkleNode, error) {
var values [NodeWidth][]byte
offset := leafStemOffset + StemSize + 3*banderwagon.UncompressedSize // offset in the serialized payload
valueIdx := 0 // Index of the value being deserialized
for valueIdx < NodeWidth {
rangecount := serialized[offset+1]
gapsize := serialized[offset]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rename gapsize and valueIdx to rangeSkip and rangeCount, as to use the same names in the serialization Skip and Count?

Using gap or range worlds can be confusing. Using the same names as the serialization makes easier to understand the both ways of the algorithm.

valueIdx += int(gapsize)
offset += 2
for i := 0; i < int(rangecount); i++ {
values[valueIdx] = serialized[offset : offset+leafSlotSize]
offset += leafSlotSize
valueIdx++
}
}
ln := NewLeafNodeWithNoComms(serialized[leafStemOffset:leafStemOffset+StemSize], values[:])
ln.setDepth(depth)
ln.c1 = new(Point)

// Sanity check that we have at least 3*banderwagon.UncompressedSize bytes left in the serialized payload.
if len(serialized[leafCommitmentOffset:]) < 3*banderwagon.UncompressedSize {
return nil, fmt.Errorf("leaf node commitments are not the correct size, expected at least %d, got %d", 3*banderwagon.UncompressedSize, len(serialized[leafC1CommitmentOffset:]))
}

if err := ln.c1.SetBytesUncompressed(serialized[leafC1CommitmentOffset:leafC1CommitmentOffset+banderwagon.UncompressedSize], true); err != nil {
return nil, fmt.Errorf("setting c1 commitment: %w", err)
}
ln.c2 = new(Point)
if err := ln.c2.SetBytesUncompressed(serialized[leafC2CommitmentOffset:leafC2CommitmentOffset+banderwagon.UncompressedSize], true); err != nil {
return nil, fmt.Errorf("setting c2 commitment: %w", err)
}
ln.commitment = new(Point)
if err := ln.commitment.SetBytesUncompressed(serialized[leafCommitmentOffset:leafC1CommitmentOffset], true); err != nil {
return nil, fmt.Errorf("setting commitment: %w", err)
}
return ln, nil
}

func parseEoAccountNode(serialized []byte, depth byte) (VerkleNode, error) {
var values [NodeWidth][]byte
offset := leafStemOffset + StemSize + 2*banderwagon.UncompressedSize
values[0] = zero32[:] // 0 version
values[1] = serialized[offset : offset+leafBalanceSize] // balance
var nonce [32]byte
offset += leafBalanceSize
copy(nonce[:leafNonceSize], serialized[offset:offset+leafNonceSize])
values[2] = nonce[:] // nonce
values[3] = EmptyCodeHash[:]
values[4] = zero32[:] // 0 code size
values[0] = serialized[offset : offset+leafBasicDataSize] // basic data
values[1] = EmptyCodeHash[:]
ln := NewLeafNodeWithNoComms(serialized[leafStemOffset:leafStemOffset+StemSize], values[:])
ln.setDepth(depth)
ln.c1 = new(Point)
Expand Down
109 changes: 88 additions & 21 deletions encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package verkle

import (
"bytes"
"encoding/binary"
"testing"

"github.com/crate-crypto/go-ipa/banderwagon"
Expand All @@ -22,16 +23,18 @@ func TestLeafStemLength(t *testing.T) {
// Serialize a leaf with no values, but whose stem is 32 bytes. The
// serialization should trim the extra byte.
toolong := make([]byte, 32)
leaf, err := NewLeafNode(toolong, make([][]byte, NodeWidth))
values := make([][]byte, NodeWidth)
values[42] = zero32[:]
leaf, err := NewLeafNode(toolong, values)
if err != nil {
t.Fatal(err)
}
ser, err := leaf.Serialize()
if err != nil {
t.Fatal(err)
}
if len(ser) != nodeTypeSize+StemSize+bitlistSize+3*banderwagon.UncompressedSize {
t.Fatalf("invalid serialization when the stem is longer than 31 bytes: %x (%d bytes != %d)", ser, len(ser), nodeTypeSize+StemSize+bitlistSize+2*banderwagon.UncompressedSize)
if len(ser) != singleSlotLeafSize {
t.Fatalf("invalid serialization when the stem is longer than 31 bytes: %x (%d bytes != %d)", ser, len(ser), singleSlotLeafSize)
}
}

Expand Down Expand Up @@ -61,12 +64,11 @@ func TestInvalidNodeEncoding(t *testing.T) {
}

func TestParseNodeEoA(t *testing.T) {
var basicdata [32]byte
values := make([][]byte, 256)
values[0] = zero32[:]
values[0] = basicdata[:]
binary.BigEndian.PutUint64(values[0][8:], 0xde)
values[1] = EmptyCodeHash[:] // set empty code hash as balance, because why not
values[2] = fourtyKeyTest[:] // set nonce to 64
values[3] = EmptyCodeHash[:] // set empty code hash
values[4] = zero32[:] // zero-size
ln, err := NewLeafNode(ffx32KeyTest[:31], values)
if err != nil {
t.Fatalf("error creating leaf node: %v", err)
Expand Down Expand Up @@ -99,26 +101,15 @@ func TestParseNodeEoA(t *testing.T) {
t.Fatalf("invalid stem, got %x, expected %x", lnd.stem, ffx32KeyTest[:31])
}

if !bytes.Equal(lnd.values[0], zero32[:]) {
t.Fatalf("invalid version, got %x, expected %x", lnd.values[0], zero32[:])
nonce := binary.BigEndian.Uint64(lnd.values[0][8:])
if nonce != 0xde {
t.Fatalf("invalid version, got %x, expected %x", nonce, 0xde)
}

if !bytes.Equal(lnd.values[1], EmptyCodeHash[:]) {
t.Fatalf("invalid balance, got %x, expected %x", lnd.values[1], EmptyCodeHash[:])
}

if !bytes.Equal(lnd.values[2], fourtyKeyTest[:]) {
t.Fatalf("invalid nonce, got %x, expected %x", lnd.values[2], fourtyKeyTest[:])
}

if !bytes.Equal(lnd.values[3], EmptyCodeHash[:]) {
t.Fatalf("invalid code hash, got %x, expected %x", lnd.values[3], EmptyCodeHash[:])
}

if !bytes.Equal(lnd.values[4], zero32[:]) {
t.Fatalf("invalid code size, got %x, expected %x", lnd.values[4], zero32[:])
}

if !lnd.c2.Equal(&banderwagon.Identity) {
t.Fatalf("invalid c2, got %x, expected %x", lnd.c2, banderwagon.Identity)
}
Expand Down Expand Up @@ -190,3 +181,79 @@ func TestParseNodeSingleSlot(t *testing.T) {
t.Fatalf("invalid commitment, got %x, expected %x", lnd.commitment, ln.commitment)
}
}

func TestSerializeWithSkipLists(t *testing.T) {
t.Parallel()

values := make([][]byte, NodeWidth)
values[42] = zero32[:]
values[57] = fourtyKeyTest[:]
leaf, err := NewLeafNode(ffx32KeyTest, values)
if err != nil {
t.Fatal(err)
}
ser, err := leaf.Serialize()
if err != nil {
t.Fatal(err)
}
if len(ser) == 0 {
t.Fatal("empty serialization buffer")
}
if ser[0] != skipListType {
t.Fatalf("invalid serialization type, got %d, expected %d", ser[0], skipListType)
}
if !bytes.Equal(ser[1:32], ffx32KeyTest[:31]) {
t.Fatalf("stem didn't serialize properly, got %x, want %x", ser[1:32], ffx32KeyTest[:31])
}
expectedSize := nodeTypeSize + StemSize + 3*banderwagon.UncompressedSize + 4 + 2*leafSlotSize
if len(ser) != expectedSize {
t.Fatalf("invalid skiplist serialization: %x (%d bytes != %d)", ser, len(ser), expectedSize)
}
if ser[nodeTypeSize+StemSize+3*banderwagon.UncompressedSize] != 42 {
t.Fatalf("invalid amount of leaves skipped, got %d, want %d", ser[nodeTypeSize+StemSize+3*banderwagon.UncompressedSize], 42)
}
if ser[nodeTypeSize+StemSize+3*banderwagon.UncompressedSize+1] != 1 {
t.Fatalf("invalid amount of leaves skipped, got %d, want %d", ser[nodeTypeSize+StemSize+3*banderwagon.UncompressedSize+1], 42)
}
if ser[nodeTypeSize+StemSize+3*banderwagon.UncompressedSize+2+leafSlotSize] != 14 {
t.Fatalf("invalid amount of leaves skipped, got %d, want %d", ser[nodeTypeSize+StemSize+3*banderwagon.UncompressedSize+2+leafSlotSize], 14)
}

// add a last value to check that the final gap is properly handled
values[255] = ffx32KeyTest
ser, err = leaf.Serialize()
if err != nil {
t.Fatal(err)
}
expectedSize = nodeTypeSize + StemSize + 3*banderwagon.UncompressedSize + 6 + 3*leafSlotSize
if len(ser) != expectedSize {
t.Fatalf("invalid skiplist serialization: %x (%d bytes != %d)", ser, len(ser), expectedSize)
}

deser, err := ParseNode(ser, 5)
if err != nil {
t.Fatal(err)
}
vals := deser.(*LeafNode).values
for i, val := range vals {

switch i {
case 42:
if !bytes.Equal(val, zero32[:]) {
t.Fatalf("invalid deserialized skiplist value at %d: got %x, want %x", i, val, zero32)
}
case 57:
if !bytes.Equal(val, fourtyKeyTest[:]) {
t.Fatalf("invalid deserialized skiplist value at %d: got %x, want %x", i, val, fourtyKeyTest)
}
case 255:
if !bytes.Equal(val, ffx32KeyTest[:]) {
t.Fatalf("invalid deserialized skiplist value at %d: got %x, want %x", i, val, ffx32KeyTest)
}
default:
if val != nil {
t.Fatalf("invalid deserialized skiplist value at %d: got %x, want nil", i, val)
}
}
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/ethereum/go-verkle

go 1.19
go 1.21

require (
github.com/crate-crypto/go-ipa v0.0.0-20240223125850-b1e8a79f509c
Expand Down
5 changes: 3 additions & 2 deletions proof_ipa.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"bytes"
"errors"
"fmt"
"slices"
"sort"

ipa "github.com/crate-crypto/go-ipa"
Expand Down Expand Up @@ -399,14 +400,14 @@ func DeserializeProof(vp *VerkleProof, statediff StateDiff) (*Proof, error) {
k[StemSize] = ins.Suffix
keys = append(keys, k[:])
prevalues = append(prevalues, nil)
postvalues = append(postvalues, ins.New[:])
postvalues = append(postvalues, slices.Clone(ins.New[:]))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This, and line 410, are the fixes for the test that was broken.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Weird, so this means that stemdiff is somewhat mutated after affecting the returned Proof?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the area that is used to store the location of the iterator is reused over and over, and if we pass a reference to it, it will change as the iterator progress. This is not the first time we fall for this, and probably not the last time either, it's quite subtle and easy to forget.

}
for _, rd := range stemdiff.Reads {
var k [32]byte
copy(k[:StemSize], stemdiff.Stem[:])
k[StemSize] = rd.Suffix
keys = append(keys, k[:])
prevalues = append(prevalues, rd.Current[:])
prevalues = append(prevalues, slices.Clone(rd.Current[:]))
postvalues = append(postvalues, nil)
}
for _, mi := range stemdiff.Missing {
Expand Down
54 changes: 41 additions & 13 deletions tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ const (
leafType byte = 2
eoAccountType byte = 3
singleSlotType byte = 4
skipListType byte = 8
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is set in bit order, because I'm hoping to mix encodings in the future.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eoAccountType has value 3 which isn't respecting that idea. Is it worth changing now? If not, we're ina a half-baked idea.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could change it to 4, no long-term db has been using this.

)

type (
Expand Down Expand Up @@ -1775,35 +1776,42 @@ func (n *LeafNode) serializeLeafWithUncompressedCommitments(cBytes, c1Bytes, c2B
bitlist [bitlistSize]byte
isEoA = true
count, lastIdx int
gapcount int
gaps [32]struct {
Skip byte // How many slots to skip before the next range
Count byte // Size of the next range
}
)
for i, v := range n.values {
if v != nil {
count++
lastIdx = i
gaps[gapcount].Count++

setBit(bitlist[:], i)
children = append(children, v...)
if padding := emptyValue[:LeafValueSize-len(v)]; len(padding) != 0 {
children = append(children, padding...)
}
} else {
if gaps[gapcount].Skip == 255 {
panic("empty leaf node")
}
if i > 0 && n.values[i-1] != nil {
gapcount++
}
gaps[gapcount].Skip++
}

// Check for an EOA
if isEoA {
switch i {
case 0:
// Version should be 0
isEoA = v != nil && bytes.Equal(v, zero32[:])
case 1:
// Balance should not be nil
// Basic data should not be nil
isEoA = v != nil
case 2:
// Nonce should have its last 24 bytes set to 0
isEoA = v != nil && bytes.Equal(v[leafNonceSize:], zero24[:])
case 3:
case 1:
// Code hash should be the empty code hash
isEoA = v != nil && bytes.Equal(v, EmptyCodeHash[:])
case 4:
// Code size must be 0
isEoA = v != nil && bytes.Equal(v, zero32[:])
default:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the EoA fix for Nyota.

// All other values must be nil
isEoA = v == nil
Expand All @@ -1830,8 +1838,28 @@ func (n *LeafNode) serializeLeafWithUncompressedCommitments(cBytes, c1Bytes, c2B
copy(result[leafStemOffset:], n.stem[:StemSize])
copy(result[leafStemOffset+StemSize:], c1Bytes[:])
copy(result[leafStemOffset+StemSize+banderwagon.UncompressedSize:], cBytes[:])
copy(result[leafStemOffset+StemSize+2*banderwagon.UncompressedSize:], n.values[1]) // copy balance
copy(result[leafStemOffset+StemSize+2*banderwagon.UncompressedSize+leafBalanceSize:], n.values[2][:leafNonceSize]) // copy nonce
copy(result[leafStemOffset+StemSize+2*banderwagon.UncompressedSize:], n.values[0]) // copy basic data
case gapcount < 16:
// If there are less than 16 gaps, it's worth using skiplists
result = make([]byte, 1, nodeTypeSize+StemSize+bitlistSize+3*banderwagon.UncompressedSize+len(children))
result[0] = skipListType
result = append(result, n.stem[:StemSize]...)
result = append(result, cBytes[:]...)
result = append(result, c1Bytes[:]...)
result = append(result, c2Bytes[:]...)
var leafIdx int
for _, gap := range gaps {
if gap.Count == 0 {
break // skip the last gap as nothing follows
}
Comment on lines +1863 to +1866
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we do:

for j:=0; j<=gapCount; j++ {

and avoid if L1852 ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not completely, because if Count == 0 it means that the group ends with a gap, wherease if Count != 0 then the group ends with a range. But yeah, no point in going over the whole list, I'll loop on j.

result = append(result, gap.Skip)
leafIdx += int(gap.Skip)
result = append(result, gap.Count)
for i := 0; i < int(gap.Count); i++ {
result = append(result, n.values[leafIdx]...)
leafIdx++
}
}
default:
result = make([]byte, nodeTypeSize+StemSize+bitlistSize+3*banderwagon.UncompressedSize+len(children))
result[0] = leafType
Expand Down