Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VKT state migration optimized methods & lower LeafNode memory pressure #338

Draft
wants to merge 15 commits into
base: master
Choose a base branch
from
196 changes: 196 additions & 0 deletions conversion.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
package verkle

import (
"bytes"
"sort"
)

// BatchNewLeafNodeData is a struct that contains the data needed to create a new leaf node.
type BatchNewLeafNodeData struct {
Stem []byte
Values map[byte][]byte
}

// BatchNewLeafNode creates a new leaf node from the given data. It optimizes LeafNode creation
// by batching expensive cryptography operations. It returns the LeafNodes sorted by stem.
func BatchNewLeafNode(nodesValues []BatchNewLeafNodeData) []LeafNode {
cfg := GetConfig()

ret := make([]LeafNode, len(nodesValues))
c1c2points := make([]*Point, 2*len(nodesValues))
c1c2frs := make([]*Fr, 2*len(nodesValues))
for i, nv := range nodesValues {
ret[i] = LeafNode{
values: nv.Values,
stem: nv.Stem,
c1: Generator(),
c2: Generator(),
}

var c1poly, c2poly [NodeWidth]Fr

valsslice := make([][]byte, NodeWidth)
for idx := range nv.Values {
valsslice[idx] = nv.Values[idx]
}

fillSuffixTreePoly(c1poly[:], valsslice[:NodeWidth/2])
ret[i].c1 = cfg.CommitToPoly(c1poly[:], 0)
fillSuffixTreePoly(c2poly[:], valsslice[NodeWidth/2:])
ret[i].c2 = cfg.CommitToPoly(c2poly[:], 0)
Comment on lines +23 to +40
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we merged the other emptyCode cached PR, we can exploit the same here. Maybe I can extract this code section to be shared between both.

For now, it won't make a big difference in the conversion since we're bottleneck by compactions.
I prefer to merge this PR as is with our correctness guarantees, and I'll do a PR right after with only that refactor.


c1c2points[2*i], c1c2points[2*i+1] = ret[i].c1, ret[i].c2
c1c2frs[2*i], c1c2frs[2*i+1] = new(Fr), new(Fr)
}

toFrMultiple(c1c2frs, c1c2points)

var poly [NodeWidth]Fr
poly[0].SetUint64(1)
for i, nv := range nodesValues {
StemFromBytes(&poly[1], nv.Stem)
poly[2] = *c1c2frs[2*i]
poly[3] = *c1c2frs[2*i+1]

ret[i].commitment = cfg.CommitToPoly(poly[:], 252)
}

sort.Slice(ret, func(i, j int) bool {
return bytes.Compare(ret[i].stem, ret[j].stem) < 0
})

return ret
}

// BatchInsertOrderedLeaves creates a tree under from an ordered and deduplicated list of leaves.
// There's weak assumption that each subtree of the first stem-byte has more than 1 leaf node.
// If the whole tree has more than 2000 leaves the chance of that not being true is 0.033~=0.
func BatchInsertOrderedLeaves(leaves []LeafNode) *InternalNode {
// currentBranch is a representation of the current branch we're in.
// The length of the branch is at most StemSize, and it might only
// have non-nil values in the first N levels.
var currentBranch [StemSize]*InternalNode

// Initial state is a branch with only a root node at the top, pointing to
// the first leaf.
currentBranch[1] = newInternalNode(1).(*InternalNode)
currentBranch[1].cowChild(leaves[0].stem[1])
currentBranch[1].children[leaves[0].stem[1]] = &leaves[0]

currentBranch[0] = New().(*InternalNode)
currentBranch[0].cowChild(leaves[0].stem[0])
currentBranch[0].children[leaves[0].stem[0]] = currentBranch[1]

prevLeaf := &leaves[0]
leaves = leaves[1:]
// The idea is that we compare the newLeaf with the previousLeaf, and
// depending on how their stems differ, we adjust our currentBranch structure.
for i := range leaves {
newLeaf := &leaves[i]

// We get the first index in their stems that is different.
idx := firstDiffByteIdx(prevLeaf.stem, newLeaf.stem)

// If the currentBranch has a node at that index, we simply set the children
// to the newLeaf.
if currentBranch[idx] != nil {
currentBranch[idx].cowChild(newLeaf.stem[idx])
currentBranch[idx].children[newLeaf.stem[idx]] = newLeaf
newLeaf.setDepth(currentBranch[idx].depth + 1)
for i := idx + 1; i < len(currentBranch); i++ {
currentBranch[i] = nil
}
} else {
// In this case there's no InternalNode in the current branch at the index.
// We need to "fill the gap" between the previous non-nil internal node up to
// the idx with new internal nodes. Then we set the last created internal node
// to the previous and new leaf.
prevNonNilIdx := 0
for i := idx - 1; i >= 0; i-- {
if currentBranch[i] != nil {
prevNonNilIdx = i
break
}
}
for k := prevNonNilIdx + 1; k <= idx; k++ {
currentBranch[k] = newInternalNode(currentBranch[k-1].depth + 1).(*InternalNode)
currentBranch[k-1].cowChild(newLeaf.stem[k-1])
currentBranch[k-1].children[newLeaf.stem[k-1]] = currentBranch[k]
}

currentBranch[idx].cowChild(prevLeaf.stem[idx])
currentBranch[idx].children[prevLeaf.stem[idx]] = prevLeaf
prevLeaf.setDepth(currentBranch[idx].depth + 1)
currentBranch[idx].cowChild(newLeaf.stem[idx])
currentBranch[idx].children[newLeaf.stem[idx]] = newLeaf

for i := idx + 1; i < len(currentBranch); i++ {
currentBranch[i] = nil
}
}

prevLeaf = newLeaf
}

return currentBranch[0]
}

// firstDiffByteIdx will return the first index in which the two stems differ.
// Both stems *must* be different.
func firstDiffByteIdx(stem1 []byte, stem2 []byte) int {
for i := range stem1 {
if stem1[i] != stem2[i] {
return i
}
}
panic("stems are equal")
}

// GetInternalNodeCommitment returns the commitment of the internal node at
// the partialStem. e.g: if partialStem is [a, b] it will walk to the a-th
// children of the node, and then to the b-th children of that node, returning
// its commitment..
func GetInternalNodeCommitment(node *InternalNode, partialStem []byte) *Point {
for i := range partialStem {
nextNode, ok := node.children[partialStem[i]].(*InternalNode)
if !ok {
return node.children[partialStem[i]].(*LeafNode).commitment
}
node = nextNode
}

return node.commitment
}

// BuildFirstTwoLayers builds the first two layers of the tree from all the precalculated
// commitments of the children of the second level. This method is generally used if tree
// construction was done in partitions, and you want to glue them together without having
// the whole tree in memory.
func BuildFirstTwoLayers(commitments [NodeWidth][NodeWidth][32]byte) *InternalNode {
var secondLevelInternalNodes [NodeWidth]*InternalNode

for stemFirstByte := range commitments {
for stemSecondByte := range commitments[stemFirstByte] {
if commitments[stemFirstByte][stemSecondByte] == [32]byte{} {
continue
}
if secondLevelInternalNodes[stemFirstByte] == nil {
secondLevelInternalNodes[stemFirstByte] = newInternalNode(1).(*InternalNode)
}
hashedNode := HashedNode{commitment: commitments[stemFirstByte][stemSecondByte][:]}
secondLevelInternalNodes[stemFirstByte].cowChild(byte(stemSecondByte))
secondLevelInternalNodes[stemFirstByte].SetChild(stemSecondByte, &hashedNode)
}
}

root := newInternalNode(0).(*InternalNode)
for i, node := range secondLevelInternalNodes {
if node == nil {
continue
}
root.cowChild(byte(i))
root.SetChild(i, node)
}

return root
}
69 changes: 50 additions & 19 deletions tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ type (

LeafNode struct {
stem []byte
values [][]byte
values map[byte][]byte

commitment *Point
c1, c2 *Point
Expand Down Expand Up @@ -231,10 +231,16 @@ func NewLeafNode(stem []byte, values [][]byte) *LeafNode {
StemFromBytes(&poly[1], stem)
toFrMultiple([]*Fr{&poly[2], &poly[3]}, []*Point{c1, c2})

vals := make(map[byte][]byte, len(values))
for i, v := range values {
if v != nil {
vals[byte(i)] = v
}
}
return &LeafNode{
// depth will be 0, but the commitment calculation
// does not need it, and so it won't be free.
values: values,
values: vals,
stem: stem,
commitment: cfg.CommitToPoly(poly[:], NodeWidth-4),
c1: c1,
Expand All @@ -246,10 +252,16 @@ func NewLeafNode(stem []byte, values [][]byte) *LeafNode {
// commitments. The created node's commitments are intended to be
// initialized with `SetTrustedBytes` in a deserialization context.
func NewLeafNodeWithNoComms(stem []byte, values [][]byte) *LeafNode {
vals := make(map[byte][]byte, len(values))
for i, v := range values {
if v != nil {
vals[byte(i)] = v
}
}
return &LeafNode{
// depth will be 0, but the commitment calculation
// does not need it, and so it won't be free.
values: values,
values: vals,
stem: stem,
}
}
Expand All @@ -259,7 +271,7 @@ func (n *InternalNode) Children() []VerkleNode {
}

func (n *InternalNode) SetChild(i int, c VerkleNode) error {
if i >= NodeWidth-1 {
if i >= NodeWidth {
Comment on lines -262 to +274
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forgot to mention: I found this bug the hard way while doing the helpers and running stuff converting trees...

I think we got lucky to not see this bug happening before.

return errors.New("child index higher than node width")
}
n.children[i] = c
Expand Down Expand Up @@ -375,7 +387,11 @@ func (n *InternalNode) GetStem(stem []byte, resolver NodeResolverFn) ([][]byte,
return n.GetStem(stem, resolver)
case *LeafNode:
if equalPaths(child.stem, stem) {
return child.values, nil
values := make([][]byte, NodeWidth)
for i, v := range child.values {
values[i] = v
}
return values, nil
}
return nil, nil
case *InternalNode:
Expand Down Expand Up @@ -783,7 +799,7 @@ func (n *InternalNode) setDepth(d byte) {
func MergeTrees(subroots []*InternalNode) VerkleNode {
root := New().(*InternalNode)
for _, subroot := range subroots {
for i := 0; i < 256; i++ {
for i := 0; i < NodeWidth; i++ {
if _, ok := subroot.children[i].(Empty); ok {
continue
}
Expand Down Expand Up @@ -837,7 +853,7 @@ func (n *LeafNode) updateCn(index byte, value []byte, c *Point) {
var (
old, newH [2]Fr
diff Point
poly [256]Fr
poly [NodeWidth]Fr
)

// Optimization idea:
Expand Down Expand Up @@ -893,7 +909,7 @@ func (n *LeafNode) updateMultipleLeaves(values [][]byte) {
// commitment. We copy the original point in oldC1 and oldC2, so we can batch their Fr transformation
// after this loop.
for i, v := range values {
if len(v) != 0 && !bytes.Equal(v, n.values[i]) {
if len(v) != 0 && !bytes.Equal(v, n.values[byte(i)]) {
if i < NodeWidth/2 {
// First time we touch C1? Save the original point for later.
if oldC1 == nil {
Expand All @@ -911,7 +927,7 @@ func (n *LeafNode) updateMultipleLeaves(values [][]byte) {
// We update C2 directly in `n`. We have our original copy in oldC2.
n.updateCn(byte(i), v, n.c2)
}
n.values[i] = v
n.values[byte(i)] = v
}
}

Expand Down Expand Up @@ -1021,8 +1037,8 @@ func leafToComms(poly []Fr, val []byte) {

func (n *LeafNode) GetProofItems(keys keylist) (*ProofElements, []byte, [][]byte) {
var (
poly [256]Fr // top-level polynomial
pe = &ProofElements{
poly [NodeWidth]Fr // top-level polynomial
pe = &ProofElements{
Cis: []*Point{n.commitment, n.commitment},
Zis: []byte{0, 1},
Yis: []*Fr{&poly[0], &poly[1]}, // Should be 0
Expand Down Expand Up @@ -1092,14 +1108,17 @@ func (n *LeafNode) GetProofItems(keys keylist) (*ProofElements, []byte, [][]byte

var (
suffix = key[31]
suffPoly [256]Fr // suffix-level polynomial
suffPoly [NodeWidth]Fr // suffix-level polynomial
count int
)

vals := make([][]byte, NodeWidth)
for idx := range n.values {
vals[idx] = n.values[idx]
}
if suffix >= 128 {
count = fillSuffixTreePoly(suffPoly[:], n.values[128:])
count = fillSuffixTreePoly(suffPoly[:], vals[128:])
} else {
count = fillSuffixTreePoly(suffPoly[:], n.values[:128])
count = fillSuffixTreePoly(suffPoly[:], vals[:128])
}

// Proof of absence: case of a missing suffix tree.
Expand Down Expand Up @@ -1171,7 +1190,8 @@ func (n *LeafNode) Serialize() ([]byte, error) {
func (n *LeafNode) Copy() VerkleNode {
l := &LeafNode{}
l.stem = make([]byte, len(n.stem))
l.values = make([][]byte, len(n.values))
l.values = make(map[byte][]byte, len(n.values))

l.depth = n.depth
copy(l.stem, n.stem)
for i, v := range n.values {
Expand Down Expand Up @@ -1202,7 +1222,10 @@ func (n *LeafNode) Key(i int) []byte {
}

func (n *LeafNode) Value(i int) []byte {
return n.values[i]
if i >= NodeWidth {
panic("leaf node index out of range")
}
return n.values[byte(i)]
}

func (n *LeafNode) toDot(parent, path string) string {
Expand All @@ -1222,7 +1245,11 @@ func (n *LeafNode) setDepth(d byte) {
}

func (n *LeafNode) Values() [][]byte {
return n.values
vals := make([][]byte, NodeWidth)
for idx := range n.values {
vals[idx] = n.values[idx]
}
return vals
}

func setBit(bitlist []byte, index int) {
Expand Down Expand Up @@ -1369,7 +1396,11 @@ func (n *LeafNode) serializeWithCompressedCommitments(c1Bytes [32]byte, c2Bytes
// Create bitlist and store in children LeafValueSize (padded) values.
children := make([]byte, 0, NodeWidth*LeafValueSize)
var bitlist [bitlistSize]byte
for i, v := range n.values {
vals := make([][]byte, NodeWidth)
for i := range n.values {
vals[i] = n.values[i]
}
for i, v := range vals {
if v != nil {
setBit(bitlist[:], i)
children = append(children, v...)
Expand Down