Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

verkle migration: faster stage2 conversion #196

Closed
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
dfe1e27
verkle migration: code rearchitecture
jsign Mar 29, 2023
7ebb7aa
verkle migration: memory saving optimizations
jsign Mar 29, 2023
47869f5
verkle migration: parallelize leaf computation
jsign Mar 30, 2023
8deaef2
verkle migration: keep ordering in batch results
jsign Mar 30, 2023
018ce4e
verkle migration: parallelize tree merging
jsign Mar 30, 2023
655a8de
verkle migration: remove sorted file & cleanup log messages
jsign Mar 30, 2023
d0be88e
verkle migration: do partial commitment of trees in parallel
jsign Mar 30, 2023
04df746
verkle migration: serialize in parallel
jsign Mar 31, 2023
cea9eac
verkle migration: avoid copying to tuples but use slices
jsign Mar 31, 2023
00d77f2
verkle migration: more memory saving optimizations
jsign Mar 31, 2023
b069fa4
verkle migration: refactor
jsign Apr 1, 2023
3afa5be
verkle migration: pipeline rearchitecture
jsign Apr 1, 2023
cb593b1
verkle migration: build top two layers from processed subtrees
jsign Apr 3, 2023
f451ad0
verkle migraiton: batch database writes
jsign Apr 4, 2023
892acba
verkle migration: cleanup
jsign Apr 4, 2023
e5acace
verkle migration: comment code
jsign Apr 4, 2023
5d08fd1
verkle migration: adjust to byte key
jsign Apr 5, 2023
8c3caf8
verkle migration: add duration per file and estimated logs
jsign Apr 5, 2023
ee16580
mod: use corresponding go-verkle and go-ipa deps
jsign Apr 6, 2023
57fde22
verkle migration: support smaller trees
jsign Apr 13, 2023
e5ee037
mod: use latest PR version of go-verkle
jsign Apr 13, 2023
50c776a
Update cmd/geth/verkle.go
jsign Apr 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 168 additions & 68 deletions cmd/geth/verkle.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ package main

import (
"bytes"
"context"
"encoding/binary"
"encoding/hex"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"runtime"
Expand All @@ -33,6 +33,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/core/state/snapshot"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/internal/flags"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
Expand All @@ -41,6 +42,7 @@ import (
"github.com/gballet/go-verkle"
"github.com/holiman/uint256"
cli "github.com/urfave/cli/v2"
"golang.org/x/sync/errgroup"
)

var (
Expand Down Expand Up @@ -732,89 +734,187 @@ func dumpKeys(ctx *cli.Context) error {
}

func sortKeys(ctx *cli.Context) error {
// Get list of files
files, _ := ioutil.ReadDir(".")
// Run precomp preparation now to avoid racing in paralell goroutines if the
// precomputed table doesn't exist.
_ = verkle.GetConfig()

// Open database.
stack, _ := makeConfigNode(ctx)
defer stack.Close()
chaindb := utils.MakeChainDatabase(ctx, stack, false)
if chaindb == nil {
return errors.New("nil chaindb")
}

start := time.Now()
root := verkle.New()

// Iterate over files
for _, file := range files {
// The migration code converts trees partitioned by the first two bytes of
// the stem. We'll collect in secondLevelCommitment[a][b] the commitment of
// the InternalNode with stem [a, b, ...]. We'll then use this to build the
// first two layers of the tree.
var secondLevelCommitment [256][256][32]byte

// List files and iterate on them
files, _ := ioutil.ReadDir(".")
for fileIdx, file := range files {
// Check if file is a binary file
fname := file.Name()
if !bytes.HasSuffix([]byte(fname), []byte(".bin")) || bytes.HasPrefix([]byte(fname), []byte("sorted-")) || len(fname) != 6 {
if !bytes.HasSuffix([]byte(fname), []byte(".bin")) || len(fname) != 6 {
continue
}
log.Info("Processing file", "name", file.Name())
data, _ := ioutil.ReadFile(file.Name())
numTuples := len(data) / 64
tuples := make([][64]byte, 0, numTuples)
reader := bytes.NewReader(data)
for {
var tuple [64]byte
err := binary.Read(reader, binary.LittleEndian, &tuple)
if errors.Is(err, io.EOF) {
break
startFile := time.Now()

// Read the file grouping leaves values by the first two bytes of the stem, and send them to secondLvlLeaves.
secondLvlLeaves := make(chan []verkle.BatchNewLeafNodeData)
go func() {
if err := getSortedLeavesData(fname, secondLvlLeaves); err != nil {
log.Crit("Failed to get sorted leaves data", "error", err)
}
if err != nil {
panic(err)
close(secondLvlLeaves)
}()

// Process secondLvlLeaves items and pipe the results to serializedTrees.
log.Info("Building tree", "name", file.Name())
serializedTrees := make(chan []verkle.SerializedNode)
go func() {
// We read from the channel, and allow up to runtime.CPU() goroutines to process the data.
// This tries to use the most amount of CPUs, while also puts some backpressure on the channel
// to avoid using too much memory.
group, _ := errgroup.WithContext(context.Background())
group.SetLimit(runtime.NumCPU())
for leavesData := range secondLvlLeaves {
leavesData := leavesData
group.Go(func() error {
// We generate the LeafNodes in an optimized way.
leaves := verkle.BatchNewLeafNode(leavesData)
// We do an optimized tree construction from all the leaves at once.
// Note this is a partial tree since all the keys have hte same first two bytes of the stem.
jsign marked this conversation as resolved.
Show resolved Hide resolved
root := verkle.BatchInsertOrderedLeaves(leaves)
root.Commit()

// Serialize all the nodes of the generated tree, which takes advantage of many optimizations.
nodes, err := root.BatchSerialize()
if err != nil {
return fmt.Errorf("failed to serialize nodes: %w", err)
}

// Sort the serialized nodes by their CommitmentBytes, which tries to help the database with
// future compactions when inserting.
sort.Slice(nodes, func(i, j int) bool {
return bytes.Compare(nodes[i].CommitmentBytes[:], nodes[j].CommitmentBytes[:]) < 0
})

// Remember: this is a partial tree where all the keys have the same first two bytes of the stem.
// We collect now all the commitments of the InternalNodes with stem [a, b, ...]
// in secondLevelCommitment[a][b]. Note that each goroutine is working on a different
// place in the array, so there's no race-condition.
stem := leavesData[0].Stem // All the leaves have the same first 2-byte stem, take the first one.
point := verkle.GetInternalNodeCommitment(root, stem[:2])
secondLevelCommitment[stem[0]][stem[1]] = point.Bytes()

// Send the nodes to serializedTrees which will write them to disk.
serializedTrees <- nodes
return nil
})
}
if err := group.Wait(); err != nil {
log.Crit("Failed to build tree", "error", err)
}
close(serializedTrees)
}()

// We receive serialized nodes from serializedTrees and write them to disk.
// We batch them into presumably optimal batches. Note this also puts backpressure
// to the previous channels if we can't write fast enough. That's useful because
// there's no reason to use more memory if things are lagging behind. Disk is slow.
log.Info("Serializing tree")
batch := chaindb.NewBatchWithSize(ethdb.IdealBatchSize)
for nodes := range serializedTrees {
for _, node := range nodes {
if err := batch.Put(node.CommitmentBytes[:], node.SerializedBytes); err != nil {
log.Crit("put node to disk: %s", err)
}
if batch.ValueSize() > ethdb.IdealBatchSize {
if err := batch.Write(); err != nil {
log.Crit("write batch: %s", err)
}
batch.Reset()
}
}
tuples = append(tuples, tuple)
}

// Sort tuples by key
log.Info("Sorting file", "name", file.Name())
sort.Slice(tuples, func(i, j int) bool {
return bytes.Compare(tuples[i][:32], tuples[j][:32]) < 0
})
// Just make sure to GC before the next file, so there's a bound of ~4GiB of memory used.
runtime.GC()

// Merge the values
log.Info("Merging file", "name", file.Name())
file, _ := os.Create("sorted-" + file.Name())
var (
stem [31]byte
values = make([][]byte, 256)
last [31]byte
)
if len(tuples) > 0 {
copy(last[:], tuples[0][:31])
}
for i := range tuples {
copy(stem[:], tuples[i][:31])
if stem != last {
binary.Write(file, binary.LittleEndian, last)
binary.Write(file, binary.LittleEndian, values)

var istem [31]byte
istem = last
err := root.(*verkle.InternalNode).InsertStem(istem[:], values, nil)
if err != nil {
panic(err)
}
copy(last[:], stem[:])
values = make([][]byte, 256)
}
historyAvgPerFile := time.Since(start) / time.Duration(fileIdx+1)
timeLeft := common.PrettyDuration(historyAvgPerFile * time.Duration(len(files)-fileIdx-1))
log.Info("Subtree finished", "file", fname, "elapsed", common.PrettyDuration(time.Since(startFile)), "estimated_remaining", timeLeft.String())
}

values[tuples[i][31]] = make([]byte, 32)
copy(values[tuples[i][31]], tuples[i][32:])
// From all the commitments of the InternalNodes with stem [a, b, ...] we build
// and save the first two layers of the tree.
root := verkle.BuildFirstTwoLayers(secondLevelCommitment)
log.Info("Building tree finished", "root", fmt.Sprintf("%x", root.Commit().Bytes()))
nodes, err := root.BatchSerialize()
if err != nil {
return fmt.Errorf("failed to serialize nodes: %w", err)
}
for _, node := range nodes {
if err := chaindb.Put(node.CommitmentBytes[:], node.SerializedBytes); err != nil {
log.Crit("put node to disk: %s", err)
}
}

// dump the last group
binary.Write(file, binary.LittleEndian, stem)
binary.Write(file, binary.LittleEndian, values)
err := root.(*verkle.InternalNode).InsertStem(stem[:], values, nil)
if err != nil {
panic(err)
}
log.Info("Finished", "elapsed", common.PrettyDuration(time.Since(start)))
return nil
}

// Committing file
log.Info("Committing file", "name", file.Name())
root.Commit()
func getSortedLeavesData(fname string, secondLvlLeavesData chan []verkle.BatchNewLeafNodeData) error {
log.Info("Reading file", "name", fname)
data, err := ioutil.ReadFile(fname)
if err != nil {
return fmt.Errorf("failed to read file: %w", err)
}

// Write sorted tuples back to file
log.Info("Writing file", "name", file.Name())
file.Close()
log.Info("Processing file", "name", fname)
numTuples := len(data) / 64
tuples := make([][]byte, numTuples)
for i := 0; i < numTuples; i++ {
tuples[i] = data[i*64 : (i+1)*64]
}
log.Info("Done", "root", fmt.Sprintf("%x", root.Commit().Bytes()))
log.Info("Finished", "elapsed", common.PrettyDuration(time.Since(start)))
// Sort tuples by key
log.Info("Sorting file", "name", fname)
sort.Slice(tuples, func(i, j int) bool {
return bytes.Compare(tuples[i][:32], tuples[j][:32]) < 0
})

// Merge the values
log.Info("Merging file", "name", fname)
var (
stem []byte
values = make(map[byte][]byte, 5)
last []byte
)
if len(tuples) > 0 {
last = tuples[0][:31]
}
var leavesData []verkle.BatchNewLeafNodeData
for i := range tuples {
stem = tuples[i][:31]
if !bytes.Equal(stem, last) {
leavesData = append(leavesData, verkle.BatchNewLeafNodeData{Stem: last, Values: values})
if stem[1] != last[1] {
secondLvlLeavesData <- leavesData
leavesData = make([]verkle.BatchNewLeafNodeData, 0, len(leavesData))
}
last = stem
values = make(map[byte][]byte)
}

values[tuples[i][31]] = tuples[i][32:]
}
leavesData = append(leavesData, verkle.BatchNewLeafNodeData{Stem: last, Values: values})
secondLvlLeavesData <- leavesData

return nil
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ require (
github.com/fjl/gencodec v0.0.0-20220412091415-8bb9e558978c
github.com/fjl/memsize v0.0.0-20190710130421-bcb5799ab5e5
github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff
github.com/gballet/go-verkle v0.0.0-20230413135631-4bea2763ed0f
github.com/gballet/go-verkle v0.0.0-20230413165055-0ebfd8549906
github.com/go-stack/stack v1.8.0
github.com/golang-jwt/jwt/v4 v4.3.0
github.com/golang/protobuf v1.5.2
Expand Down
10 changes: 2 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ github.com/consensys/gnark-crypto v0.4.1-0.20210426202927-39ac3d4b3f1f/go.mod h1
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/crate-crypto/go-ipa v0.0.0-20230315201338-1643fdc2ead8 h1:2EBbIwPDRqlCD2K34Eojyy0x9d3RhOuHAZfbQm508X8=
github.com/crate-crypto/go-ipa v0.0.0-20230315201338-1643fdc2ead8/go.mod h1:gzbVz57IDJgQ9rLQwfSk696JGWof8ftznEL9GoAv3NI=
github.com/crate-crypto/go-ipa v0.0.0-20230410135559-ce4a96995014 h1:bbyTlFQ12wkFA6aVL+9HrBZwVl85AN0VS/Bwam7o93U=
github.com/crate-crypto/go-ipa v0.0.0-20230410135559-ce4a96995014/go.mod h1:gzbVz57IDJgQ9rLQwfSk696JGWof8ftznEL9GoAv3NI=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
Expand Down Expand Up @@ -137,10 +135,8 @@ github.com/garslo/gogen v0.0.0-20170306192744-1d203ffc1f61 h1:IZqZOB2fydHte3kUgx
github.com/garslo/gogen v0.0.0-20170306192744-1d203ffc1f61/go.mod h1:Q0X6pkwTILDlzrGEckF6HKjXe48EgsY/l7K7vhY4MW8=
github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff h1:tY80oXqGNY4FhTFhk+o9oFHGINQ/+vhlm8HFzi6znCI=
github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff/go.mod h1:x7DCsMOv1taUwEWCzT4cmDeAkigA5/QCwUodaVOe8Ww=
github.com/gballet/go-verkle v0.0.0-20230317174103-141354da6b11 h1:x4hiQFgr1SlqR4IoAZiXLFZK4L7KbibqkORqa1fwKp8=
github.com/gballet/go-verkle v0.0.0-20230317174103-141354da6b11/go.mod h1:IyOnn1kujMWaT+wet/6Ix1BtvYwateOBy9puuWH/8sw=
github.com/gballet/go-verkle v0.0.0-20230413135631-4bea2763ed0f h1:gP4uR2/1qx6hsIzbRI28JWcsVuP7xyjyj6SpLnoFobc=
github.com/gballet/go-verkle v0.0.0-20230413135631-4bea2763ed0f/go.mod h1:P3bwGrLhsUNIsUDlq2yzMPvO1c/15oiB3JS85P+hNfw=
github.com/gballet/go-verkle v0.0.0-20230413165055-0ebfd8549906 h1:T/z0/Xg6VwrTdw6oZcQyw6vLjDF5+g/15ppwSWgBMP8=
github.com/gballet/go-verkle v0.0.0-20230413165055-0ebfd8549906/go.mod h1:P3bwGrLhsUNIsUDlq2yzMPvO1c/15oiB3JS85P+hNfw=
github.com/getkin/kin-openapi v0.53.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4=
github.com/getkin/kin-openapi v0.61.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
Expand Down Expand Up @@ -558,8 +554,6 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20211020174200-9d6173849985/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220919091848-fb04ddd9f9c8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
Expand Down