From 29f1da40d7fc6da34af6c063a8ad1801aef06513 Mon Sep 17 00:00:00 2001 From: John Jannotti Date: Fri, 10 Mar 2023 15:21:14 -0500 Subject: [PATCH 1/2] Small tweaks, mostly to comments --- crypto/merkletrie/cache.go | 8 +- crypto/merkletrie/cache_test.go | 4 +- crypto/merkletrie/committer.go | 2 +- crypto/merkletrie/node.go | 20 ++-- crypto/merkletrie/node_test.go | 100 +++++++++++++++--- crypto/merkletrie/trie.go | 38 ++++--- ...merkle_commiter.go => merkle_committer.go} | 0 7 files changed, 122 insertions(+), 50 deletions(-) rename ledger/store/{merkle_commiter.go => merkle_committer.go} (100%) diff --git a/crypto/merkletrie/cache.go b/crypto/merkletrie/cache.go index 208397edb9..e0112a07d7 100644 --- a/crypto/merkletrie/cache.go +++ b/crypto/merkletrie/cache.go @@ -43,9 +43,9 @@ var ErrLoadedPageMissingNode = errors.New("loaded page is missing a node") var ErrPageDecodingFailuire = errors.New("error encountered while decoding page") type merkleTrieCache struct { - // mt is a point to the originating trie + // mt is a pointer to the originating trie mt *Trie - // committer is the backing up storage for the cache. ( memory, database, etc. ) + // committer is the backing store for the cache. ( memory, database, etc. ) committer Committer // cachedNodeCount is the number of currently cached, in-memory, nodes stored in the pageToNIDsPtr structure. cachedNodeCount int @@ -293,7 +293,7 @@ func (mtc *merkleTrieCache) beginTransaction() { mtc.txNextNodeID = mtc.mt.nextNodeID } -// commitTransaction - used internaly by the Trie +// commitTransaction - used internally by the Trie func (mtc *merkleTrieCache) commitTransaction() { // the created nodes are already on the list. for nodeID := range mtc.txCreatedNodeIDs { @@ -426,7 +426,7 @@ func (mtc *merkleTrieCache) commit() (CommitStats, error) { // reallocatePendingPages is called by the commit() function, and is responsible for performing two tasks - // 1. calculate the hashes of all the newly created nodes -// 2. reornigize the pending flush nodes into an optimal page list, and construct a list of pages that need to be created, deleted and updated. +// 2. reorganize the pending flush nodes into an optimal page list, and construct a list of pages that need to be created, deleted and updated. func (mtc *merkleTrieCache) reallocatePendingPages(stats *CommitStats) (pagesToCreate []uint64, pagesToDelete map[uint64]bool, pagesToUpdate map[uint64]map[storedNodeIdentifier]*node, err error) { // newPageThreshold is the threshold at which all the pages are newly created pages that were never committed. newPageThreshold := uint64(mtc.mt.lastCommittedNodeID) / uint64(mtc.nodesPerPage) diff --git a/crypto/merkletrie/cache_test.go b/crypto/merkletrie/cache_test.go index d9c7a23e30..5967f01145 100644 --- a/crypto/merkletrie/cache_test.go +++ b/crypto/merkletrie/cache_test.go @@ -298,7 +298,7 @@ func (mt *Trie) TestDeleteRollback(d []byte) (bool, error) { if err != nil { return false, err } - found, err := pnode.find(mt.cache, d[:]) + found, err := pnode.find(&mt.cache, d[:]) if !found || err != nil { return false, err } @@ -311,7 +311,7 @@ func (mt *Trie) TestDeleteRollback(d []byte) (bool, error) { mt.elementLength = 0 return true, nil } - _, err = pnode.remove(mt.cache, d[:], make([]byte, 0, len(d))) + _, err = pnode.remove(&mt.cache, d[:], make([]byte, 0, len(d))) // unlike the "real" function, we want always to fail here to test the rollbackTransaction() functionality. mt.cache.rollbackTransaction() return false, fmt.Errorf("this is a test for failing a Delete request") diff --git a/crypto/merkletrie/committer.go b/crypto/merkletrie/committer.go index bad5fe7e0a..5e5ae758ab 100644 --- a/crypto/merkletrie/committer.go +++ b/crypto/merkletrie/committer.go @@ -26,7 +26,7 @@ const ( inMemoryCommitterPageSize = int64(512) ) -// InMemoryCommitter is a fully function in-memory committer, supporting +// InMemoryCommitter is a fully functional in-memory committer, supporting // persistence of pages. type InMemoryCommitter struct { memStore map[uint64][]byte diff --git a/crypto/merkletrie/node.go b/crypto/merkletrie/node.go index 33c2f673ab..de765793dd 100644 --- a/crypto/merkletrie/node.go +++ b/crypto/merkletrie/node.go @@ -140,8 +140,9 @@ func (n *node) add(cache *merkleTrieCache, d []byte, path []byte) (nodeID stored } pnode.hash = append(path, d[:idiff]...) + // create ancestors from pnode up to the new split for i := idiff - 1; i >= 0; i-- { - // create a parent node for pnode. + // create a parent node for pnode, and move up pnode2, nodeID2 := cache.allocateNewNode() pnode2.childrenMask.SetBit(d[i]) pnode2.children = []childEntry{ @@ -152,7 +153,6 @@ func (n *node) add(cache *merkleTrieCache, d []byte, path []byte) (nodeID stored } pnode2.hash = append(path, d[:i]...) - pnode = pnode2 nodeID = nodeID2 } return nodeID, nil @@ -160,16 +160,14 @@ func (n *node) add(cache *merkleTrieCache, d []byte, path []byte) (nodeID stored if n.childrenMask.Bit(d[0]) == false { // no such child. - var childNode *node - var childNodeID storedNodeIdentifier - childNode, childNodeID = cache.allocateNewNode() + childNode, childNodeID := cache.allocateNewNode() childNode.hash = d[1:] pnode, nodeID = cache.allocateNewNode() pnode.childrenMask = n.childrenMask pnode.childrenMask.SetBit(d[0]) - pnode.children = make([]childEntry, len(n.children)+1, len(n.children)+1) + pnode.children = make([]childEntry, len(n.children)+1) if d[0] > n.children[len(n.children)-1].hashIndex { // the new entry comes after all the existing ones. for i, child := range n.children { @@ -183,8 +181,8 @@ func (n *node) add(cache *merkleTrieCache, d []byte, path []byte) (nodeID stored for i, child := range n.children { if d[0] < child.hashIndex { pnode.children[i] = childEntry{ - hashIndex: d[0], id: childNodeID, + hashIndex: d[0], } // copy the rest of the items. for ; i < len(n.children); i++ { @@ -211,7 +209,7 @@ func (n *node) add(cache *merkleTrieCache, d []byte, path []byte) (nodeID stored pnode, nodeID = childNode, cache.refurbishNode(curNodeID) pnode.childrenMask = n.childrenMask if len(pnode.children) < len(n.children) { - pnode.children = make([]childEntry, len(n.children), len(n.children)) + pnode.children = make([]childEntry, len(n.children)) } else { pnode.children = pnode.children[:len(n.children)] } @@ -270,7 +268,7 @@ func (n *node) remove(cache *merkleTrieCache, key []byte, path []byte) (nodeID s pnode, nodeID = childNode, cache.refurbishNode(childNodeID) pnode.childrenMask = n.childrenMask // we are guaranteed to have other children, because our tree forbids nodes that have exactly one leaf child and no other children. - pnode.children = make([]childEntry, len(n.children)-1, len(n.children)-1) + pnode.children = make([]childEntry, len(n.children)-1) copy(pnode.children, append(n.children[:childIndex], n.children[childIndex+1:]...)) pnode.childrenMask.ClearBit(key[0]) } else { @@ -283,7 +281,7 @@ func (n *node) remove(cache *merkleTrieCache, key []byte, path []byte) (nodeID s pnode, nodeID = childNode, cache.refurbishNode(childNodeID) pnode.childrenMask = n.childrenMask if len(pnode.children) < len(n.children) { - pnode.children = make([]childEntry, len(n.children), len(n.children)) + pnode.children = make([]childEntry, len(n.children)) } else { pnode.children = pnode.children[:len(n.children)] } @@ -371,7 +369,7 @@ func deserializeNode(buf []byte) (n *node, s int) { prevChildIndex = childIndex i++ } - n.children = make([]childEntry, i, i) + n.children = make([]childEntry, i) copy(n.children, childEntries[:i]) return } diff --git a/crypto/merkletrie/node_test.go b/crypto/merkletrie/node_test.go index 1495a8e9c0..893fbc86b2 100644 --- a/crypto/merkletrie/node_test.go +++ b/crypto/merkletrie/node_test.go @@ -17,6 +17,8 @@ package merkletrie import ( + "crypto/sha512" + "encoding/binary" "testing" "github.com/stretchr/testify/require" @@ -67,17 +69,24 @@ func (n *node) leafUsingChildrenLength() bool { return len(n.children) == 0 } +func makeHashes(n int) [][]byte { + hashes := make([][]byte, n) + for i := 0; i < len(hashes); i++ { + buf := make([]byte, 32) + binary.BigEndian.PutUint64(buf, uint64(i)) + h := crypto.Hash(buf) + hashes[i] = h[:] + } + return hashes +} + func BenchmarkNodeLeafImplementation(b *testing.B) { + hashes := makeHashes(100000) + b.Run("leaf-ChildrenMask", func(b *testing.B) { var memoryCommitter InMemoryCommitter memConfig := defaultTestMemoryConfig mt1, _ := MakeTrie(&memoryCommitter, memConfig) - // create 100000 hashes. - leafsCount := 100000 - hashes := make([]crypto.Digest, leafsCount) - for i := 0; i < len(hashes); i++ { - hashes[i] = crypto.Hash([]byte{byte(i % 256), byte((i / 256) % 256), byte(i / 65536)}) - } for i := 0; i < len(hashes); i++ { mt1.Add(hashes[i][:]) @@ -100,12 +109,6 @@ func BenchmarkNodeLeafImplementation(b *testing.B) { var memoryCommitter InMemoryCommitter memConfig := defaultTestMemoryConfig mt1, _ := MakeTrie(&memoryCommitter, memConfig) - // create 100000 hashes. - leafsCount := 100000 - hashes := make([]crypto.Digest, leafsCount) - for i := 0; i < len(hashes); i++ { - hashes[i] = crypto.Hash([]byte{byte(i % 256), byte((i / 256) % 256), byte(i / 65536)}) - } for i := 0; i < len(hashes); i++ { mt1.Add(hashes[i][:]) @@ -125,3 +128,76 @@ func BenchmarkNodeLeafImplementation(b *testing.B) { } }) } + +// calculateHashIncrementally uses the Writer interface to the crypto digest to +// avoid accumulating in a buffer. Yet it's slower! I don't know why, but +// leaving it here to benchmark more carefully later. (The final use of +// d.Sum(nil) instead of d.Sum(n.hash[:0]) is needed because we share the +// backing array for the slices in node hashes. But that is not the cause of the +// slow down.) +func (n *node) calculateHashIncrementally(cache *merkleTrieCache) error { + if n.leaf() { + return nil + } + path := n.hash + + d := sha512.New512_256() + + // we add this string length before the actual string so it could get "decoded"; in practice, it makes a good domain separator. + d.Write([]byte{byte(len(path))}) + d.Write(path) + for _, child := range n.children { + childNode, err := cache.getNode(child.id) + if err != nil { + return err + } + if childNode.leaf() { + d.Write([]byte{0}) + } else { + d.Write([]byte{1}) + } + // we add this string length before the actual string so it could get "decoded"; in practice, it makes a good domain separator. + d.Write([]byte{byte(len(childNode.hash))}) + d.Write([]byte{child.hashIndex}) // adding the first byte of the child + d.Write(childNode.hash) // adding the reminder of the child + } + n.hash = d.Sum(nil) + return nil +} + +func BenchmarkAdd(b *testing.B) { + b.ReportAllocs() + + memConfig := defaultTestMemoryConfig + mt, _ := MakeTrie(&InMemoryCommitter{}, memConfig) + hashes := makeHashes(b.N) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mt.Add(hashes[i]) + if i%1000 == 999 { + mt.Commit() // not sure how often we should Commit for a nice benchmark + } + } +} + +func BenchmarkDelete(b *testing.B) { + b.ReportAllocs() + + memConfig := defaultTestMemoryConfig + mt, _ := MakeTrie(&InMemoryCommitter{}, memConfig) + hashes := makeHashes(b.N) + for i := 0; i < b.N; i++ { + mt.Add(hashes[i]) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mt.Delete(hashes[i]) + if i%1000 == 999 { // not sure how often we should Commit for a nice benchmark + mt.Commit() + } + } +} diff --git a/crypto/merkletrie/trie.go b/crypto/merkletrie/trie.go index 7a214e7097..f92776e420 100644 --- a/crypto/merkletrie/trie.go +++ b/crypto/merkletrie/trie.go @@ -32,8 +32,8 @@ const ( NodePageVersion = uint64(0x1000000010000000) ) -// ErrRootPageDecodingFailuire is returned if the decoding the root page has failed. -var ErrRootPageDecodingFailuire = errors.New("error encountered while decoding root page") +// ErrRootPageDecodingFailure is returned if the decoding the root page has failed. +var ErrRootPageDecodingFailure = errors.New("error encountered while decoding root page") // ErrMismatchingElementLength is returned when an element is being added/removed from the trie that doesn't align with the trie's previous elements length var ErrMismatchingElementLength = errors.New("mismatching element length") @@ -63,7 +63,7 @@ type Trie struct { root storedNodeIdentifier nextNodeID storedNodeIdentifier lastCommittedNodeID storedNodeIdentifier - cache *merkleTrieCache + cache merkleTrieCache elementLength int } @@ -79,7 +79,7 @@ type Stats struct { func MakeTrie(committer Committer, memoryConfig MemoryConfig) (*Trie, error) { mt := &Trie{ root: storedNodeIdentifierNull, - cache: &merkleTrieCache{}, + cache: merkleTrieCache{}, nextNodeID: storedNodeIdentifierBase, lastCommittedNodeID: storedNodeIdentifierBase, } @@ -106,11 +106,9 @@ func MakeTrie(committer Committer, memoryConfig MemoryConfig) (*Trie, error) { return mt, nil } -// SetCommitter set the provided committter as the current committer, and return the old one. -func (mt *Trie) SetCommitter(committer Committer) (prevCommitter Committer) { - prevCommitter = mt.cache.committer +// SetCommitter sets the provided committer as the current committer +func (mt *Trie) SetCommitter(committer Committer) { mt.cache.committer = committer - return } // RootHash returns the root hash of all the elements in the trie @@ -154,13 +152,13 @@ func (mt *Trie) Add(d []byte) (bool, error) { if err != nil { return false, err } - found, err := pnode.find(mt.cache, d[:]) + found, err := pnode.find(&mt.cache, d[:]) if found || (err != nil) { return false, err } mt.cache.beginTransaction() var updatedRoot storedNodeIdentifier - updatedRoot, err = pnode.add(mt.cache, d[:], make([]byte, 0, len(d))) + updatedRoot, err = pnode.add(&mt.cache, d[:], make([]byte, 0, len(d))) if err != nil { mt.cache.rollbackTransaction() return false, err @@ -184,7 +182,7 @@ func (mt *Trie) Delete(d []byte) (bool, error) { if err != nil { return false, err } - found, err := pnode.find(mt.cache, d[:]) + found, err := pnode.find(&mt.cache, d[:]) if !found || err != nil { return false, err } @@ -198,7 +196,7 @@ func (mt *Trie) Delete(d []byte) (bool, error) { return true, nil } var updatedRoot storedNodeIdentifier - updatedRoot, err = pnode.remove(mt.cache, d[:], make([]byte, 0, len(d))) + updatedRoot, err = pnode.remove(&mt.cache, d[:], make([]byte, 0, len(d))) if err != nil { mt.cache.rollbackTransaction() return false, err @@ -218,7 +216,7 @@ func (mt *Trie) GetStats() (stats Stats, err error) { if err != nil { return Stats{}, err } - err = pnode.stats(mt.cache, &stats, 1) + err = pnode.stats(&mt.cache, &stats, 1) return } @@ -260,30 +258,30 @@ func (mt *Trie) serialize() []byte { return serializedBuffer[:version+root+next+elementLength+pageSizeLength] } -// serialize serializes the trie root +// deserialize deserializes the trie root func (mt *Trie) deserialize(bytes []byte) (int64, error) { version, versionLen := binary.Uvarint(bytes[:]) if versionLen <= 0 { - return 0, ErrRootPageDecodingFailuire + return 0, ErrRootPageDecodingFailure } if version != MerkleTreeVersion { - return 0, ErrRootPageDecodingFailuire + return 0, ErrRootPageDecodingFailure } root, rootLen := binary.Uvarint(bytes[versionLen:]) if rootLen <= 0 { - return 0, ErrRootPageDecodingFailuire + return 0, ErrRootPageDecodingFailure } nextNodeID, nextNodeIDLen := binary.Uvarint(bytes[versionLen+rootLen:]) if nextNodeIDLen <= 0 { - return 0, ErrRootPageDecodingFailuire + return 0, ErrRootPageDecodingFailure } elemLength, elemLengthLength := binary.Uvarint(bytes[versionLen+rootLen+nextNodeIDLen:]) if elemLengthLength <= 0 { - return 0, ErrRootPageDecodingFailuire + return 0, ErrRootPageDecodingFailure } pageSize, pageSizeLength := binary.Uvarint(bytes[versionLen+rootLen+nextNodeIDLen+elemLengthLength:]) if pageSizeLength <= 0 { - return 0, ErrRootPageDecodingFailuire + return 0, ErrRootPageDecodingFailure } mt.root = storedNodeIdentifier(root) mt.nextNodeID = storedNodeIdentifier(nextNodeID) diff --git a/ledger/store/merkle_commiter.go b/ledger/store/merkle_committer.go similarity index 100% rename from ledger/store/merkle_commiter.go rename to ledger/store/merkle_committer.go From 44efbb014673899cf0ee18f65a00cae713b0e822 Mon Sep 17 00:00:00 2001 From: John Jannotti Date: Mon, 13 Mar 2023 12:56:26 -0400 Subject: [PATCH 2/2] Couple more typos, and unexport a constant --- crypto/merkletrie/cache.go | 18 +++++++++--------- crypto/merkletrie/committer_test.go | 10 +++++----- crypto/merkletrie/trie.go | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/crypto/merkletrie/cache.go b/crypto/merkletrie/cache.go index e0112a07d7..01648d73a6 100644 --- a/crypto/merkletrie/cache.go +++ b/crypto/merkletrie/cache.go @@ -39,8 +39,8 @@ const ( // be found in neither the in-memory cache or on the persistent storage. var ErrLoadedPageMissingNode = errors.New("loaded page is missing a node") -// ErrPageDecodingFailuire is returned if the decoding of a page has failed. -var ErrPageDecodingFailuire = errors.New("error encountered while decoding page") +// ErrPageDecodingFailure is returned if the decoding of a page has failed. +var ErrPageDecodingFailure = errors.New("error encountered while decoding page") type merkleTrieCache struct { // mt is a pointer to the originating trie @@ -669,26 +669,26 @@ func (mtc *merkleTrieCache) reallocateNode(nid storedNodeIdentifier) storedNodeI func decodePage(bytes []byte) (nodesMap map[storedNodeIdentifier]*node, err error) { version, versionLength := binary.Uvarint(bytes[:]) if versionLength <= 0 { - return nil, ErrPageDecodingFailuire + return nil, ErrPageDecodingFailure } - if version != NodePageVersion { - return nil, ErrPageDecodingFailuire + if version != nodePageVersion { + return nil, ErrPageDecodingFailure } nodesCount, nodesCountLength := binary.Varint(bytes[versionLength:]) if nodesCountLength <= 0 { - return nil, ErrPageDecodingFailuire + return nil, ErrPageDecodingFailure } nodesMap = make(map[storedNodeIdentifier]*node) walk := nodesCountLength + versionLength for i := int64(0); i < nodesCount; i++ { nodeID, nodesIDLength := binary.Uvarint(bytes[walk:]) if nodesIDLength <= 0 { - return nil, ErrPageDecodingFailuire + return nil, ErrPageDecodingFailure } walk += nodesIDLength pnode, nodeLength := deserializeNode(bytes[walk:]) if nodeLength <= 0 { - return nil, ErrPageDecodingFailuire + return nil, ErrPageDecodingFailure } walk += nodeLength nodesMap[storedNodeIdentifier(nodeID)] = pnode @@ -699,7 +699,7 @@ func decodePage(bytes []byte) (nodesMap map[storedNodeIdentifier]*node, err erro // decodePage encodes a page contents into a byte array func (mtc *merkleTrieCache) encodePage(nodeIDs map[storedNodeIdentifier]*node, serializedBuffer []byte) []byte { - version := binary.PutUvarint(serializedBuffer[:], NodePageVersion) + version := binary.PutUvarint(serializedBuffer[:], nodePageVersion) length := binary.PutVarint(serializedBuffer[version:], int64(len(nodeIDs))) walk := version + length for nodeID, pnode := range nodeIDs { diff --git a/crypto/merkletrie/committer_test.go b/crypto/merkletrie/committer_test.go index 6f8dfb2a78..c8bfd71435 100644 --- a/crypto/merkletrie/committer_test.go +++ b/crypto/merkletrie/committer_test.go @@ -140,18 +140,18 @@ func TestNoRedundentPages(t *testing.T) { require.Equal(t, nodesCount, mt1.cache.cachedNodeCount) } -// decodePage decodes a byte array into a page content +// decodePageHeaderSize decodes a page header at the start of a byte array func decodePageHeaderSize(bytes []byte) (headerSize int, err error) { version, versionLength := binary.Uvarint(bytes[:]) if versionLength <= 0 { - return 0, ErrPageDecodingFailuire + return 0, ErrPageDecodingFailure } - if version != NodePageVersion { - return 0, ErrPageDecodingFailuire + if version != nodePageVersion { + return 0, ErrPageDecodingFailure } _, nodesCountLength := binary.Varint(bytes[versionLength:]) if nodesCountLength <= 0 { - return 0, ErrPageDecodingFailuire + return 0, ErrPageDecodingFailure } return nodesCountLength + versionLength, nil } diff --git a/crypto/merkletrie/trie.go b/crypto/merkletrie/trie.go index f92776e420..39051699f3 100644 --- a/crypto/merkletrie/trie.go +++ b/crypto/merkletrie/trie.go @@ -24,12 +24,12 @@ import ( ) const ( - // MerkleTreeVersion is the version of the encoded trie. If we ever want to make changes and want to have upgrade path, + // merkleTreeVersion is the version of the encoded trie. If we ever want to make changes and want to have upgrade path, // this would give us the ability to do so. - MerkleTreeVersion = uint64(0x1000000010000000) - // NodePageVersion is the version of the encoded node. If we ever want to make changes and want to have upgrade path, + merkleTreeVersion = uint64(0x1000000010000000) + // nodePageVersion is the version of the encoded node. If we ever want to make changes and want to have upgrade path, // this would give us the ability to do so. - NodePageVersion = uint64(0x1000000010000000) + nodePageVersion = uint64(0x1000000010000000) ) // ErrRootPageDecodingFailure is returned if the decoding the root page has failed. @@ -250,7 +250,7 @@ func (mt *Trie) Evict(commit bool) (int, error) { // serialize serializes the trie root func (mt *Trie) serialize() []byte { serializedBuffer := make([]byte, 5*binary.MaxVarintLen64) // allocate the worst-case scenario for the trie header. - version := binary.PutUvarint(serializedBuffer[:], MerkleTreeVersion) + version := binary.PutUvarint(serializedBuffer[:], merkleTreeVersion) root := binary.PutUvarint(serializedBuffer[version:], uint64(mt.root)) next := binary.PutUvarint(serializedBuffer[version+root:], uint64(mt.nextNodeID)) elementLength := binary.PutUvarint(serializedBuffer[version+root+next:], uint64(mt.elementLength)) @@ -264,7 +264,7 @@ func (mt *Trie) deserialize(bytes []byte) (int64, error) { if versionLen <= 0 { return 0, ErrRootPageDecodingFailure } - if version != MerkleTreeVersion { + if version != merkleTreeVersion { return 0, ErrRootPageDecodingFailure } root, rootLen := binary.Uvarint(bytes[versionLen:])