From 29a66bdaf9366b6c7f12fccf6135db2009e6e711 Mon Sep 17 00:00:00 2001 From: Lovesh Date: Wed, 12 Jul 2017 14:39:02 +0530 Subject: [PATCH 1/3] Hash store is used to recover when the ledger starts, fixed the calculation for consistency Recovering from transaction log is slow as each transaction is applied again to the tree. --- ledger/compact_merkle_tree.py | 24 ++++++++-- ledger/ledger.py | 46 ++++++++----------- ledger/merkle_tree.py | 2 +- ledger/stores/hash_store.py | 8 ++++ ledger/test/test_file_hash_store.py | 8 ++++ ledger/test/test_ledger.py | 2 +- ledger/test/test_merkle_proof.py | 15 +++--- .../test_prepare_message.py | 5 +- .../test_node_request_consistency_proof.py | 3 -- 9 files changed, 67 insertions(+), 46 deletions(-) diff --git a/ledger/compact_merkle_tree.py b/ledger/compact_merkle_tree.py index c74c61cfab..8f798d8ee8 100644 --- a/ledger/compact_merkle_tree.py +++ b/ledger/compact_merkle_tree.py @@ -261,10 +261,24 @@ def leafCount(self) -> int: def nodeCount(self) -> int: return self.hashStore.nodeCount - def verifyConsistency(self, expectedLeafCount = -1) -> bool: - if expectedLeafCount > 0 and expectedLeafCount != self.leafCount: + @staticmethod + def get_expected_node_count(leaf_count): + """ + The number of nodes is the number of full subtrees present + """ + count = 0 + while leaf_count > 1: + leaf_count //= 2 + count += leaf_count + return count + + def verify_consistency(self, expected_leaf_count) -> bool: + """ + Check that the tree has same leaf count as expected and the + number of nodes are also as expected + """ + if expected_leaf_count != self.leafCount: raise ConsistencyVerificationFailed() - expectedNodeCount = count_bits_set(self.leafCount) - if not expectedNodeCount == self.nodeCount: + if self.get_expected_node_count(self.leafCount) != self.nodeCount: raise ConsistencyVerificationFailed() - return True \ No newline at end of file + return True diff --git a/ledger/ledger.py b/ledger/ledger.py index 252d1f024d..ad2abc2d63 100644 --- a/ledger/ledger.py +++ b/ledger/ledger.py @@ -73,34 +73,28 @@ def recoverTree(self): .format(type(self.tree))) - # ATTENTION! - # This functionality is disabled until better consistency verification - # implemented - always using recovery from transaction log - # from ledger.stores.memory_hash_store import MemoryHashStore - # from ledger.util import ConsistencyVerificationFailed - # if not self.tree.hashStore \ - # or isinstance(self.tree.hashStore, MemoryHashStore) \ - # or self.tree.leafCount == 0: - # logging.info("Recovering tree from transaction log") - # self.recoverTreeFromTxnLog() - # else: - # try: - # logging.info("Recovering tree from hash store of size {}". - # format(self.tree.leafCount)) - # self.recoverTreeFromHashStore() - # except ConsistencyVerificationFailed: - # logging.error("Consistency verification of merkle tree " - # "from hash store failed, " - # "falling back to transaction log") - # self.recoverTreeFromTxnLog() - - logging.debug("Recovering tree from transaction log") + from ledger.stores.memory_hash_store import MemoryHashStore + from ledger.util import ConsistencyVerificationFailed start = time.perf_counter() - self.recoverTreeFromTxnLog() + if not self.tree.hashStore \ + or isinstance(self.tree.hashStore, MemoryHashStore) \ + or self.tree.leafCount == 0: + logging.info("Recovering tree from transaction log") + self.recoverTreeFromTxnLog() + else: + try: + logging.info("Recovering tree from hash store of size {}". + format(self.tree.leafCount)) + self.recoverTreeFromHashStore() + except ConsistencyVerificationFailed: + logging.error("Consistency verification of merkle tree " + "from hash store failed, " + "falling back to transaction log") + self.recoverTreeFromTxnLog() + end = time.perf_counter() t = end - start - logging.debug("Recovered tree from transaction log in {} seconds". - format(t)) + logging.debug("Recovered tree in {} seconds".format(t)) def recoverTreeFromTxnLog(self): # TODO: in this and some other lines specific fields of @@ -118,7 +112,7 @@ def recoverTreeFromHashStore(self): hashes = list(reversed(self.tree.inclusion_proof(treeSize, treeSize + 1))) self.tree._update(self.tree.leafCount, hashes) - self.tree.verifyConsistency(self._transactionLog.numKeys) + self.tree.verify_consistency(self._transactionLog.numKeys) def add(self, leaf): self._addToStore(leaf) diff --git a/ledger/merkle_tree.py b/ledger/merkle_tree.py index 58f899916a..7279191481 100644 --- a/ledger/merkle_tree.py +++ b/ledger/merkle_tree.py @@ -69,6 +69,6 @@ def nodeCount(self) -> int: """ @abstractmethod - def verifyConsistency(self, expectedLeafCount) -> bool: + def verify_consistency(self, expectedLeafCount) -> bool: """ """ \ No newline at end of file diff --git a/ledger/stores/hash_store.py b/ledger/stores/hash_store.py index 449144f8f4..e7edf1a022 100644 --- a/ledger/stores/hash_store.py +++ b/ledger/stores/hash_store.py @@ -137,6 +137,14 @@ def readNodeByTree(self, start, height=None): pos = self.getNodePosition(start, height) return self.readNode(pos) + @property + def is_consistent(self) -> bool: + """ + Returns True if number of nodes are consistent with number of leaves + """ + from ledger.compact_merkle_tree import CompactMerkleTree + return self.nodeCount == CompactMerkleTree.get_expected_node_count(self.leafCount) + @staticmethod def _validatePos(start, end=None): if end: diff --git a/ledger/test/test_file_hash_store.py b/ledger/test/test_file_hash_store.py index 56ad9d4cf8..8d229b76d8 100644 --- a/ledger/test/test_file_hash_store.py +++ b/ledger/test/test_file_hash_store.py @@ -51,6 +51,14 @@ def testSimpleReadWrite(nodesLeaves, tempdir): for i, n in enumerate(nds): assert nodes[i][2] == n + # Check that hash store can be closed and re-opened and the contents remain same + leaf_count = fhs.leafCount + node_count = fhs.nodeCount + fhs.close() + reopened_hash_store = FileHashStore(tempdir) + assert reopened_hash_store.leafCount == leaf_count + assert reopened_hash_store.nodeCount == node_count + def testIncorrectWrites(tempdir): fhs = FileHashStore(tempdir, leafSize=50, nodeSize=50) diff --git a/ledger/test/test_ledger.py b/ledger/test/test_ledger.py index 9d5be8e4ad..d41d26ba8a 100644 --- a/ledger/test/test_ledger.py +++ b/ledger/test/test_ledger.py @@ -107,7 +107,7 @@ def testRecoverLedgerFromHashStore(tempdir): fhs = FileHashStore(tempdir) tree = CompactMerkleTree(hashStore=fhs) ledger = Ledger(tree=tree, dataDir=tempdir) - for d in range(10): + for d in range(100): ledger.add(str(d).encode()) updatedTree = ledger.tree ledger.stop() diff --git a/ledger/test/test_merkle_proof.py b/ledger/test/test_merkle_proof.py index 84a3599a03..fbbf138c6d 100644 --- a/ledger/test/test_merkle_proof.py +++ b/ledger/test/test_merkle_proof.py @@ -110,6 +110,9 @@ """ +TXN_COUNT = 1000 + + @pytest.yield_fixture(scope="module", params=['File', 'Memory']) def hashStore(request, tdir): if request.param == 'File': @@ -141,15 +144,13 @@ def hasherAndTree(hasher): def addTxns(hasherAndTree): h, m = hasherAndTree - txn_count = 1000 - auditPaths = [] - for d in range(txn_count): + for d in range(TXN_COUNT): serNo = d+1 data = str(serNo).encode() auditPaths.append([hexlify(h) for h in m.append(data)]) - - return txn_count, auditPaths + print(m.hashStore.leafCount, m.hashStore.nodeCount) + return TXN_COUNT, auditPaths @pytest.fixture() @@ -200,7 +201,7 @@ def testCompactMerkleTree2(hasherAndTree, verifier): def testCompactMerkleTree(hasherAndTree, verifier): h, m = hasherAndTree printEvery = 1000 - count = 1000 + count = TXN_COUNT for d in range(count): data = str(d + 1).encode() data_hex = hexlify(data) @@ -208,6 +209,8 @@ def testCompactMerkleTree(hasherAndTree, verifier): audit_path_hex = [hexlify(h) for h in audit_path] incl_proof = m.inclusion_proof(d, d+1) assert audit_path == incl_proof + assert m.nodeCount == m.get_expected_node_count(m.leafCount) + assert m.hashStore.is_consistent if d % printEvery == 0: show(h, m, data_hex) print("audit path is {}".format(audit_path_hex)) diff --git a/plenum/test/input_validation/message_validation/test_prepare_message.py b/plenum/test/input_validation/message_validation/test_prepare_message.py index 9278ec0c87..49214b7882 100644 --- a/plenum/test/input_validation/message_validation/test_prepare_message.py +++ b/plenum/test/input_validation/message_validation/test_prepare_message.py @@ -1,9 +1,6 @@ -import pytest - from collections import OrderedDict from plenum.common.messages.fields import NonNegativeNumberField, \ - NonEmptyStringField, \ - HexField, MerkleRootField, AnyValueField + NonEmptyStringField, MerkleRootField from plenum.common.messages.node_messages import Prepare EXPECTED_ORDERED_FIELDS = OrderedDict([ diff --git a/plenum/test/node_catchup/test_node_request_consistency_proof.py b/plenum/test/node_catchup/test_node_request_consistency_proof.py index a37056997c..b721e7da01 100644 --- a/plenum/test/node_catchup/test_node_request_consistency_proof.py +++ b/plenum/test/node_catchup/test_node_request_consistency_proof.py @@ -1,7 +1,4 @@ import types -from random import randint - -import pytest from plenum.common.constants import DOMAIN_LEDGER_ID, CONSISTENCY_PROOF from plenum.common.ledger import Ledger From ce74097d230c16e78849102f78c3dd9ef52118ad Mon Sep 17 00:00:00 2001 From: Lovesh Date: Wed, 12 Jul 2017 15:47:15 +0530 Subject: [PATCH 2/3] change log level --- ledger/ledger.py | 4 ++-- plenum/cli/cli.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ledger/ledger.py b/ledger/ledger.py index ad2abc2d63..c25fb587ad 100644 --- a/ledger/ledger.py +++ b/ledger/ledger.py @@ -79,11 +79,11 @@ def recoverTree(self): if not self.tree.hashStore \ or isinstance(self.tree.hashStore, MemoryHashStore) \ or self.tree.leafCount == 0: - logging.info("Recovering tree from transaction log") + logging.debug("Recovering tree from transaction log") self.recoverTreeFromTxnLog() else: try: - logging.info("Recovering tree from hash store of size {}". + logging.debug("Recovering tree from hash store of size {}". format(self.tree.leafCount)) self.recoverTreeFromHashStore() except ConsistencyVerificationFailed: diff --git a/plenum/cli/cli.py b/plenum/cli/cli.py index 3f30ebaca7..a85e6a1dfe 100644 --- a/plenum/cli/cli.py +++ b/plenum/cli/cli.py @@ -252,10 +252,10 @@ def __init__(self, looper, basedirpath, nodeReg=None, cliNodeReg=None, eventloop=eventloop, output=out) - RAETVerbosity = getRAETLogLevelFromConfig("RAETLogLevelCli", - Console.Wordage.mute, - self.config) - RAETLogFile = getRAETLogFilePath("RAETLogFilePathCli", self.config) + # RAETVerbosity = getRAETLogLevelFromConfig("RAETLogLevelCli", + # Console.Wordage.mute, + # self.config) + # RAETLogFile = getRAETLogFilePath("RAETLogFilePathCli", self.config) # Patch stdout in something that will always print *above* the prompt # when something is written to stdout. sys.stdout = self.cli.stdout_proxy() From e5e231e112ff16557418448ae141b91cf8c9cebd Mon Sep 17 00:00:00 2001 From: Lovesh Date: Wed, 12 Jul 2017 16:53:08 +0530 Subject: [PATCH 3/3] put imports at top and add a property to HashStore --- ledger/ledger.py | 10 ++-------- ledger/stores/file_hash_store.py | 4 ++++ ledger/stores/hash_store.py | 4 ++++ ledger/stores/memory_hash_store.py | 4 ++++ ledger/test/test_file_hash_store.py | 1 + ledger/test/test_merkle_proof.py | 6 ++++-- plenum/persistence/leveldb_hash_store.py | 4 ++++ 7 files changed, 23 insertions(+), 10 deletions(-) diff --git a/ledger/ledger.py b/ledger/ledger.py index c25fb587ad..8e5b74852d 100644 --- a/ledger/ledger.py +++ b/ledger/ledger.py @@ -1,10 +1,8 @@ import base64 import logging import time -from collections import OrderedDict from ledger.compact_merkle_tree import CompactMerkleTree -from ledger.stores.chunked_file_store import ChunkedFileStore from ledger.tree_hasher import TreeHasher from ledger.merkle_tree import MerkleTree from ledger.serializers.mapping_serializer import MappingSerializer @@ -12,7 +10,7 @@ from ledger.stores.file_store import FileStore from ledger.stores.text_file_store import TextFileStore from ledger.immutable_store import ImmutableStore -from ledger.util import F +from ledger.util import F, ConsistencyVerificationFailed class Ledger(ImmutableStore): @@ -71,13 +69,9 @@ def recoverTree(self): logging.error("Do not know how to recover {}".format(self.tree)) raise TypeError("Merkle tree type {} is not supported" .format(type(self.tree))) - - - from ledger.stores.memory_hash_store import MemoryHashStore - from ledger.util import ConsistencyVerificationFailed start = time.perf_counter() if not self.tree.hashStore \ - or isinstance(self.tree.hashStore, MemoryHashStore) \ + or not self.tree.hashStore.is_persistent \ or self.tree.leafCount == 0: logging.debug("Recovering tree from transaction log") self.recoverTreeFromTxnLog() diff --git a/ledger/stores/file_hash_store.py b/ledger/stores/file_hash_store.py index f2fcb04f40..6a04975006 100644 --- a/ledger/stores/file_hash_store.py +++ b/ledger/stores/file_hash_store.py @@ -27,6 +27,10 @@ def __init__(self, dataDir, fileNamePrefix="", leafSize=32, nodeSize=32): self.nodeSize = nodeSize self.leafSize = leafSize + @property + def is_persistent(self) -> bool: + return True + @staticmethod def write(data, store, size): if not isinstance(data, bytes): diff --git a/ledger/stores/hash_store.py b/ledger/stores/hash_store.py index e7edf1a022..f74ce2a76e 100644 --- a/ledger/stores/hash_store.py +++ b/ledger/stores/hash_store.py @@ -8,6 +8,10 @@ class HashStore: """ Store of nodeHashes and leafHashes mapped against their sequence numbers. """ + @property + @abstractmethod + def is_persistent(self) -> bool: + pass @abstractmethod def writeLeaf(self, leafHash): diff --git a/ledger/stores/memory_hash_store.py b/ledger/stores/memory_hash_store.py index 2a327d04a3..7c5c8e9ed7 100644 --- a/ledger/stores/memory_hash_store.py +++ b/ledger/stores/memory_hash_store.py @@ -6,6 +6,10 @@ def __init__(self): self.reset() self._closed = False + @property + def is_persistent(self) -> bool: + return False + def writeLeaf(self, leafHash): self._leafs.append(leafHash) diff --git a/ledger/test/test_file_hash_store.py b/ledger/test/test_file_hash_store.py index 8d229b76d8..c3f41774a9 100644 --- a/ledger/test/test_file_hash_store.py +++ b/ledger/test/test_file_hash_store.py @@ -15,6 +15,7 @@ def nodesLeaves(): def writtenFhs(tempdir, nodes, leaves): fhs = FileHashStore(tempdir) + assert fhs.is_persistent for leaf in leaves: fhs.writeLeaf(leaf) for node in nodes: diff --git a/ledger/test/test_merkle_proof.py b/ledger/test/test_merkle_proof.py index fbbf138c6d..56c53aee71 100644 --- a/ledger/test/test_merkle_proof.py +++ b/ledger/test/test_merkle_proof.py @@ -117,9 +117,12 @@ def hashStore(request, tdir): if request.param == 'File': fhs = FileHashStore(tdir) + assert fhs.is_persistent yield fhs elif request.param == 'Memory': - yield MemoryHashStore() + mhs = MemoryHashStore() + assert not mhs.is_persistent + yield mhs @pytest.fixture() @@ -149,7 +152,6 @@ def addTxns(hasherAndTree): serNo = d+1 data = str(serNo).encode() auditPaths.append([hexlify(h) for h in m.append(data)]) - print(m.hashStore.leafCount, m.hashStore.nodeCount) return TXN_COUNT, auditPaths diff --git a/plenum/persistence/leveldb_hash_store.py b/plenum/persistence/leveldb_hash_store.py index ec3ca68f9a..bb45463c0f 100644 --- a/plenum/persistence/leveldb_hash_store.py +++ b/plenum/persistence/leveldb_hash_store.py @@ -17,6 +17,10 @@ def __init__(self, dataDir): self.leavesDb = None self.open() + @property + def is_persistent(self) -> bool: + return True + def writeLeaf(self, leafHash): self.leavesDb.put(str(self.leafCount + 1), leafHash) self.leafCount += 1