Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

pallet-mmr: handle forks without collisions in offchain storage #11594

Merged
merged 35 commits into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
4ded369
pallet-mmr: fix some typos
acatangiu Jun 1, 2022
eb9eea8
pallet-mmr: make the MMR resilient to chain forks
acatangiu Jun 1, 2022
7c8aac0
pallet-mmr: get hash for block that added node
acatangiu Jun 2, 2022
c82ac27
beefy-mmr: add debug logging
acatangiu Jun 21, 2022
98abe4a
add explanatory comment
acatangiu Jun 22, 2022
db41f81
account for block offset of pallet activation
acatangiu Jun 22, 2022
bfd4142
add support for finding all nodes added by leaf
acatangiu Jun 22, 2022
417de11
minor improvements
acatangiu Jun 23, 2022
25468f9
add helper to return all nodes added to mmr with a leaf append
Lederstrumpf Jun 23, 2022
19b35ee
simplify leaf_node_index_to_leaf_index
Lederstrumpf Jun 23, 2022
f9f202a
dead fish: this also doesn't work
acatangiu Jun 23, 2022
05c5f26
simplify rightmost_leaf_node_index_from_pos
Lederstrumpf Jun 23, 2022
e15810d
minor fix
acatangiu Jun 24, 2022
f803ce9
move leaf canonicalization to offchain worker
acatangiu Jun 24, 2022
94e3b61
move storage related code to storage.rs
acatangiu Jun 24, 2022
ecb77a7
on offchain reads use canonic key for old leaves
acatangiu Jun 24, 2022
a17cb65
fix offchain worker write using canon key
acatangiu Jun 24, 2022
e9ab363
fix pallet-mmr tests
acatangiu Jun 24, 2022
b75b0ae
Merge branch 'master' of github.com:paritytech/substrate into mmr-han…
acatangiu Jun 27, 2022
8856db1
add documentation and fix logging
acatangiu Jun 27, 2022
dfc5b45
add offchain mmr canonicalization test
acatangiu Jun 28, 2022
e9edd28
test canon + generate + verify
acatangiu Jun 28, 2022
f472e9b
fix pallet-beefy-mmr tests
acatangiu Jun 28, 2022
5ed95c7
implement review suggestions
acatangiu Jun 28, 2022
2c8b101
improve test
acatangiu Jun 29, 2022
56f2eaa
pallet-mmr: add offchain pruning of forks
acatangiu Jun 30, 2022
e22a936
Merge branch 'master' of github.com:paritytech/substrate into mmr-han…
acatangiu Jun 30, 2022
2629734
pallet-mmr: improve offchain pruning
acatangiu Jul 1, 2022
58d116f
Merge branch 'master' of github.com:paritytech/substrate into mmr-han…
acatangiu Jul 1, 2022
4e6de82
pallet-mmr: improve MMRStore<OffchainStorage>::get()
acatangiu Jul 1, 2022
e72e344
Merge branch 'master' of github.com:paritytech/substrate into mmr-han…
acatangiu Jul 5, 2022
6335512
pallet-mmr: storage: improve logs
acatangiu Jul 5, 2022
eab718c
fix tests: correctly persist overlay
acatangiu Jul 5, 2022
8a8f3d2
pallet-mmr: fix numeric typo in test
Lederstrumpf Jul 6, 2022
ba9fd71
add comment around LeafData requirements
acatangiu Jul 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions frame/merkle-mountain-range/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
#![cfg_attr(not(feature = "std"), no_std)]

use codec::Encode;
use frame_support::weights::Weight;
use frame_support::{log::info, weights::Weight};
use sp_runtime::traits::{self, One, Saturating};

#[cfg(any(feature = "runtime-benchmarks", test))]
Expand Down Expand Up @@ -116,12 +116,12 @@ pub mod pallet {
/// Prefix for elements stored in the Off-chain DB via Indexing API.
///
/// Each node of the MMR is inserted both on-chain and off-chain via Indexing API.
/// The former does not store full leaf content, just it's compact version (hash),
/// The former does not store full leaf content, just its compact version (hash),
/// and some of the inner mmr nodes might be pruned from on-chain storage.
/// The latter will contain all the entries in their full form.
///
/// Each node is stored in the Off-chain DB under key derived from the
/// [`Self::INDEXING_PREFIX`] and it's in-tree index (MMR position).
/// [`Self::INDEXING_PREFIX`] and its in-tree index (MMR position).
const INDEXING_PREFIX: &'static [u8];

/// A hasher type for MMR.
Expand Down Expand Up @@ -199,6 +199,7 @@ pub mod pallet {
#[pallet::hooks]
impl<T: Config<I>, I: 'static> Hooks<BlockNumberFor<T>> for Pallet<T, I> {
fn on_initialize(_n: T::BlockNumber) -> Weight {
info!(target: "runtime::mmr", "🥩: initialize block {:?}", _n);
use primitives::LeafDataProvider;
let leaves = Self::mmr_leaves();
let peaks_before = mmr::utils::NodesUtils::new(leaves).number_of_peaks();
Expand All @@ -207,6 +208,8 @@ pub mod pallet {
let mut mmr: ModuleMmr<mmr::storage::RuntimeStorage, T, I> = mmr::Mmr::new(leaves);
mmr.push(data).expect("MMR push never fails.");

info!(target: "runtime::mmr", "🥩: on_initialize(block-num: {:?}): leaves_before {}, peaks_before {}", _n, leaves, peaks_before);

// update the size
let (leaves, root) = mmr.finalize().expect("MMR finalize never fails.");
<T::OnNewRoot as primitives::OnNewRoot<_>>::on_new_root(&root);
Expand All @@ -215,6 +218,9 @@ pub mod pallet {
<RootHash<T, I>>::put(root);

let peaks_after = mmr::utils::NodesUtils::new(leaves).number_of_peaks();

info!(target: "runtime::mmr", "🥩: on_initialize(block-num: {:?}): leaves {}, peaks {}", _n, leaves, peaks_after);

T::WeightInfo::on_initialize(peaks_before.max(peaks_after))
}
}
Expand Down Expand Up @@ -254,17 +260,24 @@ where
}

impl<T: Config<I>, I: 'static> Pallet<T, I> {
fn offchain_key(pos: NodeIndex) -> sp_std::prelude::Vec<u8> {
(T::INDEXING_PREFIX, pos).encode()
/// Build offchain key from `parent_hash` of block that originally added node `pos` to MMR.
///
/// This combination makes the offchain (key,value) entry resilient to chain forks.
fn offchain_key(
parent_hash: <T as frame_system::Config>::Hash,
pos: NodeIndex,
) -> sp_std::prelude::Vec<u8> {
(T::INDEXING_PREFIX, parent_hash, pos).encode()
}

/// Generate a MMR proof for the given `leaf_indices`.
///
/// Note this method can only be used from an off-chain context
/// (Offchain Worker or Runtime API call), since it requires
/// all the leaves to be present.
/// It may return an error or panic if used incorrectly.
acatangiu marked this conversation as resolved.
Show resolved Hide resolved
pub fn generate_batch_proof(
leaf_indices: Vec<NodeIndex>,
leaf_indices: Vec<LeafIndex>,
) -> Result<
(Vec<LeafOf<T, I>>, primitives::BatchProof<<T as Config<I>>::Hash>),
primitives::Error,
Expand Down
92 changes: 87 additions & 5 deletions frame/merkle-mountain-range/src/mmr/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,14 @@
//! A MMR storage implementations.

use codec::Encode;
use frame_support::{log::info, traits::Get};
use mmr_lib::helper;
use sp_io::offchain_index;
use sp_mmr_primitives::LeafIndex;
use sp_runtime::{
traits::{One, Saturating},
SaturatedConversion,
};
use sp_std::iter::Peekable;
#[cfg(not(feature = "std"))]
use sp_std::prelude::*;
Expand Down Expand Up @@ -58,14 +64,76 @@ impl<StorageType, T, I, L> Default for Storage<StorageType, T, I, L> {
}
}

impl<StorageType, T, I, L> Storage<StorageType, T, I, L>
where
T: Config<I>,
I: 'static,
L: primitives::FullLeaf + codec::Decode,
{
fn parent_hash_of_ancestor_that_added_node(
pos: NodeIndex,
) -> <T as frame_system::Config>::Hash {
let leaves_count = NumberOfLeaves::<T, I>::get().saturated_into();
let ancestor_leaf_idx = NodesUtils::leaf_index_that_added_node(pos).saturated_into();
// leaves are zero-indexed and were added one per block since pallet activation,
// while block numbers are one-indexed, so block number that added `leaf_idx` is:
// `block_num = block_num_when_pallet_activated + leaf_idx + 1`
// `block_num = (current_block_num - leaves_count) + leaf_idx + 1`
// `parent_block_num = current_block_num - leaves_count + leaf_idx`.
let parent_block_num: <T as frame_system::Config>::BlockNumber =
<frame_system::Pallet<T>>::block_number()
.saturating_sub(leaves_count)
.saturating_add(ancestor_leaf_idx);

// TODO: I think this only holds recent history, so old block hashes might not be here.
let parent_hash = <frame_system::Pallet<T>>::block_hash(parent_block_num);
info!(
target: "runtime::mmr",
"🥩: parent of ancestor that added {}: leaf idx {:?}, block-num {:?} (block offset {:?}) hash {:?}",
pos, ancestor_leaf_idx, parent_block_num,
<frame_system::Pallet<T>>::block_number().saturating_sub(leaves_count),
parent_hash
);
parent_hash
}

fn nodes_added_by_leaf(leaf_index: LeafIndex) -> Vec<NodeIndex> {
let leaves = NumberOfLeaves::<T, I>::get();
let mmr_size = NodesUtils::new(leaves).size();
let pos = helper::leaf_index_to_pos(leaf_index);

let mut nodes_added_by_leaf = vec![pos];
let mut next_pos = pos + 1;
while next_pos < mmr_size && helper::pos_height_in_tree(next_pos) > 0 {
nodes_added_by_leaf.push(next_pos);
next_pos += 1;
}
info!(
target: "runtime::mmr",
"🥩: nodes_added_by_leaf(idx {}, pos {}): {:?}",
leaf_index, pos, nodes_added_by_leaf
);
return nodes_added_by_leaf
}
}

impl<T, I, L> mmr_lib::MMRStore<NodeOf<T, I, L>> for Storage<OffchainStorage, T, I, L>
where
T: Config<I>,
I: 'static,
L: primitives::FullLeaf + codec::Decode,
{
fn get_elem(&self, pos: NodeIndex) -> mmr_lib::Result<Option<NodeOf<T, I, L>>> {
let key = Pallet::<T, I>::offchain_key(pos);
// Get the parent hash of the ancestor block that added node at index `pos`.
// Use the hash as extra identifier to differentiate between various `pos` entries
// in offchain DB coming from various chain forks.
let parent_hash_of_ancestor = Self::parent_hash_of_ancestor_that_added_node(pos);
let key = Pallet::<T, I>::offchain_key(parent_hash_of_ancestor, pos);
info!(
target: "runtime::mmr",
"🥩: get elem {}: key {:?}",
pos, key
);
// Retrieve the element from Off-chain DB.
Ok(sp_io::offchain::local_storage_get(sp_core::offchain::StorageKind::PERSISTENT, &key)
.and_then(|v| codec::Decode::decode(&mut &*v).ok()))
Expand All @@ -92,12 +160,18 @@ where
}

sp_std::if_std! {
frame_support::log::trace!("elems: {:?}", elems.iter().map(|elem| elem.hash()).collect::<Vec<_>>());
frame_support::log::info!("elems: {:?}", elems.iter().map(|elem| elem.hash()).collect::<Vec<_>>());
}

let leaves = NumberOfLeaves::<T, I>::get();
let size = NodesUtils::new(leaves).size();

info!(
target: "runtime::mmr",
"🥩: append elem {}: leaves {} size {}",
pos, leaves, size
);

if pos != size {
return Err(mmr_lib::Error::InconsistentStore)
}
Expand All @@ -112,11 +186,19 @@ where
let mut leaf_index = leaves;
let mut node_index = size;

// Use parent hash of block adding new nodes (this block) as extra identifier
// in offchain DB to avoid DB collisions and overwrites in case of forks.
let parent_hash = <frame_system::Pallet<T>>::parent_hash();
let block_number = <frame_system::Pallet<T>>::block_number();
for elem in elems {
let key = Pallet::<T, I>::offchain_key(parent_hash, node_index);
info!(
target: "runtime::mmr",
"🥩: offchain set: block-num {:?}, parent_hash {:?} node-idx {} key {:?}",
block_number, parent_hash, node_index, key
);
// Indexing API is used to store the full node content (both leaf and inner).
elem.using_encoded(|elem| {
offchain_index::set(&Pallet::<T, I>::offchain_key(node_index), elem)
});
elem.using_encoded(|elem| offchain_index::set(&key, elem));

// On-chain we are going to only store new peaks.
if peaks_to_store.next_if_eq(&node_index).is_some() {
Expand Down
48 changes: 48 additions & 0 deletions frame/merkle-mountain-range/src/mmr/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,60 @@ impl NodesUtils {

64 - self.no_of_leaves.next_power_of_two().leading_zeros()
}

/// Calculate `LeafIndex` for the leaf that added `node_index` to the MMR.
pub fn leaf_index_that_added_node(node_index: NodeIndex) -> LeafIndex {
let rightmost_leaf_pos = Self::rightmost_leaf_node_index_from_pos(node_index);
Self::leaf_node_index_to_leaf_index(rightmost_leaf_pos)
}

// Translate a _leaf_ `NodeIndex` to its `LeafIndex`.
fn leaf_node_index_to_leaf_index(pos: NodeIndex) -> LeafIndex {
Lederstrumpf marked this conversation as resolved.
Show resolved Hide resolved
if pos == 0 {
return 0
}
let (leaf_count, _) =
mmr_lib::helper::get_peaks(pos)
.iter()
.fold((0, 0), |(mut acc, last_peak), peak| {
let leaves = (peak - last_peak) >> 1;
acc += leaves + 1;
// last_peak, leaves, acc);
(acc, peak.clone())
});
leaf_count
acatangiu marked this conversation as resolved.
Show resolved Hide resolved
}

// Starting from any node position get position of rightmost leaf; this is the leaf
// responsible for the addition of node `pos`.
fn rightmost_leaf_node_index_from_pos(mut pos: NodeIndex) -> NodeIndex {
use mmr_lib::helper::pos_height_in_tree;
if pos > 0 {
let mut current_height = pos_height_in_tree(pos);
let mut right_child_height = pos_height_in_tree(pos - 1);
while right_child_height < current_height {
pos = pos - 1;
current_height = right_child_height;
right_child_height = pos_height_in_tree(pos - 1);
}
}
pos
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_leaf_node_index_to_leaf_index() {
use mmr_lib::helper::leaf_index_to_pos;
for index in 0..100000 {
let pos = leaf_index_to_pos(index);
assert_eq!(NodesUtils::leaf_node_index_to_leaf_index(pos), index);
}
}

#[test]
fn should_calculate_number_of_leaves_correctly() {
assert_eq!(
Expand Down
12 changes: 6 additions & 6 deletions primitives/merkle-mountain-range/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ pub struct Proof<Hash> {

/// A full leaf content stored in the offchain-db.
pub trait FullLeaf: Clone + PartialEq + fmt::Debug {
/// Encode the leaf either in it's full or compact form.
/// Encode the leaf either in its full or compact form.
///
/// NOTE the encoding returned here MUST be `Decode`able into `FullLeaf`.
fn using_encoded<R, F: FnOnce(&[u8]) -> R>(&self, f: F, compact: bool) -> R;
Expand Down Expand Up @@ -167,18 +167,18 @@ impl EncodableOpaqueLeaf {
}
}

/// An element representing either full data or it's hash.
/// An element representing either full data or its hash.
///
/// See [Compact] to see how it may be used in practice to reduce the size
/// of proofs in case multiple [LeafDataProvider]s are composed together.
/// This is also used internally by the MMR to differentiate leaf nodes (data)
/// and inner nodes (hashes).
///
/// [DataOrHash::hash] method calculates the hash of this element in it's compact form,
/// [DataOrHash::hash] method calculates the hash of this element in its compact form,
/// so should be used instead of hashing the encoded form (which will always be non-compact).
#[derive(RuntimeDebug, Clone, PartialEq)]
pub enum DataOrHash<H: traits::Hash, L> {
/// Arbitrary data in it's full form.
/// Arbitrary data in its full form.
Data(L),
/// A hash of some data.
Hash(H::Output),
Expand Down Expand Up @@ -339,7 +339,7 @@ where
A: FullLeaf,
B: FullLeaf,
{
/// Retrieve a hash of this item in it's compact form.
/// Retrieve a hash of this item in its compact form.
pub fn hash(&self) -> H::Output {
self.using_encoded(<H as traits::Hash>::hash, true)
}
Expand Down Expand Up @@ -447,7 +447,7 @@ sp_api::decl_runtime_apis! {
/// Note this function does not require any on-chain storage - the
/// proof is verified against given MMR root hash.
///
/// The leaf data is expected to be encoded in it's compact form.
/// The leaf data is expected to be encoded in its compact form.
fn verify_proof_stateless(root: Hash, leaf: EncodableOpaqueLeaf, proof: Proof<Hash>)
-> Result<(), Error>;

Expand Down