diff --git a/Cargo.lock b/Cargo.lock index f319a8db8e71..c2480647ccef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8257,6 +8257,7 @@ dependencies = [ "polkadot-node-core-dispute-coordinator", "polkadot-node-core-pvf-execute-worker", "polkadot-node-core-pvf-prepare-worker", + "polkadot-node-network-protocol", "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-node-subsystem-test-helpers", diff --git a/node/malus/Cargo.toml b/node/malus/Cargo.toml index 8e23e623174f..0606fa6cd36e 100644 --- a/node/malus/Cargo.toml +++ b/node/malus/Cargo.toml @@ -24,6 +24,8 @@ polkadot-node-core-pvf-execute-worker = { path = "../core/pvf/execute-worker" } polkadot-node-core-pvf-prepare-worker = { path = "../core/pvf/prepare-worker" } polkadot-node-primitives = { path = "../primitives" } polkadot-primitives = { path = "../../primitives" } +polkadot-node-network-protocol = { path = "../network/protocol" } + color-eyre = { version = "0.6.1", default-features = false } assert_matches = "1.5" async-trait = "0.1.57" diff --git a/node/malus/src/interceptor.rs b/node/malus/src/interceptor.rs index cbf39bccd160..854100dbc3ae 100644 --- a/node/malus/src/interceptor.rs +++ b/node/malus/src/interceptor.rs @@ -40,7 +40,7 @@ where /// For non-trivial cases, the `sender` can be used to send /// multiple messages after doing some additional processing. fn intercept_incoming( - &self, + &mut self, _sender: &mut Sender, msg: FromOrchestra, ) -> Option> { diff --git a/node/malus/src/malus.rs b/node/malus/src/malus.rs index d09f8be990a4..0be3ae9e921c 100644 --- a/node/malus/src/malus.rs +++ b/node/malus/src/malus.rs @@ -36,6 +36,8 @@ enum NemesisVariant { BackGarbageCandidate(BackGarbageCandidateOptions), /// Delayed disputing of ancestors that are perfectly fine. DisputeAncestor(DisputeAncestorOptions), + /// Do not distribute approvals to all nodes + WitholdApprovalsDistribution(WitholdApprovalsDistributionOptions), #[allow(missing_docs)] #[command(name = "prepare-worker", hide = true)] @@ -59,6 +61,7 @@ impl MalusCli { /// Launch a malus node. fn launch(self) -> eyre::Result<()> { let finality_delay = self.finality_delay; + match self.variant { NemesisVariant::BackGarbageCandidate(opts) => { let BackGarbageCandidateOptions { percentage, cli } = opts; @@ -88,6 +91,15 @@ impl MalusCli { finality_delay, )? }, + NemesisVariant::WitholdApprovalsDistribution(WitholdApprovalsDistributionOptions { + num_network_groups, + assigned_network_group, + cli, + }) => polkadot_cli::run_node( + cli, + WitholdApprovalsDistribution { num_network_groups, assigned_network_group }, + finality_delay, + )?, NemesisVariant::PvfPrepareWorker(cmd) => { #[cfg(target_os = "android")] { diff --git a/node/malus/src/variants/common.rs b/node/malus/src/variants/common.rs index 4ea8b88b56a5..f07043e56025 100644 --- a/node/malus/src/variants/common.rs +++ b/node/malus/src/variants/common.rs @@ -205,7 +205,7 @@ where // Capture all (approval and backing) candidate validation requests and depending on configuration fail them. fn intercept_incoming( - &self, + &mut self, subsystem_sender: &mut Sender, msg: FromOrchestra, ) -> Option> { diff --git a/node/malus/src/variants/mod.rs b/node/malus/src/variants/mod.rs index 3789f33ac98b..8144665e9f66 100644 --- a/node/malus/src/variants/mod.rs +++ b/node/malus/src/variants/mod.rs @@ -20,10 +20,14 @@ mod back_garbage_candidate; mod common; mod dispute_valid_candidates; mod suggest_garbage_candidate; +mod withold_approvals_distribution; pub(crate) use self::{ back_garbage_candidate::{BackGarbageCandidateOptions, BackGarbageCandidates}, dispute_valid_candidates::{DisputeAncestorOptions, DisputeValidCandidates}, suggest_garbage_candidate::{SuggestGarbageCandidateOptions, SuggestGarbageCandidates}, + withold_approvals_distribution::{ + WitholdApprovalsDistribution, WitholdApprovalsDistributionOptions, + }, }; pub(crate) use common::*; diff --git a/node/malus/src/variants/suggest_garbage_candidate.rs b/node/malus/src/variants/suggest_garbage_candidate.rs index 049cfc2b153d..4326c0ce62bc 100644 --- a/node/malus/src/variants/suggest_garbage_candidate.rs +++ b/node/malus/src/variants/suggest_garbage_candidate.rs @@ -72,7 +72,7 @@ where /// Intercept incoming `Second` requests from the `collator-protocol` subsystem. fn intercept_incoming( - &self, + &mut self, subsystem_sender: &mut Sender, msg: FromOrchestra, ) -> Option> { diff --git a/node/malus/src/variants/withold_approvals_distribution.rs b/node/malus/src/variants/withold_approvals_distribution.rs new file mode 100644 index 000000000000..20e81ebe1800 --- /dev/null +++ b/node/malus/src/variants/withold_approvals_distribution.rs @@ -0,0 +1,311 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! This variant of Malus withold sending of approvals coming from approvals +//! distribution subsystem to just a subset of nodes. It is meant to be used for testing +//! finality is reached even nodes can not reach directly each other. +//! It enforces that peers are grouped in groups of fixed size, then it arranges +//! the groups in a ring topology. +//! +//! +//! Peers can send their messages only to peers in their group and to peers from next +//! group in the ring topology. +//! E.g If we 16 nodes split in 4 groups then: +//! (1, 2, 3, 4) -> (5, 6, 7, 8) -> (9, 10, 11, 12) -> (13, 14, 15, 16 ) +//! ^ | +//! |<------------------------------------------------------| +//! +//! Then node 5 will be able to send messages only to nodes in it's group (6, 7, 8) and to nodes +//! in the next group 9, 10, 11, 12 + +use polkadot_cli::{ + prepared_overseer_builder, + service::{ + AuthorityDiscoveryApi, AuxStore, BabeApi, Block, Error, HeaderBackend, Overseer, + OverseerConnector, OverseerGen, OverseerGenArgs, OverseerHandle, ParachainHost, + ProvideRuntimeApi, + }, + Cli, +}; +use polkadot_node_subsystem::SpawnGlue; +use polkadot_node_subsystem_types::messages::network_bridge_event::PeerId; +use sp_core::traits::SpawnNamed; + +use crate::interceptor::*; +use polkadot_node_network_protocol::{v1 as protocol_v1, Versioned}; + +use std::sync::Arc; +const LOG_TARGET: &str = "parachain::withold-approvals"; + +#[derive(Debug, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct WitholdApprovalsDistributionOptions { + /// Determines how many groups to create, the groups are connected in a ring shape, so + /// a node from group N can send messages only to groups from N+1 and will receive messages from N-1 + /// This should helps us test that approval distribution works correctly even in situations where + /// all nodes can't reach each other. + #[clap(short, long, ignore_case = true, default_value_t = 4, value_parser = clap::value_parser!(u8).range(0..=100))] + pub num_network_groups: u8, + + /// The group to which this node is assigned + #[clap(short, long, ignore_case = true, default_value_t = 0, value_parser = clap::value_parser!(u8).range(0..=100))] + pub assigned_network_group: u8, + + #[clap(flatten)] + pub cli: Cli, +} + +pub(crate) struct WitholdApprovalsDistribution { + pub num_network_groups: u8, + pub assigned_network_group: u8, +} + +impl OverseerGen for WitholdApprovalsDistribution { + fn generate<'a, Spawner, RuntimeClient>( + &self, + connector: OverseerConnector, + args: OverseerGenArgs<'a, Spawner, RuntimeClient>, + ) -> Result<(Overseer, Arc>, OverseerHandle), Error> + where + RuntimeClient: 'static + ProvideRuntimeApi + HeaderBackend + AuxStore, + RuntimeClient::Api: ParachainHost + BabeApi + AuthorityDiscoveryApi, + Spawner: 'static + SpawnNamed + Clone + Unpin, + { + let spawner = args.spawner.clone(); + let validation_filter = ApprovalsDistributionInterceptor::new( + SpawnGlue(spawner), + self.num_network_groups, + self.assigned_network_group, + ); + + prepared_overseer_builder(args)? + .replace_network_bridge_tx(move |cv_subsystem| { + InterceptedSubsystem::new(cv_subsystem, validation_filter) + }) + .build_with_connector(connector) + .map_err(|e| e.into()) + } +} + +#[derive(Clone, Debug)] +/// Replaces `NetworkBridgeTx`. +pub struct ApprovalsDistributionInterceptor { + spawner: Spawner, + known_peer_ids: Vec, + num_network_groups: u8, + assigned_network_group: u8, +} + +impl ApprovalsDistributionInterceptor +where + Spawner: overseer::gen::Spawner, +{ + pub fn new(spawner: Spawner, num_network_groups: u8, assigned_network_group: u8) -> Self { + Self { spawner, known_peer_ids: vec![], num_network_groups, assigned_network_group } + } + + fn can_send(&self, peer_id: PeerId) -> bool { + let group_size = self.known_peer_ids.len() / (self.num_network_groups as usize) + + if self.known_peer_ids.len() % self.num_network_groups as usize != 0 { 1 } else { 0 }; + + let my_group_in_ring = self + .known_peer_ids + .chunks(group_size) + .skip(self.assigned_network_group as usize) + .next(); + + let next_group_in_ring = self + .known_peer_ids + .chunks(group_size) + .skip((self.assigned_network_group as usize + 1) % self.num_network_groups as usize) + .next(); + + my_group_in_ring + .map(|my_group_peers| my_group_peers.contains(&peer_id)) + .unwrap_or(false) || + next_group_in_ring + .map(|next_group_peers| next_group_peers.contains(&peer_id)) + .unwrap_or(false) + } +} + +impl MessageInterceptor for ApprovalsDistributionInterceptor +where + Sender: overseer::NetworkBridgeTxSenderTrait + Clone + Send + 'static, + Spawner: overseer::gen::Spawner + Clone + 'static, +{ + type Message = NetworkBridgeTxMessage; + + // Capture all (approval and backing) candidate validation requests and depending on configuration fail them. + fn intercept_incoming( + &mut self, + _subsystem_sender: &mut Sender, + msg: FromOrchestra, + ) -> Option> { + match msg { + // Message sent by the approval voting subsystem + FromOrchestra::Communication { + msg: NetworkBridgeTxMessage::SendValidationMessage(peers, message), + } => { + let new_peers: Vec<&PeerId> = + peers.iter().filter(|peer_id| !self.known_peer_ids.contains(peer_id)).collect(); + if !new_peers.is_empty() { + self.known_peer_ids.extend(new_peers); + self.known_peer_ids.sort(); + } + + match &message { + Versioned::V1(protocol_v1::ValidationProtocol::ApprovalDistribution( + protocol_v1::ApprovalDistributionMessage::Approvals(approvals), + )) => { + let num_peers_we_wanted = peers.len(); + let peers_we_can_send: Vec = + peers.into_iter().filter(|peer_id| self.can_send(*peer_id)).collect(); + gum::info!( + target: LOG_TARGET, + "Malus message intercepted num_peers_we_can_send {:} num_peers_we_wanted_to_send {:} known_peers {:} peers {:} approval {:?}", + peers_we_can_send.len(), + num_peers_we_wanted, + self.known_peer_ids.len(), + peers_we_can_send + .clone() + .into_iter() + .fold(String::new(), |accumulator, peer| { + format!("{}:{}", accumulator, peer) + }), + approvals + ); + Some(FromOrchestra::Communication { + msg: NetworkBridgeTxMessage::SendValidationMessage( + peers_we_can_send, + message, + ), + }) + }, + _ => Some(FromOrchestra::Communication { + msg: NetworkBridgeTxMessage::SendValidationMessage(peers, message), + }), + } + }, + msg => Some(msg), + } + } + + fn intercept_outgoing( + &self, + msg: overseer::NetworkBridgeTxOutgoingMessages, + ) -> Option { + Some(msg) + } +} + +#[cfg(test)] +mod tests { + use polkadot_node_network_protocol::PeerId; + use polkadot_node_subsystem::Spawner; + + use super::ApprovalsDistributionInterceptor; + + #[derive(Debug, Clone)] + struct DummySpanner; + impl Spawner for DummySpanner { + fn spawn_blocking( + &self, + _name: &'static str, + _group: Option<&'static str>, + _future: futures::future::BoxFuture<'static, ()>, + ) { + todo!() + } + + fn spawn( + &self, + _name: &'static str, + _group: Option<&'static str>, + _future: futures::future::BoxFuture<'static, ()>, + ) { + todo!() + } + } + + #[test] + fn test_can_send() { + let test_topologies: Vec> = vec![ + (1..20).map(|_| PeerId::random()).collect(), + (1..21).map(|_| PeerId::random()).collect(), + ]; + + for peer_ids in test_topologies { + let withold_approval_distribution = ApprovalsDistributionInterceptor { + spawner: DummySpanner {}, + known_peer_ids: peer_ids.clone(), + assigned_network_group: 1, + num_network_groups: 4, + }; + + assert!(!withold_approval_distribution.can_send(peer_ids[0])); + assert!(!withold_approval_distribution.can_send(peer_ids[4])); + + assert!(withold_approval_distribution.can_send(peer_ids[5])); + assert!(withold_approval_distribution.can_send(peer_ids[9])); + + assert!(withold_approval_distribution.can_send(peer_ids[10])); + assert!(withold_approval_distribution.can_send(peer_ids[14])); + + assert!(!withold_approval_distribution.can_send(peer_ids[15])); + assert!(!withold_approval_distribution.can_send(peer_ids[18])); + + let withold_approval_distribution = ApprovalsDistributionInterceptor { + spawner: DummySpanner {}, + known_peer_ids: peer_ids.clone(), + assigned_network_group: 3, + num_network_groups: 4, + }; + + assert!(withold_approval_distribution.can_send(peer_ids[0])); + assert!(withold_approval_distribution.can_send(peer_ids[4])); + + assert!(!withold_approval_distribution.can_send(peer_ids[5])); + assert!(!withold_approval_distribution.can_send(peer_ids[9])); + + assert!(!withold_approval_distribution.can_send(peer_ids[10])); + assert!(!withold_approval_distribution.can_send(peer_ids[14])); + + assert!(withold_approval_distribution.can_send(peer_ids[15])); + assert!(withold_approval_distribution.can_send(peer_ids[18])); + + let withold_approval_distribution = ApprovalsDistributionInterceptor { + spawner: DummySpanner {}, + known_peer_ids: peer_ids.clone(), + assigned_network_group: 0, + num_network_groups: 4, + }; + + assert!(withold_approval_distribution.can_send(peer_ids[0])); + assert!(withold_approval_distribution.can_send(peer_ids[4])); + + assert!(withold_approval_distribution.can_send(peer_ids[5])); + assert!(withold_approval_distribution.can_send(peer_ids[9])); + + assert!(!withold_approval_distribution.can_send(peer_ids[10])); + assert!(!withold_approval_distribution.can_send(peer_ids[14])); + + assert!(!withold_approval_distribution.can_send(peer_ids[15])); + assert!(!withold_approval_distribution.can_send(peer_ids[18])); + } + } +} diff --git a/node/service/src/relay_chain_selection.rs b/node/service/src/relay_chain_selection.rs index afc0ce320610..4d5f4d2c91b0 100644 --- a/node/service/src/relay_chain_selection.rs +++ b/node/service/src/relay_chain_selection.rs @@ -471,6 +471,7 @@ where let lag = initial_leaf_number.saturating_sub(subchain_number); self.metrics.note_approval_checking_finality_lag(lag); + gum::debug!(target: LOG_TARGET, ?subchain_head, "Approval checking lag {:}", lag); // Messages sent to `approval-distrbution` are known to have high `ToF`, we need to spawn a task for sending // the message to not block here and delay finality. diff --git a/zombienet_tests/functional/0005-parachains-aggression-enabled.toml b/zombienet_tests/functional/0005-parachains-aggression-enabled.toml new file mode 100644 index 000000000000..1d94465d00d3 --- /dev/null +++ b/zombienet_tests/functional/0005-parachains-aggression-enabled.toml @@ -0,0 +1,130 @@ +[settings] +timeout = 1000 + +[relaychain.genesis.runtime.runtime_genesis_config.configuration.config] + needed_approvals = 8 + +[relaychain] +default_image = "{{ZOMBIENET_INTEGRATION_TEST_IMAGE}}" +chain = "rococo-local" +chain_spec_command = "polkadot build-spec --chain rococo-local --disable-default-bootnode" + +[relaychain.default_resources] +limits = { memory = "4G", cpu = "2" } +requests = { memory = "2G", cpu = "1" } + + [[relaychain.nodes]] + name = "alice" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=0" + args = [ "--alice", "-lparachain=debug,runtime=debug" ] + + [[relaychain.nodes]] + name = "bob" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=0" + args = [ "--bob", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "charlie" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=0" + args = [ "--charlie", "-lparachain=debug,runtime=debug" ] + + [[relaychain.nodes]] + name = "dave" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=0" + args = [ "--dave", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "ferdie" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=1" + args = [ "--ferdie", "-lparachain=debug,runtime=debug" ] + + [[relaychain.nodes]] + name = "eve" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=1" + args = [ "--eve", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "one" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=1" + args = [ "--one", "-lparachain=debug,runtime=debug" ] + + [[relaychain.nodes]] + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=1" + name = "two" + args = [ "--two", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode8" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=2" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode9" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=2" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode10" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=2" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode11" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=2" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode12" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=3" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode13" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=3" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode14" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=3" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode15" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=3" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode16" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=4" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode17" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=4" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode18" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=4" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + + [[relaychain.nodes]] + name = "newnode19" + command = "malus withold-approvals-distribution --num-network-groups=5 --assigned-network-group=4" + args = [ "--validator", "-lparachain=debug,runtime=debug"] + +[[parachains]] +id = 2000 +addToGenesis = true +genesis_state_generator = "undying-collator export-genesis-state --pov-size=100000 --pvf-complexity=1" + + [parachains.collator] + name = "collator01" + image = "{{COL_IMAGE}}" + command = "undying-collator" + args = ["-lparachain=debug", "--pov-size=100000", "--pvf-complexity=1", "--parachain-id=2000"] + +[types.Header] +number = "u64" +parent_hash = "Hash" +post_state = "Hash" \ No newline at end of file diff --git a/zombienet_tests/functional/0005-parachains-aggression-enabled.zndsl b/zombienet_tests/functional/0005-parachains-aggression-enabled.zndsl new file mode 100644 index 000000000000..c8c7956d69c6 --- /dev/null +++ b/zombienet_tests/functional/0005-parachains-aggression-enabled.zndsl @@ -0,0 +1,21 @@ +Description: PVF preparation & execution time +Network: ./0005-parachains-aggression-enabled.toml +Creds: config + +# Check authority status. +alice: reports node_roles is 4 +bob: reports node_roles is 4 +charlie: reports node_roles is 4 +dave: reports node_roles is 4 +eve: reports node_roles is 4 +ferdie: reports node_roles is 4 +one: reports node_roles is 4 +two: reports node_roles is 4 + +# Ensure parachains are registered. +alice: parachain 2000 is registered within 60 seconds + + +# Ensure parachains made progress. +# alice: parachain 2000 block height is at least 30 within 600 seconds +alice: reports substrate_block_height{status="finalized"} is at least 40 within 400 seconds \ No newline at end of file