Skip to content

Commit

Permalink
Validator key hot swap (#11536)
Browse files Browse the repository at this point in the history
Issue: #11264

This PR build on:
* #11372
* #11400

and contains actual changes and test for validator key hot swap.

### Summary
- Extend `UpdateableConfig` with validator key.
- Update client's mutable validator key when we detect it changed in the
updateable config.
- Advertise our new validator key through `advertise_tier1_proxies()`.
- Add integration test for the new behaviour:
- We start with 2 validating nodes (`node0`, `node1`) and 1
non-validating node (`node2`). It is important that the non-validating
node tracks all shards, because we do not know which shard it will track
when we switch validator keys.
  - We copy validator key from `node0` to `node2`.
  - We stop `node0`, then we trigger validator key reload for `node2`.
- Now `node2` is a validator, but it figures out as `node0` because it
copied validator key from `node0`.
- We wait for a couple of epochs and we require that both remaining
nodes progress the chain. Both nodes should be synchronised after a few
epochs.


Test with:
```
cargo build -pneard --features test_features,rosetta_rpc &&
cargo build -pgenesis-populate -prestaked -pnear-test-contracts &&
python3 pytest/tests/sanity/validator_switch_key_quick.py
```

#### Extra changes:
- Use `MutableValidatorSigner` alias instead of
`MutableConfigValue<Option<Arc<ValidatorSigner>>>`
- Return `ConfigUpdaterResult` from config updater.
- Remove (de)serialization derives for `UpdateableConfigs`.
-

---------

Co-authored-by: Your Name <[email protected]>
  • Loading branch information
staffik and Your Name committed Jun 25, 2024
1 parent de83cf0 commit 1159181
Show file tree
Hide file tree
Showing 19 changed files with 250 additions and 48 deletions.
7 changes: 4 additions & 3 deletions chain/chain/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ use itertools::Itertools;
use lru::LruCache;
use near_async::futures::{AsyncComputationSpawner, AsyncComputationSpawnerExt};
use near_async::time::{Clock, Duration, Instant};
use near_chain_configs::{MutableConfigValue, ReshardingConfig, ReshardingHandle};
use near_chain_configs::{
MutableConfigValue, MutableValidatorSigner, ReshardingConfig, ReshardingHandle,
};
#[cfg(feature = "new_epoch_sync")]
use near_chain_primitives::error::epoch_sync::EpochSyncInfoError;
use near_chain_primitives::error::{BlockKnownError, Error, LogTransientStorageError};
Expand Down Expand Up @@ -87,7 +89,6 @@ use near_primitives::unwrap_or_return;
#[cfg(feature = "new_epoch_sync")]
use near_primitives::utils::index_to_bytes;
use near_primitives::utils::MaybeValidated;
use near_primitives::validator_signer::ValidatorSigner;
use near_primitives::version::{ProtocolFeature, ProtocolVersion, PROTOCOL_VERSION};
use near_primitives::views::{
BlockStatusView, DroppedReason, ExecutionOutcomeWithIdView, ExecutionStatusView,
Expand Down Expand Up @@ -404,7 +405,7 @@ impl Chain {
chain_config: ChainConfig,
snapshot_callbacks: Option<SnapshotCallbacks>,
apply_chunks_spawner: Arc<dyn AsyncComputationSpawner>,
validator: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator: MutableValidatorSigner,
) -> Result<Chain, Error> {
// Get runtime initial state and create genesis block out of it.
let state_roots = get_genesis_state_roots(runtime_adapter.store())?
Expand Down
13 changes: 6 additions & 7 deletions chain/chunks/src/shards_manager_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ use near_chain::byzantine_assert;
use near_chain::chunks_store::ReadOnlyChunksStore;
use near_chain::near_chain_primitives::error::Error::DBNotFoundErr;
use near_chain::types::EpochManagerAdapter;
use near_chain_configs::MutableConfigValue;
use near_chain_configs::MutableValidatorSigner;
pub use near_chunks_primitives::Error;
use near_epoch_manager::shard_tracker::ShardTracker;
use near_network::shards_manager::ShardsManagerRequestFromNetwork;
Expand Down Expand Up @@ -246,7 +246,7 @@ pub struct ShardsManagerActor {
/// Contains validator info about this node. This field is mutable and optional. Use with caution!
/// Lock the value of mutable validator signer for the duration of a request to ensure consistency.
/// Please note that the locked value should not be stored anywhere or passed through the thread boundary.
validator_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator_signer: MutableValidatorSigner,
store: ReadOnlyChunksStore,

epoch_manager: Arc<dyn EpochManagerAdapter>,
Expand Down Expand Up @@ -296,7 +296,7 @@ pub fn start_shards_manager(
shard_tracker: ShardTracker,
network_adapter: Sender<PeerManagerMessageRequest>,
client_adapter_for_shards_manager: Sender<ShardsManagerResponse>,
validator_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator_signer: MutableValidatorSigner,
store: Store,
chunk_request_retry_period: Duration,
) -> (actix::Addr<ActixWrapper<ShardsManagerActor>>, actix::ArbiterHandle) {
Expand Down Expand Up @@ -334,7 +334,7 @@ pub fn start_shards_manager(
impl ShardsManagerActor {
pub fn new(
clock: time::Clock,
validator_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator_signer: MutableValidatorSigner,
epoch_manager: Arc<dyn EpochManagerAdapter>,
shard_tracker: ShardTracker,
network_adapter: Sender<PeerManagerMessageRequest>,
Expand Down Expand Up @@ -2242,6 +2242,7 @@ mod test {
use assert_matches::assert_matches;
use near_async::messaging::IntoSender;
use near_async::time::FakeClock;
use near_chain_configs::MutableConfigValue;
use near_epoch_manager::shard_tracker::TrackedConfig;
use near_epoch_manager::test_utils::setup_epoch_manager_with_block_and_chunk_producers;
use near_network::test_utils::MockPeerManagerAdapter;
Expand All @@ -2257,9 +2258,7 @@ mod test {
use crate::logic::persist_chunk;
use crate::test_utils::*;

fn mutable_validator_signer(
account_id: &AccountId,
) -> MutableConfigValue<Option<Arc<ValidatorSigner>>> {
fn mutable_validator_signer(account_id: &AccountId) -> MutableValidatorSigner {
MutableConfigValue::new(
Some(Arc::new(EmptyValidatorSigner::new(account_id.clone()))),
"validator_signer",
Expand Down
26 changes: 19 additions & 7 deletions chain/client/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use near_chain::{
DoomslugThresholdMode, Provenance,
};
use near_chain_configs::{
ClientConfig, LogSummaryStyle, MutableConfigValue, UpdateableClientConfig,
ClientConfig, LogSummaryStyle, MutableValidatorSigner, UpdateableClientConfig,
};
use near_chunks::adapter::ShardsManagerRequestFromClient;
use near_chunks::client::ShardedTransactionPool;
Expand Down Expand Up @@ -150,7 +150,7 @@ pub struct Client {
/// Signer for block producer (if present). This field is mutable and optional. Use with caution!
/// Lock the value of mutable validator signer for the duration of a request to ensure consistency.
/// Please note that the locked value should not be stored anywhere or passed through the thread boundary.
pub validator_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
pub validator_signer: MutableValidatorSigner,
/// Approvals for which we do not have the block yet
pub pending_approvals:
lru::LruCache<ApprovalInner, HashMap<AccountId, (Approval, ApprovalType)>>,
Expand Down Expand Up @@ -202,12 +202,24 @@ pub struct Client {
}

impl Client {
pub(crate) fn update_client_config(&self, update_client_config: UpdateableClientConfig) {
self.config.expected_shutdown.update(update_client_config.expected_shutdown);
self.config.resharding_config.update(update_client_config.resharding_config);
self.config
pub(crate) fn update_client_config(
&self,
update_client_config: UpdateableClientConfig,
) -> bool {
let mut is_updated = false;
is_updated |= self.config.expected_shutdown.update(update_client_config.expected_shutdown);
is_updated |= self.config.resharding_config.update(update_client_config.resharding_config);
is_updated |= self
.config
.produce_chunk_add_transactions_time_limit
.update(update_client_config.produce_chunk_add_transactions_time_limit);
is_updated
}

/// Updates client's mutable validator signer.
/// It will update all validator signers that synchronize with it.
pub(crate) fn update_validator_signer(&self, signer: Arc<ValidatorSigner>) -> bool {
self.validator_signer.update(Some(signer))
}
}

Expand Down Expand Up @@ -253,7 +265,7 @@ impl Client {
runtime_adapter: Arc<dyn RuntimeAdapter>,
network_adapter: PeerManagerAdapter,
shards_manager_adapter: Sender<ShardsManagerRequestFromClient>,
validator_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator_signer: MutableValidatorSigner,
enable_doomslug: bool,
rng_seed: RngSeed,
snapshot_callbacks: Option<SnapshotCallbacks>,
Expand Down
18 changes: 13 additions & 5 deletions chain/client/src/client_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use near_chain::{
byzantine_assert, near_chain_primitives, Block, BlockHeader, BlockProcessingArtifact,
ChainGenesis, Provenance,
};
use near_chain_configs::{ClientConfig, LogSummaryStyle, MutableConfigValue, ReshardingHandle};
use near_chain_configs::{ClientConfig, LogSummaryStyle, MutableValidatorSigner, ReshardingHandle};
use near_chain_primitives::error::EpochErrorResultToChainError;
use near_chunks::adapter::ShardsManagerRequestFromClient;
use near_chunks::client::ShardsManagerResponse;
Expand Down Expand Up @@ -134,7 +134,7 @@ pub fn start_client(
state_sync_adapter: Arc<RwLock<SyncAdapter>>,
network_adapter: PeerManagerAdapter,
shards_manager_adapter: Sender<ShardsManagerRequestFromClient>,
validator_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator_signer: MutableValidatorSigner,
telemetry_sender: Sender<TelemetryEvent>,
snapshot_callbacks: Option<SnapshotCallbacks>,
sender: Option<broadcast::Sender<()>>,
Expand Down Expand Up @@ -1166,9 +1166,17 @@ impl ClientActorInner {
fn check_triggers(&mut self, ctx: &mut dyn DelayedActionRunner<Self>) -> Duration {
let _span = tracing::debug_span!(target: "client", "check_triggers").entered();
if let Some(config_updater) = &mut self.config_updater {
config_updater.try_update(&|updateable_client_config| {
self.client.update_client_config(updateable_client_config)
});
let update_result = config_updater.try_update(
&|updateable_client_config| {
self.client.update_client_config(updateable_client_config)
},
&|validator_signer| self.client.update_validator_signer(validator_signer),
);
if update_result.validator_signer_updated {
// Request PeerManager to advertise tier1 proxies.
// It is needed to advertise that our validator key changed.
self.network_adapter.send(PeerManagerMessageRequest::AdvertiseTier1Proxies);
}
}

// Check block height to trigger expected shutdown
Expand Down
25 changes: 23 additions & 2 deletions chain/client/src/config_updater.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use near_chain_configs::UpdateableClientConfig;
use near_dyn_configs::{UpdateableConfigLoaderError, UpdateableConfigs};
use near_primitives::validator_signer::ValidatorSigner;
use std::sync::Arc;
use tokio::sync::broadcast::Receiver;

Expand All @@ -12,6 +13,14 @@ pub struct ConfigUpdater {
updateable_configs_error: Option<Arc<UpdateableConfigLoaderError>>,
}

/// Return type of `ConfigUpdater::try_update()`.
/// Represents which values have been updated.
#[derive(Default)]
pub struct ConfigUpdaterResult {
pub client_config_updated: bool,
pub validator_signer_updated: bool,
}

impl ConfigUpdater {
pub fn new(
rx_config_update: Receiver<Result<UpdateableConfigs, Arc<UpdateableConfigLoaderError>>>,
Expand All @@ -21,21 +30,33 @@ impl ConfigUpdater {

/// Check if any of the configs were updated.
/// If they did, the receiver (rx_config_update) will contain a clone of the new configs.
pub fn try_update(&mut self, update_client_config_fn: &dyn Fn(UpdateableClientConfig)) {
pub fn try_update(
&mut self,
update_client_config_fn: &dyn Fn(UpdateableClientConfig) -> bool,
update_validator_signer_fn: &dyn Fn(Arc<ValidatorSigner>) -> bool,
) -> ConfigUpdaterResult {
let mut update_result = ConfigUpdaterResult::default();
while let Ok(maybe_updateable_configs) = self.rx_config_update.try_recv() {
match maybe_updateable_configs {
Ok(updateable_configs) => {
if let Some(client_config) = updateable_configs.client_config {
update_client_config_fn(client_config);
update_result.client_config_updated |=
update_client_config_fn(client_config);
tracing::info!(target: "config", "Updated ClientConfig");
}
if let Some(validator_signer) = updateable_configs.validator_signer {
update_result.validator_signer_updated |=
update_validator_signer_fn(validator_signer);
tracing::info!(target: "config", "Updated validator key");
}
self.updateable_configs_error = None;
}
Err(err) => {
self.updateable_configs_error = Some(err.clone());
}
}
}
update_result
}

/// Prints an error if it's present.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use near_async::messaging::{Actor, CanSend, Handler, Sender};
use near_async::time::Clock;
use near_async::{MultiSend, MultiSendMessage, MultiSenderFrom};
use near_chain::Error;
use near_chain_configs::MutableConfigValue;
use near_chain_configs::MutableValidatorSigner;
use near_epoch_manager::EpochManagerAdapter;
use near_network::state_witness::{
ChunkStateWitnessAckMessage, PartialEncodedStateWitnessForwardMessage,
Expand Down Expand Up @@ -36,7 +36,7 @@ pub struct PartialWitnessActor {
/// Validator signer to sign the state witness. This field is mutable and optional. Use with caution!
/// Lock the value of mutable validator signer for the duration of a request to ensure consistency.
/// Please note that the locked value should not be stored anywhere or passed through the thread boundary.
my_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
my_signer: MutableValidatorSigner,
/// Epoch manager to get the set of chunk validators
epoch_manager: Arc<dyn EpochManagerAdapter>,
/// Tracks the parts of the state witness sent from chunk producers to chunk validators.
Expand Down Expand Up @@ -107,7 +107,7 @@ impl PartialWitnessActor {
clock: Clock,
network_adapter: PeerManagerAdapter,
client_sender: ClientSenderForPartialWitness,
my_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
my_signer: MutableValidatorSigner,
epoch_manager: Arc<dyn EpochManagerAdapter>,
store: Store,
) -> Self {
Expand Down
6 changes: 3 additions & 3 deletions chain/client/src/view_client_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use near_chain::types::{RuntimeAdapter, Tip};
use near_chain::{
get_epoch_block_producers_view, Chain, ChainGenesis, ChainStoreAccess, DoomslugThresholdMode,
};
use near_chain_configs::{ClientConfig, MutableConfigValue, ProtocolConfigView};
use near_chain_configs::{ClientConfig, MutableValidatorSigner, ProtocolConfigView};
use near_chain_primitives::error::EpochErrorResultToChainError;
use near_client_primitives::types::{
Error, GetBlock, GetBlockError, GetBlockProof, GetBlockProofError, GetBlockProofResponse,
Expand Down Expand Up @@ -99,7 +99,7 @@ pub struct ViewClientActorInner {
/// Validator account (if present). This field is mutable and optional. Use with caution!
/// Lock the value of mutable validator signer for the duration of a request to ensure consistency.
/// Please note that the locked value should not be stored anywhere or passed through the thread boundary.
validator: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator: MutableValidatorSigner,
chain: Chain,
epoch_manager: Arc<dyn EpochManagerAdapter>,
shard_tracker: ShardTracker,
Expand Down Expand Up @@ -128,7 +128,7 @@ impl ViewClientActorInner {

pub fn spawn_actix_actor(
clock: Clock,
validator: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator: MutableValidatorSigner,
chain_genesis: ChainGenesis,
epoch_manager: Arc<dyn EpochManagerAdapter>,
shard_tracker: ShardTracker,
Expand Down
5 changes: 3 additions & 2 deletions chain/network/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::types::ROUTED_MESSAGE_TTL;
use anyhow::Context;
use near_async::time;
use near_chain_configs::MutableConfigValue;
use near_chain_configs::MutableValidatorSigner;
use near_crypto::{KeyType, SecretKey};
use near_primitives::network::PeerId;
use near_primitives::test_utils::create_test_signer;
Expand Down Expand Up @@ -60,7 +61,7 @@ pub struct ValidatorConfig {
/// Contains signer key for this node. This field is mutable and optional. Use with caution!
/// Lock the value of mutable validator signer for the duration of a request to ensure consistency.
/// Please note that the locked value should not be stored anywhere or passed through the thread boundary.
pub signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
pub signer: MutableValidatorSigner,
pub proxies: ValidatorProxies,
}

Expand Down Expand Up @@ -241,7 +242,7 @@ impl NetworkConfig {
pub fn new(
cfg: crate::config_json::Config,
node_key: SecretKey,
validator_signer: MutableConfigValue<Option<Arc<ValidatorSigner>>>,
validator_signer: MutableValidatorSigner,
archive: bool,
) -> anyhow::Result<Self> {
if cfg.public_addrs.len() > MAX_PEER_ADDRS {
Expand Down
8 changes: 8 additions & 0 deletions chain/network/src/peer_manager/peer_manager_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,14 @@ impl PeerManagerActor {
self.handle_msg_network_requests(msg, ctx),
)
}
PeerManagerMessageRequest::AdvertiseTier1Proxies => {
let state = self.state.clone();
let clock = self.clock.clone();
ctx.spawn(wrap_future(async move {
state.tier1_advertise_proxies(&clock).await;
}));
PeerManagerMessageResponse::AdvertiseTier1Proxies
}
PeerManagerMessageRequest::OutboundTcpConnect(stream) => {
let peer_addr = stream.peer_addr;
if let Err(err) =
Expand Down
5 changes: 5 additions & 0 deletions chain/network/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ pub struct SetChainInfo(pub ChainInfo);
#[rtype(result = "PeerManagerMessageResponse")]
pub enum PeerManagerMessageRequest {
NetworkRequests(NetworkRequests),
/// Request PeerManager to call `tier1_advertise_proxies()`. Used internally.
/// The effect would be accounts data known by this node broadcasted to other tier1 nodes.
/// That includes info about validator signer of this node.
AdvertiseTier1Proxies,
/// Request PeerManager to connect to the given peer.
/// Used in tests and internally by PeerManager.
/// TODO: replace it with AsyncContext::spawn/run_later for internal use.
Expand Down Expand Up @@ -193,6 +197,7 @@ impl PeerManagerMessageRequest {
#[derive(actix::MessageResponse, Debug)]
pub enum PeerManagerMessageResponse {
NetworkResponses(NetworkResponses),
AdvertiseTier1Proxies,
/// TEST-ONLY
OutboundTcpConnect,
FetchRoutingTable(RoutingTableInfo),
Expand Down
2 changes: 1 addition & 1 deletion core/chain-configs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub use genesis_config::{
};
use near_primitives::types::{Balance, BlockHeightDelta, Gas, NumBlocks, NumSeats};
use num_rational::Rational32;
pub use updateable_config::{MutableConfigValue, UpdateableClientConfig};
pub use updateable_config::{MutableConfigValue, MutableValidatorSigner, UpdateableClientConfig};

pub const GENESIS_CONFIG_FILENAME: &str = "genesis.json";

Expand Down
3 changes: 3 additions & 0 deletions core/chain-configs/src/updateable_config.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#[cfg(feature = "metrics")]
use near_async::time::Clock;
use near_primitives::types::BlockHeight;
use near_primitives::validator_signer::ValidatorSigner;
use serde::{Deserialize, Serialize, Serializer};
use std::fmt::Debug;
use std::sync::{Arc, Mutex};
Expand Down Expand Up @@ -108,3 +109,5 @@ pub struct UpdateableClientConfig {
#[serde(with = "near_async::time::serde_opt_duration_as_std")]
pub produce_chunk_add_transactions_time_limit: Option<Duration>,
}

pub type MutableValidatorSigner = MutableConfigValue<Option<Arc<ValidatorSigner>>>;
Loading

0 comments on commit 1159181

Please sign in to comment.