-
Notifications
You must be signed in to change notification settings - Fork 11.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[consensus] Update proposer metrics #19655
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,10 +12,7 @@ use mysten_metrics::monitored_mpsc::{unbounded_channel, UnboundedReceiver}; | |
use mysten_metrics::monitored_scope; | ||
use parking_lot::RwLock; | ||
use sui_macros::fail_point; | ||
use tokio::{ | ||
sync::{broadcast, watch}, | ||
time::Instant, | ||
}; | ||
use tokio::sync::{broadcast, watch}; | ||
use tracing::{debug, info, warn}; | ||
|
||
use crate::{ | ||
|
@@ -275,11 +272,34 @@ impl Core { | |
/// Adds/processed all the newly `accepted_blocks`. We basically try to move the threshold clock and add them to the | ||
/// pending ancestors list. | ||
fn add_accepted_blocks(&mut self, accepted_blocks: Vec<VerifiedBlock>) { | ||
// Advance the threshold clock. If advanced to a new round then send a signal that a new quorum has been received. | ||
if let Some(new_round) = self | ||
.threshold_clock | ||
.add_blocks(accepted_blocks.iter().map(|b| b.reference()).collect()) | ||
// Get max round of accepted blocks. This will be equal to the threshold | ||
// clock round, either by advancing the threshold clock round by being | ||
// greater than current clock round or by equaling the current clock round. | ||
let max_accepted_round = accepted_blocks | ||
.iter() | ||
.map(|b| b.round()) | ||
.max() | ||
.unwrap_or(GENESIS_ROUND); | ||
// Therefore the leader round for which proposals will wait will be max accepted round - 1 | ||
// or saturate to GENESIS_ROUND. | ||
let accepted_proposal_leader_round = max_accepted_round.saturating_sub(1); | ||
|
||
// Ignore checking for leader blocks with rounds less than the current | ||
// threshold clock round - 1. | ||
let proposal_leaders_exist = if accepted_proposal_leader_round | ||
>= self.threshold_clock.get_round().saturating_sub(1) | ||
{ | ||
self.leaders_exist(accepted_proposal_leader_round) | ||
} else { | ||
false | ||
}; | ||
|
||
// Advance the threshold clock. If advanced to a new round then send a | ||
// signal that a new quorum has been received. | ||
if let Some(new_round) = self.threshold_clock.add_blocks( | ||
accepted_blocks.iter().map(|b| b.reference()).collect(), | ||
proposal_leaders_exist, | ||
) { | ||
// notify that threshold clock advanced to new round | ||
self.signals.new_round(new_round); | ||
} | ||
|
@@ -351,10 +371,22 @@ impl Core { | |
// There must be a quorum of blocks from the previous round. | ||
let quorum_round = self.threshold_clock.get_round().saturating_sub(1); | ||
|
||
let leader_authority = &self | ||
.context | ||
.committee | ||
.authority(self.first_leader(quorum_round)) | ||
.hostname; | ||
|
||
// Create a new block either because we want to "forcefully" propose a block due to a leader timeout, | ||
// or because we are actually ready to produce the block (leader exists and min delay has passed). | ||
if !force { | ||
if !self.leaders_exist(quorum_round) { | ||
self.context | ||
.metrics | ||
.node_metrics | ||
.block_proposal_leader_wait_count | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should use a separate metric for counting the number of times leader is not found. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the confusion for me with these metrics is that it doesn't just include leader wait time, it includes the quorum receive wait time which can make this metric a little misleading. Separating them brings more clarity. Though I guess we could always subtract this metric from quorum receive latency. |
||
.with_label_values(&[leader_authority]) | ||
.inc(); | ||
return None; | ||
} | ||
|
||
|
@@ -369,27 +401,18 @@ impl Core { | |
} | ||
} | ||
|
||
let leader_authority = &self | ||
.context | ||
.committee | ||
.authority(self.first_leader(quorum_round)) | ||
.hostname; | ||
self.context | ||
.metrics | ||
.node_metrics | ||
.block_proposal_leader_wait_ms | ||
.with_label_values(&[leader_authority]) | ||
.inc_by( | ||
Instant::now() | ||
.saturating_duration_since(self.threshold_clock.get_quorum_ts()) | ||
.as_millis() as u64, | ||
); | ||
self.context | ||
.metrics | ||
.node_metrics | ||
.block_proposal_leader_wait_count | ||
.with_label_values(&[leader_authority]) | ||
.inc(); | ||
if let Some(leader_ts) = self.threshold_clock.get_proposal_leaders_ts() { | ||
self.context | ||
.metrics | ||
.node_metrics | ||
.block_proposal_leader_wait_ms | ||
.with_label_values(&[leader_authority]) | ||
.inc_by( | ||
leader_ts | ||
.saturating_duration_since(self.threshold_clock.get_quorum_ts()) | ||
.as_millis() as u64, | ||
); | ||
} | ||
|
||
// TODO: produce the block for the clock_round. As the threshold clock can advance many rounds at once (ex | ||
// because we synchronized a bulk of blocks) we can decide here whether we want to produce blocks per round | ||
|
@@ -477,6 +500,19 @@ impl Core { | |
// Ensure the new block and its ancestors are persisted, before broadcasting it. | ||
self.dag_state.write().flush(); | ||
|
||
let current_proposal_duration = Duration::from_millis(verified_block.timestamp_ms()); | ||
let previous_proposal_duration = Duration::from_millis(self.last_proposed_timestamp_ms()); | ||
self.context | ||
.metrics | ||
.node_metrics | ||
.block_proposal_interval | ||
.observe( | ||
current_proposal_duration | ||
.checked_sub(previous_proposal_duration) | ||
.unwrap_or_else(|| Duration::from_millis(0)) | ||
.as_secs_f64(), | ||
); | ||
|
||
// Update internal state. | ||
self.last_proposed_block = verified_block.clone(); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this the case? Blocks older than current threshold clock round can get accepted as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I only added the case in the comment for greater and equal but blocks less than the clock round are essentially ignored by threshold clock