Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Logging & metrics improvements - Ladi's attempt to replicate Rob's PR #4337

Merged
merged 1 commit into from
Nov 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions node/core/backing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,13 @@ impl CandidateBackingJob {

// Sanity check that candidate is from our assignment.
if Some(candidate.descriptor().para_id) != self.assignment {
tracing::debug!(
target: LOG_TARGET,
our_assignment = ?self.assignment,
collation = ?candidate.descriptor().para_id,
"Subsystem asked to second for para outside of our assignment",
);

return Ok(())
}

Expand Down
8 changes: 6 additions & 2 deletions node/core/dispute-coordinator/src/real/initialized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,9 @@ impl Initialized {
},
};
let candidate_receipt = votes.candidate_receipt.clone();
let was_concluded_valid = votes.valid.len() >= supermajority_threshold;
let was_concluded_invalid = votes.invalid.len() >= supermajority_threshold;

let mut recent_disputes = overlay_db.load_recent_disputes()?.unwrap_or_default();
let controlled_indices = find_controlled_validator_indices(&self.keystore, &validators);

Expand Down Expand Up @@ -813,10 +816,11 @@ impl Initialized {
self.metrics.on_open();
}

if concluded_valid {
if !was_concluded_valid && concluded_valid {
self.metrics.on_concluded_valid();
}
if concluded_invalid {

if !was_concluded_invalid && concluded_invalid {
self.metrics.on_concluded_invalid();
}

Expand Down
42 changes: 3 additions & 39 deletions node/network/dispute-distribution/src/sender/send_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,6 @@ pub struct SendTask {
/// Whether we have any tasks failed since the last refresh.
has_failed_sends: bool,

/// Total count of failed transmissions.
///
/// Used for issuing a warning, if that number gets above a certain threshold.
failed_count: usize,

/// Total number of initiated requests.
///
/// Used together with `failed_count` for issuing a warning on too many failed attempts.
send_count: usize,

/// Sender to be cloned for tasks.
tx: mpsc::Sender<TaskFinish>,
}
Expand Down Expand Up @@ -120,14 +110,8 @@ impl SendTask {
request: DisputeRequest,
metrics: &Metrics,
) -> Result<Self> {
let mut send_task = Self {
request,
deliveries: HashMap::new(),
has_failed_sends: false,
tx,
failed_count: 0,
send_count: 0,
};
let mut send_task =
Self { request, deliveries: HashMap::new(), has_failed_sends: false, tx };
send_task.refresh_sends(ctx, runtime, active_sessions, metrics).await?;
Ok(send_task)
}
Expand Down Expand Up @@ -160,7 +144,6 @@ impl SendTask {
.await?;

self.has_failed_sends = false;
self.send_count += new_statuses.len();
self.deliveries.extend(new_statuses.into_iter());
Ok(())
}
Expand All @@ -176,33 +159,14 @@ impl SendTask {
pub fn on_finished_send(&mut self, authority: &AuthorityDiscoveryId, result: TaskResult) {
match result {
TaskResult::Failed(err) => {
tracing::debug!(
tracing::trace!(
target: LOG_TARGET,
?authority,
candidate_hash = %self.request.0.candidate_receipt.hash(),
%err,
"Error sending dispute statements to node."
);

self.failed_count += 1;
let error_rate = (100 * self.failed_count).checked_div(self.send_count).expect(
"We cannot receive a failed request, without having sent one first. qed.",
);
// 10% seems to be a sensible threshold to become alert - note that
// self.send_count gets increased in batches of the full validator set, so we don't
// need to account for a low send_count.
if error_rate > 10 {
tracing::warn!(
target: LOG_TARGET,
candidate_hash = %self.request.0.candidate_receipt.hash(),
last_authority = ?authority,
last_error = %err,
failed_count = ?self.failed_count,
total_attempts = ?self.send_count,
"Sending our dispute vote failed for more than 10% of total attempts!"
);
}

self.has_failed_sends = true;
// Remove state, so we know what to try again:
self.deliveries.remove(authority);
Expand Down