diff --git a/polkadot/node/core/candidate-validation/src/lib.rs b/polkadot/node/core/candidate-validation/src/lib.rs index 9f7b17f61299..d371637a5dc1 100644 --- a/polkadot/node/core/candidate-validation/src/lib.rs +++ b/polkadot/node/core/candidate-validation/src/lib.rs @@ -651,21 +651,22 @@ async fn validate_candidate_exhaustive( ); Err(ValidationFailed(e.to_string())) }, - Err(ValidationError::Invalid(WasmInvalidCandidate::HardTimeout)) => - Ok(ValidationResult::Invalid(InvalidCandidate::Timeout)), Err(ValidationError::Invalid(WasmInvalidCandidate::WorkerReportedInvalid(e))) => Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(e))), + Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousWorkerDeath)) => Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError( "ambiguous worker death".to_string(), ))), - Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(err))) => - Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(err))), - Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousJobDeath(err))) => Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(format!( "ambiguous job death: {err}" )))), + Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::HardTimeout)) => + Ok(ValidationResult::Invalid(InvalidCandidate::Timeout)), + Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(err))) => + Ok(ValidationResult::Invalid(InvalidCandidate::ExecutionError(err))), + Err(ValidationError::Preparation(e)) => { gum::warn!( target: LOG_TARGET, @@ -766,21 +767,20 @@ trait ValidationBackend { } else { break; }, - - Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(_))) => + Err(ValidationError::PossiblyInvalid( + PossiblyInvalidError::JobError(_) | PossiblyInvalidError::HardTimeout, + )) => if num_job_error_retries_left > 0 { num_job_error_retries_left -= 1; } else { break; }, - Err(ValidationError::Internal(_)) => if num_internal_retries_left > 0 { num_internal_retries_left -= 1; } else { break; }, - Ok(_) | Err(ValidationError::Invalid(_) | ValidationError::Preparation(_)) => break, } diff --git a/polkadot/node/core/candidate-validation/src/tests.rs b/polkadot/node/core/candidate-validation/src/tests.rs index 110785804652..14e354a467da 100644 --- a/polkadot/node/core/candidate-validation/src/tests.rs +++ b/polkadot/node/core/candidate-validation/src/tests.rs @@ -480,8 +480,8 @@ fn candidate_validation_bad_return_is_invalid() { let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() }; let v = executor::block_on(validate_candidate_exhaustive( - MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::Invalid( - WasmInvalidCandidate::HardTimeout, + MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::PossiblyInvalid( + PossiblyInvalidError::HardTimeout, ))), validation_data, validation_code, @@ -758,8 +758,8 @@ fn candidate_validation_timeout_is_internal_error() { let candidate_receipt = CandidateReceipt { descriptor, commitments_hash: Hash::zero() }; let v = executor::block_on(validate_candidate_exhaustive( - MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::Invalid( - WasmInvalidCandidate::HardTimeout, + MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::PossiblyInvalid( + PossiblyInvalidError::HardTimeout, ))), validation_data, validation_code, @@ -852,8 +852,8 @@ fn candidate_validation_code_mismatch_is_invalid() { let (_ctx, _ctx_handle) = test_helpers::make_subsystem_context::(pool.clone()); let v = executor::block_on(validate_candidate_exhaustive( - MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::Invalid( - WasmInvalidCandidate::HardTimeout, + MockValidateCandidateBackend::with_hardcoded_result(Err(ValidationError::PossiblyInvalid( + PossiblyInvalidError::HardTimeout, ))), validation_data, validation_code, diff --git a/polkadot/node/core/pvf/src/error.rs b/polkadot/node/core/pvf/src/error.rs index 442443f326e9..f5ea5df8207c 100644 --- a/polkadot/node/core/pvf/src/error.rs +++ b/polkadot/node/core/pvf/src/error.rs @@ -44,8 +44,6 @@ pub enum InvalidCandidate { /// The candidate is reported to be invalid by the execution worker. The string contains the /// error message. WorkerReportedInvalid(String), - /// PVF execution (compilation is not included) took more time than was allotted. - HardTimeout, } /// Possibly transient issue that may resolve after retries. @@ -70,6 +68,8 @@ pub enum PossiblyInvalidError { /// /// We cannot treat this as an internal error because malicious code may have caused this. AmbiguousJobDeath(String), + /// PVF execution (compilation is not included) took more time than was allotted. + HardTimeout, /// An unexpected error occurred in the job process and we can't be sure whether the candidate /// is really invalid or some internal glitch occurred. Whenever we are unsure, we can never /// treat an error as internal as we would abstain from voting. This is bad because if the diff --git a/polkadot/node/core/pvf/src/execute/queue.rs b/polkadot/node/core/pvf/src/execute/queue.rs index a0c24fd44323..91dfa064d106 100644 --- a/polkadot/node/core/pvf/src/execute/queue.rs +++ b/polkadot/node/core/pvf/src/execute/queue.rs @@ -346,10 +346,6 @@ fn handle_job_finish( None, ), Outcome::InternalError { err } => (None, Err(ValidationError::Internal(err)), None), - // Either the worker or the job timed out. Kill the worker in either case. Treated as - // definitely-invalid, because if we timed out, there's no time left for a retry. - Outcome::HardTimeout => - (None, Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)), None), // "Maybe invalid" errors (will retry). Outcome::WorkerIntfErr => ( None, @@ -361,6 +357,10 @@ fn handle_job_finish( Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::AmbiguousJobDeath(err))), None, ), + // Either the worker or the job timed out. Kill the worker in either case. Treated as + // definitely-invalid, because if we timed out, there's no time left for a retry. + Outcome::HardTimeout => + (None, Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::HardTimeout)), None), Outcome::JobError { err } => (None, Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::JobError(err))), None), }; diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index 4c81ac502dd4..22e828d135f0 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -19,8 +19,9 @@ use assert_matches::assert_matches; use parity_scale_codec::Encode as _; use polkadot_node_core_pvf::{ - start, testing::build_workers_and_get_paths, Config, InvalidCandidate, Metrics, PrepareError, - PrepareJobKind, PvfPrepData, ValidationError, ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR, + start, testing::build_workers_and_get_paths, Config, Metrics, PossiblyInvalidError, + PrepareError, PrepareJobKind, PvfPrepData, ValidationError, ValidationHost, + JOB_TIMEOUT_WALL_CLOCK_FACTOR, }; use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams, ValidationResult}; use polkadot_primitives::{ExecutorParam, ExecutorParams}; @@ -162,7 +163,7 @@ async fn execute_job_terminates_on_timeout() { .await; match result { - Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)) => {}, + Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::HardTimeout)) => {}, r => panic!("{:?}", r), } @@ -202,8 +203,8 @@ async fn ensure_parallel_execution() { assert_matches!( (res1, res2), ( - Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)), - Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)) + Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::HardTimeout)), + Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::HardTimeout)) ) ); @@ -344,7 +345,7 @@ async fn deleting_prepared_artifact_does_not_dispute() { .await; match result { - Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)) => {}, + Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::HardTimeout)) => {}, r => panic!("{:?}", r), } } diff --git a/polkadot/node/core/pvf/tests/it/process.rs b/polkadot/node/core/pvf/tests/it/process.rs index b742acb15d02..08279297a47a 100644 --- a/polkadot/node/core/pvf/tests/it/process.rs +++ b/polkadot/node/core/pvf/tests/it/process.rs @@ -19,9 +19,7 @@ use super::TestHost; use assert_matches::assert_matches; -use polkadot_node_core_pvf::{ - InvalidCandidate, PossiblyInvalidError, PrepareError, ValidationError, -}; +use polkadot_node_core_pvf::{PossiblyInvalidError, PrepareError, ValidationError}; use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams}; use procfs::process; use rusty_fork::rusty_fork_test; @@ -153,7 +151,7 @@ rusty_fork_test! { assert_matches!( result, - Err(ValidationError::Invalid(InvalidCandidate::HardTimeout)) + Err(ValidationError::PossiblyInvalid(PossiblyInvalidError::HardTimeout)) ); }) }