paritytech · mrcnski · Oct 31, 2023 · Oct 24, 2023 · Oct 24, 2023 · Oct 25, 2023
diff --git a/polkadot/node/core/pvf/src/execute/worker_intf.rs b/polkadot/node/core/pvf/src/execute/worker_intf.rs
@@ -18,6 +18,7 @@
 
 use crate::{
 	artifacts::ArtifactPathId,
+	security,
 	worker_intf::{
 		clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
 		SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
@@ -106,7 +107,7 @@ pub enum Outcome {
 /// returns the outcome.
 ///
 /// NOTE: Not returning the idle worker token in `Outcome` will trigger the child process being
-/// killed.
+/// killed, if it's still alive.
 pub async fn start_work(
 	worker: IdleWorker,
 	artifact: ArtifactPathId,
@@ -124,7 +125,10 @@ pub async fn start_work(
 		artifact.path.display(),
 	);
 
+	let artifact_path = artifact.path.clone();
 	with_worker_dir_setup(worker_dir, pid, &artifact.path, |worker_dir| async move {
+		let audit_log_file = security::AuditLogFile::try_open_and_seek_to_end().await;
+
 		if let Err(error) = send_request(&mut stream, &validation_params, execution_timeout).await {
 			gum::warn!(
 				target: LOG_TARGET,
@@ -153,6 +157,21 @@ pub async fn start_work(
 							?error,
 							"failed to recv an execute response",
 						);
+						// The worker died. Check if it was due to a seccomp violation.
+						if let Some(syscall) = security::check_seccomp_violation_for_worker(audit_log_file, pid).await {
+							// NOTE: Log, but don't change the outcome. Not all validators may have
+							// auditing enabled, so we don't want attackers to abuse a
+							// non-deterministic outcome.
+							gum::error!(
+								target: LOG_TARGET,
+								worker_pid = %pid,
+								%syscall,
+								validation_code_hash = ?artifact.id.code_hash,
+								?artifact_path,
+								"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
+							);
+						}
+
 						return Outcome::IoErr
 					},
 					Ok(response) => {

diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs
@@ -18,6 +18,7 @@
 
 use crate::{
 	metrics::Metrics,
+	security,
 	worker_intf::{
 		clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
 		SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
@@ -126,7 +127,9 @@ pub async fn start_work(
 		pid,
 		|tmp_artifact_file, mut stream, worker_dir| async move {
 			let preparation_timeout = pvf.prep_timeout();
-			if let Err(err) = send_request(&mut stream, pvf).await {
+			let audit_log_file = security::AuditLogFile::try_open_and_seek_to_end().await;
+
+			if let Err(err) = send_request(&mut stream, pvf.clone()).await {
 				gum::warn!(
 					target: LOG_TARGET,
 					worker_pid = %pid,
@@ -169,6 +172,21 @@ pub async fn start_work(
 						"failed to recv a prepare response: {:?}",
 						err,
 					);
+
+					// The worker died. Check if it was due to a seccomp violation.
+					if let Some(syscall) = security::check_seccomp_violation_for_worker(audit_log_file, pid).await {
+						// NOTE: Log, but don't change the outcome. Not all validators may have
+						// auditing enabled, so we don't want attackers to abuse a non-deterministic
+						// outcome.
+						gum::error!(
+							target: LOG_TARGET,
+							worker_pid = %pid,
+							%syscall,
+							?pvf,
+							"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
+						);
+					}
+
 					Outcome::IoErr(err.to_string())
 				},
 				Err(_) => {

diff --git a/polkadot/node/core/pvf/src/security.rs b/polkadot/node/core/pvf/src/security.rs
@@ -16,6 +16,10 @@
 
 use crate::LOG_TARGET;
 use std::path::Path;
+use tokio::{
+	fs::{File, OpenOptions},
+	io::{AsyncReadExt, AsyncSeekExt, SeekFrom},
+};
 
 /// Check if we can sandbox the root and emit a warning if not.
 ///
@@ -162,3 +166,147 @@ pub async fn check_seccomp(
 		}
 	}
 }
+
+const AUDIT_LOG_PATH: &'static str = "/var/log/audit/audit.log";
+const SYSLOG_PATH: &'static str = "/var/log/syslog";
+
+/// System audit log.
+pub struct AuditLogFile {
+	file: File,
+	path: &'static str,
+}
+
+impl AuditLogFile {
+	/// Looks for an audit log file on the system and opens it, seeking to the end to skip any
+	/// events from before this was called.
+	///
+	/// A bit of a verbose name, but it should clue future refactorers not to move calls closer to
+	/// where the `AuditLogFile` is used.
+	pub async fn try_open_and_seek_to_end() -> Option<Self> {
+		let mut path = AUDIT_LOG_PATH;
+		let mut file = match OpenOptions::new().read(true).open(AUDIT_LOG_PATH).await {
+			Ok(file) => Ok(file),
+			Err(_) => {
+				path = SYSLOG_PATH;
+				OpenOptions::new().read(true).open(SYSLOG_PATH).await
+			},
+		}
+		.ok()?;
+
+		let _pos = file.seek(SeekFrom::End(0)).await;
+
+		Some(Self { file, path })
+	}
+
+	async fn read_new_since_open(mut self) -> String {
+		let mut buf = String::new();
+		let _len = self.file.read_to_string(&mut buf).await;
+		buf
+	}
+}
+
+/// Check if a seccomp violation occurred for the given worker. As the syslog may be in a different
+/// location, or seccomp auditing may be disabled, this function provides a best-effort attempt
+/// only.
+///
+/// The `audit_log_file` must have been obtained before the job started. It only allows reading
+/// entries that were written since it was obtained, so that we do not consider events from previous
+/// processes with the same pid. This can still be racy, but it's unlikely and fine for a
+/// best-effort attempt.
+pub async fn check_seccomp_violation_for_worker(
+	audit_log_file: Option<AuditLogFile>,
+	worker_pid: u32,
+) -> Option<u32> {
+	let audit_event_pid_field = format!("pid={worker_pid}");
+
+	let audit_log_file = match audit_log_file {
+		Some(file) => {
+			gum::debug!(
+				target: LOG_TARGET,
+				%worker_pid,
+				audit_log_path = ?file.path,
+				"checking audit log for seccomp violations",
+			);
+			file
+		},
+		None => {
+			gum::warn!(
+				target: LOG_TARGET,
+				%worker_pid,
+				"could not open either {AUDIT_LOG_PATH} or {SYSLOG_PATH} for reading audit logs"
+			);
+			return None
+		},
+	};
+	let events = audit_log_file.read_new_since_open().await;
+
+	for event in events.lines() {
+		let res = parse_audit_log_for_seccomp_event(event, &audit_event_pid_field);
+		if res.is_some() {
+			return res
+		}
+	}
+
+	None
+}
+
+fn parse_audit_log_for_seccomp_event(event: &str, audit_event_pid_field: &str) -> Option<u32> {
+	const SECCOMP_AUDIT_EVENT_TYPE: &'static str = "type=1326";
+
+	// Do a series of simple .contains instead of a regex, because I'm not sure if the fields are
+	// guaranteed to always be in the same order.
+	if !event.contains(SECCOMP_AUDIT_EVENT_TYPE) || !event.contains(&audit_event_pid_field) {
+		return None
+	}
+
+	// Get the syscall. Let's avoid a dependency on regex just for this.
+	for field in event.split(" ") {
+		if let Some(syscall) = field.strip_prefix("syscall=") {
+			return syscall.parse::<u32>().ok()
+		}
+	}
+
+	None
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+
+	#[test]
+	fn test_parse_audit_log_for_seccomp_event() {
+		let audit_event_pid_field = "pid=2559058";
+
+		assert_eq!(
+			parse_audit_log_for_seccomp_event(
+				r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1326 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559058 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
+				audit_event_pid_field
+			),
+			Some(53)
+		);
+		// pid is wrong
+		assert_eq!(
+			parse_audit_log_for_seccomp_event(
+				r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1326 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
+				audit_event_pid_field
+			),
+			None
+		);
+		// type is wrong
+		assert_eq!(
+			parse_audit_log_for_seccomp_event(
+				r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1327 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
+				audit_event_pid_field
+			),
+			None
+		);
+		// no syscall field
+		assert_eq!(
+			parse_audit_log_for_seccomp_event(
+				r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1327 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
+				audit_event_pid_field
+			),
+			None
+		);
+	}
+}
diff --git a/polkadot/node/core/pvf/src/worker_intf.rs b/polkadot/node/core/pvf/src/worker_intf.rs
@@ -245,7 +245,7 @@ pub enum SpawnErr {
 /// has been terminated. Since the worker is running in another process it is obviously not
 /// necessary to poll this future to make the worker run, it's only for termination detection.
 ///
-/// This future relies on the fact that a child process's stdout `fd` is closed upon it's
+/// This future relies on the fact that a child process's stdout `fd` is closed upon its
 /// termination.
 #[pin_project]
 pub struct WorkerHandle {

diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs
@@ -14,7 +14,6 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-#[cfg(feature = "ci-only-tests")]
 use assert_matches::assert_matches;
 use parity_scale_codec::Encode as _;
 use polkadot_node_core_pvf::{
@@ -28,7 +27,7 @@ use polkadot_primitives::ExecutorParams;
 #[cfg(feature = "ci-only-tests")]
 use polkadot_primitives::ExecutorParam;
 
-use std::time::Duration;
+use std::{process::Command, time::Duration};
 use tokio::sync::Mutex;
 
 mod adder;
@@ -153,6 +152,68 @@ async fn terminates_on_timeout() {
 	assert!(duration < TEST_EXECUTION_TIMEOUT * JOB_TIMEOUT_WALL_CLOCK_FACTOR);
 }
 
+// What happens when the prepare worker dies in the middle of a job?
+//
+// To avoid interfering with other running tests or processes, this test is ignored and must be run
+// by itself:
+// $  cargo test prepare_worker_killed_during_job -- --include-ignored
+#[ignore]
+#[tokio::test]
+async fn prepare_worker_killed_during_job() {
+	let host = TestHost::new().await;
+
+	let (result, _) = futures::join!(
+		// Choose a job that would normally take the entire timeout.
+		host.precheck_pvf(rococo_runtime::WASM_BINARY.unwrap(), Default::default()),
+		// Run a future that kills the job in the middle of the timeout.
+		async {
+			tokio::time::sleep(TEST_PREPARATION_TIMEOUT / 2).await;
+			Command::new("killall").args(["-9", "polkadot-prepare-worker"]).spawn().unwrap();
+		}
+	);
+
+	assert_matches!(result, Err(PrepareError::IoErr(_)));
+}
+
+// What happens when the execute worker dies in the middle of a job?
+//
+// To avoid interfering with other running tests or processes, this test is ignored and must be run
+// by itself:
+// $  cargo test execute_worker_killed_during_job -- --include-ignored
+#[ignore]
+#[tokio::test]
+async fn execute_worker_killed_during_job() {
+	let host = TestHost::new().await;
+
+	// Prepare the artifact ahead of time.
+	let binary = halt::wasm_binary_unwrap();
+	host.precheck_pvf(binary, Default::default()).await.unwrap();
+
+	let (result, _) = futures::join!(
+		// Choose an job that would normally take the entire timeout.
+		host.validate_candidate(
+			binary,
+			ValidationParams {
+				block_data: BlockData(Vec::new()),
+				parent_head: Default::default(),
+				relay_parent_number: 1,
+				relay_parent_storage_root: Default::default(),
+			},
+			Default::default(),
+		),
+		// Run a future that kills the job in the middle of the timeout.
+		async {
+			tokio::time::sleep(TEST_EXECUTION_TIMEOUT / 2).await;
+			Command::new("killall").args(["-9", "polkadot-execute-worker"]).spawn().unwrap();
+		}
+	);
+
+	assert_matches!(
+		result,
+		Err(ValidationError::InvalidCandidate(InvalidCandidate::AmbiguousWorkerDeath))
+	);
+}
+
 #[cfg(feature = "ci-only-tests")]
 #[tokio::test]
 async fn ensure_parallel_execution() {