Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PVF worker: Add seccomp restrictions (restrict networking) #2009

Merged
merged 13 commits into from
Oct 31, 2023
14 changes: 11 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion polkadot/node/core/candidate-validation/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ async fn run<Context>(
exec_worker_path,
),
pvf_metrics,
);
)
.await;
ctx.spawn_blocking("pvf-validation-host", task.boxed())?;

loop {
Expand Down
3 changes: 2 additions & 1 deletion polkadot/node/core/pvf/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ cpu-time = "1.0.0"
futures = "0.3.21"
gum = { package = "tracing-gum", path = "../../../gum" }
libc = "0.2.139"
tokio = { version = "1.24.2", features = ["fs", "process", "io-util"] }

parity-scale-codec = { version = "3.6.1", default-features = false, features = ["derive"] }

Expand All @@ -30,6 +29,8 @@ sp-tracing = { path = "../../../../../substrate/primitives/tracing" }

[target.'cfg(target_os = "linux")'.dependencies]
landlock = "0.3.0"
seccompiler = "0.3.0"
mrcnski marked this conversation as resolved.
Show resolved Hide resolved
thiserror = "1.0.31"

[dev-dependencies]
assert_matches = "1.4.0"
Expand Down
5 changes: 3 additions & 2 deletions polkadot/node/core/pvf/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ pub use sp_tracing;
const LOG_TARGET: &str = "parachain::pvf-common";

use std::{
io::{Read, Write},
io::{self, Read, Write},
mem,
};
use tokio::io;

#[cfg(feature = "test-utils")]
pub mod tests {
Expand All @@ -50,6 +49,8 @@ pub mod tests {
pub struct SecurityStatus {
/// Whether the landlock features we use are fully available on this system.
pub can_enable_landlock: bool,
/// Whether the seccomp features we use are fully available on this system.
pub can_enable_seccomp: bool,
// Whether we are able to unshare the user namespace and change the filesystem root.
pub can_unshare_user_namespace_and_change_root: bool,
}
Expand Down
55 changes: 36 additions & 19 deletions polkadot/node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@ use cpu_time::ProcessTime;
use futures::never::Never;
use std::{
any::Any,
fmt,
fmt, io,
os::unix::net::UnixStream,
path::PathBuf,
sync::mpsc::{Receiver, RecvTimeoutError},
time::Duration,
};
use tokio::{io, runtime::Runtime};

/// Use this macro to declare a `fn main() {}` that will create an executable that can be used for
/// spawning the desired worker.
Expand Down Expand Up @@ -75,6 +74,13 @@ macro_rules! decl_worker_main {
let status = -1;
std::process::exit(status)
},
"--check-can-enable-seccomp" => {
#[cfg(target_os = "linux")]
let status = if security::seccomp::check_is_fully_enabled() { 0 } else { -1 };
#[cfg(not(target_os = "linux"))]
let status = -1;
std::process::exit(status)
},
"--check-can-unshare-user-namespace-and-change-root" => {
#[cfg(target_os = "linux")]
let status = if let Err(err) = security::unshare_user_namespace_and_change_root(
Expand Down Expand Up @@ -119,6 +125,7 @@ macro_rules! decl_worker_main {
let mut worker_dir_path = None;
let mut node_version = None;
let mut can_enable_landlock = false;
let mut can_enable_seccomp = false;
let mut can_unshare_user_namespace_and_change_root = false;

let mut i = 2;
Expand All @@ -137,6 +144,7 @@ macro_rules! decl_worker_main {
i += 1
},
"--can-enable-landlock" => can_enable_landlock = true,
"--can-enable-seccomp" => can_enable_seccomp = true,
"--can-unshare-user-namespace-and-change-root" =>
can_unshare_user_namespace_and_change_root = true,
arg => panic!("Unexpected argument found: {}", arg),
Expand All @@ -151,6 +159,7 @@ macro_rules! decl_worker_main {
let worker_dir_path = std::path::Path::new(worker_dir_path).to_owned();
let security_status = $crate::SecurityStatus {
can_enable_landlock,
can_enable_seccomp,
can_unshare_user_namespace_and_change_root,
};

Expand Down Expand Up @@ -188,7 +197,7 @@ impl fmt::Display for WorkerKind {

// The worker version must be passed in so that we accurately get the version of the worker, and not
// the version that this crate was compiled with.
pub fn worker_event_loop<F, Fut>(
pub fn worker_event_loop<F>(
worker_kind: WorkerKind,
socket_path: PathBuf,
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))] mut worker_dir_path: PathBuf,
Expand All @@ -197,8 +206,7 @@ pub fn worker_event_loop<F, Fut>(
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))] security_status: &SecurityStatus,
mut event_loop: F,
) where
F: FnMut(UnixStream, PathBuf) -> Fut,
Fut: futures::Future<Output = io::Result<Never>>,
F: FnMut(UnixStream, PathBuf) -> io::Result<Never>,
{
let worker_pid = std::process::id();
gum::debug!(
Expand Down Expand Up @@ -252,7 +260,7 @@ pub fn worker_event_loop<F, Fut>(
}

// Connect to the socket.
let stream = || -> std::io::Result<UnixStream> {
let stream = || -> io::Result<UnixStream> {
let stream = UnixStream::connect(&socket_path)?;
let _ = std::fs::remove_file(&socket_path);
Ok(stream)
Expand Down Expand Up @@ -307,6 +315,22 @@ pub fn worker_event_loop<F, Fut>(
let landlock_status =
security::landlock::enable_for_worker(worker_kind, worker_pid, &worker_dir_path);
if !matches!(landlock_status, Ok(landlock::RulesetStatus::FullyEnforced)) {
// We previously were able to enable, so this should never happen.
gum::error!(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
"could not fully enable landlock: {:?}. This should not happen, please report to the Polkadot devs",
mrcnski marked this conversation as resolved.
Show resolved Hide resolved
landlock_status
);
}
}

#[cfg(target_os = "linux")]
if security_status.can_enable_seccomp {
let seccomp_status =
security::seccomp::enable_for_worker(worker_kind, worker_pid, &worker_dir_path);
if !matches!(seccomp_status, Ok(())) {
// We previously were able to enable, so this should never happen.
//
// TODO: Make this a real error in secure-mode. See:
Expand All @@ -315,8 +339,8 @@ pub fn worker_event_loop<F, Fut>(
target: LOG_TARGET,
%worker_kind,
%worker_pid,
"could not fully enable landlock: {:?}. This should not happen, please report to the Polkadot devs",
landlock_status
"could not fully enable seccomp: {:?}. This should not happen, please report to the Polkadot devs",
seccomp_status
);
}
}
Expand All @@ -336,18 +360,11 @@ pub fn worker_event_loop<F, Fut>(
}

// Run the main worker loop.
let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it.");
let err = rt
.block_on(event_loop(stream, worker_dir_path))
let err = event_loop(stream, worker_dir_path)
// It's never `Ok` because it's `Ok(Never)`.
.unwrap_err();

worker_shutdown_message(worker_kind, worker_pid, &err.to_string());

// We don't want tokio to wait for the tasks to finish. We want to bring down the worker as fast
// as possible and not wait for stalled validation to finish. This isn't strictly necessary now,
// but may be in the future.
rt.shutdown_background();
}

/// Provide a consistent message on worker shutdown.
Expand Down Expand Up @@ -428,7 +445,7 @@ fn kill_parent_node_in_emergency() {
/// The motivation for this module is to coordinate worker threads without using async Rust.
pub mod thread {
use std::{
panic,
io, panic,
sync::{Arc, Condvar, Mutex},
thread,
time::Duration,
Expand Down Expand Up @@ -469,7 +486,7 @@ pub mod thread {
f: F,
cond: Cond,
outcome: WaitOutcome,
) -> std::io::Result<thread::JoinHandle<R>>
) -> io::Result<thread::JoinHandle<R>>
where
F: FnOnce() -> R,
F: Send + 'static + panic::UnwindSafe,
Expand All @@ -487,7 +504,7 @@ pub mod thread {
cond: Cond,
outcome: WaitOutcome,
stack_size: usize,
) -> std::io::Result<thread::JoinHandle<R>>
) -> io::Result<thread::JoinHandle<R>>
where
F: FnOnce() -> R,
F: Send + 'static + panic::UnwindSafe,
Expand Down
Loading