diff --git a/Cargo.lock b/Cargo.lock index b7fe040244f..34b3ece0852 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,6 +113,15 @@ version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +[[package]] +name = "arbitrary" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arc-swap" version = "1.6.0" @@ -1136,6 +1145,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "derive_arbitrary" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53e0efad4403bfc52dc201159c4b842a246a14b98c64b55dfd0f2d89729dfeb8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.23", +] + [[package]] name = "derive_destructure2" version = "0.1.1" @@ -2088,6 +2108,17 @@ version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +[[package]] +name = "libfuzzer-sys" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "beb09950ae85a0a94b27676cccf37da5ff13f27076aa1adbc6545dd0d0e1bd4e" +dependencies = [ + "arbitrary", + "cc", + "once_cell", +] + [[package]] name = "libloading" version = "0.7.4" @@ -2903,6 +2934,21 @@ dependencies = [ "opendal", ] +[[package]] +name = "opendal-fuzz" +version = "0.0.0" +dependencies = [ + "anyhow", + "arbitrary", + "bytes", + "dotenvy", + "libfuzzer-sys", + "opendal", + "sha2", + "tokio", + "uuid", +] + [[package]] name = "opendal-hs" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index c1f6ca38537..08e19e5a547 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,8 @@ members = [ "bin/oli", "bin/oay", + + "core/fuzz", ] resolver = "2" diff --git a/core/fuzz/.gitignore b/core/fuzz/.gitignore new file mode 100644 index 00000000000..8245cf2ebb4 --- /dev/null +++ b/core/fuzz/.gitignore @@ -0,0 +1,5 @@ +target +corpus +artifacts +coverage +tmp \ No newline at end of file diff --git a/core/fuzz/Cargo.toml b/core/fuzz/Cargo.toml new file mode 100644 index 00000000000..03fa4c5cf64 --- /dev/null +++ b/core/fuzz/Cargo.toml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +edition = "2021" +name = "opendal-fuzz" +publish = false +version = "0.0.0" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +anyhow = "1.0.71" +arbitrary = { version = "1.3.0", features = ["derive"] } +bytes = "1.2" +dotenvy = "0.15.6" +libfuzzer-sys = "0.4" +opendal = { path = ".." } +sha2 = { version = "0.10.6" } +tokio = { version = "1", features = ["full"] } +uuid = { version = "1.3.0", features = ["v4"] } + +[profile.release] +debug = 1 + +[[bin]] +doc = false +name = "fuzz_reader" +path = "fuzz_reader.rs" +test = false diff --git a/core/fuzz/README.md b/core/fuzz/README.md new file mode 100644 index 00000000000..e08bb125db9 --- /dev/null +++ b/core/fuzz/README.md @@ -0,0 +1,40 @@ +# Fuzz Test for OpenDAL + +fuzz test are used to test the robustness of the code. + +## Setup + + + +To run the fuzz tests, please copy the `.env.example`, which is at project root, to `.env` and change the values on need. + +Take `fs` for example, we need to change to enable behavior test on `fs` on `/tmp`. + +```dotenv +OPENDAL_FS_TEST=false +OPENDAL_FS_ROOT=/path/to/dir +``` + +into + +```dotenv +OPENDAL_FS_TEST=on +OPENDAL_FS_ROOT=/tmp +``` + + +## Run + +List all fuzz targets. + +```bash +cargo +nightly fuzz list +``` + +Run a fuzz target(such as `reader`). + +```bash +cargo +nightly fuzz run fuzz_reader +``` + +For more details, please visit [cargo fuzz](https://rust-fuzz.github.io/book/cargo-fuzz/tutorial.html) or run the command cargo fuzz --help. \ No newline at end of file diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs new file mode 100644 index 00000000000..7ebb0ff2528 --- /dev/null +++ b/core/fuzz/fuzz_reader.rs @@ -0,0 +1,230 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![no_main] + +mod utils; + +use bytes::Bytes; +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; +use opendal::raw::oio::ReadExt; +use opendal::Operator; +use sha2::{Digest, Sha256}; +use std::io::SeekFrom; + +const MAX_DATA_SIZE: usize = 16 * 1024 * 1024; + +#[derive(Debug, Clone)] +enum ReaderAction { + Read { size: usize }, + Seek(SeekFrom), + Next, +} + +#[derive(Debug, Clone)] +struct FuzzInput { + actions: Vec, + data: Vec, +} + +impl Arbitrary<'_> for FuzzInput { + fn arbitrary(u: &mut Unstructured<'_>) -> Result { + let data_len = u.int_in_range(1..=MAX_DATA_SIZE)?; + let data: Vec = u.bytes(data_len)?.to_vec(); + + let mut actions = vec![]; + let mut action_count = u.int_in_range(128..=1024)?; + + while action_count != 0 { + action_count -= 1; + match u.int_in_range(0..=2)? { + 0 => { + let size = u.int_in_range(0..=data_len * 2)?; + actions.push(ReaderAction::Read { size }); + } + 1 => { + let offset: i64 = u.int_in_range(-(data_len as i64)..=(data_len as i64))?; + let seek_from = match u.int_in_range(0..=2)? { + 0 => SeekFrom::Start(offset.unsigned_abs()), + 1 => SeekFrom::End(offset), + _ => SeekFrom::Current(offset), + }; + actions.push(ReaderAction::Seek(seek_from)); + } + _ => actions.push(ReaderAction::Next), + } + } + Ok(FuzzInput { actions, data }) + } +} + +struct ReaderFuzzerChecker { + data: Vec, + size: usize, + cur: usize, +} + +impl ReaderFuzzerChecker { + fn new(data: Vec) -> Self { + Self { + size: data.len(), + data, + cur: 0, + } + } + + fn check_read(&mut self, n: usize, output: &[u8]) { + if n == 0 { + return; + } + + let expected = &self.data[self.cur..self.cur + n]; + + // Check the read result + assert_eq!( + format!("{:x}", Sha256::digest(output)), + format!("{:x}", Sha256::digest(expected)), + "check read failed: output bs is different with expected bs", + ); + + // Update the current position + self.cur += n; + } + + fn check_seek(&mut self, seek_from: SeekFrom, output: opendal::Result) { + let expected = match seek_from { + SeekFrom::Start(offset) => offset as i64, + SeekFrom::End(offset) => self.size as i64 + offset, + SeekFrom::Current(offset) => self.cur as i64 + offset, + }; + + if expected < 0 { + assert!(output.is_err(), "check seek failed: seek should fail"); + assert_eq!( + output.unwrap_err().kind(), + opendal::ErrorKind::InvalidInput, + "check seek failed: seek result is different with expected result" + ); + } else { + assert_eq!( + output.unwrap(), + expected as u64, + "check seek failed: seek result is different with expected result", + ); + + // only update the current position when seek succeed + self.cur = expected as usize; + } + } + + fn check_next(&mut self, output: Option) { + if let Some(output) = output { + assert!( + self.cur + output.len() <= self.size, + "check next failed: output bs is larger than remaining bs", + ); + + assert_eq!( + format!("{:x}", Sha256::digest(&output)), + format!( + "{:x}", + Sha256::digest(&self.data[self.cur..self.cur + output.len()]) + ), + "check next failed: output bs is different with expected bs", + ); + + // update the current position + self.cur += output.len(); + } else { + assert!( + self.cur >= self.size, + "check next failed: output bs is None, we still have bytes to read", + ) + } + } +} + +async fn fuzz_reader_process(input: FuzzInput, op: &Operator, name: &str) -> Result<()> { + let len = input.data.len(); + let path = uuid::Uuid::new_v4().to_string(); + + let mut checker = ReaderFuzzerChecker::new(input.data.clone()); + op.write(&path, input.data) + .await + .unwrap_or_else(|_| panic!("{} write must succeed", name)); + + let mut o = op + .range_reader(&path, 0..len as u64) + .await + .unwrap_or_else(|_| panic!("{} init range_reader must succeed", name)); + + for action in input.actions { + match action { + ReaderAction::Read { size } => { + let mut buf = vec![0; size]; + let n = o + .read(&mut buf) + .await + .unwrap_or_else(|_| panic!("{} read must succeed", name)); + checker.check_read(n, &buf[..n]); + } + + ReaderAction::Seek(seek_from) => { + let res = o.seek(seek_from).await; + checker.check_seek(seek_from, res); + } + + ReaderAction::Next => { + let res = o + .next() + .await + .map(|v| v.unwrap_or_else(|_| panic!("{} next should not return error", name))); + checker.check_next(res); + } + } + } + + op.delete(&path) + .await + .unwrap_or_else(|_| panic!("{} delete must succeed", name)); + Ok(()) +} + +fn fuzz_reader(name: &str, op: &Operator, input: FuzzInput) { + let runtime = tokio::runtime::Runtime::new().unwrap(); + + runtime.block_on(async { + fuzz_reader_process(input, op, name) + .await + .unwrap_or_else(|_| panic!("{} fuzz_reader must succeed", name)); + }); +} + +fuzz_target!(|input: FuzzInput| { + let _ = dotenvy::dotenv(); + + for service in utils::init_services() { + if service.1.is_none() { + continue; + } + + let op = service.1.unwrap(); + + fuzz_reader(service.0, &op, input.clone()); + } +}); diff --git a/core/fuzz/utils.rs b/core/fuzz/utils.rs new file mode 100644 index 00000000000..184ed33aa37 --- /dev/null +++ b/core/fuzz/utils.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use opendal::{services, Builder, Operator}; +use std::env; + +fn service() -> Option { + let test_key = format!("opendal_{}_test", B::SCHEME).to_uppercase(); + if env::var(test_key).unwrap_or_default() != "on" { + return None; + } + + let prefix = format!("opendal_{}_", B::SCHEME); + let envs = env::vars() + .filter_map(move |(k, v)| { + k.to_lowercase() + .strip_prefix(&prefix) + .map(|k| (k.to_string(), v)) + }) + .collect(); + + Some( + Operator::from_map::(envs) + .unwrap_or_else(|_| panic!("init {} must succeed", B::SCHEME)) + .finish(), + ) +} + +pub fn init_services() -> Vec<(&'static str, Option)> { + vec![ + ("fs", service::()), + ("memory", service::()), + ] +}