diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 13ed8da..9abcc55 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -56,6 +56,12 @@ path = "fuzz_targets/inflate_bytewise2.rs" test = false doc = false +[[bin]] +name = "inflate_bytewise3" +path = "fuzz_targets/inflate_bytewise3.rs" +test = false +doc = false + [[bin]] name = "inflate_split" path = "fuzz_targets/inflate_split.rs" diff --git a/fuzz/fuzz_targets/inflate_bytewise3.rs b/fuzz/fuzz_targets/inflate_bytewise3.rs new file mode 100644 index 0000000..a5a28c0 --- /dev/null +++ b/fuzz/fuzz_targets/inflate_bytewise3.rs @@ -0,0 +1,22 @@ +//! This fuzz target tests that feeding bytes into the decompressor one at a time always produces +//! valid output. + +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +extern crate miniz_oxide; + +#[path = "../../src/decompress/tests/test_utils.rs"] +mod test_utils; +use test_utils::decompress_by_chunks; + +fuzz_target!(|input: &[u8]| { + let r_whole = decompress_by_chunks(input, std::iter::repeat(input.len()), false); + let r_bytewise = decompress_by_chunks(input, std::iter::repeat(1), false); + match (r_whole, r_bytewise) { + (Ok(output_whole), Ok(output_bytewise)) => assert_eq!(output_whole, output_bytewise), + (Err(_e1), Err(_e2)) => (), + (Ok(_), Err(e)) => panic!("Only byte-by-byte returned an error: {:?}", e), + (Err(e), Ok(_)) => panic!("Only consume-whole returned an error: {:?}", e), + } +}); diff --git a/src/decompress.rs b/src/decompress.rs index 3fad322..f89747e 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -1273,4 +1273,62 @@ mod tests { assert_eq!(output_written, 0); } } + + mod test_utils; + use test_utils::{decompress_by_chunks, TestDecompressionError}; + + fn verify_no_sensitivity_to_input_chunking( + input: &[u8], + ) -> Result, TestDecompressionError> { + let r_whole = decompress_by_chunks(input, vec![input.len()], false); + let r_bytewise = decompress_by_chunks(input, std::iter::repeat(1), false); + assert_eq!(r_whole, r_bytewise); + r_whole // Returning an arbitrary result, since this is equal to `r_bytewise`. + } + + /// This is a regression test found by the `buf_independent` fuzzer from the `png` crate. When + /// this test case was found, the results were unexpectedly different when 1) decompressing the + /// whole input (successful result) vs 2) decompressing byte-by-byte + /// (`Err(InvalidDistanceCode)`). + #[test] + fn test_input_chunking_sensitivity_when_handling_distance_codes() { + let result = verify_no_sensitivity_to_input_chunking(include_bytes!( + "../tests/input-chunking-sensitivity-example1.zz" + )) + .unwrap(); + assert_eq!(result.len(), 281); + assert_eq!(simd_adler32::adler32(&result.as_slice()), 751299); + } + + /// This is a regression test found by the `inflate_bytewise3` fuzzer from the `fdeflate` + /// crate. When this test case was found, the results were unexpectedly different when 1) + /// decompressing the whole input (`Err(DistanceTooFarBack)`) vs 2) decompressing byte-by-byte + /// (successful result)`). + #[test] + fn test_input_chunking_sensitivity_when_no_end_of_block_symbol_example1() { + let err = verify_no_sensitivity_to_input_chunking(include_bytes!( + "../tests/input-chunking-sensitivity-example2.zz" + )) + .unwrap_err(); + assert_eq!( + err, + TestDecompressionError::ProdError(DecompressionError::BadLiteralLengthHuffmanTree) + ); + } + + /// This is a regression test found by the `inflate_bytewise3` fuzzer from the `fdeflate` + /// crate. When this test case was found, the results were unexpectedly different when 1) + /// decompressing the whole input (`Err(InvalidDistanceCode)`) vs 2) decompressing byte-by-byte + /// (successful result)`). + #[test] + fn test_input_chunking_sensitivity_when_no_end_of_block_symbol_example2() { + let err = verify_no_sensitivity_to_input_chunking(include_bytes!( + "../tests/input-chunking-sensitivity-example3.zz" + )) + .unwrap_err(); + assert_eq!( + err, + TestDecompressionError::ProdError(DecompressionError::BadLiteralLengthHuffmanTree) + ); + } } diff --git a/src/decompress/tests/test_utils.rs b/src/decompress/tests/test_utils.rs new file mode 100644 index 0000000..bcec96b --- /dev/null +++ b/src/decompress/tests/test_utils.rs @@ -0,0 +1,101 @@ +//! Testing utilities for testing `fdeflate::Decompressor`. +//! +//! These utilities are used by: +//! +//! * Unit tests (e.g. `#[test]` tests in `src/decompress.rs`) +//! * Fuzzers (e.g. `fuzz/fuzz_targets/inflate_bytewise3.rs`) + +#[cfg(test)] +use crate as fdeflate; + +use fdeflate::{DecompressionError, Decompressor}; + +#[derive(Debug, PartialEq)] +pub enum TestDecompressionError { + ProdError(DecompressionError), + TestError(TestErrorKind), +} + +#[derive(Debug, Eq, PartialEq)] +pub enum TestErrorKind { + OutputTooLarge, + TooManyIterations, +} + +impl From for TestDecompressionError { + fn from(e: DecompressionError) -> Self { + Self::ProdError(e) + } +} + +impl From for TestDecompressionError { + fn from(kind: TestErrorKind) -> Self { + Self::TestError(kind) + } +} + +/// Decompresses `input` when feeding it into a `Decompressor::read` in `chunks`. +/// +/// `chunks` typically can be used to decode the whole input at once (setting `chunks` to +/// `vec![input.len]`) or byte-by-byte (setting `chunks` to `std::iter::repeat(1)`). +/// But `chunks` can also be used to replicate arbitrary chunking patterns (such as may be +/// used by some fuzzing-based repros from the `png` crate). +/// +/// `early_eof` is used to the last `end_of_input` argument of `Decompressor::read` calls. +/// When `early_eof` is `false`, then `end_of_input` is `false` until the whole input is +/// consumed (and then is `Decompressor::is_done` is still false, then `Decompressor::read` +/// is called one or more times with empty input slice and `end_of_input` set to true). +/// When `early_eof` is `true` then `end_of_input` is set to `true` as soon as the slice +/// fed to `Decompressor::read` "reaches" the end of the whole input. +/// +/// Unlike the `png` crate, this testing helper uses a big, fixed-size output buffer. +/// (i.e. there is no simulation of `ZlibStream.compact_out_buffer_if_needed` from the `png` +/// crate). +pub fn decompress_by_chunks( + input: &[u8], + chunks: impl IntoIterator, + early_eof: bool, +) -> Result, TestDecompressionError> { + let mut chunks = chunks.into_iter(); + + // `iteration_counter` helps to prevent infinite loops (which may happen with `chunks` such + // as `std::iter::repeat(0)`). + let mut iteration_counter = 0; + + // Ignoring checksums so that we can work with inputs generated by fuzzing. (Fuzzing + // typically ignores checksums to make it easier to explore the space of possible inputs.) + let mut d = Decompressor::new(); + d.ignore_adler32(); + + let mut out_buf = vec![0; 1_000_000]; + let mut in_pos = 0; + let mut out_pos = 0; + while !d.is_done() { + iteration_counter += 1; + if iteration_counter > 5000 { + return Err(TestErrorKind::TooManyIterations.into()); + } + + let chunk_size = chunks.next().unwrap_or(0); + let start = in_pos; + let end = std::cmp::min(start + chunk_size, input.len()); + + let eof = if early_eof { + end == input.len() + } else { + start == input.len() + }; + + let (in_consumed, out_written) = + d.read(&input[start..end], out_buf.as_mut_slice(), out_pos, eof)?; + + in_pos += in_consumed; + out_pos += out_written; + if out_pos == out_buf.len() && in_consumed == 0 && !d.is_done() { + return Err(TestErrorKind::OutputTooLarge.into()); + } + } + + out_buf.resize(out_pos, 0xFF); + Ok(out_buf) +} diff --git a/tests/input-chunking-sensitivity-example1.zz b/tests/input-chunking-sensitivity-example1.zz new file mode 100644 index 0000000..84ca515 Binary files /dev/null and b/tests/input-chunking-sensitivity-example1.zz differ diff --git a/tests/input-chunking-sensitivity-example2.zz b/tests/input-chunking-sensitivity-example2.zz new file mode 100644 index 0000000..5a5934e Binary files /dev/null and b/tests/input-chunking-sensitivity-example2.zz differ diff --git a/tests/input-chunking-sensitivity-example3.zz b/tests/input-chunking-sensitivity-example3.zz new file mode 100644 index 0000000..637783b Binary files /dev/null and b/tests/input-chunking-sensitivity-example3.zz differ