Skip to content

Commit

Permalink
Merge pull request #28 from anforowicz/explicitly-reject-missing-eof-…
Browse files Browse the repository at this point in the history
…of-block-symbol

Regression tests for input chunking sensitivity.
  • Loading branch information
HeroicKatora authored Sep 19, 2024
2 parents 05a3118 + 3b0c518 commit 1b911e1
Show file tree
Hide file tree
Showing 7 changed files with 187 additions and 0 deletions.
6 changes: 6 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ path = "fuzz_targets/inflate_bytewise2.rs"
test = false
doc = false

[[bin]]
name = "inflate_bytewise3"
path = "fuzz_targets/inflate_bytewise3.rs"
test = false
doc = false

[[bin]]
name = "inflate_split"
path = "fuzz_targets/inflate_split.rs"
Expand Down
22 changes: 22 additions & 0 deletions fuzz/fuzz_targets/inflate_bytewise3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//! This fuzz target tests that feeding bytes into the decompressor one at a time always produces
//! valid output.

#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
extern crate miniz_oxide;

#[path = "../../src/decompress/tests/test_utils.rs"]
mod test_utils;
use test_utils::decompress_by_chunks;

fuzz_target!(|input: &[u8]| {
let r_whole = decompress_by_chunks(input, std::iter::repeat(input.len()), false);
let r_bytewise = decompress_by_chunks(input, std::iter::repeat(1), false);
match (r_whole, r_bytewise) {
(Ok(output_whole), Ok(output_bytewise)) => assert_eq!(output_whole, output_bytewise),
(Err(_e1), Err(_e2)) => (),
(Ok(_), Err(e)) => panic!("Only byte-by-byte returned an error: {:?}", e),
(Err(e), Ok(_)) => panic!("Only consume-whole returned an error: {:?}", e),
}
});
58 changes: 58 additions & 0 deletions src/decompress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1273,4 +1273,62 @@ mod tests {
assert_eq!(output_written, 0);
}
}

mod test_utils;
use test_utils::{decompress_by_chunks, TestDecompressionError};

fn verify_no_sensitivity_to_input_chunking(
input: &[u8],
) -> Result<Vec<u8>, TestDecompressionError> {
let r_whole = decompress_by_chunks(input, vec![input.len()], false);
let r_bytewise = decompress_by_chunks(input, std::iter::repeat(1), false);
assert_eq!(r_whole, r_bytewise);
r_whole // Returning an arbitrary result, since this is equal to `r_bytewise`.
}

/// This is a regression test found by the `buf_independent` fuzzer from the `png` crate. When
/// this test case was found, the results were unexpectedly different when 1) decompressing the
/// whole input (successful result) vs 2) decompressing byte-by-byte
/// (`Err(InvalidDistanceCode)`).
#[test]
fn test_input_chunking_sensitivity_when_handling_distance_codes() {
let result = verify_no_sensitivity_to_input_chunking(include_bytes!(
"../tests/input-chunking-sensitivity-example1.zz"
))
.unwrap();
assert_eq!(result.len(), 281);
assert_eq!(simd_adler32::adler32(&result.as_slice()), 751299);
}

/// This is a regression test found by the `inflate_bytewise3` fuzzer from the `fdeflate`
/// crate. When this test case was found, the results were unexpectedly different when 1)
/// decompressing the whole input (`Err(DistanceTooFarBack)`) vs 2) decompressing byte-by-byte
/// (successful result)`).
#[test]
fn test_input_chunking_sensitivity_when_no_end_of_block_symbol_example1() {
let err = verify_no_sensitivity_to_input_chunking(include_bytes!(
"../tests/input-chunking-sensitivity-example2.zz"
))
.unwrap_err();
assert_eq!(
err,
TestDecompressionError::ProdError(DecompressionError::BadLiteralLengthHuffmanTree)
);
}

/// This is a regression test found by the `inflate_bytewise3` fuzzer from the `fdeflate`
/// crate. When this test case was found, the results were unexpectedly different when 1)
/// decompressing the whole input (`Err(InvalidDistanceCode)`) vs 2) decompressing byte-by-byte
/// (successful result)`).
#[test]
fn test_input_chunking_sensitivity_when_no_end_of_block_symbol_example2() {
let err = verify_no_sensitivity_to_input_chunking(include_bytes!(
"../tests/input-chunking-sensitivity-example3.zz"
))
.unwrap_err();
assert_eq!(
err,
TestDecompressionError::ProdError(DecompressionError::BadLiteralLengthHuffmanTree)
);
}
}
101 changes: 101 additions & 0 deletions src/decompress/tests/test_utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
//! Testing utilities for testing `fdeflate::Decompressor`.
//!
//! These utilities are used by:
//!
//! * Unit tests (e.g. `#[test]` tests in `src/decompress.rs`)
//! * Fuzzers (e.g. `fuzz/fuzz_targets/inflate_bytewise3.rs`)

#[cfg(test)]
use crate as fdeflate;

use fdeflate::{DecompressionError, Decompressor};

#[derive(Debug, PartialEq)]
pub enum TestDecompressionError {
ProdError(DecompressionError),
TestError(TestErrorKind),
}

#[derive(Debug, Eq, PartialEq)]
pub enum TestErrorKind {
OutputTooLarge,
TooManyIterations,
}

impl From<DecompressionError> for TestDecompressionError {
fn from(e: DecompressionError) -> Self {
Self::ProdError(e)
}
}

impl From<TestErrorKind> for TestDecompressionError {
fn from(kind: TestErrorKind) -> Self {
Self::TestError(kind)
}
}

/// Decompresses `input` when feeding it into a `Decompressor::read` in `chunks`.
///
/// `chunks` typically can be used to decode the whole input at once (setting `chunks` to
/// `vec![input.len]`) or byte-by-byte (setting `chunks` to `std::iter::repeat(1)`).
/// But `chunks` can also be used to replicate arbitrary chunking patterns (such as may be
/// used by some fuzzing-based repros from the `png` crate).
///
/// `early_eof` is used to the last `end_of_input` argument of `Decompressor::read` calls.
/// When `early_eof` is `false`, then `end_of_input` is `false` until the whole input is
/// consumed (and then is `Decompressor::is_done` is still false, then `Decompressor::read`
/// is called one or more times with empty input slice and `end_of_input` set to true).
/// When `early_eof` is `true` then `end_of_input` is set to `true` as soon as the slice
/// fed to `Decompressor::read` "reaches" the end of the whole input.
///
/// Unlike the `png` crate, this testing helper uses a big, fixed-size output buffer.
/// (i.e. there is no simulation of `ZlibStream.compact_out_buffer_if_needed` from the `png`
/// crate).
pub fn decompress_by_chunks(
input: &[u8],
chunks: impl IntoIterator<Item = usize>,
early_eof: bool,
) -> Result<Vec<u8>, TestDecompressionError> {
let mut chunks = chunks.into_iter();

// `iteration_counter` helps to prevent infinite loops (which may happen with `chunks` such
// as `std::iter::repeat(0)`).
let mut iteration_counter = 0;

// Ignoring checksums so that we can work with inputs generated by fuzzing. (Fuzzing
// typically ignores checksums to make it easier to explore the space of possible inputs.)
let mut d = Decompressor::new();
d.ignore_adler32();

let mut out_buf = vec![0; 1_000_000];
let mut in_pos = 0;
let mut out_pos = 0;
while !d.is_done() {
iteration_counter += 1;
if iteration_counter > 5000 {
return Err(TestErrorKind::TooManyIterations.into());
}

let chunk_size = chunks.next().unwrap_or(0);
let start = in_pos;
let end = std::cmp::min(start + chunk_size, input.len());

let eof = if early_eof {
end == input.len()
} else {
start == input.len()
};

let (in_consumed, out_written) =
d.read(&input[start..end], out_buf.as_mut_slice(), out_pos, eof)?;

in_pos += in_consumed;
out_pos += out_written;
if out_pos == out_buf.len() && in_consumed == 0 && !d.is_done() {
return Err(TestErrorKind::OutputTooLarge.into());
}
}

out_buf.resize(out_pos, 0xFF);
Ok(out_buf)
}
Binary file added tests/input-chunking-sensitivity-example1.zz
Binary file not shown.
Binary file added tests/input-chunking-sensitivity-example2.zz
Binary file not shown.
Binary file added tests/input-chunking-sensitivity-example3.zz
Binary file not shown.

0 comments on commit 1b911e1

Please sign in to comment.