From 27560f182b5f0feb8dbd70791cbadd6fbd622411 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 11 Oct 2023 09:59:18 -0700 Subject: [PATCH] Fix `Unstructured::arbitrary_take_rest_iter` for collections of collections This simply makes it match the behavior of `arbitrary_iter`. I experimented with more approaches, but I couldn't get anything that was better in terms of balancing simplicity of implementation, ease of understanding the algorithm, and generating all expected values. This additionally adds a testing helper function for exhaustively generating certain byte buffers and asserting that we generate the expected arbitrary values from those byte buffers. Fixes #158 --- Cargo.toml | 5 +- src/lib.rs | 132 +++++++++++++++++++++++++++++++++++++++++++- src/unstructured.rs | 25 ++------- tests/derive.rs | 2 +- 4 files changed, 140 insertions(+), 24 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a7c9a37..0588ce8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ authors = [ "Corey Farwell ", ] categories = ["development-tools::testing"] -edition = "2018" +edition = "2021" keywords = ["arbitrary", "testing"] readme = "README.md" description = "The trait for generating structured data from unstructured data" @@ -37,3 +37,6 @@ required-features = ["derive"] [workspace] members = ["./fuzz"] + +[dev-dependencies] +exhaustigen = "0.1.0" diff --git a/src/lib.rs b/src/lib.rs index ba2165c..87e6124 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1161,6 +1161,56 @@ impl<'a> Arbitrary<'a> for IpAddr { mod test { use super::*; + /// Assert that the given expected values are all generated. + /// + /// Exhaustively enumerates all buffers up to length 10 containing the + /// following bytes: `0x00`, `0x01`, `0x61` (aka ASCII 'a'), and `0xff` + fn assert_generates(expected_values: impl IntoIterator) + where + T: Clone + std::fmt::Debug + std::hash::Hash + Eq + for<'a> Arbitrary<'a>, + { + let expected_values: HashSet<_> = expected_values.into_iter().collect(); + let mut arbitrary_expected = expected_values.clone(); + let mut arbitrary_take_rest_expected = expected_values; + + let bytes = [0, 1, b'a', 0xff]; + let max_len = 10; + + let mut buf = Vec::with_capacity(max_len); + + let mut g = exhaustigen::Gen::new(); + while !g.done() { + let len = g.gen(max_len); + + buf.clear(); + buf.extend( + std::iter::repeat_with(|| { + let index = g.gen(bytes.len() - 1); + bytes[index] + }) + .take(len), + ); + + let mut u = Unstructured::new(&buf); + let val = T::arbitrary(&mut u).unwrap(); + arbitrary_expected.remove(&val); + + let u = Unstructured::new(&buf); + let val = T::arbitrary_take_rest(u).unwrap(); + arbitrary_take_rest_expected.remove(&val); + + if arbitrary_expected.is_empty() && arbitrary_take_rest_expected.is_empty() { + return; + } + } + + panic!( + "failed to generate all expected values!\n\n\ + T::arbitrary did not generate: {arbitrary_expected:#?}\n\n\ + T::arbitrary_take_rest did not generate {arbitrary_take_rest_expected:#?}" + ) + } + /// Generates an arbitrary `T`, and checks that the result is consistent with the /// `size_hint()` reported by `T`. fn checked_arbitrary<'a, T: Arbitrary<'a>>(u: &mut Unstructured<'a>) -> Result { @@ -1231,6 +1281,16 @@ mod test { let expected = 1 | (2 << 8) | (3 << 16) | (4 << 24); let actual = checked_arbitrary::(&mut buf).unwrap(); assert_eq!(expected, actual); + + assert_generates([ + i32::from_ne_bytes([0, 0, 0, 0]), + i32::from_ne_bytes([0, 0, 0, 1]), + i32::from_ne_bytes([0, 0, 1, 0]), + i32::from_ne_bytes([0, 1, 0, 0]), + i32::from_ne_bytes([1, 0, 0, 0]), + i32::from_ne_bytes([1, 1, 1, 1]), + i32::from_ne_bytes([0xff, 0xff, 0xff, 0xff]), + ]); } #[test] @@ -1251,6 +1311,74 @@ mod test { assert_eq!(expected, actual); } + #[test] + fn arbitrary_for_vec_u8() { + assert_generates::>([ + vec![], + vec![0], + vec![1], + vec![0, 0], + vec![0, 1], + vec![1, 0], + vec![1, 1], + vec![0, 0, 0], + vec![0, 0, 1], + vec![0, 1, 0], + vec![0, 1, 1], + vec![1, 0, 0], + vec![1, 0, 1], + vec![1, 1, 0], + vec![1, 1, 1], + ]); + } + + #[test] + fn arbitrary_for_vec_vec_u8() { + assert_generates::>>([ + vec![], + vec![vec![]], + vec![vec![0]], + vec![vec![1]], + vec![vec![0, 1]], + vec![vec![], vec![]], + vec![vec![0], vec![]], + vec![vec![], vec![1]], + vec![vec![0], vec![1]], + vec![vec![0, 1], vec![]], + vec![vec![], vec![1, 0]], + vec![vec![], vec![], vec![]], + ]); + } + + #[test] + fn arbitrary_for_vec_vec_vec_u8() { + assert_generates::>>>([ + vec![], + vec![vec![]], + vec![vec![vec![0]]], + vec![vec![vec![1]]], + vec![vec![vec![0, 1]]], + vec![vec![], vec![]], + vec![vec![], vec![vec![]]], + vec![vec![vec![]], vec![]], + vec![vec![vec![]], vec![vec![]]], + vec![vec![vec![0]], vec![]], + vec![vec![], vec![vec![1]]], + vec![vec![vec![0]], vec![vec![1]]], + vec![vec![vec![0, 1]], vec![]], + vec![vec![], vec![vec![0, 1]]], + vec![vec![], vec![], vec![]], + vec![vec![vec![]], vec![], vec![]], + vec![vec![], vec![vec![]], vec![]], + vec![vec![], vec![], vec![vec![]]], + ]); + } + + #[test] + fn arbitrary_for_string() { + assert_generates::(["".into(), "a".into(), "aa".into(), "aaa".into()]); + } + #[test] fn arbitrary_collection() { let x = [ @@ -1284,11 +1412,11 @@ mod test { ); assert_eq!( checked_arbitrary_take_rest::>(Unstructured::new(&x)).unwrap(), - &[1, 2, 3, 4] + &[2, 4] ); assert_eq!( checked_arbitrary_take_rest::>(Unstructured::new(&x)).unwrap(), - &[0x4030201] + &[0x040302] ); assert_eq!( checked_arbitrary_take_rest::(Unstructured::new(&x)).unwrap(), diff --git a/src/unstructured.rs b/src/unstructured.rs index 0bfdff2..6467659 100644 --- a/src/unstructured.rs +++ b/src/unstructured.rs @@ -620,14 +620,8 @@ impl<'a> Unstructured<'a> { pub fn arbitrary_take_rest_iter>( self, ) -> Result> { - let (lower, upper) = ElementType::size_hint(0); - - let elem_size = upper.unwrap_or(lower * 2); - let elem_size = std::cmp::max(1, elem_size); - let size = self.len() / elem_size; Ok(ArbitraryTakeRestIter { - size, - u: Some(self), + u: self, _marker: PhantomData, }) } @@ -735,25 +729,16 @@ impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, Elem /// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`] pub struct ArbitraryTakeRestIter<'a, ElementType> { - u: Option>, - size: usize, + u: Unstructured<'a>, _marker: PhantomData, } impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> { type Item = Result; fn next(&mut self) -> Option> { - if let Some(mut u) = self.u.take() { - if self.size == 1 { - Some(Arbitrary::arbitrary_take_rest(u)) - } else if self.size == 0 { - None - } else { - self.size -= 1; - let ret = Arbitrary::arbitrary(&mut u); - self.u = Some(u); - Some(ret) - } + let keep_going = self.u.arbitrary().unwrap_or(false); + if keep_going { + Some(Arbitrary::arbitrary(&mut self.u)) } else { None } diff --git a/tests/derive.rs b/tests/derive.rs index f29d227..a84e4dc 100644 --- a/tests/derive.rs +++ b/tests/derive.rs @@ -59,7 +59,7 @@ fn test_take_rest() { assert_eq!(s2.1, true); assert_eq!(s1.2, 0x4030201); assert_eq!(s2.2, 0x4030201); - assert_eq!(s1.3, vec![0x605, 0x807]); + assert_eq!(s1.3, vec![0x0706]); assert_eq!(s2.3, "\x05\x06\x07\x08"); }