From 5cd1dc97c0c5f2b611ec9ef7a21d11407c2c9e96 Mon Sep 17 00:00:00 2001 From: Jacob Finkelman Date: Wed, 22 Nov 2023 20:40:07 +0000 Subject: [PATCH 1/5] feat: add a `simplify` for error messages --- src/range.rs | 157 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 145 insertions(+), 12 deletions(-) diff --git a/src/range.rs b/src/range.rs index be7ff250..60bacd61 100644 --- a/src/range.rs +++ b/src/range.rs @@ -51,6 +51,7 @@ //! If we do not see practical bugs, or we get a formal proof that the code cannot lead to error states, then we may remove this warning. use crate::{internal::small_vec::SmallVec, version_set::VersionSet}; +use std::cmp::Ordering; use std::ops::RangeBounds; use std::{ fmt::{Debug, Display, Formatter}, @@ -202,23 +203,46 @@ impl Range { /// Returns true if the this Range contains the specified value. pub fn contains(&self, v: &V) -> bool { for segment in self.segments.iter() { - if match segment { - (Unbounded, Unbounded) => true, - (Unbounded, Included(end)) => v <= end, - (Unbounded, Excluded(end)) => v < end, - (Included(start), Unbounded) => v >= start, - (Included(start), Included(end)) => v >= start && v <= end, - (Included(start), Excluded(end)) => v >= start && v < end, - (Excluded(start), Unbounded) => v > start, - (Excluded(start), Included(end)) => v > start && v <= end, - (Excluded(start), Excluded(end)) => v > start && v < end, - } { - return true; + match within_bounds(v, segment) { + Ordering::Less => return false, + Ordering::Equal => return true, + Ordering::Greater => (), } } false } + /// Returns true if the this Range contains the specified values. + /// + /// The `versions` iterator must be sorted. + /// Functionally equivalent to `versions.map(|v| self.contains(v))`. + /// Except it runs in `O(size_of_range + len_of_versions)` not `O(size_of_range * len_of_versions)` + pub fn contains_many<'s, I>(&'s self, versions: I) -> impl Iterator + 's + where + I: Iterator + 's, + V: 's, + { + self.locate_versions(versions).map(|m| m.is_some()) + } + + /// Return the segment index in the range for each version in the range, None otherwise + fn locate_versions<'s, I>(&'s self, versions: I) -> impl Iterator> + 's + where + I: Iterator + 's, + V: 's, + { + versions.scan(0, |i, v| { + while let Some(segment) = self.segments.get(*i) { + match within_bounds(v, segment) { + Ordering::Less => return Some(None), + Ordering::Equal => return Some(Some(*i)), + Ordering::Greater => *i += 1, + } + } + Some(None) + }) + } + /// Construct a simple range from anything that impls [RangeBounds] like `v1..v2`. pub fn from_range_bounds(bounds: R) -> Self where @@ -264,6 +288,26 @@ impl Range { } } +fn within_bounds(v: &V, segment: &Interval) -> Ordering { + let below_lower_bound = match segment { + (Excluded(start), _) => v <= start, + (Included(start), _) => v < start, + (Unbounded, _) => false, + }; + if below_lower_bound { + return Ordering::Less; + } + let below_upper_bound = match segment { + (_, Unbounded) => true, + (_, Included(end)) => v <= end, + (_, Excluded(end)) => v < end, + }; + if below_upper_bound { + return Ordering::Equal; + } + Ordering::Greater +} + fn valid_segment(start: &Bound, end: &Bound) -> bool { match (start, end) { (Included(s), Included(e)) => s <= e, @@ -274,6 +318,33 @@ fn valid_segment(start: &Bound, end: &Bound) -> bool { } } +/// group adjacent versions locations +/// [None, 3, 6, 7, None] -> [(3, 7)] +/// [3, 6, 7, None] -> [(None, 7)] +/// [3, 6, 7] -> [(None, None)] +/// [None, 1, 4, 7, None, None, None, 8, None, 9] -> [(1, 7), (8, 8), (9, None)] +fn group_adjacent_locations( + mut locations: impl Iterator>, +) -> impl Iterator, Option)> { + // If the first version matched, then the lower bound of that segment is not needed + let mut seg = locations.next().flatten().map(|ver| (None, Some(ver))); + std::iter::from_fn(move || { + for ver in locations.by_ref() { + if let Some(ver) = ver { + // As long as were still matching versions, we keep merging into the currently matching segment + seg = Some((seg.map_or(Some(ver), |(s, _)| s), Some(ver))); + } else { + // If we have found a version that doesn't match, then right the merge segment and prepare for a new one. + if seg.is_some() { + return seg.take(); + } + } + } + // If the last version matched, then write out the merged segment but the upper bound is not needed. + seg.take().map(|(s, _)| (s, None)) + }) +} + impl Range { /// Computes the union of this `Range` and another. pub fn union(&self, other: &Self) -> Self { @@ -321,6 +392,41 @@ impl Range { Self { segments }.check_invariants() } + + /// Returns a simpler Range that contains the same versions + /// + /// For every one of the Versions provided in versions the existing range and + /// the simplified range will agree on whether it is contained. + /// The simplified version may include or exclude versions that are not in versions as the implementation wishes. + /// For example: + /// - If all the versions are contained in the original than the range will be simplified to `full`. + /// - If none of the versions are contained in the original than the range will be simplified to `empty`. + /// + /// If versions are not sorted the correctness of this function is not guaranteed. + pub fn simplify<'s, I>(&'s self, versions: I) -> Self + where + I: Iterator + 's, + V: 's, + { + let version_locations = self.locate_versions(versions); + let kept_segments = group_adjacent_locations(version_locations); + self.keep_segments(kept_segments) + } + + /// simplify range with segments at given location bounds. + fn keep_segments( + &self, + kept_segments: impl Iterator, Option)>, + ) -> Range { + let mut segments = SmallVec::Empty; + for (s, e) in kept_segments { + segments.push(( + s.map_or(Unbounded, |s| self.segments[s].0.clone()), + e.map_or(Unbounded, |e| self.segments[e].1.clone()), + )); + } + Self { segments }.check_invariants() + } } impl VersionSet for Range { @@ -600,5 +706,32 @@ pub mod tests { let rv2: Range = rv.bounding_range().map(Range::from_range_bounds::<_, u32>).unwrap_or_else(Range::empty); assert_eq!(rv, rv2); } + + #[test] + fn contains(range in strategy(), versions in proptest::collection::vec(version_strat(), ..30)) { + for v in versions { + assert_eq!(range.contains(&v), range.segments.iter().any(|s| RangeBounds::contains(s, &v))); + } + } + + #[test] + fn contains_many(range in strategy(), mut versions in proptest::collection::vec(version_strat(), ..30)) { + versions.sort(); + assert_eq!(versions.len(), range.contains_many(versions.iter()).count()); + for (a, b) in versions.iter().zip(range.contains_many(versions.iter())) { + assert_eq!(range.contains(a), b); + } + } + + #[test] + fn simplify(range in strategy(), mut versions in proptest::collection::vec(version_strat(), ..30)) { + versions.sort(); + let simp = range.simplify(versions.iter()); + + for v in versions { + assert_eq!(range.contains(&v), simp.contains(&v)); + } + assert!(simp.segments.len() <= range.segments.len()) + } } } From 8f7ef55e617944539270669e39170d26afe5b06d Mon Sep 17 00:00:00 2001 From: Jacob Finkelman Date: Tue, 28 Nov 2023 13:10:40 -0500 Subject: [PATCH 2/5] Fix broken links Co-authored-by: konsti --- src/range.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/range.rs b/src/range.rs index 60bacd61..2a9ddd01 100644 --- a/src/range.rs +++ b/src/range.rs @@ -319,10 +319,12 @@ fn valid_segment(start: &Bound, end: &Bound) -> bool { } /// group adjacent versions locations +/// ```text /// [None, 3, 6, 7, None] -> [(3, 7)] /// [3, 6, 7, None] -> [(None, 7)] /// [3, 6, 7] -> [(None, None)] /// [None, 1, 4, 7, None, None, None, 8, None, 9] -> [(1, 7), (8, 8), (9, None)] +/// ``` fn group_adjacent_locations( mut locations: impl Iterator>, ) -> impl Iterator, Option)> { From db745af0b742cdd849fa67a522d0b8ab880f58d2 Mon Sep 17 00:00:00 2001 From: Jacob Finkelman Date: Tue, 28 Nov 2023 20:02:40 +0000 Subject: [PATCH 3/5] Inline locate_versions, to simplify lifetimes While attempting to use this simplification code I got an odd lifetime error with ``` let c = set.complement(); let s = c.simplify(versions); s.complement() ``` By in lining locate_versions the lifetimes could be simplified so that that code works --- src/range.rs | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/range.rs b/src/range.rs index 2a9ddd01..25ebb549 100644 --- a/src/range.rs +++ b/src/range.rs @@ -222,24 +222,15 @@ impl Range { I: Iterator + 's, V: 's, { - self.locate_versions(versions).map(|m| m.is_some()) - } - - /// Return the segment index in the range for each version in the range, None otherwise - fn locate_versions<'s, I>(&'s self, versions: I) -> impl Iterator> + 's - where - I: Iterator + 's, - V: 's, - { - versions.scan(0, |i, v| { + versions.scan(0, move |i, v| { while let Some(segment) = self.segments.get(*i) { match within_bounds(v, segment) { - Ordering::Less => return Some(None), - Ordering::Equal => return Some(Some(*i)), + Ordering::Less => return Some(false), + Ordering::Equal => return Some(true), Ordering::Greater => *i += 1, } } - Some(None) + Some(false) }) } @@ -405,12 +396,22 @@ impl Range { /// - If none of the versions are contained in the original than the range will be simplified to `empty`. /// /// If versions are not sorted the correctness of this function is not guaranteed. - pub fn simplify<'s, I>(&'s self, versions: I) -> Self + pub fn simplify<'v, I>(&self, versions: I) -> Self where - I: Iterator + 's, - V: 's, + I: Iterator + 'v, + V: 'v, { - let version_locations = self.locate_versions(versions); + // Return the segment index in the range for each version in the range, None otherwise + let version_locations = versions.scan(0, move |i, v| { + while let Some(segment) = self.segments.get(*i) { + match within_bounds(v, segment) { + Ordering::Less => return Some(None), + Ordering::Equal => return Some(Some(*i)), + Ordering::Greater => *i += 1, + } + } + Some(None) + }); let kept_segments = group_adjacent_locations(version_locations); self.keep_segments(kept_segments) } From 581f66d30214f68dfc4dd9828e50b4148d1cc2e4 Mon Sep 17 00:00:00 2001 From: Jacob Finkelman Date: Wed, 29 Nov 2023 11:09:07 -0500 Subject: [PATCH 4/5] correct capitalization Co-authored-by: Zanie Blue --- src/range.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/range.rs b/src/range.rs index 25ebb549..c59db5a6 100644 --- a/src/range.rs +++ b/src/range.rs @@ -309,7 +309,8 @@ fn valid_segment(start: &Bound, end: &Bound) -> bool { } } -/// group adjacent versions locations +/// Group adjacent versions locations. +/// /// ```text /// [None, 3, 6, 7, None] -> [(3, 7)] /// [3, 6, 7, None] -> [(None, 7)] From e518819ab23a659de0e9019f08d94146e7734421 Mon Sep 17 00:00:00 2001 From: Jacob Finkelman Date: Wed, 29 Nov 2023 11:10:21 -0500 Subject: [PATCH 5/5] improve comment Co-authored-by: Zanie Blue --- src/range.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/range.rs b/src/range.rs index c59db5a6..91933e61 100644 --- a/src/range.rs +++ b/src/range.rs @@ -417,7 +417,10 @@ impl Range { self.keep_segments(kept_segments) } - /// simplify range with segments at given location bounds. + /// Create a new range with a subset of segments at given location bounds. + /// + /// Each new segment is constructed from a pair of segments, taking the + /// start of the first and the end of the second. fn keep_segments( &self, kept_segments: impl Iterator, Option)>,