From c5a1d8c3db171a4351712c04e6ba6a4e4636a332 Mon Sep 17 00:00:00 2001 From: Ulrik Sverdrup Date: Sun, 2 Aug 2015 19:03:01 +0200 Subject: [PATCH 1/4] StrSearcher: Add tests for rfind(&str) Add tests for .rfind(&str), using the reverse searcher case for substring search. --- src/libcollectionstest/str.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 4cccb29b41cdd..ac9c7908ab8f9 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -115,6 +115,26 @@ fn test_find_str() { assert_eq!(data[43..86].find("ย中"), Some(67 - 43)); assert_eq!(data[43..86].find("iệt"), Some(77 - 43)); assert_eq!(data[43..86].find("Nam"), Some(83 - 43)); + + // find every substring -- assert that it finds it, or an earlier occurence. + let string = "Việt Namacbaabcaabaaba"; + for (i, ci) in string.char_indices() { + let ip = i + ci.len_utf8(); + for j in string[ip..].char_indices() + .map(|(i, _)| i) + .chain(Some(string.len() - ip)) + { + let pat = &string[i..ip + j]; + assert!(match string.find(pat) { + None => false, + Some(x) => x <= i, + }); + assert!(match string.rfind(pat) { + None => false, + Some(x) => x >= i, + }); + } + } } #[test] From 7ebae85bb8eac495bbc4a463319b23404fdc63a6 Mon Sep 17 00:00:00 2001 From: Ulrik Sverdrup Date: Sun, 2 Aug 2015 19:18:44 +0200 Subject: [PATCH 2/4] StrSearcher: Implement the full two way algorithm in reverse for rfind Fix quadratic behavior in StrSearcher in reverse search with periodic needles. This commit adds the missing pieces for the "short period" case in reverse search. The short case will show up when the needle is literally periodic, for example "abababab". Two way uses a "critical factorization" of the needle: x = u v. Searching matches v first, if mismatch at character k, skip k forward. Matching u, if mismatch, skip period(x) forward. To avoid O(mn) behavior after mismatch in u, memorize the already matched prefix. The short period case requires that |u| < period(x). For the reverse search we need to compute a different critical factorization x = u' v' where |v'| < period(x), because we are searching for the reversed needle. A short v' also benefits the algorithm in general. The reverse critical factorization is computed quickly by using the same maximal suffix algorithm, but terminating as soon as we have a location with local period equal to period(x). This adds extra fields crit_pos_back and memory_back for the reverse case. The new overhead for TwoWaySearcher::new is low, and additionally I think the "short period" case is uncommon in many applications of string search. The maximal_suffix methods were updated in documentation and the algorithms updated to not use !0 and wrapping add, variable left is now 1 larger, offset 1 smaller. Use periodicity when computing byteset: in the periodic case, just iterate over one period instead of the whole needle. Example before (rfind) after (twoway_rfind) benchmark shows the removal of quadratic behavior. needle: "ab" * 100, haystack: ("bb" + "ab" * 100) * 100 ``` test periodic::rfind ... bench: 1,926,595 ns/iter (+/- 11,390) = 10 MB/s test periodic::twoway_rfind ... bench: 51,740 ns/iter (+/- 66) = 386 MB/s ``` --- src/libcore/str/pattern.rs | 207 ++++++++++++++++++++++++++++--------- 1 file changed, 158 insertions(+), 49 deletions(-) diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 2b3fc39fc8b2a..8bc1ba207bdfb 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -676,8 +676,10 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { if searcher.end == 0 { return SearchStep::Done; } + let is_long = searcher.memory == usize::MAX; match searcher.next_back::(self.haystack.as_bytes(), - self.needle.as_bytes()) + self.needle.as_bytes(), + is_long) { SearchStep::Reject(mut a, b) => { // skip to next char boundary @@ -706,8 +708,16 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { } } StrSearcherImpl::TwoWay(ref mut searcher) => { - searcher.next_back::(self.haystack.as_bytes(), - self.needle.as_bytes()) + let is_long = searcher.memory == usize::MAX; + if is_long { + searcher.next_back::(self.haystack.as_bytes(), + self.needle.as_bytes(), + true) + } else { + searcher.next_back::(self.haystack.as_bytes(), + self.needle.as_bytes(), + false) + } } } } @@ -718,14 +728,21 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { #[derive(Clone, Debug)] struct TwoWaySearcher { // constants + /// critical factorization index crit_pos: usize, + /// critical factorization index for reversed needle + crit_pos_back: usize, period: usize, + /// `byteset` is an extension (not part of the two way algorithm); + /// it's a 64-bit "fingerprint" where each set bit `j` corresponds + /// to a (byte & 63) == j present in the needle. byteset: u64, // variables position: usize, end: usize, - memory: usize + memory: usize, + memory_back: usize, } /* @@ -797,6 +814,9 @@ struct TwoWaySearcher { The purpose of maximal_suffix is to find such a critical factorization. + If the period is short, compute another factorization x = u' v' to use + for reverse search, chosen instead so that |v'| < period(x). + */ impl TwoWaySearcher { fn new(needle: &[u8], end: usize) -> TwoWaySearcher { @@ -810,10 +830,6 @@ impl TwoWaySearcher { (crit_pos_true, period_true) }; - // This isn't in the original algorithm, as far as I'm aware. - let byteset = needle.iter() - .fold(0, |a, &b| (1 << ((b & 0x3f) as usize)) | a); - // A particularly readable explanation of what's going on here can be found // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically // see the code for "Algorithm CP" on p. 323. @@ -824,27 +840,51 @@ impl TwoWaySearcher { // "Algorithm CP2", which is optimized for when the period of the needle // is large. if &needle[..crit_pos] == &needle[period.. period + crit_pos] { - // short period case + // short period case -- the period is exact + let byteset = needle[..period].iter() + .fold(0, |a, &b| (1 << (b & 0x3f)) | a); + + // compute a separate critical factorization for the reversed needle + // x = u' v' where |v'| < period(x). + // + // This is sped up by the period being known already. + // Note that a case like x = "acba" may be factored exactly forwards + // (crit_pos = 1, period = 3) while being factored with approximate + // period in reverse (crit_pos = 2, period = 2). We use the given + // reverse factorization but keep the exact period. + let crit_pos_back = needle.len() - cmp::max( + TwoWaySearcher::reverse_maximal_suffix(needle, period, false), + TwoWaySearcher::reverse_maximal_suffix(needle, period, true)); + TwoWaySearcher { crit_pos: crit_pos, + crit_pos_back: crit_pos_back, period: period, byteset: byteset, position: 0, end: end, - memory: 0 + memory: 0, + // memory_back after which we have already matched + memory_back: needle.len(), } } else { - // long period case - // we have an approximation to the actual period, and don't use memory. + // long period case -- we have an approximation to the actual period, + // and don't use memorization. + + let byteset = needle.iter() + .fold(0, |a, &b| (1 << (b & 0x3f)) | a); + TwoWaySearcher { crit_pos: crit_pos, + crit_pos_back: crit_pos, period: cmp::max(crit_pos, needle.len() - crit_pos) + 1, byteset: byteset, position: 0, end: end, - memory: usize::MAX // Dummy value to signify that the period is long + memory: usize::MAX, // Dummy value to signify that the period is long + memory_back: usize::MAX, } } } @@ -926,19 +966,18 @@ impl TwoWaySearcher { // Follows the ideas in `next()`. // - // All the definitions are completely symmetrical, with period(x) = period(reverse(x)) + // The definitions are symmetrical, with period(x) = period(reverse(x)) // and local_period(u, v) = local_period(reverse(v), reverse(u)), so if (u, v) - // is a critical factorization, so is (reverse(v), reverse(u)). Similarly, - // the "period" stored in self.period is the real period if long_period is - // false, and so is still valid for a reversed needle, and if long_period is - // true, all the algorithm requires is that self.period is less than or - // equal to the real period, which must be true for the forward case anyway. + // is a critical factorization, so is (reverse(v), reverse(u)). + // + // For the short period case, using memorization, we rely on |u| < period(x). + // For this case we have computed a critical factorization x = u' v' + // where |v'| < period(x) instead (field `crit_pos_back`). // // To search in reverse through the haystack, we search forward through - // a reversed haystack with a reversed needle, and the above paragraph shows - // that the precomputed parameters can be left alone. + // a reversed haystack with a reversed needle, matching first u' and then v'. #[inline] - fn next_back(&mut self, haystack: &[u8], needle: &[u8]) + fn next_back(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output where S: TwoWayStrategy { @@ -959,21 +998,34 @@ impl TwoWaySearcher { // Quickly skip by large portions unrelated to our substring if !self.byteset_contains(haystack[self.end - needle.len()]) { self.end -= needle.len(); + if !long_period { + self.memory_back = needle.len(); + } continue 'search; } // See if the left part of the needle matches - for i in (0..self.crit_pos).rev() { + let crit = if long_period { self.crit_pos_back } + else { cmp::min(self.crit_pos_back, self.memory_back) }; + for i in (0..crit).rev() { if needle[i] != haystack[self.end - needle.len() + i] { - self.end -= self.crit_pos - i; + self.end -= self.crit_pos_back - i; + if !long_period { + self.memory_back = needle.len(); + } continue 'search; } } // See if the right part of the needle matches - for i in self.crit_pos..needle.len() { + let needle_end = if long_period { needle.len() } + else { self.memory_back }; + for i in self.crit_pos_back..needle_end { if needle[i] != haystack[self.end - needle.len() + i] { self.end -= self.period; + if !long_period { + self.memory_back = self.period; + } continue 'search; } } @@ -982,41 +1034,94 @@ impl TwoWaySearcher { let match_pos = self.end - needle.len(); // Note: sub self.period instead of needle.len() to have overlapping matches self.end -= needle.len(); + if !long_period { + self.memory_back = needle.len(); + } return S::matching(match_pos, match_pos + needle.len()); } } - // Computes a critical factorization (u, v) of `arr`. - // Specifically, returns (i, p), where i is the starting index of v in some - // critical factorization (u, v) and p = period(v) + // Compute the maximal suffix of `arr`. + // + // The maximal suffix is a possible critical factorization (u, v) of `arr`. + // + // Returns (`i`, `p`) where `i` is the starting index of v and `p` is the + // period of v. + // + // `order_greater` determines if lexical order is `<` or `>`. Both + // orders must be computed -- the ordering with the largest `i` gives + // a critical factorization. + // + // For long period cases, the resulting period is not exact (it is too short). #[inline] - fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) { - let mut left: usize = !0; // Corresponds to i in the paper - let mut right = 0; // Corresponds to j in the paper - let mut offset = 1; // Corresponds to k in the paper + fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) { + let mut left = 0; // Corresponds to i in the paper + let mut right = 1; // Corresponds to j in the paper + let mut offset = 0; // Corresponds to k in the paper let mut period = 1; // Corresponds to p in the paper - while right + offset < arr.len() { - let a; - let b; - if reversed { - a = arr[left.wrapping_add(offset)]; - b = arr[right + offset]; + while let Some(&a) = arr.get(right + offset) { + // `left` will be inbounds when `right` is. + let b = arr[left + offset]; + if (a < b && !order_greater) || (a > b && order_greater) { + // Suffix is smaller, period is entire prefix so far. + right += offset + 1; + offset = 0; + period = right - left; + } else if a == b { + // Advance through repetition of the current period. + if offset + 1 == period { + right += offset + 1; + offset = 0; + } else { + offset += 1; + } } else { - a = arr[right + offset]; - b = arr[left.wrapping_add(offset)]; + // Suffix is larger, start over from current location. + left = right; + right += 1; + offset = 0; + period = 1; } - if a < b { + } + (left, period) + } + + // Compute the maximal suffix of the reverse of `arr`. + // + // The maximal suffix is a possible critical factorization (u', v') of `arr`. + // + // Returns `i` where `i` is the starting index of v', from the back; + // returns immedately when a period of `known_period` is reached. + // + // `order_greater` determines if lexical order is `<` or `>`. Both + // orders must be computed -- the ordering with the largest `i` gives + // a critical factorization. + // + // For long period cases, the resulting period is not exact (it is too short). + fn reverse_maximal_suffix(arr: &[u8], known_period: usize, + order_greater: bool) -> usize + { + let mut left = 0; // Corresponds to i in the paper + let mut right = 1; // Corresponds to j in the paper + let mut offset = 0; // Corresponds to k in the paper + let mut period = 1; // Corresponds to p in the paper + let n = arr.len(); + + while right + offset < n { + let a = arr[n - (1 + right + offset)]; + let b = arr[n - (1 + left + offset)]; + if (a < b && !order_greater) || (a > b && order_greater) { // Suffix is smaller, period is entire prefix so far. - right += offset; - offset = 1; - period = right.wrapping_sub(left); + right += offset + 1; + offset = 0; + period = right - left; } else if a == b { // Advance through repetition of the current period. - if offset == period { - right += offset; - offset = 1; + if offset + 1 == period { + right += offset + 1; + offset = 0; } else { offset += 1; } @@ -1024,11 +1129,15 @@ impl TwoWaySearcher { // Suffix is larger, start over from current location. left = right; right += 1; - offset = 1; + offset = 0; period = 1; } + if period == known_period { + break; + } } - (left.wrapping_add(1), period) + debug_assert!(period <= known_period); + left } } From 2b82c072c75d64c434a62f185b67fb41028d6f71 Mon Sep 17 00:00:00 2001 From: Ulrik Sverdrup Date: Fri, 7 Aug 2015 12:06:43 +0200 Subject: [PATCH 3/4] StrSearcher: Improve inner loop in TwoWaySearcher::next, next_back The innermost loop of TwoWaySearcher checks the boundary of the haystack vs position + needle.len(), and it checks the last byte of the needle against the byteset. If these two steps are combined by using the indexing of the last needle byte's position as bounds check, the algorithm improves its throughput. We improve the innermost loop by reducing the number of instructions used, and elminating the panic case for the checked indexing that was previously used. Selected benchmarks from the external/workspace testsuite. Benchmarks improve across the board. ``` before: test bb_in_aa::twoway_find ... bench: 4,229 ns/iter (+/- 1,305) = 23646 MB/s test bb_in_aa::twoway_rfind ... bench: 3,873 ns/iter (+/- 101) = 25819 MB/s test short_1let_long::twoway_find ... bench: 7,075 ns/iter (+/- 29) = 360 MB/s test short_1let_long::twoway_rfind ... bench: 6,640 ns/iter (+/- 79) = 384 MB/s test short_2let_long::twoway_find ... bench: 3,823 ns/iter (+/- 16) = 667 MB/s test short_2let_long::twoway_rfind ... bench: 3,774 ns/iter (+/- 44) = 675 MB/s test short_3let_long::twoway_find ... bench: 3,582 ns/iter (+/- 47) = 712 MB/s test short_3let_long::twoway_rfind ... bench: 3,616 ns/iter (+/- 34) = 705 MB/s with this commit: test bb_in_aa::twoway_find ... bench: 2,952 ns/iter (+/- 20) = 33875 MB/s test bb_in_aa::twoway_rfind ... bench: 2,939 ns/iter (+/- 99) = 34025 MB/s test short_1let_long::twoway_find ... bench: 4,593 ns/iter (+/- 4) = 555 MB/s test short_1let_long::twoway_rfind ... bench: 4,592 ns/iter (+/- 76) = 555 MB/s test short_2let_long::twoway_find ... bench: 2,804 ns/iter (+/- 3) = 909 MB/s test short_2let_long::twoway_rfind ... bench: 2,807 ns/iter (+/- 40) = 908 MB/s test short_3let_long::twoway_find ... bench: 3,105 ns/iter (+/- 120) = 821 MB/s test short_3let_long::twoway_rfind ... bench: 3,019 ns/iter (+/- 50) = 844 MB/s ``` - `bb_in_aa`: fast skip due to byteset filter loop improves. - 1/2/3let: Searches for 1, 2, or 3 ascii bytes improves. --- src/libcore/str/pattern.rs | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 8bc1ba207bdfb..0ea3b38a3cf29 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -906,19 +906,25 @@ impl TwoWaySearcher { { // `next()` uses `self.position` as its cursor let old_pos = self.position; + let needle_last = needle.len() - 1; 'search: loop { // Check that we have room to search in - if needle.len() > haystack.len() - self.position { - self.position = haystack.len(); - return S::rejecting(old_pos, self.position); - } + // position + needle_last can not overflow if we assume slices + // are bounded by isize's range. + let tail_byte = match haystack.get(self.position + needle_last) { + Some(&b) => b, + None => { + self.position = haystack.len(); + return S::rejecting(old_pos, self.position); + } + }; if S::use_early_reject() && old_pos != self.position { return S::rejecting(old_pos, self.position); } // Quickly skip by large portions unrelated to our substring - if !self.byteset_contains(haystack[self.position + needle.len() - 1]) { + if !self.byteset_contains(tail_byte) { self.position += needle.len(); if !long_period { self.memory = 0; @@ -986,17 +992,23 @@ impl TwoWaySearcher { let old_end = self.end; 'search: loop { // Check that we have room to search in - if needle.len() > self.end { - self.end = 0; - return S::rejecting(0, old_end); - } + // end - needle.len() will wrap around when there is no more room, + // but due to slice length limits it can never wrap all the way back + // into the length of haystack. + let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) { + Some(&b) => b, + None => { + self.end = 0; + return S::rejecting(0, old_end); + } + }; if S::use_early_reject() && old_end != self.end { return S::rejecting(self.end, old_end); } // Quickly skip by large portions unrelated to our substring - if !self.byteset_contains(haystack[self.end - needle.len()]) { + if !self.byteset_contains(front_byte) { self.end -= needle.len(); if !long_period { self.memory_back = needle.len(); From 01e88124612471f82b3c62efaad141e61842cfbb Mon Sep 17 00:00:00 2001 From: Ulrik Sverdrup Date: Sun, 16 Aug 2015 22:37:18 +0200 Subject: [PATCH 4/4] StrSearcher: Additional comments and small code moves Break out a separate static method to create the "byteset". --- src/libcore/str/pattern.rs | 46 ++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 0ea3b38a3cf29..8e22fdc30426c 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -641,6 +641,8 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> { } StrSearcherImpl::TwoWay(ref mut searcher) => { let is_long = searcher.memory == usize::MAX; + // write out `true` and `false` cases to encourage the compiler + // to specialize the two cases separately. if is_long { searcher.next::(self.haystack.as_bytes(), self.needle.as_bytes(), @@ -653,8 +655,8 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> { } } } - } + unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { #[inline] fn next_back(&mut self) -> SearchStep { @@ -709,6 +711,7 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { } StrSearcherImpl::TwoWay(ref mut searcher) => { let is_long = searcher.memory == usize::MAX; + // write out `true` and `false`, like `next_match` if is_long { searcher.next_back::(self.haystack.as_bytes(), self.needle.as_bytes(), @@ -723,8 +726,7 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { } } -/// The internal state of an iterator that searches for matches of a substring -/// within a larger string using two-way search +/// The internal state of the two-way substring search algorithm. #[derive(Clone, Debug)] struct TwoWaySearcher { // constants @@ -741,7 +743,9 @@ struct TwoWaySearcher { // variables position: usize, end: usize, + /// index into needle before which we have already matched memory: usize, + /// index into needle after which we have already matched memory_back: usize, } @@ -841,9 +845,6 @@ impl TwoWaySearcher { // is large. if &needle[..crit_pos] == &needle[period.. period + crit_pos] { // short period case -- the period is exact - let byteset = needle[..period].iter() - .fold(0, |a, &b| (1 << (b & 0x3f)) | a); - // compute a separate critical factorization for the reversed needle // x = u' v' where |v'| < period(x). // @@ -860,26 +861,26 @@ impl TwoWaySearcher { crit_pos: crit_pos, crit_pos_back: crit_pos_back, period: period, - byteset: byteset, + byteset: Self::byteset_create(&needle[..period]), position: 0, end: end, memory: 0, - // memory_back after which we have already matched memory_back: needle.len(), } } else { // long period case -- we have an approximation to the actual period, // and don't use memorization. - - let byteset = needle.iter() - .fold(0, |a, &b| (1 << (b & 0x3f)) | a); + // + // Approximate the period by lower bound max(|u|, |v|) + 1. + // The critical factorization is efficient to use for both forward and + // reverse search. TwoWaySearcher { crit_pos: crit_pos, crit_pos_back: crit_pos, period: cmp::max(crit_pos, needle.len() - crit_pos) + 1, - byteset: byteset, + byteset: Self::byteset_create(needle), position: 0, end: end, @@ -889,6 +890,11 @@ impl TwoWaySearcher { } } + #[inline] + fn byteset_create(bytes: &[u8]) -> u64 { + bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a) + } + #[inline(always)] fn byteset_contains(&self, byte: u8) -> bool { (self.byteset >> ((byte & 0x3f) as usize)) & 1 != 0 @@ -976,9 +982,9 @@ impl TwoWaySearcher { // and local_period(u, v) = local_period(reverse(v), reverse(u)), so if (u, v) // is a critical factorization, so is (reverse(v), reverse(u)). // - // For the short period case, using memorization, we rely on |u| < period(x). - // For this case we have computed a critical factorization x = u' v' - // where |v'| < period(x) instead (field `crit_pos_back`). + // For the reverse case we have computed a critical factorization x = u' v' + // (field `crit_pos_back`). We need |u| < period(x) for the forward case and + // thus |v'| < period(x) for the reverse. // // To search in reverse through the haystack, we search forward through // a reversed haystack with a reversed needle, matching first u' and then v'. @@ -1018,7 +1024,7 @@ impl TwoWaySearcher { // See if the left part of the needle matches let crit = if long_period { self.crit_pos_back } - else { cmp::min(self.crit_pos_back, self.memory_back) }; + else { cmp::min(self.crit_pos_back, self.memory_back) }; for i in (0..crit).rev() { if needle[i] != haystack[self.end - needle.len() + i] { self.end -= self.crit_pos_back - i; @@ -1031,7 +1037,7 @@ impl TwoWaySearcher { // See if the right part of the needle matches let needle_end = if long_period { needle.len() } - else { self.memory_back }; + else { self.memory_back }; for i in self.crit_pos_back..needle_end { if needle[i] != haystack[self.end - needle.len() + i] { self.end -= self.period; @@ -1070,7 +1076,8 @@ impl TwoWaySearcher { fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) { let mut left = 0; // Corresponds to i in the paper let mut right = 1; // Corresponds to j in the paper - let mut offset = 0; // Corresponds to k in the paper + let mut offset = 0; // Corresponds to k in the paper, but starting at 0 + // to match 0-based indexing. let mut period = 1; // Corresponds to p in the paper while let Some(&a) = arr.get(right + offset) { @@ -1117,7 +1124,8 @@ impl TwoWaySearcher { { let mut left = 0; // Corresponds to i in the paper let mut right = 1; // Corresponds to j in the paper - let mut offset = 0; // Corresponds to k in the paper + let mut offset = 0; // Corresponds to k in the paper, but starting at 0 + // to match 0-based indexing. let mut period = 1; // Corresponds to p in the paper let n = arr.len();