diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index caa54e00f319f..cc66da25795dd 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -40,6 +40,7 @@ use crate::cmp; use crate::cmp::Ordering; +use crate::convert::TryInto as _; use crate::fmt; use crate::slice::memchr; @@ -370,11 +371,17 @@ pub struct CharSearcher<'a> { // safety invariant: `utf8_size` must be less than 5 /// The number of bytes `needle` takes up when encoded in utf8. - utf8_size: usize, + utf8_size: u8, /// A utf8 encoded copy of the `needle` utf8_encoded: [u8; 4], } +impl CharSearcher<'_> { + fn utf8_size(&self) -> usize { + self.utf8_size.into() + } +} + unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { #[inline] fn haystack(&self) -> &'a str { @@ -414,7 +421,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?; // the last byte of the utf8 encoded needle // SAFETY: we have an invariant that `utf8_size < 5` - let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) }; + let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) }; if let Some(index) = memchr::memchr(last_byte, bytes) { // The new finger is the index of the byte we found, // plus one, since we memchr'd for the last byte of the character. @@ -434,10 +441,10 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { // find something. When we find something the `finger` will be set // to a UTF8 boundary. self.finger += index + 1; - if self.finger >= self.utf8_size { - let found_char = self.finger - self.utf8_size; + if self.finger >= self.utf8_size() { + let found_char = self.finger - self.utf8_size(); if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) { - if slice == &self.utf8_encoded[0..self.utf8_size] { + if slice == &self.utf8_encoded[0..self.utf8_size()] { return Some((found_char, self.finger)); } } @@ -482,7 +489,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { let bytes = haystack.get(self.finger..self.finger_back)?; // the last byte of the utf8 encoded needle // SAFETY: we have an invariant that `utf8_size < 5` - let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) }; + let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) }; if let Some(index) = memchr::memrchr(last_byte, bytes) { // we searched a slice that was offset by self.finger, // add self.finger to recoup the original index @@ -493,14 +500,14 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { // char in the paradigm of reverse iteration). For // multibyte chars we need to skip down by the number of more // bytes they have than ASCII - let shift = self.utf8_size - 1; + let shift = self.utf8_size() - 1; if index >= shift { let found_char = index - shift; - if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) { - if slice == &self.utf8_encoded[0..self.utf8_size] { + if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size())) { + if slice == &self.utf8_encoded[0..self.utf8_size()] { // move finger to before the character found (i.e., at its start index) self.finger_back = found_char; - return Some((self.finger_back, self.finger_back + self.utf8_size)); + return Some((self.finger_back, self.finger_back + self.utf8_size())); } } } @@ -542,7 +549,12 @@ impl<'a> Pattern<'a> for char { #[inline] fn into_searcher(self, haystack: &'a str) -> Self::Searcher { let mut utf8_encoded = [0; 4]; - let utf8_size = self.encode_utf8(&mut utf8_encoded).len(); + let utf8_size = self + .encode_utf8(&mut utf8_encoded) + .len() + .try_into() + .expect("char len should be less than 255"); + CharSearcher { haystack, finger: 0,