From dc5601dbece5312c8cc8f8f66b950d1eb6edd95e Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Thu, 5 Jan 2023 19:20:50 +0100 Subject: [PATCH] Refactor input traits (#1610) * add defaut implementations for InputTakeAtPosition methods * use const generics for fixed size array implementations * BREAKING CHANGE: simplify InputIter: remove iter_indices it was only used in two combinators for which an alternative implementation can be used --- src/character/complete.rs | 31 ++-- src/character/streaming.rs | 31 ++-- src/number/complete.rs | 6 +- src/number/streaming.rs | 6 +- src/traits.rs | 304 +++++++++++++++---------------------- 5 files changed, 167 insertions(+), 211 deletions(-) diff --git a/src/character/complete.rs b/src/character/complete.rs index eccbb4e3a..cd6824108 100644 --- a/src/character/complete.rs +++ b/src/character/complete.rs @@ -311,13 +311,10 @@ where T: InputIter + InputLength + Slice>, ::Item: AsChar, { - let mut it = input.iter_indices(); + let mut it = input.iter_elements(); match it.next() { None => Err(Err::Error(E::from_error_kind(input, ErrorKind::Eof))), - Some((_, c)) => match it.next() { - None => Ok((input.slice(input.input_len()..), c.as_char())), - Some((idx, _)) => Ok((input.slice(idx..), c.as_char())), - }, + Some(c) => Ok((input.slice(c.len()..), c.as_char())), } } @@ -744,7 +741,8 @@ macro_rules! ints { let mut value: $t = 0; if sign { - for (pos, c) in i.iter_indices() { + let mut pos = 0; + for c in i.iter_elements() { match c.as_char().to_digit(10) { None => { if pos == 0 { @@ -755,12 +753,16 @@ macro_rules! ints { }, Some(d) => match value.checked_mul(10).and_then(|v| v.checked_add(d as $t)) { None => return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))), - Some(v) => value = v, + Some(v) => { + pos += c.len(); + value = v; + }, } } } } else { - for (pos, c) in i.iter_indices() { + let mut pos = 0; + for c in i.iter_elements() { match c.as_char().to_digit(10) { None => { if pos == 0 { @@ -771,7 +773,10 @@ macro_rules! ints { }, Some(d) => match value.checked_mul(10).and_then(|v| v.checked_sub(d as $t)) { None => return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))), - Some(v) => value = v, + Some(v) => { + pos += c.len(); + value = v; + }, } } } @@ -804,7 +809,8 @@ macro_rules! uints { } let mut value: $t = 0; - for (pos, c) in i.iter_indices() { + let mut pos = 0; + for c in i.iter_elements() { match c.as_char().to_digit(10) { None => { if pos == 0 { @@ -815,7 +821,10 @@ macro_rules! uints { }, Some(d) => match value.checked_mul(10).and_then(|v| v.checked_add(d as $t)) { None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Digit))), - Some(v) => value = v, + Some(v) => { + pos += c.len(); + value = v; + }, } } } diff --git a/src/character/streaming.rs b/src/character/streaming.rs index eaa25516d..904cfcbca 100644 --- a/src/character/streaming.rs +++ b/src/character/streaming.rs @@ -291,13 +291,10 @@ where T: InputIter + InputLength + Slice>, ::Item: AsChar, { - let mut it = input.iter_indices(); + let mut it = input.iter_elements(); match it.next() { None => Err(Err::Incomplete(Needed::new(1))), - Some((_, c)) => match it.next() { - None => Ok((input.slice(input.input_len()..), c.as_char())), - Some((idx, _)) => Ok((input.slice(idx..), c.as_char())), - }, + Some(c) => Ok((input.slice(c.len()..), c.as_char())), } } @@ -650,7 +647,8 @@ macro_rules! ints { let mut value: $t = 0; if sign { - for (pos, c) in i.iter_indices() { + let mut pos = 0; + for c in i.iter_elements() { match c.as_char().to_digit(10) { None => { if pos == 0 { @@ -661,12 +659,16 @@ macro_rules! ints { }, Some(d) => match value.checked_mul(10).and_then(|v| v.checked_add(d as $t)) { None => return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))), - Some(v) => value = v, + Some(v) => { + pos += c.len(); + value = v; + }, } } } } else { - for (pos, c) in i.iter_indices() { + let mut pos = 0; + for c in i.iter_elements() { match c.as_char().to_digit(10) { None => { if pos == 0 { @@ -677,7 +679,10 @@ macro_rules! ints { }, Some(d) => match value.checked_mul(10).and_then(|v| v.checked_sub(d as $t)) { None => return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))), - Some(v) => value = v, + Some(v) => { + pos += c.len(); + value = v; + }, } } } @@ -710,7 +715,8 @@ macro_rules! uints { } let mut value: $t = 0; - for (pos, c) in i.iter_indices() { + let mut pos = 0; + for c in i.iter_elements() { match c.as_char().to_digit(10) { None => { if pos == 0 { @@ -721,7 +727,10 @@ macro_rules! uints { }, Some(d) => match value.checked_mul(10).and_then(|v| v.checked_add(d as $t)) { None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Digit))), - Some(v) => value = v, + Some(v) => { + pos += c.len(); + value = v; + }, } } } diff --git a/src/number/complete.rs b/src/number/complete.rs index f8cbcb740..1ea0ae8fd 100644 --- a/src/number/complete.rs +++ b/src/number/complete.rs @@ -472,7 +472,7 @@ where Err(Err::Error(make_error(input, ErrorKind::Eof))) } else { let mut res = Uint::default(); - for (index, byte) in input.iter_indices().take(bound) { + for (index, byte) in input.iter_elements().take(bound).enumerate() { res = res + (Uint::from(byte) << (8 * index as u8)); } @@ -1506,7 +1506,7 @@ where T: Clone + Offset + ParseTo + Compare<&'static str>, T: InputIter + InputLength + InputTake, ::Item: AsChar + Copy, - ::IterElem: Clone, + ::Iter: Clone, T: InputTakeAtPosition, ::Item: AsChar, T: AsBytes, @@ -1559,7 +1559,7 @@ where T: Clone + Offset + ParseTo + Compare<&'static str>, T: InputIter + InputLength + InputTake, ::Item: AsChar + Copy, - ::IterElem: Clone, + ::Iter: Clone, T: InputTakeAtPosition, ::Item: AsChar, T: AsBytes, diff --git a/src/number/streaming.rs b/src/number/streaming.rs index 0d80e1f98..e15789b36 100644 --- a/src/number/streaming.rs +++ b/src/number/streaming.rs @@ -449,7 +449,7 @@ where Err(Err::Incomplete(Needed::new(bound - input.input_len()))) } else { let mut res = Uint::default(); - for (index, byte) in input.iter_indices().take(bound) { + for (index, byte) in input.iter_elements().take(bound).enumerate() { res = res + (Uint::from(byte) << (8 * index as u8)); } @@ -1476,7 +1476,7 @@ where T: Clone + Offset, T: InputIter + InputLength + InputTake + crate::traits::ParseTo + Compare<&'static str>, ::Item: AsChar, - ::IterElem: Clone, + ::Iter: Clone, T: InputTakeAtPosition, ::Item: AsChar, T: AsBytes, @@ -1530,7 +1530,7 @@ where T: Clone + Offset, T: InputIter + InputLength + InputTake + crate::traits::ParseTo + Compare<&'static str>, ::Item: AsChar, - ::IterElem: Clone, + ::Iter: Clone, T: InputTakeAtPosition, ::Item: AsChar, T: AsBytes, diff --git a/src/traits.rs b/src/traits.rs index e61c57a2a..a739fd287 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,13 +1,12 @@ //! Traits input types have to implement to work with nom combinators use crate::error::{ErrorKind, ParseError}; use crate::internal::{Err, IResult, Needed}; -use crate::lib::std::iter::{Copied, Enumerate}; +use crate::lib::std::iter::Copied; use crate::lib::std::ops::{ Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, }; use crate::lib::std::slice::Iter; use crate::lib::std::str::from_utf8; -use crate::lib::std::str::CharIndices; use crate::lib::std::str::Chars; use crate::lib::std::str::FromStr; @@ -122,31 +121,18 @@ impl AsBytes for [u8] { } } -macro_rules! as_bytes_array_impls { - ($($N:expr)+) => { - $( - impl<'a> AsBytes for &'a [u8; $N] { - #[inline(always)] - fn as_bytes(&self) -> &[u8] { - *self - } - } - - impl AsBytes for [u8; $N] { - #[inline(always)] - fn as_bytes(&self) -> &[u8] { - self - } - } - )+ - }; +impl<'a, const N: usize> AsBytes for &'a [u8; N] { + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + *self + } } -as_bytes_array_impls! { - 0 1 2 3 4 5 6 7 8 9 - 10 11 12 13 14 15 16 17 18 19 - 20 21 22 23 24 25 26 27 28 29 - 30 31 32 +impl AsBytes for [u8; N] { + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + self + } } /// Transforms common types to a char for basic token parsing @@ -303,18 +289,12 @@ pub trait InputIter { /// /// Example: `u8` for `&[u8]` or `char` for `&str` type Item; - /// An iterator over the input type, producing the item and its position - /// for use with [Slice]. If we're iterating over `&str`, the position - /// corresponds to the byte index of the character - type Iter: Iterator; /// An iterator over the input type, producing the item - type IterElem: Iterator; + type Iter: Iterator; - /// Returns an iterator over the elements and their byte offsets - fn iter_indices(&self) -> Self::Iter; /// Returns an iterator over the elements - fn iter_elements(&self) -> Self::IterElem; + fn iter_elements(&self) -> Self::Iter; /// Finds the byte position of the element fn position

(&self, predicate: P) -> Option where @@ -333,15 +313,10 @@ pub trait InputTake: Sized { impl<'a> InputIter for &'a [u8] { type Item = u8; - type Iter = Enumerate; - type IterElem = Copied>; + type Iter = Copied>; #[inline] - fn iter_indices(&self) -> Self::Iter { - self.iter_elements().enumerate() - } - #[inline] - fn iter_elements(&self) -> Self::IterElem { + fn iter_elements(&self) -> Self::Iter { self.iter().copied() } #[inline] @@ -375,14 +350,10 @@ impl<'a> InputTake for &'a [u8] { impl<'a> InputIter for &'a str { type Item = char; - type Iter = CharIndices<'a>; - type IterElem = Chars<'a>; - #[inline] - fn iter_indices(&self) -> Self::Iter { - self.char_indices() - } + type Iter = Chars<'a>; + #[inline] - fn iter_elements(&self) -> Self::IterElem { + fn iter_elements(&self) -> Self::Iter { self.chars() } fn position

(&self, predicate: P) -> Option @@ -440,65 +411,21 @@ pub trait UnspecializedInput {} /// Methods to take as much input as possible until the provided function returns true for the current element. /// /// A large part of nom's basic parsers are built using this trait. -pub trait InputTakeAtPosition: Sized { +pub trait InputTakeAtPosition: Clone + InputTake + InputLength + Sized { /// The current input type is a sequence of that `Item` type. /// /// Example: `u8` for `&[u8]` or `char` for `&str` type Item; - /// Looks for the first element of the input type for which the condition returns true, - /// and returns the input up to this position. - /// - /// *streaming version*: If no element is found matching the condition, this will return `Incomplete` - fn split_at_position>(&self, predicate: P) -> IResult - where - P: Fn(Self::Item) -> bool; - - /// Looks for the first element of the input type for which the condition returns true - /// and returns the input up to this position. - /// - /// Fails if the produced slice is empty. - /// - /// *streaming version*: If no element is found matching the condition, this will return `Incomplete` - fn split_at_position1>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult + /// Returns the position of the first element satisfying the predicate + fn position

(&self, predicate: P) -> Option where P: Fn(Self::Item) -> bool; /// Looks for the first element of the input type for which the condition returns true, /// and returns the input up to this position. /// - /// *complete version*: If no element is found matching the condition, this will return the whole input - fn split_at_position_complete>( - &self, - predicate: P, - ) -> IResult - where - P: Fn(Self::Item) -> bool; - - /// Looks for the first element of the input type for which the condition returns true - /// and returns the input up to this position. - /// - /// Fails if the produced slice is empty. - /// - /// *complete version*: If no element is found matching the condition, this will return the whole input - fn split_at_position1_complete>( - &self, - predicate: P, - e: ErrorKind, - ) -> IResult - where - P: Fn(Self::Item) -> bool; -} - -impl InputTakeAtPosition - for T -{ - type Item = ::Item; - + /// *streaming version*: If no element is found matching the condition, this will return `Incomplete` fn split_at_position>(&self, predicate: P) -> IResult where P: Fn(Self::Item) -> bool, @@ -509,6 +436,12 @@ impl InputT } } + /// Looks for the first element of the input type for which the condition returns true + /// and returns the input up to this position. + /// + /// Fails if the produced slice is empty. + /// + /// *streaming version*: If no element is found matching the condition, this will return `Incomplete` fn split_at_position1>( &self, predicate: P, @@ -524,6 +457,10 @@ impl InputT } } + /// Looks for the first element of the input type for which the condition returns true, + /// and returns the input up to this position. + /// + /// *complete version*: If no element is found matching the condition, this will return the whole input fn split_at_position_complete>( &self, predicate: P, @@ -537,6 +474,12 @@ impl InputT } } + /// Looks for the first element of the input type for which the condition returns true + /// and returns the input up to this position. + /// + /// Fails if the produced slice is empty. + /// + /// *complete version*: If no element is found matching the condition, this will return the whole input fn split_at_position1_complete>( &self, predicate: P, @@ -558,9 +501,29 @@ impl InputT } } +impl InputTakeAtPosition + for T +{ + type Item = ::Item; + + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + InputIter::position(self, predicate) + } +} + impl<'a> InputTakeAtPosition for &'a [u8] { type Item = u8; + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + self.iter().position(|c| predicate(*c)) + } + fn split_at_position>(&self, predicate: P) -> IResult where P: Fn(Self::Item) -> bool, @@ -624,6 +587,13 @@ impl<'a> InputTakeAtPosition for &'a [u8] { impl<'a> InputTakeAtPosition for &'a str { type Item = char; + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + self.find(predicate) + } + fn split_at_position>(&self, predicate: P) -> IResult where P: Fn(Self::Item) -> bool, @@ -748,22 +718,6 @@ impl<'a, 'b> Compare<&'b [u8]> for &'a [u8] { } } } - - /* - let len = self.len(); - let blen = t.len(); - let m = if len < blen { len } else { blen }; - let reduced = &self[..m]; - let b = &t[..m]; - - if reduced != b { - CompareResult::Error - } else if m < blen { - CompareResult::Incomplete - } else { - CompareResult::Ok - } - */ } #[inline(always)] @@ -1046,90 +1000,74 @@ macro_rules! slice_ranges_impl { slice_ranges_impl! {str} slice_ranges_impl! {[T]} -macro_rules! array_impls { - ($($N:expr)+) => { - $( - impl InputLength for [u8; $N] { - #[inline] - fn input_len(&self) -> usize { - self.len() - } - } - - impl<'a> InputLength for &'a [u8; $N] { - #[inline] - fn input_len(&self) -> usize { - self.len() - } - } - - impl<'a> InputIter for &'a [u8; $N] { - type Item = u8; - type Iter = Enumerate; - type IterElem = Copied>; +impl InputLength for [u8; N] { + #[inline] + fn input_len(&self) -> usize { + self.len() + } +} - fn iter_indices(&self) -> Self::Iter { - (&self[..]).iter_indices() - } +impl<'a, const N: usize> InputLength for &'a [u8; N] { + #[inline] + fn input_len(&self) -> usize { + self.len() + } +} - fn iter_elements(&self) -> Self::IterElem { - (&self[..]).iter_elements() - } +impl<'a, const N: usize> InputIter for &'a [u8; N] { + type Item = u8; + type Iter = Copied>; - fn position

(&self, predicate: P) -> Option - where P: Fn(Self::Item) -> bool { - (&self[..]).position(predicate) - } + fn iter_elements(&self) -> Self::Iter { + (&self[..]).iter_elements() + } - fn slice_index(&self, count: usize) -> Result { - (&self[..]).slice_index(count) - } - } + fn position

(&self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool, + { + InputIter::position(&&self[..], predicate) + } - impl<'a> Compare<[u8; $N]> for &'a [u8] { - #[inline(always)] - fn compare(&self, t: [u8; $N]) -> CompareResult { - self.compare(&t[..]) - } + fn slice_index(&self, count: usize) -> Result { + (&self[..]).slice_index(count) + } +} - #[inline(always)] - fn compare_no_case(&self, t: [u8;$N]) -> CompareResult { - self.compare_no_case(&t[..]) - } - } +impl<'a, const N: usize> Compare<[u8; N]> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: [u8; N]) -> CompareResult { + self.compare(&t[..]) + } - impl<'a,'b> Compare<&'b [u8; $N]> for &'a [u8] { - #[inline(always)] - fn compare(&self, t: &'b [u8; $N]) -> CompareResult { - self.compare(&t[..]) - } + #[inline(always)] + fn compare_no_case(&self, t: [u8; N]) -> CompareResult { + self.compare_no_case(&t[..]) + } +} - #[inline(always)] - fn compare_no_case(&self, t: &'b [u8;$N]) -> CompareResult { - self.compare_no_case(&t[..]) - } - } +impl<'a, 'b, const N: usize> Compare<&'b [u8; N]> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: &'b [u8; N]) -> CompareResult { + self.compare(&t[..]) + } - impl FindToken for [u8; $N] { - fn find_token(&self, token: u8) -> bool { - memchr::memchr(token, &self[..]).is_some() - } - } + #[inline(always)] + fn compare_no_case(&self, t: &'b [u8; N]) -> CompareResult { + self.compare_no_case(&t[..]) + } +} - impl<'a> FindToken<&'a u8> for [u8; $N] { - fn find_token(&self, token: &u8) -> bool { - self.find_token(*token) - } - } - )+ - }; +impl FindToken for [u8; N] { + fn find_token(&self, token: u8) -> bool { + memchr::memchr(token, &self[..]).is_some() + } } -array_impls! { - 0 1 2 3 4 5 6 7 8 9 - 10 11 12 13 14 15 16 17 18 19 - 20 21 22 23 24 25 26 27 28 29 - 30 31 32 +impl<'a, const N: usize> FindToken<&'a u8> for [u8; N] { + fn find_token(&self, token: &u8) -> bool { + self.find_token(*token) + } } /// Abstracts something which can extend an `Extend`.