From e73b238ea00dfa6574e0b8611f6ac15c2ca41d00 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Tue, 25 Apr 2023 18:25:27 -0300 Subject: [PATCH] refactor: Bytes inner arithmetic (#138) Cleaner pointer arithmetic avoiding unnecessary intermediate slices. No perf gains in isolation but facilitates some (upcoming PRs) This is conceptually easier to reason about, end is fixed, "start" and "cursor" advance monotonically --- src/iter.rs | 123 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 85 insertions(+), 38 deletions(-) diff --git a/src/iter.rs b/src/iter.rs index 0d86f9e..e6e5133 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -1,108 +1,155 @@ -use core::slice; use core::convert::TryInto; use core::convert::TryFrom; #[allow(missing_docs)] pub struct Bytes<'a> { - slice: &'a [u8], - pos: usize + start: *const u8, + end: *const u8, + cursor: *const u8, + phantom: core::marker::PhantomData<&'a ()>, } #[allow(missing_docs)] impl<'a> Bytes<'a> { #[inline] pub fn new(slice: &'a [u8]) -> Bytes<'a> { + let start = slice.as_ptr(); + let end = unsafe { start.add(slice.len()) }; + let cursor = start; Bytes { - slice, - pos: 0 + start, + end, + cursor, + phantom: core::marker::PhantomData, } } #[inline] pub fn pos(&self) -> usize { - self.pos + self.cursor as usize - self.start as usize } #[inline] pub fn peek(&self) -> Option { - self.peek_ahead(0) + if self.cursor < self.end { + // SAFETY: bounds checked + Some(unsafe { *self.cursor }) + } else { + None + } } #[inline] pub fn peek_ahead(&self, n: usize) -> Option { - self.slice.get(self.pos + n).copied() + let ptr = unsafe { self.cursor.add(n) }; + if ptr < self.end { + // SAFETY: bounds checked + Some(unsafe { *ptr }) + } else { + None + } } - + #[inline] - pub fn peek_n>(&self, n: usize) -> Option { - self.slice.get(self.pos..self.pos + n)?.try_into().ok() + pub fn peek_n<'b: 'a, U: TryFrom<&'a [u8]>>(&'b self, n: usize) -> Option { + // TODO: once we bump MSRC, use const generics to allow only [u8; N] reads + // TODO: drop `n` arg in favour of const + // let n = core::mem::size_of::(); + self.as_ref().get(..n)?.try_into().ok() } #[inline] pub unsafe fn bump(&mut self) { - debug_assert!(self.pos < self.slice.len(), "overflow"); - self.pos += 1; + self.advance(1) } - #[allow(unused)] #[inline] pub unsafe fn advance(&mut self, n: usize) { - debug_assert!(self.pos + n <= self.slice.len(), "overflow"); - self.pos += n; + self.cursor = self.cursor.add(n); + debug_assert!(self.cursor <= self.end, "overflow"); } #[inline] pub fn len(&self) -> usize { - self.slice.len() + self.end as usize - self.cursor as usize } #[inline] pub fn slice(&mut self) -> &'a [u8] { // not moving position at all, so it's safe - unsafe { - self.slice_skip(0) - } + let slice = unsafe { slice_from_ptr_range(self.start, self.cursor) }; + self.commit(); + slice } + // TODO: this is an anti-pattern, should be removed #[inline] pub unsafe fn slice_skip(&mut self, skip: usize) -> &'a [u8] { - debug_assert!(self.pos >= skip); - let head_pos = self.pos - skip; - let ptr = self.slice.as_ptr(); - let head = slice::from_raw_parts(ptr, head_pos); - let tail = slice::from_raw_parts(ptr.add(self.pos), self.slice.len() - self.pos); - self.pos = 0; - self.slice = tail; + debug_assert!(self.cursor.sub(skip) >= self.start); + let head = slice_from_ptr_range(self.start, self.cursor.sub(skip)); + self.commit(); head } + + #[inline] + pub fn commit(&mut self) { + self.start = self.cursor + } #[inline] pub unsafe fn advance_and_commit(&mut self, n: usize) { - debug_assert!(self.pos + n <= self.slice.len(), "overflow"); - self.pos += n; - let ptr = self.slice.as_ptr(); - let tail = slice::from_raw_parts(ptr.add(n), self.slice.len() - n); - self.pos = 0; - self.slice = tail; + self.advance(n); + self.commit(); + } + + #[inline] + pub fn as_ptr(&self) -> *const u8 { + self.cursor + } + + #[inline] + pub fn start(&self) -> *const u8 { + self.start + } + + #[inline] + pub fn end(&self) -> *const u8 { + self.end + } + + #[inline] + pub unsafe fn set_cursor(&mut self, ptr: *const u8) { + debug_assert!(ptr >= self.start); + debug_assert!(ptr <= self.end); + self.cursor = ptr; } } impl<'a> AsRef<[u8]> for Bytes<'a> { #[inline] fn as_ref(&self) -> &[u8] { - &self.slice[self.pos..] + unsafe { slice_from_ptr_range(self.cursor, self.end) } } } +#[inline] +unsafe fn slice_from_ptr_range<'a>(start: *const u8, end: *const u8) -> &'a [u8] { + debug_assert!(start <= end); + core::slice::from_raw_parts(start, end as usize - start as usize) +} + impl<'a> Iterator for Bytes<'a> { type Item = u8; #[inline] fn next(&mut self) -> Option { - if self.slice.len() > self.pos { - let b = unsafe { *self.slice.get_unchecked(self.pos) }; - self.pos += 1; - Some(b) + if self.cursor < self.end { + // SAFETY: bounds checked + unsafe { + let b = *self.cursor; + self.bump(); + Some(b) + } } else { None }