From 0c29ff50965e9aa94d50d6e672a6262a37f3f05f Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Wed, 18 Oct 2023 10:31:42 +0200 Subject: [PATCH] Add portable simd (core::simd) implementation Signed-off-by: Heinz N. Gies --- .github/workflows/ci.yml | 22 + Cargo.toml | 3 + src/basic.rs | 8 + src/compat.rs | 7 + src/implementation/mod.rs | 5 + src/implementation/portable.rs | 830 +++++++++++++++++++++++++++++++++ src/lib.rs | 1 + tests/tests.rs | 16 +- 8 files changed, 889 insertions(+), 3 deletions(-) create mode 100644 src/implementation/portable.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d242a626..1f89b093 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,28 @@ jobs: env: RUSTFLAGS: ${{ matrix.rustflags }} + test-portable-simd: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-11, windows-latest] + features: ["", "--features std", "--features public_imp", "--features std,public_imp"] + rustflags: ["-D warnings", "-D warnings -C target-feature=+avx2", "-D warnings -C target-feature=+sse4.2"] + exclude: + - os: macos-11 + rustflags: "-D warnings -C target-feature=+avx2" + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + profile: minimal + override: true + - name: Run tests + run: cargo test --no-default-features --features portable ${{ matrix.features }} --all-targets --verbose + env: + RUSTFLAGS: ${{ matrix.rustflags }} + test-inlining-x86: runs-on: ubuntu-latest strategy: diff --git a/Cargo.toml b/Cargo.toml index dca2f8dd..e53774d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,9 @@ aarch64_neon = [] # enable aarch64 prefetching for minor speedup - requires nightly aarch64_neon_prefetch = [] +# support for portable simd (nightly only) +portable = [] + # deprecated - does not do anything hints = [] diff --git a/src/basic.rs b/src/basic.rs index 2c1d042e..cfa5b8c4 100644 --- a/src/basic.rs +++ b/src/basic.rs @@ -216,6 +216,14 @@ pub mod imp { } } + /// Includes the portable SIMD implementations. + #[cfg(feature = "portable")] + pub mod portable { + /// Includes the validation implementation using portable SIMD. + pub use crate::implementation::portable::validate_utf8_basic as validate_utf8; + pub use crate::implementation::portable::ChunkedUtf8ValidatorImp; + pub use crate::implementation::portable::Utf8ValidatorImp; + } /// Includes the aarch64 SIMD implementations. #[cfg(all(feature = "aarch64_neon", target_arch = "aarch64"))] pub mod aarch64 { diff --git a/src/compat.rs b/src/compat.rs index 2e955b9f..de861dd9 100644 --- a/src/compat.rs +++ b/src/compat.rs @@ -115,6 +115,13 @@ pub mod imp { } } + /// Includes the x86/x86-64 SIMD implementations. + #[cfg(feature = "portable")] + pub mod portable { + /// Includes the validation implementation for portable SIMD. + pub use crate::implementation::portable::validate_utf8_compat as validate_utf8; + } + /// Includes the aarch64 SIMD implementations. #[cfg(all(feature = "aarch64_neon", target_arch = "aarch64"))] pub mod aarch64 { diff --git a/src/implementation/mod.rs b/src/implementation/mod.rs index 242b46b3..b19808f3 100644 --- a/src/implementation/mod.rs +++ b/src/implementation/mod.rs @@ -17,6 +17,11 @@ type ValidateUtf8Fn = unsafe fn(input: &[u8]) -> Result<(), Utf8ErrorBasic>; #[allow(dead_code)] type ValidateUtf8CompatFn = unsafe fn(input: &[u8]) -> Result<(), Utf8ErrorCompat>; +// Rust Portable SIMD implementation + +#[cfg(feature = "portable")] +pub(crate) mod portable; + // x86 implementation #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] diff --git a/src/implementation/portable.rs b/src/implementation/portable.rs new file mode 100644 index 00000000..f5a1013a --- /dev/null +++ b/src/implementation/portable.rs @@ -0,0 +1,830 @@ +//! Contains the portable SIMD UTF-8 validation implementation. + +#![allow(clippy::too_many_arguments)] +#![allow(clippy::needless_pass_by_value, clippy::pedantic, clippy::all)] +use crate::implementation::helpers::Utf8CheckAlgorithm; +use core::simd::{ + simd_swizzle, u8x32, SimdInt, SimdPartialOrd, SimdUint, + Which::{First, Second}, +}; + +// Portable SIMD primitives +type SimdU8Value = crate::implementation::helpers::SimdU8Value; + +impl SimdU8Value { + // ist OK + #[inline] + fn from_32_cut_off_leading( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + Self::from(u8x32::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ])) + } + + // ist OK + #[inline] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + Self::from_32_cut_off_leading( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ) + } + + #[inline] + unsafe fn load_from(ptr: *const u8) -> Self { + #[allow(clippy::cast_ptr_alignment)] + Self::from(u8x32::from_array(ptr.cast::<[u8; 32]>().read_unaligned())) + } + + // ist OK + #[inline] + fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + // We need to ensure that 'self' only contains the lower 4 bits, unlike the avx instruction + // this will otherwise lead to bad results + let idx: u8x32 = self.0.cast(); + let src: u8x32 = Self::repeat_16( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ) + .0 + .cast(); + let res = src.swizzle_dyn(idx); + Self::from(res.cast()) + } + // ist OK + #[inline] + fn splat(val: u8) -> Self { + #[allow(clippy::cast_possible_wrap)] + Self::from(u8x32::from_array([val; 32])) + } + // ist OK + #[inline] + fn splat0() -> Self { + Self::from(u8x32::from_array([0; 32])) + } + + #[inline] + fn or(self, b: Self) -> Self { + Self::from(self.0 | b.0) + } + + #[inline] + fn and(self, b: Self) -> Self { + Self::from(self.0 & b.0) + } + + #[inline] + fn xor(self, b: Self) -> Self { + Self::from(self.0 ^ b.0) + } + + #[inline] + fn saturating_sub(self, b: Self) -> Self { + let l: u8x32 = self.0.cast(); + let r: u8x32 = b.0.cast(); + let res = l.saturating_sub(r); + Self::from(res.cast()) + } + // ist OK + // ugly but shr requires const generics + #[inline] + fn shr4(self) -> Self { + Self::from(self.0 >> u8x32::from([4; 32])) + } + // ist OK + #[inline] + fn prev1(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [ + Second(31), + First(0), + First(1), + First(2), + First(3), + First(4), + First(5), + First(6), + First(7), + First(8), + First(9), + First(10), + First(11), + First(12), + First(13), + First(14), + First(15), + First(16), + First(17), + First(18), + First(19), + First(20), + First(21), + First(22), + First(23), + First(24), + First(25), + First(26), + First(27), + First(28), + First(29), + First(30), + ] + )) + } + + // ist OK + // ugly but prev requires const generics + #[inline] + fn prev2(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [ + Second(30), + Second(31), + First(0), + First(1), + First(2), + First(3), + First(4), + First(5), + First(6), + First(7), + First(8), + First(9), + First(10), + First(11), + First(12), + First(13), + First(14), + First(15), + First(16), + First(17), + First(18), + First(19), + First(20), + First(21), + First(22), + First(23), + First(24), + First(25), + First(26), + First(27), + First(28), + First(29), + ] + )) + } + + // ist OK + // ugly but prev requires const generics + #[inline] + fn prev3(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [ + Second(29), + Second(30), + Second(31), + First(0), + First(1), + First(2), + First(3), + First(4), + First(5), + First(6), + First(7), + First(8), + First(9), + First(10), + First(11), + First(12), + First(13), + First(14), + First(15), + First(16), + First(17), + First(18), + First(19), + First(20), + First(21), + First(22), + First(23), + First(24), + First(25), + First(26), + First(27), + First(28), + ] + )) + } + + #[inline] + fn signed_gt(self, other: Self) -> Self { + let gt = self.0.simd_gt(other.0).to_int(); + Self::from(gt.cast()) + } + + #[inline] + fn any_bit_set(self) -> bool { + self.0 != u8x32::from_array([0; 32]) + } + + #[inline] + fn is_ascii(self) -> bool { + let significan_bits = self.0 & u8x32::from_array([0b1000_0000; 32]); + significan_bits == u8x32::from_array([0; 32]) + } +} + +impl From for SimdU8Value { + #[inline] + fn from(val: u8x32) -> Self { + Self(val) + } +} + +impl Utf8CheckAlgorithm { + #[inline] + fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value { + let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0b1110_0000 - 1)); + let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0b1111_0000 - 1)); + + is_third_byte + .or(is_fourth_byte) + .signed_gt(SimdU8Value::splat0()) + } +} + +use crate::implementation::helpers::TempSimdChunkA32 as TempSimdChunk; +#[repr(C)] +struct SimdInput { + vals: [SimdU8Value; 2], +} + +impl SimdInput { + #[inline] + unsafe fn new(ptr: &[u8]) -> Self { + Self { + vals: [ + SimdU8Value::load_from(ptr.as_ptr()), + SimdU8Value::load_from(ptr.as_ptr().add(32)), + ], + } + } + + #[inline] + fn is_ascii(&self) -> bool { + self.vals[0].or(self.vals[1]).is_ascii() + } +} + +use crate::{basic, compat}; + +impl Utf8CheckAlgorithm { + #[inline] + fn default() -> Self { + Self { + prev: SimdU8Value::splat0(), + incomplete: SimdU8Value::splat0(), + error: SimdU8Value::splat0(), + } + } + + #[inline] + fn check_incomplete_pending(&mut self) { + self.error = self.error.or(self.incomplete); + self.error.any_bit_set(); + } + + #[inline] + fn is_incomplete(input: SimdU8Value) -> SimdU8Value { + input.saturating_sub(SimdU8Value::from_32_cut_off_leading( + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0b1111_0000 - 1, + 0b1110_0000 - 1, + 0b1100_0000 - 1, + )) + } + + #[inline] + #[allow(clippy::too_many_lines)] + fn check_special_cases(input: SimdU8Value, prev1: SimdU8Value) -> SimdU8Value { + const TOO_SHORT: u8 = 1 << 0; + const TOO_LONG: u8 = 1 << 1; + const OVERLONG_3: u8 = 1 << 2; + const SURROGATE: u8 = 1 << 4; + const OVERLONG_2: u8 = 1 << 5; + const TWO_CONTS: u8 = 1 << 7; + const TOO_LARGE: u8 = 1 << 3; + const TOO_LARGE_1000: u8 = 1 << 6; + const OVERLONG_4: u8 = 1 << 6; + const CARRY: u8 = TOO_SHORT | TOO_LONG | TWO_CONTS; + + let byte_1_high = prev1.shr4().lookup_16( + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TWO_CONTS, + TWO_CONTS, + TWO_CONTS, + TWO_CONTS, + TOO_SHORT | OVERLONG_2, + TOO_SHORT, + TOO_SHORT | OVERLONG_3 | SURROGATE, + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4, + ); + + let byte_1_low = prev1.and(SimdU8Value::splat(0x0F)).lookup_16( + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + CARRY | OVERLONG_2, + CARRY, + CARRY, + CARRY | TOO_LARGE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + ); + + let byte_2_high = input.shr4().lookup_16( + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + ); + + byte_1_high.and(byte_1_low).and(byte_2_high) + } + + #[inline] + fn check_multibyte_lengths( + input: SimdU8Value, + prev: SimdU8Value, + special_cases: SimdU8Value, + ) -> SimdU8Value { + let prev2 = input.prev2(prev); + let prev3 = input.prev3(prev); + let must23 = Self::must_be_2_3_continuation(prev2, prev3); + let must23_80 = must23.and(SimdU8Value::splat(0x80)); + must23_80.xor(special_cases) + } + + #[inline] + fn has_error(&self) -> bool { + self.error.any_bit_set() + } + + #[inline] + fn check_bytes(&mut self, input: SimdU8Value) { + let prev1 = input.prev1(self.prev); + let sc = Self::check_special_cases(input, prev1); + self.error = self + .error + .or(Self::check_multibyte_lengths(input, self.prev, sc)); + self.error.any_bit_set(); + self.prev = input; + } + + #[inline] + unsafe fn check_utf8(&mut self, input: SimdInput) { + if input.is_ascii() { + self.check_incomplete_pending(); + } else { + self.check_block(input); + } + } + + #[inline] + + unsafe fn check_block(&mut self, input: SimdInput) { + // WORKAROUND + // necessary because the for loop is not unrolled on ARM64 + if input.vals.len() == 2 { + self.check_bytes(*input.vals.get_unchecked(0)); + self.check_bytes(*input.vals.get_unchecked(1)); + self.incomplete = Self::is_incomplete(*input.vals.get_unchecked(1)); + } else if input.vals.len() == 4 { + self.check_bytes(*input.vals.get_unchecked(0)); + self.check_bytes(*input.vals.get_unchecked(1)); + self.check_bytes(*input.vals.get_unchecked(2)); + self.check_bytes(*input.vals.get_unchecked(3)); + self.incomplete = Self::is_incomplete(*input.vals.get_unchecked(3)); + } else { + panic!("Unsupported number of chunks"); + } + } +} + +/// Validation implementation for CPUs supporting the SIMD extension (see module). +/// +/// # Errors +/// Returns the zero-sized [`basic::Utf8Error`] on failure. +/// +/// # Safety +/// This function is inherently unsafe because it is compiled with SIMD extensions +/// enabled. Make sure that the CPU supports it before calling. +/// +#[inline] +pub unsafe fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + let len = input.len(); + let mut algorithm = Utf8CheckAlgorithm::::default(); + let mut idx: usize = 0; + let iter_lim = len - (len % SIMD_CHUNK_SIZE); + + while idx < iter_lim { + let simd_input = SimdInput::new(input.get_unchecked(idx as usize..)); + idx += SIMD_CHUNK_SIZE; + if !simd_input.is_ascii() { + algorithm.check_block(simd_input); + break; + } + } + + while idx < iter_lim { + let input = SimdInput::new(input.get_unchecked(idx as usize..)); + algorithm.check_utf8(input); + idx += SIMD_CHUNK_SIZE; + } + + if idx < len { + let mut tmpbuf = TempSimdChunk::new(); + crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( + input.as_ptr().add(idx), + tmpbuf.0.as_mut_ptr(), + len - idx, + ); + let simd_input = SimdInput::new(&tmpbuf.0); + algorithm.check_utf8(simd_input); + } + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } +} + +/// Validation implementation for CPUs supporting the SIMD extension (see module). +/// +/// # Errors +/// Returns [`compat::Utf8Error`] with detailed error information on failure. +/// +/// # Safety +/// This function is inherently unsafe because it is compiled with SIMD extensions +/// enabled. Make sure that the CPU supports it before calling. +/// +#[inline] +pub unsafe fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Utf8Error> { + validate_utf8_compat_simd0(input) + .map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx)) +} + +#[inline] +unsafe fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + let len = input.len(); + let mut algorithm = Utf8CheckAlgorithm::::default(); + let mut idx: usize = 0; + let mut only_ascii = true; + let iter_lim = len - (len % SIMD_CHUNK_SIZE); + + 'outer: loop { + if only_ascii { + while idx < iter_lim { + let simd_input = SimdInput::new(input.get_unchecked(idx as usize..)); + if !simd_input.is_ascii() { + algorithm.check_block(simd_input); + if algorithm.has_error() { + return Err(idx); + } else { + only_ascii = false; + idx += SIMD_CHUNK_SIZE; + continue 'outer; + } + } + idx += SIMD_CHUNK_SIZE; + } + } else { + while idx < iter_lim { + let simd_input = SimdInput::new(input.get_unchecked(idx as usize..)); + if simd_input.is_ascii() { + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + return Err(idx); + } else { + // we are in pure ASCII territory again + only_ascii = true; + idx += SIMD_CHUNK_SIZE; + continue 'outer; + } + } else { + algorithm.check_block(simd_input); + if algorithm.has_error() { + return Err(idx); + } + } + idx += SIMD_CHUNK_SIZE; + } + } + break; + } + if idx < len { + let mut tmpbuf = TempSimdChunk::new(); + crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( + input.as_ptr().add(idx), + tmpbuf.0.as_mut_ptr(), + len - idx, + ); + let simd_input = SimdInput::new(&tmpbuf.0); + + algorithm.check_utf8(simd_input); + } + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + Err(idx) + } else { + Ok(()) + } +} + +/// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. +/// +/// This is implementation requires CPU SIMD features specified by the module it resides in. +/// It is undefined behavior to call it if the required CPU features are not +/// available. +#[cfg(feature = "public_imp")] +pub struct Utf8ValidatorImp { + algorithm: Utf8CheckAlgorithm, + incomplete_data: [u8; 64], + incomplete_len: usize, +} + +#[cfg(feature = "public_imp")] +impl Utf8ValidatorImp { + #[inline] + unsafe fn update_from_incomplete_data(&mut self) { + let simd_input = SimdInput::new(&self.incomplete_data); + self.algorithm.check_utf8(simd_input); + self.incomplete_len = 0; + } +} + +#[cfg(feature = "public_imp")] +impl basic::imp::Utf8Validator for Utf8ValidatorImp { + #[inline] + #[must_use] + unsafe fn new() -> Self { + Self { + algorithm: Utf8CheckAlgorithm::::default(), + incomplete_data: [0; 64], + incomplete_len: 0, + } + } + + #[inline] + unsafe fn update(&mut self, mut input: &[u8]) { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + if input.is_empty() { + return; + } + if self.incomplete_len != 0 { + let to_copy = core::cmp::min(SIMD_CHUNK_SIZE - self.incomplete_len, input.len()); + self.incomplete_data + .as_mut_ptr() + .add(self.incomplete_len) + .copy_from_nonoverlapping(input.as_ptr(), to_copy); + if self.incomplete_len + to_copy == SIMD_CHUNK_SIZE { + self.update_from_incomplete_data(); + input = &input[to_copy..]; + } else { + self.incomplete_len += to_copy; + return; + } + } + let len = input.len(); + let mut idx: usize = 0; + let iter_lim = len - (len % SIMD_CHUNK_SIZE); + while idx < iter_lim { + let input = SimdInput::new(input.get_unchecked(idx as usize..)); + self.algorithm.check_utf8(input); + idx += SIMD_CHUNK_SIZE; + } + if idx < len { + let to_copy = len - idx; + self.incomplete_data + .as_mut_ptr() + .copy_from_nonoverlapping(input.as_ptr().add(idx), to_copy); + self.incomplete_len = to_copy; + } + } + + #[inline] + unsafe fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> { + if self.incomplete_len != 0 { + for i in &mut self.incomplete_data[self.incomplete_len..] { + *i = 0; + } + self.update_from_incomplete_data(); + } + self.algorithm.check_incomplete_pending(); + if self.algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } +} + +/// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait. +/// +/// This is implementation requires CPU SIMD features specified by the module it resides in. +/// It is undefined behavior to call it if the required CPU features are not +/// available. +#[cfg(feature = "public_imp")] +pub struct ChunkedUtf8ValidatorImp { + algorithm: Utf8CheckAlgorithm, +} + +#[cfg(feature = "public_imp")] +impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { + #[inline] + #[must_use] + unsafe fn new() -> Self { + Self { + algorithm: Utf8CheckAlgorithm::::default(), + } + } + + #[inline] + unsafe fn update_from_chunks(&mut self, input: &[u8]) { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + + assert!( + input.len() % SIMD_CHUNK_SIZE == 0, + "Input size must be a multiple of 64." + ); + for chunk in input.chunks_exact(SIMD_CHUNK_SIZE) { + let input = SimdInput::new(chunk); + self.algorithm.check_utf8(input); + } + } + + #[inline] + unsafe fn finalize( + mut self, + remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), basic::Utf8Error> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + + if let Some(mut remaining_input) = remaining_input { + if !remaining_input.is_empty() { + let len = remaining_input.len(); + let chunks_lim = len - (len % SIMD_CHUNK_SIZE); + if chunks_lim > 0 { + self.update_from_chunks(&remaining_input[..chunks_lim]); + } + let rem = len - chunks_lim; + if rem > 0 { + remaining_input = &remaining_input[chunks_lim..]; + let mut tmpbuf = TempSimdChunk::new(); + tmpbuf + .0 + .as_mut_ptr() + .copy_from_nonoverlapping(remaining_input.as_ptr(), remaining_input.len()); + let simd_input = SimdInput::new(&tmpbuf.0); + self.algorithm.check_utf8(simd_input); + } + } + } + self.algorithm.check_incomplete_pending(); + if self.algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index d85c5538..62937446 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,7 @@ //! //! See Validating UTF-8 In Less Than One Instruction Per Byte, Software: Practice and Experience 51 (5), 2021 //! +#![cfg_attr(feature = "portable", feature(portable_simd))] pub mod basic; pub mod compat; diff --git a/tests/tests.rs b/tests/tests.rs index e165db9b..2b862c40 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -132,6 +132,16 @@ fn test_valid_public_imp(input: &[u8]) { ); } } + #[cfg(feature = "portable")] + unsafe { + assert!(dbg!(simdutf8::basic::imp::portable::validate_utf8(input)).is_ok()); + assert!(simdutf8::compat::imp::portable::validate_utf8(input).is_ok()); + + test_streaming::(input, true); + test_chunked_streaming::( + input, true, + ); + } #[cfg(all( feature = "aarch64_neon", target_arch = "aarch64", @@ -352,21 +362,21 @@ fn simple_invalid() { #[test] fn incomplete_on_32nd_byte() { let mut invalid = b"a".repeat_x(31); - invalid.push(b'\xF0'); + invalid.push(0xf0); test_invalid(&invalid, 31, None) } #[test] fn incomplete_on_64th_byte() { let mut invalid = b"a".repeat_x(63); - invalid.push(b'\xF0'); + invalid.push(0xf0); test_invalid(&invalid, 63, None) } #[test] fn incomplete_on_64th_byte_65_bytes_total() { let mut invalid = b"a".repeat_x(63); - invalid.push(b'\xF0'); + invalid.push(0xf0); invalid.push(b'a'); test_invalid(&invalid, 63, Some(1)) }