diff --git a/src/impls/avx2/stage1.rs b/src/impls/avx2/stage1.rs index 6ba58de6..40d42206 100644 --- a/src/impls/avx2/stage1.rs +++ b/src/impls/avx2/stage1.rs @@ -1,5 +1,5 @@ #![allow(dead_code)] -use crate::{static_cast_i32, static_cast_i64, static_cast_u32, Stage1Parse}; +use crate::{static_cast_i32, static_cast_i64, static_cast_u32, Stage1Parse, SIMDINPUT_LENGTH}; #[cfg(target_arch = "x86")] use std::arch::x86 as arch; @@ -7,10 +7,10 @@ use std::arch::x86 as arch; use std::arch::x86_64 as arch; use arch::{ - __m256i, _mm256_add_epi32, _mm256_and_si256, _mm256_cmpeq_epi8, _mm256_loadu_si256, - _mm256_max_epu8, _mm256_movemask_epi8, _mm256_set1_epi8, _mm256_set_epi32, _mm256_setr_epi8, - _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi32, _mm256_storeu_si256, - _mm_clmulepi64_si128, _mm_set1_epi8, _mm_set_epi64x, + __m256i, _mm256_add_epi32, _mm256_and_si256, _mm256_cmpeq_epi8, _mm256_load_si256, + _mm256_loadu_si256, _mm256_max_epu8, _mm256_movemask_epi8, _mm256_set1_epi8, _mm256_set_epi32, + _mm256_setr_epi8, _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_srli_epi32, + _mm256_storeu_si256, _mm_clmulepi64_si128, _mm_set1_epi8, _mm_set_epi64x, }; macro_rules! low_nibble_mask { diff --git a/src/impls/native/stage1.rs b/src/impls/native/stage1.rs index 3f2ec7e9..af1d68e3 100644 --- a/src/impls/native/stage1.rs +++ b/src/impls/native/stage1.rs @@ -1,6 +1,6 @@ #![allow(clippy::cast_lossless, clippy::cast_sign_loss)] -use crate::{static_cast_i32, Stage1Parse}; +use crate::{static_cast_i32, Stage1Parse, SIMDINPUT_LENGTH}; type V128 = [u8; 16]; diff --git a/src/impls/neon/stage1.rs b/src/impls/neon/stage1.rs index 8d5e90e6..44142180 100644 --- a/src/impls/neon/stage1.rs +++ b/src/impls/neon/stage1.rs @@ -38,9 +38,6 @@ pub unsafe fn neon_movemask_bulk( // /NEON-SPECIFIC -//pub const SIMDJSON_PADDING: usize = mem::size_of::() * 4; -//pub const SIMDINPUT_LENGTH: usize = 64; - #[derive(Debug)] pub(crate) struct SimdInput { v0: uint8x16_t, diff --git a/src/impls/portable/stage1.rs b/src/impls/portable/stage1.rs index daf9d52e..0f11b7ab 100644 --- a/src/impls/portable/stage1.rs +++ b/src/impls/portable/stage1.rs @@ -1,6 +1,6 @@ use std::simd::{prelude::*, ToBitMask}; -use crate::{static_cast_i32, Stage1Parse}; +use crate::{static_cast_i32, Stage1Parse, SIMDINPUT_LENGTH}; #[derive(Debug)] pub(crate) struct SimdInput { v: u8x64, diff --git a/src/impls/simd128/stage1.rs b/src/impls/simd128/stage1.rs index 9bfa24ec..b76fafe5 100644 --- a/src/impls/simd128/stage1.rs +++ b/src/impls/simd128/stage1.rs @@ -1,4 +1,4 @@ -use crate::Stage1Parse; +use crate::{Stage1Parse, SIMDINPUT_LENGTH}; use std::arch::wasm32::{ i8x16_splat, u32x4, u32x4_add, u32x4_splat, u8x16, u8x16_bitmask, u8x16_eq, u8x16_le, u8x16_shr, u8x16_splat, u8x16_swizzle, v128, v128_and, v128_load, v128_store, diff --git a/src/impls/sse42/stage1.rs b/src/impls/sse42/stage1.rs index 4cb22b45..5211380c 100644 --- a/src/impls/sse42/stage1.rs +++ b/src/impls/sse42/stage1.rs @@ -1,4 +1,4 @@ -use crate::{static_cast_i32, static_cast_u32, Stage1Parse}; +use crate::{static_cast_i32, static_cast_u32, Stage1Parse, SIMDINPUT_LENGTH}; #[cfg(target_arch = "x86")] use std::arch::x86 as arch; @@ -8,15 +8,16 @@ use std::arch::x86_64 as arch; #[cfg(target_arch = "x86")] use arch::{ __m128i, _mm_add_epi32, _mm_and_si128, _mm_cmpeq_epi8, _mm_cmpgt_epi8, _mm_loadu_si128, - _mm_max_epu8, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8, _mm_set_epi32, _mm_setr_epi8, - _mm_setzero_si128, _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128, _mm_testz_si128, + _mm_loadu_si128, _mm_max_epu8, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8, _mm_set_epi32, + _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128, + _mm_testz_si128, }; #[cfg(target_arch = "x86_64")] use arch::{ - __m128i, _mm_add_epi32, _mm_and_si128, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_max_epu8, - _mm_movemask_epi8, _mm_set1_epi8, _mm_set_epi32, _mm_setr_epi8, _mm_setzero_si128, - _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128, + __m128i, _mm_add_epi32, _mm_and_si128, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_loadu_si128, + _mm_max_epu8, _mm_movemask_epi8, _mm_set1_epi8, _mm_set_epi32, _mm_setr_epi8, + _mm_setzero_si128, _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128, }; macro_rules! low_nibble_mask { diff --git a/src/lib.rs b/src/lib.rs index ce475fda..cf72df49 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -426,7 +426,8 @@ type ParseStrFn = for<'invoke, 'de> unsafe fn( any(target_arch = "x86_64", target_arch = "x86"), ))] type FindStructuralBitsFn = unsafe fn( - input: &[u8], + input: &AlignedBuf, + len: usize, structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType>; @@ -698,7 +699,8 @@ impl<'de> Deserializer<'de> { any(target_arch = "x86_64", target_arch = "x86"), ))] pub(crate) unsafe fn find_structural_bits( - input: &[u8], + input: &AlignedBuf, + len: usize, structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { use std::sync::atomic::{AtomicPtr, Ordering}; @@ -722,16 +724,17 @@ impl<'de> Deserializer<'de> { #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn get_fastest( - input: &[u8], + input: &AlignedBuf, + len: usize, structural_indexes: &mut Vec, ) -> core::result::Result<(), error::ErrorType> { let fun = get_fastest_available_implementation(); FN.store(fun as FnRaw, Ordering::Relaxed); - (fun)(input, structural_indexes) + (fun)(input, len, structural_indexes) } let fun = FN.load(Ordering::Relaxed); - mem::transmute::(fun)(input, structural_indexes) + mem::transmute::(fun)(input, len, structural_indexes) } #[cfg(not(any( @@ -747,7 +750,8 @@ impl<'de> Deserializer<'de> { )))] #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn find_structural_bits( - input: &[u8], + input: &AlignedBuf, + len: usize, structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { // This is a nasty hack, we don't have a chunked implementation for native rust @@ -757,16 +761,17 @@ impl<'de> Deserializer<'de> { Err(_) => return Err(ErrorType::InvalidUtf8), }; #[cfg(not(feature = "portable"))] - Self::_find_structural_bits::(input, structural_indexes) + Self::_find_structural_bits::(input, len, structural_indexes) } #[cfg(all(feature = "portable", not(feature = "runtime-detection")))] #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn find_structural_bits( - input: &[u8], + input: &AlignedBuf, + len: usize, structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::(input, structural_indexes) + Self::_find_structural_bits::(input, len, structural_indexes) } #[cfg(all( @@ -776,10 +781,11 @@ impl<'de> Deserializer<'de> { ))] #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn find_structural_bits( - input: &[u8], + input: &AlignedBuf, + len: usize, structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::(input, structural_indexes) + Self::_find_structural_bits::(input, len, structural_indexes) } #[cfg(all( @@ -790,10 +796,11 @@ impl<'de> Deserializer<'de> { ))] #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn find_structural_bits( - input: &[u8], + input: &AlignedBuf, + len: usize, structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::(input, structural_indexes) + Self::_find_structural_bits::(input, len, structural_indexes) } #[cfg(all(target_arch = "aarch64", not(feature = "portable")))] @@ -809,10 +816,11 @@ impl<'de> Deserializer<'de> { #[cfg(all(target_feature = "simd128", not(feature = "portable")))] #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn find_structural_bits( - input: &[u8], + input: &AlignedBuf, + len: usize, structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::(input, structural_indexes) + Self::_find_structural_bits::(input, len, structural_indexes) } }