From 32229046e1138ba78daa4f66d72fb81230536613 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Mon, 16 Oct 2023 14:26:57 +0200 Subject: [PATCH 1/8] Implement std::simd portable Signed-off-by: Heinz N. Gies --- .gitignore | 1 + Cargo.toml | 8 +- src/avx2/deser.rs | 13 +- src/avx2/stage1.rs | 20 +- src/charutils.rs | 23 +- src/error.rs | 21 +- src/lib.rs | 1434 ++++--------------------------- src/macros.rs | 4 +- src/native/deser.rs | 144 ++++ src/native/mod.rs | 26 + src/native/stage1.rs | 481 +++++++++++ src/neon/deser.rs | 4 +- src/neon/mod.rs | 2 + src/neon/stage1.rs | 11 +- src/portable/deser.rs | 160 ++++ src/portable/mod.rs | 2 + src/portable/stage1.rs | 222 +++++ src/serde.rs | 2 +- src/serde/de.rs | 2 +- src/serde/se.rs | 109 +-- src/serde/se/pp.rs | 155 ++-- src/serde/value/borrowed/se.rs | 1 + src/serde/value/owned/se.rs | 9 +- src/simd128/deser.rs | 3 +- src/simd128/stage1.rs | 5 +- src/sse42/deser.rs | 18 +- src/sse42/stage1.rs | 6 +- src/stage2.rs | 18 +- src/stringparse.rs | 20 +- src/tests.rs | 270 ++++++ src/tests/impls.rs | 80 ++ src/tests/serde.rs | 964 +++++++++++++++++++++ src/value/borrowed.rs | 3 +- src/value/borrowed/serialize.rs | 4 +- src/value/owned.rs | 2 +- src/value/owned/serialize.rs | 4 +- src/value/tape.rs | 8 +- 37 files changed, 2754 insertions(+), 1505 deletions(-) create mode 100644 src/native/deser.rs create mode 100644 src/native/mod.rs create mode 100644 src/native/stage1.rs create mode 100644 src/portable/deser.rs create mode 100644 src/portable/mod.rs create mode 100644 src/portable/stage1.rs create mode 100644 src/tests.rs create mode 100644 src/tests/impls.rs create mode 100644 src/tests/serde.rs diff --git a/.gitignore b/.gitignore index 3b39f953..5cdd215a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ proptest-regressions .current .cargo .vscode +rust-toolchain diff --git a/Cargo.toml b/Cargo.toml index a2c77076..6c6948ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,10 @@ lexical-core = { version = "0.8", features = ["format"] } beef = { version = "0.5", optional = true } halfbrown = "0.2" value-trait = { version = "0.6.1" } -simdutf8 = { version = "0.1.4", features = ["public_imp", "aarch64_neon"] } +simdutf8 = { version = "0.1.4", features = [ + "public_imp", + "aarch64_neon", +], path = "../simdutf8" } # ahash known key once_cell = { version = "1.17", optional = true } @@ -102,6 +105,9 @@ perf = ["perfcnt", "getopts", "colored", "serde_json"] # for documentation docsrs = [] +# portable simd support (as of rust 1.73 nightly only) +portable = ["simdutf8/portable"] + [[example]] name = "perf" diff --git a/src/avx2/deser.rs b/src/avx2/deser.rs index 11b5d106..edcabc87 100644 --- a/src/avx2/deser.rs +++ b/src/avx2/deser.rs @@ -9,8 +9,6 @@ use std::arch::x86_64::{ _mm256_storeu_si256, }; -use std::mem; - pub use crate::error::{Error, ErrorType}; use crate::safer_unchecked::GetSaferUnchecked; use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP}; @@ -20,15 +18,12 @@ pub use crate::Result; #[target_feature(enable = "avx2")] #[allow( clippy::if_not_else, - clippy::transmute_ptr_to_ptr, - clippy::too_many_lines, - clippy::cast_ptr_alignment, clippy::cast_possible_wrap, clippy::if_not_else, clippy::too_many_lines )] #[cfg_attr(not(feature = "no-inline"), inline)] -pub(crate) unsafe fn parse_str_avx<'invoke, 'de>( +pub(crate) unsafe fn parse_str<'invoke, 'de>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], @@ -47,6 +42,8 @@ pub(crate) unsafe fn parse_str_avx<'invoke, 'de>( let mut src_i: usize = 0; let mut len = src_i; loop { + // _mm256_loadu_si256 does not require alignment + #[allow(clippy::cast_ptr_alignment)] let v: __m256i = _mm256_loadu_si256(src.as_ptr().add(src_i).cast::()); @@ -96,9 +93,13 @@ pub(crate) unsafe fn parse_str_avx<'invoke, 'de>( // To be more conform with upstream loop { + // _mm256_loadu_si256 does not require alignment + #[allow(clippy::cast_ptr_alignment)] let v: __m256i = _mm256_loadu_si256(src.as_ptr().add(src_i).cast::()); + // _mm256_storeu_si256 does not require alignment + #[allow(clippy::cast_ptr_alignment)] _mm256_storeu_si256( buffer .as_mut_ptr() diff --git a/src/avx2/stage1.rs b/src/avx2/stage1.rs index ea98ff7d..2a0995a4 100644 --- a/src/avx2/stage1.rs +++ b/src/avx2/stage1.rs @@ -35,23 +35,22 @@ macro_rules! high_nibble_mask { }; } -//pub const SIMDJSON_PADDING: usize = mem::size_of::<__m256i>(); -//pub const SIMDINPUT_LENGTH: usize = 64; - #[derive(Debug)] pub(crate) struct SimdInputAVX { v0: __m256i, v1: __m256i, } -impl Stage1Parse<__m256i> for SimdInputAVX { +impl Stage1Parse for SimdInputAVX { + type Utf8Validator = simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp; + type SimdRepresentation = __m256i; #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_ptr_alignment)] #[target_feature(enable = "avx2")] unsafe fn new(ptr: &[u8]) -> Self { Self { - v0: _mm256_loadu_si256(ptr.as_ptr().cast::()), - v1: _mm256_loadu_si256(ptr.as_ptr().add(32).cast::()), + v0: _mm256_loadu_si256(ptr.as_ptr().cast::<__m256i>()), + v1: _mm256_loadu_si256(ptr.as_ptr().add(32).cast::<__m256i>()), } } @@ -142,8 +141,13 @@ impl Stage1Parse<__m256i> for SimdInputAVX { _mm256_set1_epi8(0), ); + // We depend on this static_cast_u32 as `_mm256_movemask_epi8` returns a i32 + // and rusts conversion of i32 to u64 and u32 to u64 isn't equivalent + // in the case if i32 a negative flag (highest bit set to 1) + // carries over to the entire upper half in the u64 to be set to 1 as well + let structural_res_0: u64 = u64::from(static_cast_u32!(_mm256_movemask_epi8(tmp_lo))); - let structural_res_1: u64 = _mm256_movemask_epi8(tmp_hi) as u64; + let structural_res_1: u64 = u64::from(static_cast_u32!(_mm256_movemask_epi8(tmp_hi))); *structurals = !(structural_res_0 | (structural_res_1 << 32)); let tmp_ws_lo: __m256i = _mm256_cmpeq_epi8( @@ -156,7 +160,7 @@ impl Stage1Parse<__m256i> for SimdInputAVX { ); let ws_res_0: u64 = u64::from(static_cast_u32!(_mm256_movemask_epi8(tmp_ws_lo))); - let ws_res_1: u64 = _mm256_movemask_epi8(tmp_ws_hi) as u64; + let ws_res_1: u64 = u64::from(static_cast_u32!(_mm256_movemask_epi8(tmp_ws_hi))); *whitespace = !(ws_res_0 | (ws_res_1 << 32)); } diff --git a/src/charutils.rs b/src/charutils.rs index 6683fe2c..ed1c389e 100644 --- a/src/charutils.rs +++ b/src/charutils.rs @@ -87,29 +87,30 @@ pub fn codepoint_to_utf8(cp: u32, c: &mut [u8]) -> usize { unsafe { if cp <= 0x7F { *c.get_kinda_unchecked_mut(0) = cp as u8; - return 1; // ascii - } - if cp <= 0x7FF { + 1 // ascii + } else if cp <= 0x7FF { *c.get_kinda_unchecked_mut(0) = ((cp >> 6) + 192) as u8; *c.get_kinda_unchecked_mut(1) = ((cp & 63) + 128) as u8; - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here + 2 + // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here } else if cp <= 0xFFFF { *c.get_kinda_unchecked_mut(0) = ((cp >> 12) + 224) as u8; *c.get_kinda_unchecked_mut(1) = (((cp >> 6) & 63) + 128) as u8; *c.get_kinda_unchecked_mut(2) = ((cp & 63) + 128) as u8; - return 3; + 3 } else if cp <= 0x0010_FFFF { // if you know you have a valid code point, this is not needed *c.get_kinda_unchecked_mut(0) = ((cp >> 18) + 240) as u8; *c.get_kinda_unchecked_mut(1) = (((cp >> 12) & 63) + 128) as u8; *c.get_kinda_unchecked_mut(2) = (((cp >> 6) & 63) + 128) as u8; *c.get_kinda_unchecked_mut(3) = ((cp & 63) + 128) as u8; - return 4; + 4 + } else { + // will return 0 when the code point was too large. + 0 } } - // will return 0 when the code point was too large. - 0 } diff --git a/src/error.rs b/src/error.rs index c732c31e..0a1b70c3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -43,7 +43,7 @@ pub enum ErrorType { /// Expected an unsigned number ExpectedUnsigned, /// Internal error - InternalError, + InternalError(InternalError), /// Invalid escape sequence InvalidEscape, /// Invalid exponent in a floating point number @@ -88,6 +88,12 @@ pub enum ErrorType { Io(std::io::Error), } +#[derive(Debug, PartialEq)] +pub enum InternalError { + InvalidStrucutralIndexes, + TapeError, +} + impl From for Error { fn from(e: std::io::Error) -> Self { Self::generic(ErrorType::Io(e)) @@ -116,7 +122,6 @@ impl PartialEq for ErrorType { | (Self::ExpectedSigned, Self::ExpectedSigned) | (Self::ExpectedString, Self::ExpectedString) | (Self::ExpectedUnsigned, Self::ExpectedUnsigned) - | (Self::InternalError, Self::InternalError) | (Self::InvalidEscape, Self::InvalidEscape) | (Self::InvalidExponent, Self::InvalidExponent) | (Self::InvalidNumber, Self::InvalidNumber) @@ -136,6 +141,7 @@ impl PartialEq for ErrorType { | (Self::ExpectedObjectKey, Self::ExpectedObjectKey) | (Self::Overflow, Self::Overflow) => true, (Self::Serde(s1), Self::Serde(s2)) => s1 == s2, + (Self::InternalError(e1), Self::InternalError(e2)) => e1 == e2, _ => false, } } @@ -195,10 +201,15 @@ impl From for std::io::Error { #[cfg(test)] mod test { - use super::{Error, ErrorType}; + use super::{Error, ErrorType, InternalError}; #[test] fn fmt() { - let e = Error::generic(ErrorType::InternalError); - assert_eq!(e.to_string(), "InternalError at character 0"); + let e = Error::generic(ErrorType::InternalError( + InternalError::InvalidStrucutralIndexes, + )); + assert_eq!( + e.to_string(), + "InternalError(InvalidStrucutralIndexes) at character 0" + ); } } diff --git a/src/lib.rs b/src/lib.rs index 19950dee..0505b21d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![deny(warnings)] #![cfg_attr(feature = "hints", feature(core_intrinsics))] +#![cfg_attr(feature = "portable", feature(portable_simd))] #![warn(unused_extern_crates)] #![deny( clippy::all, @@ -119,6 +120,13 @@ //! let v: Value = simd_json::serde::from_slice(&mut d).unwrap(); //! ``` +/// rust native implementation +mod native; + +#[cfg(feature = "portable")] +/// rust native implementation +mod portable; + #[cfg(feature = "serde_impl")] extern crate serde as serde_ext; @@ -126,6 +134,7 @@ extern crate serde as serde_ext; /// serde related helper functions pub mod serde; +use crate::error::InternalError; #[cfg(feature = "serde_impl")] pub use crate::serde::{ from_reader, from_slice, from_str, to_string, to_string_pretty, to_vec, to_vec_pretty, @@ -157,29 +166,21 @@ pub const SIMDINPUT_LENGTH: usize = 64; mod avx2; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] pub(crate) use crate::avx2::stage1::SimdInputAVX; -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] -use simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp as ChunkedUtf8ValidatorImpAVX2; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] mod sse42; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] pub(crate) use crate::sse42::stage1::SimdInputSSE; -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] -use simdutf8::basic::imp::x86::sse42::ChunkedUtf8ValidatorImp as ChunkedUtf8ValidatorImpSSE42; #[cfg(target_arch = "aarch64")] mod neon; #[cfg(target_arch = "aarch64")] pub(crate) use crate::neon::stage1::SimdInputNEON; -#[cfg(target_arch = "aarch64")] -use simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp as ChunkedUtf8ValidatorImpNEON; #[cfg(target_feature = "simd128")] mod simd128; #[cfg(target_feature = "simd128")] pub(crate) use crate::simd128::stage1::SimdInput128; -#[cfg(target_feature = "simd128")] -use simdutf8::basic::imp::wasm32::simd128::ChunkedUtf8ValidatorImp as ChunkedUtf8ValidatorImpSIMD128; mod stage2; /// simd-json JSON-DOM value @@ -242,14 +243,17 @@ pub fn to_tape(s: &mut [u8]) -> Result> { Deserializer::from_slice(s).map(Deserializer::into_tape) } -pub(crate) trait Stage1Parse { +pub(crate) trait Stage1Parse { + type Utf8Validator: ChunkedUtf8Validator; + type SimdRepresentation; + unsafe fn new(ptr: &[u8]) -> Self; unsafe fn compute_quote_mask(quote_bits: u64) -> u64; unsafe fn cmp_mask_against_input(&self, m: u8) -> u64; - unsafe fn unsigned_lteq_against_input(&self, maxval: T) -> u64; + unsafe fn unsigned_lteq_against_input(&self, maxval: Self::SimdRepresentation) -> u64; unsafe fn find_whitespace_and_structurals(&self, whitespace: &mut u64, structurals: &mut u64); @@ -379,8 +383,8 @@ pub(crate) trait Stage1Parse { structurals } - unsafe fn fill_s8(n: i8) -> T; - unsafe fn zero() -> T; + unsafe fn fill_s8(n: i8) -> Self::SimdRepresentation; + unsafe fn zero() -> Self::SimdRepresentation; } /// Deserializer struct to deserialize a JSON @@ -393,38 +397,73 @@ pub struct Deserializer<'de> { impl<'de> Deserializer<'de> { #[inline] - #[cfg(not(any(feature = "avx2", feature = "sse42")))] - pub(crate) fn parse_str_<'invoke>( + #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + not(feature = "avx2"), + not(feature = "sse42"), + ))] + pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, ) -> Result<&'de str> { - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - { - let cell = std::cell::OnceCell::new(); - let avx_support: &bool = cell.get_or_init(|| std::is_x86_feature_detected!("avx2")); - if *avx_support { - return unsafe { crate::avx2::deser::parse_str_avx(input, data, buffer, idx) }; - } - let cell = std::cell::OnceCell::new(); - let sse_support: &bool = cell.get_or_init(|| std::is_x86_feature_detected!("sse4.2")); - if *sse_support { - return unsafe { crate::sse42::deser::parse_str_sse(input, data, buffer, idx) }; - } - Err(Self::error(ErrorType::SimdUnsupported)) - } - #[cfg(target_arch = "aarch64")] - { - return crate::neon::deser::parse_str_neon(input, data, buffer, idx); - } - #[cfg(target_feature = "simd128")] - { - return crate::simd128::deser::parse_str_simd128(input, data, buffer, idx); + if std::is_x86_feature_detected!("avx2") { + crate::avx2::deser::parse_str(input, data, buffer, idx) + } else if std::is_x86_feature_detected!("sse4.2") { + crate::sse42::deser::parse_str(input, data, buffer, idx) + } else { + #[cfg(feature = "portable")] + let r = crate::portable::deser::parse_str(input, data, buffer, idx); + #[cfg(not(feature = "portable"))] + let r = crate::native::deser::parse_str(input, data, buffer, idx); + + r } } /// To allow inlining + #[inline] + #[cfg(target_arch = "aarch64")] + pub(crate) fn parse_str_<'invoke>( + input: *mut u8, + data: &'invoke [u8], + buffer: &'invoke mut [u8], + idx: usize, + ) -> std::result::Result, ErrorType> { + unsafe { crate::neon::deser::parse_str(input, data, buffer, idx) } + } + /// To allow inlining + #[inline] + #[cfg(target_feature = "simd128")] + pub(crate) fn parse_str_<'invoke>( + input: *mut u8, + data: &'invoke [u8], + buffer: &'invoke mut [u8], + idx: usize, + ) -> std::result::Result, ErrorType> { + unsafe { crate::simd128::deser::parse_str(input, data, buffer, idx) } + } + + #[cfg(all( + not(target_feature = "simd128"), + not(target_arch = "aarch64"), + not(target_arch = "x86_64"), + not(target_arch = "x86") + ))] + pub(crate) fn parse_str_<'invoke>( + input: *mut u8, + data: &'invoke [u8], + buffer: &'invoke mut [u8], + idx: usize, + ) -> std::result::Result, ErrorType> { + #[cfg(feature = "portable")] + let r = crate::portable::deser::parse_str(input, data, buffer, idx); + #[cfg(not(feature = "portable"))] + let r = crate::native::deser::parse_str(input, data, buffer, idx); + r + } + #[inline] #[cfg(feature = "avx2")] pub(crate) fn parse_str_<'invoke>( @@ -433,7 +472,7 @@ impl<'de> Deserializer<'de> { buffer: &'invoke mut [u8], idx: usize, ) -> std::result::Result, ErrorType> { - unsafe { crate::avx2::deser::parse_str_avx(input, data, buffer, idx) } + unsafe { crate::avx2::deser::parse_str(input, data, buffer, idx) } } #[inline] @@ -444,7 +483,7 @@ impl<'de> Deserializer<'de> { buffer: &'invoke mut [u8], idx: usize, ) -> std::result::Result, ErrorType> { - unsafe { crate::sse42::deser::parse_str_sse(input, data, buffer, idx) } + unsafe { crate::sse42::deser::parse_str(input, data, buffer, idx) } } /// Extracts the tape from the Deserializer @@ -557,32 +596,52 @@ impl<'de> Deserializer<'de> { ) -> std::result::Result<(), ErrorType> { #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] { - let cell = std::cell::OnceCell::new(); - let avx_support: &bool = cell.get_or_init(|| std::is_x86_feature_detected!("avx2")); - if *avx_support { - return Self::find_structural_bits_avx(input, structural_indexes); - } - let cell = std::cell::OnceCell::new(); - let sse_support: &bool = cell.get_or_init(|| std::is_x86_feature_detected!("sse4.2")); - if *sse_support { - return Self::find_structural_bits_sse(input, structural_indexes); + if std::is_x86_feature_detected!("avx2") { + Self::find_structural_bits_avx(input, structural_indexes) + } else if std::is_x86_feature_detected!("sse4.2") { + Self::find_structural_bits_sse(input, structural_indexes) + } else { + // This is a horrible hack to allow ChunkedUtf8ValidatorImpNative to not do anything + #[cfg(not(feature = "portable"))] + match core::str::from_utf8(input) { + Ok(_) => (), + Err(_) => return Err(ErrorType::InvalidUtf8), + }; + #[cfg(not(feature = "portable"))] + let r = Self::find_structural_bits_native(input, structural_indexes); + #[cfg(feature = "portable")] + let r = Self::find_structural_bits_portable(input, structural_indexes); + r } - panic!("Please run on a simd compatible cpu, read the simdjson README."); } #[cfg(target_arch = "aarch64")] { - return Self::_find_structural_bits::<_, SimdInputNEON, ChunkedUtf8ValidatorImpNEON>( - input, - structural_indexes, - ); + return Self::_find_structural_bits::(input, structural_indexes); } #[cfg(target_feature = "simd128")] { - return Self::_find_structural_bits::<_, SimdInput128, ChunkedUtf8ValidatorImpSIMD128>( - input, - structural_indexes, - ); + return Self::_find_structural_bits::(input, structural_indexes); + } + // If we're on a non supported platform fall back to the native ones + #[cfg(all( + not(target_feature = "simd128"), + not(target_arch = "aarch64"), + not(target_arch = "x86_64"), + not(target_arch = "x86") + ))] + { + // This is a horrible hack to allow ChunkedUtf8ValidatorImpNative to not do anything + #[cfg(not(feature = "portable"))] + match core::str::from_utf8(input) { + Ok(_) => (), + Err(_) => return Err(ErrorType::InvalidUtf8), + }; + #[cfg(not(feature = "portable"))] + let r = Self::find_structural_bits_native(input, structural_indexes); + #[cfg(feature = "portable")] + let r = Self::find_structural_bits_portable(input, structural_indexes); + r } } @@ -593,10 +652,7 @@ impl<'de> Deserializer<'de> { input: &[u8], structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::<_, SimdInputAVX, ChunkedUtf8ValidatorImpAVX2>( - input, - structural_indexes, - ) + Self::_find_structural_bits::(input, structural_indexes) } #[inline] @@ -606,10 +662,29 @@ impl<'de> Deserializer<'de> { input: &[u8], structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::<_, SimdInputSSE, ChunkedUtf8ValidatorImpSSE42>( - input, - structural_indexes, - ) + Self::_find_structural_bits::(input, structural_indexes) + } + + #[inline] + // #[cfg(not(feature = "portable"))] + pub(crate) unsafe fn find_structural_bits_native( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + use native::stage1::NativeInput; + + Self::_find_structural_bits::(input, structural_indexes) + } + + #[inline] + #[cfg(feature = "portable")] + pub(crate) unsafe fn find_structural_bits_portable( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + use portable::stage1::SimdInputPortable; + + Self::_find_structural_bits::(input, structural_indexes) } #[allow(clippy::cast_possible_truncation)] @@ -627,7 +702,7 @@ impl<'de> Deserializer<'de> { #[cfg_attr(not(feature = "no-inline"), inline(always))] #[allow(clippy::cast_possible_truncation)] - pub(crate) unsafe fn _find_structural_bits, C: ChunkedUtf8Validator>( + pub(crate) unsafe fn _find_structural_bits( input: &[u8], structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { @@ -638,7 +713,7 @@ impl<'de> Deserializer<'de> { structural_indexes.reserve(len / 6); structural_indexes.push(0); // push extra root element - let mut utf8_validator = C::new(); + let mut utf8_validator = S::Utf8Validator::new(); // we have padded the input out to 64 byte multiple with the remainder being // zeros @@ -768,7 +843,9 @@ impl<'de> Deserializer<'de> { } if structural_indexes.last() > Some(&(len as u32)) { - return Err(ErrorType::InternalError); + return Err(ErrorType::InternalError( + InternalError::InvalidStrucutralIndexes, + )); } if error_mask != 0 { @@ -864,1227 +941,4 @@ impl DerefMut for AlignedBuf { } #[cfg(test)] -mod tests { - - use super::to_borrowed_value; - use super::{owned::Value, to_owned_value, Deserializer}; - use crate::tape::Node; - #[cfg(not(target_arch = "wasm32"))] - use proptest::prelude::*; - #[cfg(not(target_arch = "wasm32"))] - use value_trait::StaticNode; - use value_trait::Writable; - - #[cfg(not(feature = "approx-number-parsing"))] - #[test] - #[allow(clippy::float_cmp)] - fn alligned_number_parse() { - let str = "9521.824380305317"; - let mut slice = str.as_bytes().to_owned(); - let value: crate::BorrowedValue<'_> = - crate::to_borrowed_value(&mut slice).expect("failed to parse"); - assert_eq!(value, 9_521.824_380_305_317); - } - - #[test] - fn test_send_sync() { - struct TestStruct(T); - #[allow(clippy::let_underscore_drop)] // test - let _: TestStruct<_> = TestStruct(super::AlignedBuf::with_capacity(0)); - } - - #[test] - fn count1() { - let mut d = String::from("[]"); - let d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(d).expect(""); - assert_eq!(simd.tape[1], Node::Array { len: 0, count: 0 }); - } - - #[test] - fn count2() { - let mut d = String::from("[1]"); - let d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(d).expect(""); - assert_eq!(simd.tape[1], Node::Array { len: 1, count: 1 }); - } - - #[test] - fn count3() { - let mut d = String::from("[1,2]"); - let d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(d).expect(""); - assert_eq!(simd.tape[1], Node::Array { len: 2, count: 2 }); - } - - #[test] - fn count4() { - let mut d = String::from(" [ 1 , [ 3 ] , 2 ]"); - let d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(d).expect(""); - assert_eq!(simd.tape[1], Node::Array { len: 3, count: 4 }); - assert_eq!(simd.tape[3], Node::Array { len: 1, count: 1 }); - } - - #[test] - fn count5() { - let mut d = String::from("[[],null,null]"); - let d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(d).expect(""); - assert_eq!(simd.tape[1], Node::Array { len: 3, count: 3 }); - assert_eq!(simd.tape[2], Node::Array { len: 0, count: 0 }); - } - - #[test] - fn test_tape_object_simple() { - let mut d = String::from(r#" { "hello": 1 , "b": 1 }"#); - let d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(d).expect(""); - assert_eq!( - simd.tape, - [ - Node::Static(StaticNode::Null), - Node::Object { len: 2, count: 4 }, - Node::String(r#"hello"#), // <-- This is already escaped - Node::Static(StaticNode::I64(1)), - Node::String("b"), - Node::Static(StaticNode::I64(1)), - ] - ); - } - - #[test] - fn test_tape_object_escaped() { - let mut d = String::from(r#" { "hell\"o": 1 , "b": [ 1, 2, 3 ] }"#); - let d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(d).expect(""); - assert_eq!( - simd.tape, - [ - Node::Static(StaticNode::Null), - Node::Object { len: 2, count: 7 }, - Node::String(r#"hell"o"#), // <-- This is already escaped - Node::Static(StaticNode::I64(1)), - Node::String("b"), - Node::Array { len: 3, count: 3 }, - Node::Static(StaticNode::I64(1)), - Node::Static(StaticNode::I64(2)), - Node::Static(StaticNode::I64(3)) - ] - ); - } - - #[test] - fn string_array() { - const STR: &str = r#""{\"arg\":\"test\"}""#; - let mut d = String::from(STR); - let d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(d).expect(""); - dbg!(&simd.tape); - // assert_eq!(simd.tape[1], Node::Array(1, 3)); - assert_eq!(simd.tape[1], Node::String("{\"arg\":\"test\"}")); - } - - #[cfg(feature = "128bit")] - #[test] - fn odd_nuber() { - use super::value::owned::to_value; - use super::value::{Builder, Mutable}; - - let mut d = String::from( - r#"{"name": "max_unsafe_auto_id_timestamp", "value": -9223372036854776000}"#, - ); - - let mut d = unsafe { d.as_bytes_mut() }; - let mut o = Value::object(); - o.insert("name", "max_unsafe_auto_id_timestamp") - .expect("failed to set key"); - o.insert("value", -9_223_372_036_854_776_000_i128) - .expect("failed to set key"); - assert_eq!(to_value(&mut d), Ok(o)); - } - - #[cfg(feature = "128bit")] - #[test] - fn odd_nuber2() { - use super::value::owned::to_value; - use super::value::{Builder, Mutable}; - - let mut d = String::from( - r#"{"name": "max_unsafe_auto_id_timestamp", "value": 9223372036854776000}"#, - ); - - let mut d = unsafe { d.as_bytes_mut() }; - let mut o = Value::object(); - o.insert("name", "max_unsafe_auto_id_timestamp") - .expect("failed to set key"); - o.insert("value", 9_223_372_036_854_776_000_u128) - .expect("failed to set key"); - assert_eq!(to_value(&mut d), Ok(o)); - } - // How much do we care about this, it's within the same range and - // based on floating point math imprecisions during parsing. - // Is this a real issue worth improving? - #[test] - fn silly_float1() { - let v = Value::from(3.090_144_804_232_201_7e305); - let s = v.encode(); - let mut bytes = s.as_bytes().to_vec(); - let parsed = to_owned_value(&mut bytes).expect("failed to parse generated float"); - assert_eq!(v, parsed); - } - - #[test] - #[ignore] - fn silly_float2() { - let v = Value::from(-6.990_585_694_841_803e305); - let s = v.encode(); - let mut bytes = s.as_bytes().to_vec(); - let parsed = to_owned_value(&mut bytes).expect("failed to parse generated float"); - assert_eq!(v, parsed); - } - #[cfg(not(feature = "128bit"))] - #[cfg(not(target_arch = "wasm32"))] - fn arb_json_value() -> BoxedStrategy { - let leaf = prop_oneof![ - Just(Value::Static(StaticNode::Null)), - any::().prop_map(Value::from), - //(-1.0e306f64..1.0e306f64).prop_map(Value::from), // damn you float! - any::().prop_map(Value::from), - any::().prop_map(Value::from), - ".*".prop_map(Value::from), - ]; - leaf.prop_recursive( - 8, // 8 levels deep - 256, // Shoot for maximum size of 256 nodes - 10, // We put up to 10 items per collection - |inner| { - prop_oneof![ - // Take the inner strategy and make the two recursive cases. - prop::collection::vec(inner.clone(), 0..10).prop_map(Value::from), - prop::collection::hash_map(".*", inner, 0..10).prop_map(Value::from), - ] - }, - ) - .boxed() - } - - #[cfg(feature = "128bit")] - #[cfg(not(target_arch = "wasm32"))] - fn arb_json_value() -> BoxedStrategy { - let leaf = prop_oneof![ - Just(Value::Static(StaticNode::Null)), - any::().prop_map(Value::from), - //(-1.0e306f64..1.0e306f64).prop_map(Value::from), // damn you float! - any::().prop_map(Value::from), - any::().prop_map(Value::from), - any::().prop_map(Value::from), - any::().prop_map(Value::from), - ".*".prop_map(Value::from), - ]; - leaf.prop_recursive( - 8, // 8 levels deep - 256, // Shoot for maximum size of 256 nodes - 10, // We put up to 10 items per collection - |inner| { - prop_oneof![ - // Take the inner strategy and make the two recursive cases. - prop::collection::vec(inner.clone(), 0..10).prop_map(Value::from), - prop::collection::hash_map(".*", inner, 0..10).prop_map(Value::from), - ] - }, - ) - .boxed() - } - - #[cfg(not(target_arch = "wasm32"))] - proptest! { - #![proptest_config(ProptestConfig { - // Setting both fork and timeout is redundant since timeout implies - // fork, but both are shown for clarity. - // Disabled for code coverage, enable to track bugs - // fork: true, - .. ProptestConfig::default() - })] - - #[test] - fn prop_json_encode_decode(val in arb_json_value()) { - let mut encoded: Vec = Vec::new(); - val.write(&mut encoded).expect("write"); - println!("{}", String::from_utf8_lossy(&encoded)); - let mut e = encoded.clone(); - let res = to_owned_value(&mut e).expect("can't convert"); - assert_eq!(val, res); - let mut e = encoded.clone(); - let res = to_borrowed_value(&mut e).expect("can't convert"); - assert_eq!(val, res); - #[cfg(not(feature = "128bit"))] - { // we can't do 128 bit w/ serde - use crate::{deserialize, BorrowedValue, OwnedValue}; - let mut e = encoded.clone(); - let res: OwnedValue = deserialize(&mut e).expect("can't convert"); - assert_eq!(val, res); - let mut e = encoded; - let res: BorrowedValue = deserialize(&mut e).expect("can't convert"); - assert_eq!(val, res); - } - } - - } -} - -#[cfg(feature = "serde_impl")] -#[cfg(test)] -mod tests_serde { - #![allow(clippy::unnecessary_operation, clippy::non_ascii_literal)] - use super::serde::from_slice; - use super::{owned::to_value, owned::Object, owned::Value, to_borrowed_value, to_owned_value}; - use halfbrown::HashMap; - #[cfg(not(target_arch = "wasm32"))] - use proptest::prelude::*; - use serde::Deserialize; - - use value_trait::{Builder, Mutable, StaticNode}; - - #[test] - fn empty() { - let mut d = String::new(); - let d = unsafe { d.as_bytes_mut() }; - let v_simd = from_slice::(d); - let v_serde = serde_json::from_slice::(d); - assert!(v_simd.is_err()); - assert!(v_serde.is_err()); - } - - #[test] - fn bool_true() { - let mut d = String::from("true"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(d1), Ok(Value::from(true))); - } - - #[test] - fn bool_false() { - let mut d = String::from("false"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(d1), Ok(Value::from(false))); - //assert!(false) - } - - #[test] - fn union() { - let mut d = String::from("null"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(d1), Ok(Value::Static(StaticNode::Null))); - } - - #[test] - fn int() { - let mut d = String::from("42"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(d1), Ok(Value::from(42))); - } - - #[test] - fn zero() { - let mut d = String::from("0"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(d1), Ok(Value::from(0))); - } - - #[test] - fn one() { - let mut d = String::from("1"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(d1), Ok(Value::from(1))); - } - - #[test] - fn minus_one() { - let mut d = String::from("-1"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(d1), Ok(Value::from(-1))); - } - - #[test] - fn float() { - let mut d = String::from("23.0"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(d1), Ok(Value::from(23.0))); - } - - #[test] - fn string() { - let mut d = String::from(r#""snot""#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(to_value(d1), Ok(Value::from("snot"))); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn lonely_quote() { - let mut d = String::from(r#"""#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde = serde_json::from_slice::(d).is_err(); - let v_simd = from_slice::(d).is_err(); - assert!(v_simd); - assert!(v_serde); - } - - #[test] - fn lonely_quote1() { - let mut d = String::from(r#"["]"#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde = serde_json::from_slice::(d).is_err(); - let v_simd = from_slice::(d).is_err(); - assert!(v_simd); - assert!(v_serde); - } - #[test] - fn lonely_quote2() { - let mut d = String::from(r#"[1, "]"#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde = serde_json::from_slice::(d).is_err(); - let v_simd = from_slice::(d).is_err(); - assert!(v_simd); - assert!(v_serde); - } - - #[test] - fn lonely_quote3() { - let mut d = String::from(r#"{": 1}"#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde = serde_json::from_slice::(d).is_err(); - let v_simd = from_slice::(d).is_err(); - assert!(v_simd); - assert!(v_serde); - } - - #[test] - fn empty_string() { - let mut d = String::from(r#""""#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(to_value(d1), Ok(Value::from(""))); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn empty_array() { - let mut d = String::from(r#"[]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); - let v_simd: serde_json::Value = from_slice(d).expect("parse_simd"); - assert_eq!(to_value(d1), Ok(Value::Array(vec![]))); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn malformed_array() { - let mut d = String::from(r#"[["#); - let mut d1 = d.clone(); - let mut d2 = d.clone(); - let d = unsafe { d.as_bytes_mut() }; - let d1 = unsafe { d1.as_bytes_mut() }; - let d2 = unsafe { d2.as_bytes_mut() }; - let v_serde: Result = serde_json::from_slice(d); - let v_simd_owned_value = to_owned_value(d); - let v_simd_borrowed_value = to_borrowed_value(d1); - let v_simd: Result = from_slice(d2); - assert!(v_simd_owned_value.is_err()); - assert!(v_simd_borrowed_value.is_err()); - assert!(v_simd.is_err()); - assert!(v_serde.is_err()); - } - - #[test] - fn double_array() { - let mut d = String::from(r#"[[]]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); - let v_simd: serde_json::Value = from_slice(d).expect("parse_simd"); - assert_eq!(to_value(d1), Ok(Value::Array(vec![Value::Array(vec![])]))); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn null_null_array() { - let mut d = String::from(r#"[[],null,null]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); - let v_simd: serde_json::Value = from_slice(d).expect("parse_simd"); - assert_eq!( - to_value(d1), - Ok(Value::Array(vec![ - Value::Array(vec![]), - Value::Static(StaticNode::Null), - Value::Static(StaticNode::Null), - ])) - ); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn one_element_array() { - let mut d = String::from(r#"["snot"]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - assert_eq!(to_value(d1), Ok(Value::Array(vec![Value::from("snot")]))); - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn two_element_array() { - let mut d = String::from(r#"["snot", "badger"]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(d1), - Ok(Value::Array(vec![ - Value::from("snot"), - Value::from("badger") - ])) - ); - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn list() { - let mut d = String::from(r#"[42, 23.0, "snot badger"]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!( - to_value(d1), - Ok(Value::Array(vec![ - Value::from(42), - Value::from(23.0), - Value::from("snot badger") - ])) - ); - } - - #[test] - fn nested_list1() { - let mut d = String::from(r#"[42, [23.0, "snot"], "bad", "ger"]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(d1), - Ok(Value::Array(vec![ - Value::from(42), - Value::Array(vec![Value::from(23.0), Value::from("snot")]), - Value::from("bad"), - Value::from("ger") - ])) - ); - - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn nested_list2() { - let mut d = String::from(r#"[42, [23.0, "snot"], {"bad": "ger"}]"#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn utf8() { - let mut d = String::from(r#""\u000e""#); - let d = unsafe { d.as_bytes_mut() }; - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, "\u{e}"); - // NOTE: serde is broken for this - //assert_eq!(v_serde, "\u{e}"); - //assert_eq!(v_simd, v_serde); - } - #[test] - fn utf8_invalid_surrogates() { - // This is invalid UTF-8, the first character is a high surrogate - let mut d = String::from(r#""\uDE71""#); - let d = unsafe { d.as_bytes_mut() }; - let v_simd: Result = from_slice(d); - assert!(v_simd.is_err()); - } - - #[test] - fn unicode() { - let mut d = String::from(r#""Β‘\"""#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn odd_array() { - let mut d = String::from("[{},null]"); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!( - to_value(d1), - Ok(Value::Array(vec![ - Value::from(Object::default()), - Value::Static(StaticNode::Null) - ])) - ); - } - - #[test] - fn min_i64() { - let mut d = String::from( - r#"{"name": "max_unsafe_auto_id_timestamp", "value": -9223372036854775808}"#, - ); - - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - let mut o = Value::object(); - o.insert("name", "max_unsafe_auto_id_timestamp") - .expect("failed to set key"); - o.insert("value", -9_223_372_036_854_775_808_i64) - .expect("failed to set key"); - assert_eq!(to_value(d1), Ok(o)); - } - - #[test] - fn map2() { - let mut d = String::from(r#"[{"\u0000":null}]"#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn null() { - let mut d = String::from(r#"null"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - assert_eq!(to_value(d1), Ok(Value::Static(StaticNode::Null))); - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - #[test] - fn null_null() { - let mut d = String::from(r#"[null, null]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(d1), - Ok(Value::Array(vec![ - Value::Static(StaticNode::Null), - Value::Static(StaticNode::Null), - ])) - ); - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn nested_null() { - let mut d = String::from(r#"[[null, null]]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(d1), - Ok(Value::Array(vec![Value::Array(vec![ - Value::Static(StaticNode::Null), - Value::Static(StaticNode::Null), - ])])) - ); - - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn nestednested_null() { - let mut d = String::from(r#"[[[null, null]]]"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!( - to_value(d1), - Ok(Value::Array(vec![Value::Array(vec![Value::Array(vec![ - Value::Static(StaticNode::Null), - Value::Static(StaticNode::Null), - ])])])) - ); - } - - #[test] - fn odd_array2() { - let mut d = String::from("[[\"\\u0000\\\"\"]]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn odd_array3() { - let mut d = String::from("[{\"\\u0000\\u0000\":null}]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn odd_array4() { - let mut d = String::from("[{\"\\u0000𐀀a\":null}]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn float1() { - let mut d = String::from("2.3250706903316115e307"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); - let v_simd: serde_json::Value = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - // We ignore this since serde is less precise on this test - #[ignore] - #[test] - fn float2() { - let mut d = String::from("-4.5512678569607477e306"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); - let v_simd: serde_json::Value = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[cfg(not(feature = "approx-number-parsing"))] - #[test] - fn float3() { - let mut d = String::from("0.6"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Number = serde_json::from_slice(d).expect("serde_json"); - let v_simd: serde_json::Number = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn map0() { - let mut d = String::from(r#"{"snot": "badger"}"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - let mut h = Object::default(); - h.insert("snot".into(), Value::from("badger")); - assert_eq!(to_value(d1), Ok(Value::from(h))); - } - - #[test] - fn map1() { - let mut d = String::from(r#"{"snot": "badger", "badger": "snot"}"#); - let mut d1 = d.clone(); - let d1 = unsafe { d1.as_bytes_mut() }; - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(d).expect(""); - assert_eq!(v_simd, v_serde); - let mut h = Object::default(); - h.insert("snot".into(), Value::from("badger")); - h.insert("badger".into(), Value::from("snot")); - assert_eq!(to_value(d1), Ok(Value::from(h))); - } - - #[cfg(feature = "serde_impl")] - #[test] - fn tpl1() { - let mut d = String::from("[-65.613616999999977, 43.420273000000009]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: (f32, f32) = serde_json::from_slice(d).expect("serde_json"); - let v_simd: (f32, f32) = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn tpl2() { - let mut d = String::from("[[-65.613616999999977, 43.420273000000009]]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec<(f32, f32)> = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn tpl3() { - let mut d = String::from( - "[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]", - ); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec<(f32, f32)> = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - #[test] - fn tpl4() { - let mut d = String::from("[[[-65.613616999999977,43.420273000000009]]]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec> = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - #[test] - fn tpl5() { - let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec> = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn tpl6() { - let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec>> = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn tpl7() { - let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec>> = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[derive(Deserialize, PartialEq, Debug)] - struct Obj { - a: u64, - b: u64, - } - - #[derive(Deserialize, PartialEq, Debug)] - struct Obj1 { - a: Obj, - } - - #[test] - fn obj1() { - let mut d = String::from(r#"{"a": 1, "b":1}"#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: Obj = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Obj = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn obj2() { - let mut d = - String::from(r#"{"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}"#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); - let v_simd: HashMap = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn obj3() { - let mut d = String::from( - r#"{"c": {"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}}"#, - ); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: HashMap> = - serde_json::from_slice(d).expect("serde_json"); - let v_simd: HashMap> = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn obj4() { - let mut d = String::from(r#"{"c": {"a": {"a": 1, "b":1}}}"#); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); - let v_simd: HashMap = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn vecvec() { - let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]], [[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec> = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn invalid_float_array() { - let mut data = - b"[11111111111111111111111111111E1,-111111111111111111111E111111111".to_vec(); - - assert!(to_owned_value(&mut data).is_err()); - } - - #[test] - fn crazy_string() { - // there is unicode in here! - let d = "\"𐀀𐀀 𐀀𐀀0 𐀀A\\u00000A0 A \\u000b\""; - let mut d = String::from(d); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); - let v_simd: serde_json::Value = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - #[cfg(feature = "serde_impl")] - #[test] - fn event() { - #[derive(Deserialize, Debug, PartialEq, Eq)] - #[serde(deny_unknown_fields, rename_all = "camelCase")] - pub struct CitmCatalog { - pub area_names: HashMap, - pub audience_sub_category_names: HashMap, - pub block_names: HashMap, - pub events: HashMap, - } - pub type Id = u32; - #[derive(Deserialize, Debug, PartialEq, Eq)] - #[serde(deny_unknown_fields, rename_all = "camelCase")] - pub struct Event { - pub description: (), - pub id: Id, - pub logo: Option, - pub name: String, - pub sub_topic_ids: Vec, - pub subject_code: (), - pub subtitle: (), - pub topic_ids: Vec, - } - - let mut d = String::from( - r#" -{ - "areaNames": { - "205705993": "ArriΓ¨re-scΓ¨ne central", - "205705994": "1er balcon central", - "205705995": "2Γ¨me balcon bergerie cour", - "205705996": "2Γ¨me balcon bergerie jardin", - "205705998": "1er balcon bergerie jardin", - "205705999": "1er balcon bergerie cour", - "205706000": "ArriΓ¨re-scΓ¨ne jardin", - "205706001": "ArriΓ¨re-scΓ¨ne cour", - "205706002": "2Γ¨me balcon jardin", - "205706003": "2Γ¨me balcon cour", - "205706004": "2Γ¨me Balcon central", - "205706005": "1er balcon jardin", - "205706006": "1er balcon cour", - "205706007": "Orchestre central", - "205706008": "Orchestre jardin", - "205706009": "Orchestre cour", - "342752287": "Zone physique secrΓ¨te" - }, - "audienceSubCategoryNames": { - "337100890": "AbonnΓ©" - }, - "blockNames": {}, - "events": { - "138586341": { - "description": null, - "id": 138586341, - "logo": null, - "name": "30th Anniversary Tour", - "subTopicIds": [ - 337184269, - 337184283 - ], - "subjectCode": null, - "subtitle": null, - "topicIds": [ - 324846099, - 107888604 - ] - }, - "138586345": { - "description": null, - "id": 138586345, - "logo": "/images/UE0AAAAACEKo6QAAAAZDSVRN", - "name": "Berliner Philharmoniker", - "subTopicIds": [ - 337184268, - 337184283, - 337184275 - ], - "subjectCode": null, - "subtitle": null, - "topicIds": [ - 324846099, - 107888604, - 324846100 - ] - } - } -} -"#, - ); - let d = unsafe { d.as_bytes_mut() }; - let v_serde: CitmCatalog = serde_json::from_slice(d).expect("serde_json"); - let v_simd: CitmCatalog = from_slice(d).expect("simd_json"); - assert_eq!(v_simd, v_serde); - } - - //6.576692109929364e305 - #[cfg(not(target_arch = "wasm32"))] - fn arb_json() -> BoxedStrategy { - let leaf = prop_oneof![ - Just(Value::Static(StaticNode::Null)), - any::() - .prop_map(StaticNode::Bool) - .prop_map(Value::Static), - // (-1.0e306f64..1.0e306f64).prop_map(Value::from), // The float parsing of simd and serde are too different - any::().prop_map(Value::from), - ".*".prop_map(Value::from), - ]; - leaf.prop_recursive( - 8, // 8 levels deep - 256, // Shoot for maximum size of 256 nodes - 10, // We put up to 10 items per collection - |inner| { - prop_oneof![ - // Take the inner strategy and make the two recursive cases. - prop::collection::vec(inner.clone(), 0..10).prop_map(Value::from), - prop::collection::hash_map(".*", inner, 0..10).prop_map(Value::from), - ] - }, - ) - .prop_map(|v| serde_json::to_string(&v).expect("")) - .boxed() - } - - #[cfg(feature = "serde_impl")] - #[test] - fn int_map_key() -> Result<(), crate::Error> { - use std::collections::BTreeMap; - - let mut map = BTreeMap::new(); - map.insert(0, "foo"); - map.insert(1, "bar"); - map.insert(2, "baz"); - - assert_eq!( - r#"{"0":"foo","1":"bar","2":"baz"}"#, - crate::to_string(&map)? - ); - Ok(()) - } - - #[cfg(feature = "serde_impl")] - #[test] - fn enum_test() -> Result<(), crate::Error> { - use serde::{Deserialize, Serialize}; - - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - struct MyStruct { - field: u8, - } - - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - enum MyEnum { - First(MyStruct), - Second(u8), - } - - let thing = MyEnum::First(MyStruct { field: 1 }); - let mut ser = crate::serde::to_string(&thing)?; - println!("Ser {ser:?}"); - let des: MyEnum = unsafe { crate::serde::from_str(&mut ser)? }; - println!("Des {des:?}"); - assert_eq!(thing, des); - Ok(()) - } - - #[test] - fn invalid_float() { - let mut s: Vec = b"[100,9e999]".to_vec(); - assert!(to_owned_value(&mut s).is_err()); - } - - #[cfg(not(target_arch = "wasm32"))] - proptest! { - #![proptest_config(ProptestConfig { - // Setting both fork and timeout is redundant since timeout implies - // fork, but both are shown for clarity. - // Disabled for code coverage, enable to track bugs - // fork: true, - .. ProptestConfig::default() - })] - - #[test] - fn prop_json(d in arb_json()) { - use super::{OwnedValue, deserialize}; - if let Ok(v_serde) = serde_json::from_slice::(d.as_bytes()) { - let mut d1 = d.clone(); - let d1 = unsafe{ d1.as_bytes_mut()}; - let v_simd_serde: serde_json::Value = from_slice(d1).expect(""); - // We add our own encoder in here. - let mut d2 = v_simd_serde.to_string(); - let d2 = unsafe{ d2.as_bytes_mut()}; - let mut d3 = d.clone(); - let d3 = unsafe{ d3.as_bytes_mut()}; - let mut d4 = d.clone(); - let d4 = unsafe{ d4.as_bytes_mut()}; - assert_eq!(v_simd_serde, v_serde); - let v_simd_owned = to_owned_value(d2).expect("to_owned_value failed"); - let v_simd_borrowed = to_borrowed_value(d3).expect("to_borrowed_value failed"); - assert_eq!(v_simd_borrowed, v_simd_owned); - let v_deserialize: OwnedValue = deserialize(d4).expect("deserialize failed"); - assert_eq!(v_deserialize, v_simd_owned); - } - - } - - } - - #[cfg(not(target_arch = "wasm32"))] - fn arb_junk() -> BoxedStrategy> { - prop::collection::vec(any::(), 0..(1024 * 8)).boxed() - } - #[cfg(not(target_arch = "wasm32"))] - proptest! { - #![proptest_config(ProptestConfig { - // Setting both fork and timeout is redundant since timeout implies - // fork, but both are shown for clarity. - // Disabled for code coverage, enable to track bugs - // fork: true, - .. ProptestConfig::default() - })] - #[test] - #[should_panic] - fn prop_junk(d in arb_junk()) { - let mut d1 = d.clone(); - let mut d2 = d.clone(); - let mut d3 = d; - - from_slice::(&mut d1).expect("from_slice"); - to_borrowed_value(&mut d2).expect("to_borrowed_value"); - to_owned_value(&mut d3).expect("to_owned_value"); - - } - } - - #[cfg(not(target_arch = "wasm32"))] - proptest! { - #![proptest_config(ProptestConfig { - // Setting both fork and timeout is redundant since timeout implies - // fork, but both are shown for clarity. - // Disabled for code coverage, enable to track bugs - // fork: true, - .. ProptestConfig::default() - })] - - #[test] - #[should_panic] - fn prop_string(d in "\\PC*") { - let mut d1 = d.clone(); - let d1 = unsafe{ d1.as_bytes_mut()}; - let mut d2 = d.clone(); - let d2 = unsafe{ d2.as_bytes_mut()}; - let mut d3 = d; - let d3 = unsafe{ d3.as_bytes_mut()}; - from_slice::(d1).expect("from_slice"); - to_borrowed_value(d2).expect("to_borrowed_value"); - to_owned_value(d3).expect("to_owned_value"); - - } - } -} +mod tests; diff --git a/src/macros.rs b/src/macros.rs index e8932f9b..506bbaca 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1238,7 +1238,7 @@ macro_rules! static_cast_i8 { #[macro_export] macro_rules! static_cast_i32 { ($v:expr) => { - mem::transmute::<_, i32>($v) + std::mem::transmute::<_, i32>($v) }; } @@ -1246,7 +1246,7 @@ macro_rules! static_cast_i32 { #[macro_export] macro_rules! static_cast_u32 { ($v:expr) => { - mem::transmute::<_, u32>($v) + std::mem::transmute::<_, u32>($v) }; } diff --git a/src/native/deser.rs b/src/native/deser.rs new file mode 100644 index 00000000..0e07b587 --- /dev/null +++ b/src/native/deser.rs @@ -0,0 +1,144 @@ +use crate::{ + safer_unchecked::GetSaferUnchecked, + stringparse::{get_unicode_codepoint, ESCAPE_MAP}, + Deserializer, ErrorType, Result, +}; + +#[allow(clippy::cast_possible_truncation)] +pub(crate) unsafe fn parse_str<'invoke, 'de>( + input: *mut u8, + data: &'invoke [u8], + _buffer: &'invoke mut [u8], + idx: usize, +) -> Result<&'de str> { + use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint}; + + // skip leading `"` + let src: &[u8] = data.get_kinda_unchecked(idx + 1..); + let input = input.add(idx + 1); + + let mut src_i = 0; + let mut b = *src.get_kinda_unchecked(src_i); + + // quickly skip all the "good stuff" + while b != b'"' && b != b'\\' { + src_i += 1; + b = *src.get_kinda_unchecked(src_i); + } + if b == b'"' { + let v = std::str::from_utf8_unchecked(std::slice::from_raw_parts(input, src_i)); + return Ok(v); + } + + // we hit an escape sequence now it gets tricky + // our destination index is idx +1 to skip the '"' + our current position + let mut dst_i = src_i; + while b != b'"' { + if b == b'\\' { + // don't advance i yet + let escape_char = *src.get_kinda_unchecked(src_i + 1); + if escape_char == b'u' { + // got to reduce by 1 since we have to include the '\\' for get_unicode_codepoint + let (cp, src_offset) = get_unicode_codepoint(src.get_kinda_unchecked(src_i..)) + .map_err(|_| { + Deserializer::error_c(idx + 1 + src_i, 'u', InvalidUnicodeCodepoint) + })?; + + // from codepoint_to_utf8 since we write directly to input + if cp <= 0x7F { + input.add(dst_i).write(cp as u8); + dst_i += 1; + } else if cp <= 0x7FF { + input.add(dst_i).write(((cp >> 6) + 192) as u8); + dst_i += 1; + input.add(dst_i).write(((cp & 63) + 128) as u8); + dst_i += 1; + } else if cp <= 0xFFFF { + input.add(dst_i).write(((cp >> 12) + 224) as u8); + dst_i += 1; + input.add(dst_i).write((((cp >> 6) & 63) + 128) as u8); + dst_i += 1; + input.add(dst_i).write(((cp & 63) + 128) as u8); + dst_i += 1; + } else if cp <= 0x0010_FFFF { + input.add(dst_i).write(((cp >> 18) + 240) as u8); + dst_i += 1; + input.add(dst_i).write((((cp >> 12) & 63) + 128) as u8); + dst_i += 1; + input.add(dst_i).write((((cp >> 6) & 63) + 128) as u8); + dst_i += 1; + input.add(dst_i).write(((cp & 63) + 128) as u8); + dst_i += 1; + } else { + return Err(Deserializer::error_c( + idx + 1 + src_i, + 'u', + InvalidUnicodeCodepoint, + )); + } + // We have to substract one since we're already moving to the next character at the end of the loop + src_i += src_offset - 1; + } else { + let escape_result: u8 = *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize); + if escape_result == 0 { + return Err(Deserializer::error_c( + idx + 1 + src_i, + escape_char as char, + InvalidEscape, + )); + } + input.add(dst_i).write(escape_result); + dst_i += 1; + // move i for reading the escape char + src_i += 1; + } + } else { + input.add(dst_i).write(b); + dst_i += 1; + } + src_i += 1; + b = *src.get_kinda_unchecked(src_i); + } + Ok(std::str::from_utf8_unchecked(std::slice::from_raw_parts( + input, dst_i, + ))) +} + +#[cfg(test)] +mod test { + use crate::SIMDJSON_PADDING; + + fn deser_str(input: &[u8]) -> Result { + let mut input = input.to_vec(); + let mut input2 = input.clone(); + input2.append(vec![0; SIMDJSON_PADDING * 2].as_mut()); + let mut buffer = vec![0; 1024]; + + let r = unsafe { + Deserializer::parse_str_(input.as_mut_ptr(), &input2, buffer.as_mut_slice(), 0)? + }; + dbg!(r); + Ok(String::from(r)) + } + use super::*; + #[test] + fn easy_string() -> Result<()> { + let s = deser_str(&br#""snot""#[..])?; + assert_eq!("snot", s); + Ok(()) + } + + #[test] + fn string_with_quote() -> Result<()> { + let s = deser_str(&br#""snot says:\n \"badger\"""#[..])?; + assert_eq!("snot says:\n \"badger\"", s); + Ok(()) + } + + #[test] + fn string_with_utf8() -> Result<()> { + let s = deser_str(&br#""\u000e""#[..])?; + assert_eq!("\u{e}", s); + Ok(()) + } +} diff --git a/src/native/mod.rs b/src/native/mod.rs new file mode 100644 index 00000000..904d5887 --- /dev/null +++ b/src/native/mod.rs @@ -0,0 +1,26 @@ +use simdutf8::basic::imp::ChunkedUtf8Validator; + +pub mod deser; +pub mod stage1; + +/// This is a hack, since there is no native implementation of the chunked validator we pre-validate the entire +/// input string in the case of a fallback and then always let the chunked validator return true. +pub(crate) struct ChunkedUtf8ValidatorImp(); + +impl ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { + unsafe fn new() -> Self + where + Self: Sized, + { + ChunkedUtf8ValidatorImp() + } + + unsafe fn update_from_chunks(&mut self, _input: &[u8]) {} + + unsafe fn finalize( + self, + _remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), simdutf8::basic::Utf8Error> { + Ok(()) + } +} diff --git a/src/native/stage1.rs b/src/native/stage1.rs new file mode 100644 index 00000000..2907d5c0 --- /dev/null +++ b/src/native/stage1.rs @@ -0,0 +1,481 @@ +#![allow(clippy::cast_lossless, clippy::cast_sign_loss)] + +use crate::{static_cast_i32, Stage1Parse}; + +#[allow(non_camel_case_types)] +type v128 = [u8; 16]; + +fn u8x16_splat(n: u8) -> v128 { + [n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n] +} + +fn v128_and(a: v128, b: v128) -> v128 { + [ + a[0] & b[0], + a[1] & b[1], + a[2] & b[2], + a[3] & b[3], + a[4] & b[4], + a[5] & b[5], + a[6] & b[6], + a[7] & b[7], + a[8] & b[8], + a[9] & b[9], + a[10] & b[10], + a[11] & b[11], + a[12] & b[12], + a[13] & b[13], + a[14] & b[14], + a[15] & b[15], + ] +} + +fn u8x16_shr(a: v128, n: i32) -> v128 { + [ + a[0] >> n, + a[1] >> n, + a[2] >> n, + a[3] >> n, + a[4] >> n, + a[5] >> n, + a[6] >> n, + a[7] >> n, + a[8] >> n, + a[9] >> n, + a[10] >> n, + a[11] >> n, + a[12] >> n, + a[13] >> n, + a[14] >> n, + a[15] >> n, + ] +} + +fn u8x16_swizzle(a: v128, s: v128) -> [u8; 16] { + [ + if s[0] > 0x0f { + 0 + } else { + a[(s[0] & 0x0f) as usize] + }, + if s[1] > 0x0f { + 0 + } else { + a[(s[1] & 0x0f) as usize] + }, + if s[2] > 0x0f { + 0 + } else { + a[(s[2] & 0x0f) as usize] + }, + if s[3] > 0x0f { + 0 + } else { + a[(s[3] & 0x0f) as usize] + }, + if s[4] > 0x0f { + 0 + } else { + a[(s[4] & 0x0f) as usize] + }, + if s[5] > 0x0f { + 0 + } else { + a[(s[5] & 0x0f) as usize] + }, + if s[6] > 0x0f { + 0 + } else { + a[(s[6] & 0x0f) as usize] + }, + if s[7] > 0x0f { + 0 + } else { + a[(s[7] & 0x0f) as usize] + }, + if s[8] > 0x0f { + 0 + } else { + a[(s[8] & 0x0f) as usize] + }, + if s[9] > 0x0f { + 0 + } else { + a[(s[9] & 0x0f) as usize] + }, + if s[10] > 0x0f { + 0 + } else { + a[(s[10] & 0x0f) as usize] + }, + if s[11] > 0x0f { + 0 + } else { + a[(s[11] & 0x0f) as usize] + }, + if s[12] > 0x0f { + 0 + } else { + a[(s[12] & 0x0f) as usize] + }, + if s[13] > 0x0f { + 0 + } else { + a[(s[13] & 0x0f) as usize] + }, + if s[14] > 0x0f { + 0 + } else { + a[(s[14] & 0x0f) as usize] + }, + if s[15] > 0x0f { + 0 + } else { + a[(s[15] & 0x0f) as usize] + }, + ] +} + +// fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +// [ +// u8::from((a[0] & b[0]) != 0), +// u8::from((a[1] & b[1]) != 0), +// u8::from((a[2] & b[2]) != 0), +// u8::from((a[3] & b[3]) != 0), +// u8::from((a[4] & b[4]) != 0), +// u8::from((a[5] & b[5]) != 0), +// u8::from((a[6] & b[6]) != 0), +// u8::from((a[7] & b[7]) != 0), +// u8::from((a[8] & b[8]) != 0), +// u8::from((a[9] & b[9]) != 0), +// u8::from((a[10] & b[10]) != 0), +// u8::from((a[11] & b[11]) != 0), +// u8::from((a[12] & b[12]) != 0), +// u8::from((a[13] & b[13]) != 0), +// u8::from((a[14] & b[14]) != 0), +// u8::from((a[15] & b[15]) != 0), +// ] +// } + +// fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +// let mut r = [0u8; 16]; +// for i in 0..16 { +// // if the most significant bit of b is set, +// // then the destination byte is set to 0. +// if b[i] & 0x80 == 0u8 { +// r[i] = a[(b[i] % 16) as usize]; +// } +// } +// r +// } + +// fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +// [ +// a[0] + b[0], +// a[1] + b[1], +// a[2] + b[2], +// a[3] + b[3], +// a[4] + b[4], +// a[5] + b[5], +// a[6] + b[6], +// a[7] + b[7], +// a[8] + b[8], +// a[9] + b[9], +// a[10] + b[10], +// a[11] + b[11], +// a[12] + b[12], +// a[13] + b[13], +// a[14] + b[14], +// a[15] + b[15], +// ] +// } + +fn bool_to_u8(b: bool) -> u8 { + if b { + 0xFF + } else { + 0x00 + } +} +fn u8x16_le(a: v128, b: v128) -> v128 { + [ + bool_to_u8(a[0] <= b[0]), + bool_to_u8(a[1] <= b[1]), + bool_to_u8(a[2] <= b[2]), + bool_to_u8(a[3] <= b[3]), + bool_to_u8(a[4] <= b[4]), + bool_to_u8(a[5] <= b[5]), + bool_to_u8(a[6] <= b[6]), + bool_to_u8(a[7] <= b[7]), + bool_to_u8(a[8] <= b[8]), + bool_to_u8(a[9] <= b[9]), + bool_to_u8(a[10] <= b[10]), + bool_to_u8(a[11] <= b[11]), + bool_to_u8(a[12] <= b[12]), + bool_to_u8(a[13] <= b[13]), + bool_to_u8(a[14] <= b[14]), + bool_to_u8(a[15] <= b[15]), + ] +} + +fn u8x16_eq(a: v128, b: v128) -> v128 { + [ + bool_to_u8(a[0] == b[0]), + bool_to_u8(a[1] == b[1]), + bool_to_u8(a[2] == b[2]), + bool_to_u8(a[3] == b[3]), + bool_to_u8(a[4] == b[4]), + bool_to_u8(a[5] == b[5]), + bool_to_u8(a[6] == b[6]), + bool_to_u8(a[7] == b[7]), + bool_to_u8(a[8] == b[8]), + bool_to_u8(a[9] == b[9]), + bool_to_u8(a[10] == b[10]), + bool_to_u8(a[11] == b[11]), + bool_to_u8(a[12] == b[12]), + bool_to_u8(a[13] == b[13]), + bool_to_u8(a[14] == b[14]), + bool_to_u8(a[15] == b[15]), + ] +} + +fn u8x16_bitmask(a: v128) -> u16 { + (a[0] & 0b1000_0000 != 0) as u16 + | (((a[1] & 0b1000_0000 != 0) as u16) << 1) + | (((a[2] & 0b1000_0000 != 0) as u16) << 2) + | (((a[3] & 0b1000_0000 != 0) as u16) << 3) + | (((a[4] & 0b1000_0000 != 0) as u16) << 4) + | (((a[5] & 0b1000_0000 != 0) as u16) << 5) + | (((a[6] & 0b1000_0000 != 0) as u16) << 6) + | (((a[7] & 0b1000_0000 != 0) as u16) << 7) + | (((a[8] & 0b1000_0000 != 0) as u16) << 8) + | (((a[9] & 0b1000_0000 != 0) as u16) << 9) + | (((a[10] & 0b1000_0000 != 0) as u16) << 10) + | (((a[11] & 0b1000_0000 != 0) as u16) << 11) + | (((a[12] & 0b1000_0000 != 0) as u16) << 12) + | (((a[13] & 0b1000_0000 != 0) as u16) << 13) + | (((a[14] & 0b1000_0000 != 0) as u16) << 14) + | (((a[15] & 0b1000_0000 != 0) as u16) << 15) +} +// unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { +// unsafe { mem::transmute(a) } +// } + +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// pub unsafe fn neon_movemask_bulk( +// p0: uint8x16_t, +// p1: uint8x16_t, +// p2: uint8x16_t, +// p3: uint8x16_t, +// ) -> u64 { +// let bit_mask = bit_mask(); + +// let t0 = vandq_u8(p0, bit_mask); +// let t1 = vandq_u8(p1, bit_mask); +// let t2 = vandq_u8(p2, bit_mask); +// let t3 = vandq_u8(p3, bit_mask); +// let sum0 = vpaddq_u8(t0, t1); +// let sum1 = vpaddq_u8(t2, t3); +// let sum0 = vpaddq_u8(sum0, sum1); +// let sum0 = vpaddq_u8(sum0, sum0); + +// vreinterpretq_u64_u8(sum0)[0] +// } + +// fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { +// unsafe { std::mem::transmute(a) } +// } + +#[derive(Debug)] +pub(crate) struct NativeInput { + v0: v128, + v1: v128, + v2: v128, + v3: v128, +} + +impl Stage1Parse for NativeInput { + type Utf8Validator = super::ChunkedUtf8ValidatorImp; + type SimdRepresentation = v128; + unsafe fn new(ptr: &[u8]) -> Self { + NativeInput { + v0: *(ptr.as_ptr().cast::()), + v1: *(ptr.as_ptr().add(16).cast::()), + v2: *(ptr.as_ptr().add(32).cast::()), + v3: *(ptr.as_ptr().add(48).cast::()), + } + } + + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow(clippy::cast_sign_loss)] + unsafe fn compute_quote_mask(quote_bits: u64) -> u64 { + let mut quote_mask: u64 = quote_bits ^ (quote_bits << 1); + quote_mask = quote_mask ^ (quote_mask << 2); + quote_mask = quote_mask ^ (quote_mask << 4); + quote_mask = quote_mask ^ (quote_mask << 8); + quote_mask = quote_mask ^ (quote_mask << 16); + quote_mask = quote_mask ^ (quote_mask << 32); + quote_mask + } + + unsafe fn cmp_mask_against_input(&self, m: u8) -> u64 { + let mask = u8x16_splat(m); + let cmp_res_0 = u8x16_eq(self.v0, mask); + let res_0 = u8x16_bitmask(cmp_res_0) as u64; + let cmp_res_1 = u8x16_eq(self.v1, mask); + let res_1 = u8x16_bitmask(cmp_res_1) as u64; + let cmp_res_2 = u8x16_eq(self.v2, mask); + let res_2 = u8x16_bitmask(cmp_res_2) as u64; + let cmp_res_3 = u8x16_eq(self.v3, mask); + let res_3 = u8x16_bitmask(cmp_res_3) as u64; + res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48) + } + + unsafe fn unsigned_lteq_against_input(&self, maxval: v128) -> u64 { + let cmp_res_0 = u8x16_le(self.v0, maxval); + let res_0 = u8x16_bitmask(cmp_res_0) as u64; + let cmp_res_1 = u8x16_le(self.v1, maxval); + let res_1 = u8x16_bitmask(cmp_res_1) as u64; + let cmp_res_2 = u8x16_le(self.v2, maxval); + let res_2 = u8x16_bitmask(cmp_res_2) as u64; + let cmp_res_3 = u8x16_le(self.v3, maxval); + let res_3 = u8x16_bitmask(cmp_res_3) as u64; + res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48) + } + + unsafe fn find_whitespace_and_structurals(&self, whitespace: &mut u64, structurals: &mut u64) { + // do a 'shufti' to detect structural JSON characters + // they are + // * `{` 0x7b + // * `}` 0x7d + // * `:` 0x3a + // * `[` 0x5b + // * `]` 0x5d + // * `,` 0x2c + // these go into the first 3 buckets of the comparison (1/2/4) + + // we are also interested in the four whitespace characters: + // * space 0x20 + // * linefeed 0x0a + // * horizontal tab 0x09 + // * carriage return 0x0d + // these go into the next 2 buckets of the comparison (8/16) + const LOW_NIBBLE_MASK: v128 = [16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0]; + const HIGH_NIBBLE_MASK: v128 = [8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0]; + + let structural_shufti_mask = u8x16_splat(0x7); + let whitespace_shufti_mask = u8x16_splat(0x18); + let low_nib_and_mask = u8x16_splat(0xf); + let high_nib_and_mask = u8x16_splat(0x7f); + let zero_mask = u8x16_splat(0); + + let v_v0 = v128_and( + u8x16_swizzle(LOW_NIBBLE_MASK, v128_and(self.v0, low_nib_and_mask)), + u8x16_swizzle( + HIGH_NIBBLE_MASK, + v128_and(u8x16_shr(self.v0, 4), high_nib_and_mask), + ), + ); + let v_v1 = v128_and( + u8x16_swizzle(LOW_NIBBLE_MASK, v128_and(self.v1, low_nib_and_mask)), + u8x16_swizzle( + HIGH_NIBBLE_MASK, + v128_and(u8x16_shr(self.v1, 4), high_nib_and_mask), + ), + ); + let v_v2 = v128_and( + u8x16_swizzle(LOW_NIBBLE_MASK, v128_and(self.v2, low_nib_and_mask)), + u8x16_swizzle( + HIGH_NIBBLE_MASK, + v128_and(u8x16_shr(self.v2, 4), high_nib_and_mask), + ), + ); + let v_v3 = v128_and( + u8x16_swizzle(LOW_NIBBLE_MASK, v128_and(self.v3, low_nib_and_mask)), + u8x16_swizzle( + HIGH_NIBBLE_MASK, + v128_and(u8x16_shr(self.v3, 4), high_nib_and_mask), + ), + ); + let tmp_v0 = u8x16_eq(v128_and(v_v0, structural_shufti_mask), zero_mask); + let tmp_v1 = u8x16_eq(v128_and(v_v1, structural_shufti_mask), zero_mask); + let tmp_v2 = u8x16_eq(v128_and(v_v2, structural_shufti_mask), zero_mask); + let tmp_v3 = u8x16_eq(v128_and(v_v3, structural_shufti_mask), zero_mask); + + let structural_res_0 = u8x16_bitmask(tmp_v0) as u64; + let structural_res_1 = u8x16_bitmask(tmp_v1) as u64; + let structural_res_2 = u8x16_bitmask(tmp_v2) as u64; + let structural_res_3 = u8x16_bitmask(tmp_v3) as u64; + + *structurals = !(structural_res_0 + | (structural_res_1 << 16) + | (structural_res_2 << 32) + | (structural_res_3 << 48)); + + let tmp_ws_v0 = u8x16_eq(v128_and(v_v0, whitespace_shufti_mask), zero_mask); + let tmp_ws_v1 = u8x16_eq(v128_and(v_v1, whitespace_shufti_mask), zero_mask); + let tmp_ws_v2 = u8x16_eq(v128_and(v_v2, whitespace_shufti_mask), zero_mask); + let tmp_ws_v3 = u8x16_eq(v128_and(v_v3, whitespace_shufti_mask), zero_mask); + + let ws_res_0 = u8x16_bitmask(tmp_ws_v0) as u64; + let ws_res_1 = u8x16_bitmask(tmp_ws_v1) as u64; + let ws_res_2 = u8x16_bitmask(tmp_ws_v2) as u64; + let ws_res_3 = u8x16_bitmask(tmp_ws_v3) as u64; + + *whitespace = !(ws_res_0 | (ws_res_1 << 16) | (ws_res_2 << 32) | (ws_res_3 << 48)); + } + + #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[allow( + clippy::cast_possible_wrap, + clippy::cast_ptr_alignment, + clippy::uninit_vec + )] + unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { + let cnt: usize = bits.count_ones() as usize; + let mut l = base.len(); + let idx_minus_64 = idx.wrapping_sub(64); + let idx_64_v: [i32; 4] = [ + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + ]; + + // We're doing some trickery here. + // We reserve 64 extra entries, because we've at most 64 bit to set + // then we trunctate the base to the next base (that we calcuate above) + // We later indiscriminatory writre over the len we set but that's OK + // since we ensure we reserve the needed space + base.reserve(64); + base.set_len(l + cnt); + + while bits != 0 { + let v0 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v1 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v2 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v3 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + + let v: [i32; 4] = [ + idx_64_v[0] + v0, + idx_64_v[1] + v1, + idx_64_v[2] + v2, + idx_64_v[3] + v3, + ]; + std::ptr::write(base.as_mut_ptr().add(l).cast::<[i32; 4]>(), v); + l += 4; + } + } + + unsafe fn fill_s8(n: i8) -> v128 { + u8x16_splat(n as u8) + } + + unsafe fn zero() -> v128 { + u8x16_splat(0) + } +} diff --git a/src/neon/deser.rs b/src/neon/deser.rs index fdaa6cca..04e9d449 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -41,14 +41,12 @@ fn find_bs_bits_and_quote_bits(v0: uint8x16_t, v1: uint8x16_t) -> (u32, u32) { #[allow( clippy::if_not_else, - clippy::transmute_ptr_to_ptr, - clippy::cast_ptr_alignment, clippy::if_not_else, clippy::cast_ptr_alignment, clippy::too_many_lines )] #[cfg_attr(not(feature = "no-inline"), inline(always))] -pub(crate) fn parse_str_neon<'invoke, 'de>( +pub(crate) fn parse_str<'invoke, 'de>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], diff --git a/src/neon/mod.rs b/src/neon/mod.rs index 6db7ee75..27be9a6e 100644 --- a/src/neon/mod.rs +++ b/src/neon/mod.rs @@ -1,2 +1,4 @@ pub mod deser; pub mod stage1; + +pub(crate) struct ChunkedUtf8ValidatorImp {} diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index bbc9444a..0f459a62 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -49,7 +49,9 @@ pub(crate) struct SimdInputNEON { v3: uint8x16_t, } -impl Stage1Parse for SimdInputNEON { +impl Stage1Parse for SimdInputNEON { + type Utf8Validator = simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp; + type SimdRepresentation = int8x16_t; #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_ptr_alignment)] unsafe fn new(ptr: &[u8]) -> Self { @@ -115,11 +117,10 @@ impl Stage1Parse for SimdInputNEON { // * carriage return 0x0d // these go into the next 2 buckets of the comparison (8/16) - // TODO: const? - let low_nibble_mask: uint8x16_t = + const low_nibble_mask: uint8x16_t = std::mem::transmute([16u8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0]); - // TODO: const? - let high_nibble_mask: uint8x16_t = + + const high_nibble_mask: uint8x16_t = std::mem::transmute([8u8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0]); let structural_shufti_mask: uint8x16_t = vmovq_n_u8(0x7); diff --git a/src/portable/deser.rs b/src/portable/deser.rs new file mode 100644 index 00000000..d1aab295 --- /dev/null +++ b/src/portable/deser.rs @@ -0,0 +1,160 @@ +use std::simd::{u8x32, SimdPartialEq, ToBitMask}; + +use crate::{ + safer_unchecked::GetSaferUnchecked, + stringparse::{handle_unicode_codepoint, ESCAPE_MAP}, + Deserializer, ErrorType, Result, +}; + +#[cfg_attr(not(feature = "no-inline"), inline)] +pub(crate) unsafe fn parse_str<'invoke, 'de>( + input: *mut u8, + data: &'invoke [u8], + buffer: &'invoke mut [u8], + mut idx: usize, +) -> Result<&'de str> { + use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint}; + + const SLASH: u8x32 = u8x32::from_array([b'\\'; 32]); + const QUOTE: u8x32 = u8x32::from_array([b'"'; 32]); + // Add 1 to skip the initial " + idx += 1; + //let mut read: usize = 0; + + // we include the terminal '"' so we know where to end + // This is safe since we check sub's length in the range access above and only + // create sub sliced form sub to `sub.len()`. + + let src: &[u8] = data.get_kinda_unchecked(idx..); + let mut src_i: usize = 0; + let mut len = src_i; + loop { + let v = u8x32::from_array(*src.as_ptr().add(src_i).cast::<[u8; 32]>()); + + // store to dest unconditionally - we can overwrite the bits we don't like + // later + let bs_bits: u32 = v.simd_eq(SLASH).to_bitmask(); + let quote_bits = v.simd_eq(QUOTE).to_bitmask(); + if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { + // we encountered quotes first. Move dst to point to quotes and exit + // find out where the quote is... + let quote_dist: u32 = quote_bits.trailing_zeros(); + + /////////////////////// + // Above, check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + //////////////////////// + + // we advance the point, accounting for the fact that we have a NULl termination + + len += quote_dist as usize; + let v = std::str::from_utf8_unchecked(std::slice::from_raw_parts(input.add(idx), len)); + return Ok(v); + + // we compare the pointers since we care if they are 'at the same spot' + // not if they are the same value + } + if (quote_bits.wrapping_sub(1) & bs_bits) == 0 { + // they are the same. Since they can't co-occur, it means we encountered + // neither. + src_i += 32; + len += 32; + } else { + // Move to the 'bad' character + let bs_dist: u32 = bs_bits.trailing_zeros(); + len += bs_dist as usize; + src_i += bs_dist as usize; + break; + } + } + + let mut dst_i: usize = 0; + + // To be more conform with upstream + loop { + let v = u8x32::from_array(*src.as_ptr().add(src_i).cast::<[u8; 32]>()); + + buffer + .as_mut_ptr() + .add(dst_i) + .cast::<[u8; 32]>() + .write(*v.as_array()); + + // store to dest unconditionally - we can overwrite the bits we don't like + // later + let bs_bits: u32 = v.simd_eq(SLASH).to_bitmask(); + let quote_bits = v.simd_eq(QUOTE).to_bitmask(); + if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { + // we encountered quotes first. Move dst to point to quotes and exit + // find out where the quote is... + let quote_dist: u32 = quote_bits.trailing_zeros(); + + /////////////////////// + // Above, check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + //////////////////////// + + // we advance the point, accounting for the fact that we have a NULl termination + + dst_i += quote_dist as usize; + input + .add(idx + len) + .copy_from_nonoverlapping(buffer.as_ptr(), dst_i); + let v = std::str::from_utf8_unchecked(std::slice::from_raw_parts( + input.add(idx), + len + dst_i, + )); + return Ok(v); + + // we compare the pointers since we care if they are 'at the same spot' + // not if they are the same value + } + if (quote_bits.wrapping_sub(1) & bs_bits) == 0 { + // they are the same. Since they can't co-occur, it means we encountered + // neither. + src_i += 32; + dst_i += 32; + } else { + // find out where the backspace is + let bs_dist: u32 = bs_bits.trailing_zeros(); + let escape_char: u8 = *src.get_kinda_unchecked(src_i + bs_dist as usize + 1); + // we encountered backslash first. Handle backslash + if escape_char == b'u' { + // move src/dst up to the start; they will be further adjusted + // within the unicode codepoint handling code. + src_i += bs_dist as usize; + dst_i += bs_dist as usize; + let (o, s) = handle_unicode_codepoint( + src.get_kinda_unchecked(src_i..), + buffer.get_kinda_unchecked_mut(dst_i..), + ) + .map_err(|_| Deserializer::error_c(src_i, 'u', InvalidUnicodeCodepoint))?; + + if o == 0 { + return Err(Deserializer::error_c(src_i, 'u', InvalidUnicodeCodepoint)); + }; + // We moved o steps forward at the destination and 6 on the source + src_i += s; + dst_i += o; + } else { + // simple 1:1 conversion. Will eat bs_dist+2 characters in input and + // write bs_dist+1 characters to output + // note this may reach beyond the part of the buffer we've actually + // seen. I think this is ok + let escape_result: u8 = *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize); + if escape_result == 0 { + return Err(Deserializer::error_c( + src_i, + escape_char as char, + InvalidEscape, + )); + } + *buffer.get_kinda_unchecked_mut(dst_i + bs_dist as usize) = escape_result; + src_i += bs_dist as usize + 2; + dst_i += bs_dist as usize + 1; + } + } + } +} diff --git a/src/portable/mod.rs b/src/portable/mod.rs new file mode 100644 index 00000000..fbcc5482 --- /dev/null +++ b/src/portable/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod deser; +pub(crate) mod stage1; diff --git a/src/portable/stage1.rs b/src/portable/stage1.rs new file mode 100644 index 00000000..c3a518bc --- /dev/null +++ b/src/portable/stage1.rs @@ -0,0 +1,222 @@ +use std::{ + ops::Shr, + simd::{prelude::*, ToBitMask}, +}; + +macro_rules! low_nibble_mask { + () => { + [ + 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, + 2, 9, 0, 0, + ] + }; +} + +macro_rules! high_nibble_mask { + () => { + [ + 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, + 2, 1, 0, 0, + ] + }; +} + +use crate::{static_cast_i32, Stage1Parse}; +#[derive(Debug)] +pub(crate) struct SimdInputPortable { + v0: u8x32, + v1: u8x32, +} + +impl Stage1Parse for SimdInputPortable { + type Utf8Validator = simdutf8::basic::imp::portable::ChunkedUtf8ValidatorImp; + type SimdRepresentation = u8x32; + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow(clippy::cast_ptr_alignment)] + unsafe fn new(ptr: &[u8]) -> Self { + Self { + v0: u8x32::from_array(*ptr.as_ptr().cast::<[u8; 32]>()), + v1: u8x32::from_array(*ptr.as_ptr().add(32).cast::<[u8; 32]>()), + } + } + + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow(clippy::cast_sign_loss)] + unsafe fn compute_quote_mask(quote_bits: u64) -> u64 { + let mut quote_mask: u64 = quote_bits ^ (quote_bits << 1); + quote_mask = quote_mask ^ (quote_mask << 2); + quote_mask = quote_mask ^ (quote_mask << 4); + quote_mask = quote_mask ^ (quote_mask << 8); + quote_mask = quote_mask ^ (quote_mask << 16); + quote_mask = quote_mask ^ (quote_mask << 32); + quote_mask + } + + /// a straightforward comparison of a mask against input + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + unsafe fn cmp_mask_against_input(&self, m: u8) -> u64 { + let mask = u8x32::splat(m); + let cmp_res_0 = self.v0.simd_eq(mask); + let res_0 = u64::from(cmp_res_0.to_bitmask()); + let cmp_res_1 = self.v1.simd_eq(mask); + let res_1 = u64::from(cmp_res_1.to_bitmask()); + res_0 | (res_1 << 32) + } + + // find all values less than or equal than the content of maxval (using unsigned arithmetic) + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow(clippy::cast_sign_loss)] + unsafe fn unsigned_lteq_against_input(&self, maxval: u8x32) -> u64 { + let res_0: u64 = u64::from(self.v0.simd_le(maxval).to_bitmask()); + let res_1: u64 = u64::from(self.v1.simd_le(maxval).to_bitmask()); + res_0 | (res_1 << 32) + } + + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow(clippy::cast_sign_loss, clippy::cast_lossless)] + unsafe fn find_whitespace_and_structurals(&self, whitespace: &mut u64, structurals: &mut u64) { + // do a 'shufti' to detect structural JSON characters + // they are + // * `{` 0x7b + // * `}` 0x7d + // * `:` 0x3a + // * `[` 0x5b + // * `]` 0x5d + // * `,` 0x2c + // these go into the first 3 buckets of the comparison (1/2/4) + + // we are also interested in the four whitespace characters: + // * space 0x20 + // * linefeed 0x0a + // * horizontal tab 0x09 + // * carriage return 0x0d + // these go into the next 2 buckets of the comparison (8/16) + + const LOW_NIBBLE_MASK: u8x32 = u8x32::from_array(low_nibble_mask!()); + const HIGH_NIBBLE_MASK: u8x32 = u8x32::from_array(high_nibble_mask!()); + + let structural_shufti_mask: u8x32 = u8x32::splat(0b0000_0111); // 0x07 + let whitespace_shufti_mask: u8x32 = u8x32::splat(0b0001_1000); // 0x18 + + // FIXME: do we need this dance? + + let v0_32 = i32x8::from_array(std::mem::transmute(*self.v0.as_array())); + let v0_shifted: Simd = v0_32.shr(i32x8::splat(4)); + let v0_shifted = u8x32::from_array(std::mem::transmute(v0_shifted)); + + // We have to adjust the index here the reason being that while the avx instruction + // only uses the lower 4 bits for index and the 8th bit for overflow (set to 0) + // std::simd::swizzle uses all bits 5-8 for overflow, so we need to mask out + // bit 6, 5 and 7 to get the correct behaviour + let v0_idx_low = self.v0 & u8x32::splat(0b1000_1111); + let v0_swizzle_low = LOW_NIBBLE_MASK.swizzle_dyn(v0_idx_low); + let v0_idx_high = v0_shifted & u8x32::splat(0b0000_1111); + let v0_swizzle_high = HIGH_NIBBLE_MASK.swizzle_dyn(v0_idx_high); + let v_lo = v0_swizzle_low & v0_swizzle_high; + + let v1_32 = i32x8::from_array(std::mem::transmute(*self.v1.as_array())); + let v1_shifted: Simd = v1_32.shr(i32x8::splat(4)); + let v1_shifted = u8x32::from_array(std::mem::transmute(v1_shifted)); + + // We have to adjust the index here the reason being that while the avx instruction + // only uses the lower 4 bits for index and the 8th bit for overflow (set to 0) + // std::simd::swizzle uses all bits 5-8 for overflow, so we need to mask out + // bit 6, 5 and 7 to get the correct behaviour + let v1_idx_low = self.v1 & u8x32::splat(0b1000_1111); + let v1_swizzle_low = LOW_NIBBLE_MASK.swizzle_dyn(v1_idx_low); + let v1_idx_high = v1_shifted & u8x32::splat(0b0000_1111); + let v1_swizzle_high = HIGH_NIBBLE_MASK.swizzle_dyn(v1_idx_high); + let v_hi = v1_swizzle_low & v1_swizzle_high; + + let tmp_lo = (v_lo & structural_shufti_mask).simd_eq(u8x32::splat(0)); + let tmp_hi = (v_hi & structural_shufti_mask).simd_eq(u8x32::splat(0)); + + let structural_res_0 = u64::from(tmp_lo.to_bitmask()); + let structural_res_1 = u64::from(tmp_hi.to_bitmask()); + *structurals = !(structural_res_0 | (structural_res_1 << 32)); + + let tmp_ws_lo = (v_lo & whitespace_shufti_mask).simd_eq(u8x32::splat(0)); + let tmp_ws_hi = (v_hi & whitespace_shufti_mask).simd_eq(u8x32::splat(0)); + + let ws_res_0 = u64::from(tmp_ws_lo.to_bitmask()); + let ws_res_1 = u64::from(tmp_ws_hi.to_bitmask()); + + *whitespace = !(ws_res_0 | (ws_res_1 << 32)); + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + //TODO: usize was u32 here does this matter? + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow( + clippy::cast_possible_wrap, + clippy::cast_ptr_alignment, + clippy::uninit_vec + )] + unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { + let cnt: usize = bits.count_ones() as usize; + let mut l = base.len(); + let idx_minus_64 = static_cast_i32!(idx.wrapping_sub(64)); + let idx_64_v: i32x8 = i32x8::from_array([ + idx_minus_64, + idx_minus_64, + idx_minus_64, + idx_minus_64, + idx_minus_64, + idx_minus_64, + idx_minus_64, + idx_minus_64, + ]); + + // We're doing some trickery here. + // We reserve 64 extra entries, because we've at most 64 bit to set + // then we trunctate the base to the next base (that we calcuate above) + // We later indiscriminatory writre over the len we set but that's OK + // since we ensure we reserve the needed space + base.reserve(64); + base.set_len(l + cnt); + + while bits != 0 { + let v0 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v1 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v2 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v3 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v4 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v5 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v6 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v7 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + + let v = i32x8::from_array([v0, v1, v2, v3, v4, v5, v6, v7]); + let v = idx_64_v + v; + base.as_mut_ptr() + .add(l) + .cast::<[i32; 8]>() + .write(*v.as_array()); + // + l += 8; + } + } + + #[allow(clippy::cast_sign_loss)] + #[cfg_attr(not(feature = "no-inline"), inline)] + unsafe fn fill_s8(n: i8) -> u8x32 { + u8x32::splat(n as u8) + } + + #[cfg_attr(not(feature = "no-inline"), inline)] + unsafe fn zero() -> u8x32 { + u8x32::splat(0) + } +} diff --git a/src/serde.rs b/src/serde.rs index 12cea7ce..748f5a97 100644 --- a/src/serde.rs +++ b/src/serde.rs @@ -664,7 +664,7 @@ mod test { y: u64, } - let mut json = br#"[1,2]"#.to_vec(); + let mut json = b"[1,2]".to_vec(); let p: Point = serde_json::from_slice(&json).unwrap(); assert_eq!(p.x, 1); diff --git a/src/serde/de.rs b/src/serde/de.rs index 7c242e95..9f838ef4 100644 --- a/src/serde/de.rs +++ b/src/serde/de.rs @@ -335,7 +335,7 @@ where { // Parse the opening bracket of the sequence. match self.next() { - Ok(Node::Object { len, count: _ }) if len == 1 => { + Ok(Node::Object { len: 1, .. }) => { // Give the visitor access to each element of the sequence. // let value = ri!(visitor.visit_enum(VariantAccess::new(self))); visitor.visit_enum(VariantAccess::new(self)) diff --git a/src/serde/se.rs b/src/serde/se.rs index f3d270ac..148d1a33 100644 --- a/src/serde/se.rs +++ b/src/serde/se.rs @@ -23,7 +23,7 @@ where { let v = Vec::with_capacity(512); let mut s = Serializer(v); - to.serialize(&mut s).map(|_| s.0) + to.serialize(&mut s).map(|()| s.0) } /// Write a value to a string @@ -90,7 +90,7 @@ where *first = false; value.serialize(&mut **s) } else { - iomap!(s.write(b",")).and_then(|_| value.serialize(&mut **s)) + iomap!(s.write(b",")).and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -122,7 +122,7 @@ where *first = false; value.serialize(&mut **s) } else { - iomap!(s.write(b",")).and_then(|_| value.serialize(&mut **s)) + iomap!(s.write(b",")).and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -154,7 +154,7 @@ where *first = false; value.serialize(&mut **s) } else { - iomap!(s.write(b",")).and_then(|_| value.serialize(&mut **s)) + iomap!(s.write(b",")).and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -186,7 +186,7 @@ where *first = false; value.serialize(&mut **s) } else { - iomap!(s.write(b",")).and_then(|_| value.serialize(&mut **s)) + iomap!(s.write(b",")).and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -225,11 +225,11 @@ where if *first { *first = false; key.serialize(MapKeySerializer { s: &mut **s }) - .and_then(|_| iomap!(s.write(b":"))) + .and_then(|()| iomap!(s.write(b":"))) } else { iomap!(s.write(b",")) - .and_then(|_| key.serialize(MapKeySerializer { s: &mut **s })) - .and_then(|_| iomap!(s.write(b":"))) + .and_then(|()| key.serialize(MapKeySerializer { s: &mut **s })) + .and_then(|()| iomap!(s.write(b":"))) } } #[inline] @@ -304,80 +304,80 @@ where iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_i16(self, v: i16) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_i32(self, v: i32) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_i64(self, v: i64) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_i128(self, v: i128) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u8(self, v: u8) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u16(self, v: u16) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u32(self, v: u32) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u64(self, v: u64) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u128(self, v: u128) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_f32(self, _v: f32) -> Result { @@ -499,14 +499,14 @@ where } = *self; if *first { *first = false; - iomap!(s.write_simple_string(key).and_then(|_| s.write(b":"))) - .and_then(|_| value.serialize(&mut **s)) + iomap!(s.write_simple_string(key).and_then(|()| s.write(b":"))) + .and_then(|()| value.serialize(&mut **s)) } else { iomap!(s .write(b",") - .and_then(|_| s.write_simple_string(key)) - .and_then(|_| s.write(b":"))) - .and_then(|_| value.serialize(&mut **s)) + .and_then(|()| s.write_simple_string(key)) + .and_then(|()| s.write(b":"))) + .and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -546,19 +546,19 @@ where } = *self; if *first { *first = false; - iomap!(s.write_simple_string(key).and_then(|_| s.write(b":"))) - .and_then(|_| value.serialize(&mut **s)) + iomap!(s.write_simple_string(key).and_then(|()| s.write(b":"))) + .and_then(|()| value.serialize(&mut **s)) } else { iomap!(s .write(b",") - .and_then(|_| s.write_simple_string(key)) - .and_then(|_| s.write(b":"))) - .and_then(|_| value.serialize(&mut **s)) + .and_then(|()| s.write_simple_string(key)) + .and_then(|()| s.write(b":"))) + .and_then(|()| value.serialize(&mut **s)) } } #[inline] fn end(self) -> Result { - iomap!(self.s.write(b"}")).and_then(move |_| { + iomap!(self.s.write(b"}")).and_then(move |()| { if self.first { Ok(()) } else { @@ -651,11 +651,11 @@ where } #[inline] fn serialize_bytes(self, v: &[u8]) -> Result { - iomap!(self.write(b"[").and_then(|_| { + iomap!(self.write(b"[").and_then(|()| { if let Some((first, rest)) = v.split_first() { - self.write_int(*first).and_then(|_| { + self.write_int(*first).and_then(|()| { for v in rest { - self.write(b",").and_then(|_| self.write_int(*v))?; + self.write(b",").and_then(|()| self.write_int(*v))?; } self.write(b"]") }) @@ -718,10 +718,10 @@ where { iomap!(self .write(b"{") - .and_then(|_| self.write_simple_string(variant)) - .and_then(|_| self.write(b":"))) - .and_then(|_| value.serialize(&mut *self)) - .and_then(|_| iomap!(self.write(b"}"))) + .and_then(|()| self.write_simple_string(variant)) + .and_then(|()| self.write(b":"))) + .and_then(|()| value.serialize(&mut *self)) + .and_then(|()| iomap!(self.write(b"}"))) } #[inline] fn serialize_seq(self, len: Option) -> Result { @@ -730,7 +730,7 @@ where } else { iomap!(self.write(b"[")) } - .map(move |_| SerializeSeq { + .map(move |()| SerializeSeq { s: self, first: true, }) @@ -760,8 +760,8 @@ where ) -> Result { iomap!(self .write(b"{") - .and_then(|_| self.write_simple_string(variant)) - .and_then(|_| self.write(b":")))?; + .and_then(|()| self.write_simple_string(variant)) + .and_then(|()| self.write(b":")))?; self.serialize_seq(Some(len)) } @@ -774,7 +774,7 @@ where } else { iomap!(self.write(b"{")) } - .map(move |_| SerializeMap { + .map(move |()| SerializeMap { s: self, first: true, wrote_closing, @@ -800,15 +800,15 @@ where ) -> Result { iomap!(self .write(b"{") - .and_then(|_| self.write_simple_string(variant)) - .and_then(|_| self.write(b":"))) - .and_then(move |_| { + .and_then(|()| self.write_simple_string(variant)) + .and_then(|()| self.write(b":"))) + .and_then(move |()| { if len == 0 { iomap!(self.write(b"{}")) } else { iomap!(self.write(b"{")) } - .map(move |_| SerializeStructVariant { + .map(move |()| SerializeStructVariant { s: self, first: true, }) @@ -818,6 +818,7 @@ where #[cfg(test)] mod test { + #![allow(clippy::ignored_unit_patterns)] #[cfg(not(target_arch = "wasm32"))] use crate::{OwnedValue as Value, StaticNode}; #[cfg(not(target_arch = "wasm32"))] diff --git a/src/serde/se/pp.rs b/src/serde/se/pp.rs index d38f4294..3f78288e 100644 --- a/src/serde/se/pp.rs +++ b/src/serde/se/pp.rs @@ -23,7 +23,7 @@ where { let v = Vec::with_capacity(512); let mut s = PrettySerializer::new(v); - to.serialize(&mut s).map(|_| s.writer) + to.serialize(&mut s).map(|()| s.writer) } /// Write a value to a string @@ -75,7 +75,7 @@ where } #[inline] fn new_line(&mut self) -> std::io::Result<()> { - self.write_char(b'\n').and_then(|_| match self.dent { + self.write_char(b'\n').and_then(|()| match self.dent { 0 => Ok(()), 1 => self.get_writer().write_all(b" "), 2 => self.get_writer().write_all(b" "), @@ -125,9 +125,10 @@ where } = *self; if *first { *first = false; - iomap!(s.new_line()).and_then(|_| value.serialize(&mut **s)) + iomap!(s.new_line()).and_then(|()| value.serialize(&mut **s)) } else { - iomap!(s.write(b",").and_then(|_| s.new_line())).and_then(|_| value.serialize(&mut **s)) + iomap!(s.write(b",").and_then(|()| s.new_line())) + .and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -136,7 +137,7 @@ where Ok(()) } else { self.s.dedent(); - iomap!(self.s.new_line().and_then(|_| self.s.write(b"]"))) + iomap!(self.s.new_line().and_then(|()| self.s.write(b"]"))) } } } @@ -158,9 +159,10 @@ where } = *self; if *first { *first = false; - iomap!(s.new_line()).and_then(|_| value.serialize(&mut **s)) + iomap!(s.new_line()).and_then(|()| value.serialize(&mut **s)) } else { - iomap!(s.write(b",").and_then(|_| s.new_line())).and_then(|_| value.serialize(&mut **s)) + iomap!(s.write(b",").and_then(|()| s.new_line())) + .and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -169,7 +171,7 @@ where Ok(()) } else { self.s.dedent(); - iomap!(self.s.new_line().and_then(|_| self.s.write(b"]"))) + iomap!(self.s.new_line().and_then(|()| self.s.write(b"]"))) } } } @@ -191,9 +193,10 @@ where } = *self; if *first { *first = false; - iomap!(s.new_line()).and_then(|_| value.serialize(&mut **s)) + iomap!(s.new_line()).and_then(|()| value.serialize(&mut **s)) } else { - iomap!(s.write(b",").and_then(|_| s.new_line())).and_then(|_| value.serialize(&mut **s)) + iomap!(s.write(b",").and_then(|()| s.new_line())) + .and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -202,7 +205,7 @@ where Ok(()) } else { self.s.dedent(); - iomap!(self.s.new_line().and_then(|_| self.s.write(b"]"))) + iomap!(self.s.new_line().and_then(|()| self.s.write(b"]"))) } } } @@ -224,9 +227,10 @@ where } = *self; if *first { *first = false; - iomap!(s.new_line()).and_then(|_| value.serialize(&mut **s)) + iomap!(s.new_line()).and_then(|()| value.serialize(&mut **s)) } else { - iomap!(s.write(b",").and_then(|_| s.new_line())).and_then(|_| value.serialize(&mut **s)) + iomap!(s.write(b",").and_then(|()| s.new_line())) + .and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -235,7 +239,7 @@ where Ok(()) } else { self.s.dedent(); - iomap!(self.s.new_line().and_then(|_| self.s.write(b"}"))) + iomap!(self.s.new_line().and_then(|()| self.s.write(b"}"))) } } } @@ -290,80 +294,80 @@ where iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_i16(self, v: i16) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_i32(self, v: i32) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_i64(self, v: i64) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_i128(self, v: i128) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u8(self, v: u8) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u16(self, v: u16) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u32(self, v: u32) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u64(self, v: u64) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_u128(self, v: u128) -> Result { iomap!(self .s .write_char(b'"') - .and_then(|_| self.s.write_int(v)) - .and_then(|_| self.s.write_char(b'"'))) + .and_then(|()| self.s.write_int(v)) + .and_then(|()| self.s.write_char(b'"'))) } fn serialize_f32(self, _v: f32) -> Result { @@ -488,12 +492,12 @@ where if *first { *first = false; iomap!(s.new_line()) - .and_then(|_| key.serialize(MapKeySerializer { s: &mut **s })) - .and_then(|_| iomap!(s.write(b": "))) + .and_then(|()| key.serialize(MapKeySerializer { s: &mut **s })) + .and_then(|()| iomap!(s.write(b": "))) } else { - iomap!(s.write(b",").and_then(|_| s.new_line())) - .and_then(|_| key.serialize(MapKeySerializer { s: &mut **s })) - .and_then(|_| iomap!(s.write(b": "))) + iomap!(s.write(b",").and_then(|()| s.new_line())) + .and_then(|()| key.serialize(MapKeySerializer { s: &mut **s })) + .and_then(|()| iomap!(s.write(b": "))) } } #[inline] @@ -510,7 +514,7 @@ where Ok(()) } else { self.s.dedent(); - iomap!(self.s.new_line().and_then(|_| self.s.write(b"}"))) + iomap!(self.s.new_line().and_then(|()| self.s.write(b"}"))) } } } @@ -539,15 +543,15 @@ where *first = false; iomap!(s .new_line() - .and_then(|_| s.write_simple_string(key)) - .and_then(|_| s.write(b": "))) - .and_then(|_| value.serialize(&mut **s)) + .and_then(|()| s.write_simple_string(key)) + .and_then(|()| s.write(b": "))) + .and_then(|()| value.serialize(&mut **s)) } else { iomap!(s .write(b",") - .and_then(|_| s.write_simple_string(key)) - .and_then(|_| s.write(b": "))) - .and_then(|_| value.serialize(&mut **s)) + .and_then(|()| s.write_simple_string(key)) + .and_then(|()| s.write(b": "))) + .and_then(|()| value.serialize(&mut **s)) } } #[inline] @@ -556,7 +560,7 @@ where Ok(()) } else { self.s.dedent(); - iomap!(self.s.new_line().and_then(|_| self.s.write(b"}"))) + iomap!(self.s.new_line().and_then(|()| self.s.write(b"}"))) } } } @@ -591,27 +595,27 @@ where s.indent(); iomap!(s .new_line() - .and_then(|_| s.write_simple_string(key)) - .and_then(|_| s.write(b": "))) - .and_then(|_| value.serialize(&mut **s)) + .and_then(|()| s.write_simple_string(key)) + .and_then(|()| s.write(b": "))) + .and_then(|()| value.serialize(&mut **s)) } else { iomap!(s .write(b",") - .and_then(|_| s.write_simple_string(key)) - .and_then(|_| s.write(b": "))) - .and_then(|_| value.serialize(&mut **s)) + .and_then(|()| s.write_simple_string(key)) + .and_then(|()| s.write(b": "))) + .and_then(|()| value.serialize(&mut **s)) } } #[inline] fn end(self) -> Result { self.s.dedent(); - iomap!(self.s.new_line().and_then(|_| self.s.write(b"}"))).and_then(move |_| { + iomap!(self.s.new_line().and_then(|()| self.s.write(b"}"))).and_then(move |()| { if self.first { Ok(()) } else { self.s.dedent(); - iomap!(self.s.new_line().and_then(|_| self.s.write(b"}"))) + iomap!(self.s.new_line().and_then(|()| self.s.write(b"}"))) } }) } @@ -700,16 +704,16 @@ where } #[inline] fn serialize_bytes(self, v: &[u8]) -> Result { - iomap!(self.write(b"[").and_then(|_| { + iomap!(self.write(b"[").and_then(|()| { if let Some((first, rest)) = v.split_first() { self.indent(); - self.new_line().and_then(|_| { - self.write_int(*first).and_then(|_| { + self.new_line().and_then(|()| { + self.write_int(*first).and_then(|()| { for v in rest { - self.write(b",").and_then(|_| self.write_int(*v))?; + self.write(b",").and_then(|()| self.write_int(*v))?; } self.dedent(); - self.new_line().and_then(|_| self.write(b"]")) + self.new_line().and_then(|()| self.write(b"]")) }) }) } else { @@ -771,10 +775,10 @@ where { iomap!(self .write(b"{") - .and_then(|_| self.write_simple_string(variant)) - .and_then(|_| self.write(b": "))) - .and_then(|_| value.serialize(&mut *self)) - .and_then(|_| iomap!(self.write(b"}"))) + .and_then(|()| self.write_simple_string(variant)) + .and_then(|()| self.write(b": "))) + .and_then(|()| value.serialize(&mut *self)) + .and_then(|()| iomap!(self.write(b"}"))) } #[inline] fn serialize_seq(self, len: Option) -> Result { @@ -784,7 +788,7 @@ where self.indent(); iomap!(self.write(b"[")) } - .map(move |_| SerializeSeq { + .map(move |()| SerializeSeq { s: self, first: true, }) @@ -815,10 +819,10 @@ where self.indent(); iomap!(self .write(b"{") - .and_then(|_| self.new_line()) - .and_then(|_| self.write_simple_string(variant)) - .and_then(|_| self.write(b": "))) - .and_then(move |_| self.serialize_seq(Some(len))) + .and_then(|()| self.new_line()) + .and_then(|()| self.write_simple_string(variant)) + .and_then(|()| self.write(b": "))) + .and_then(move |()| self.serialize_seq(Some(len))) } #[inline] @@ -829,7 +833,7 @@ where self.indent(); iomap!(self.write(b"{")) } - .map(move |_| SerializeMap { + .map(move |()| SerializeMap { s: self, first: true, }) @@ -855,16 +859,16 @@ where self.indent(); iomap!(self .write(b"{") - .and_then(|_| self.new_line()) - .and_then(|_| self.write_simple_string(variant)) - .and_then(|_| self.write(b": "))) - .and_then(move |_| { + .and_then(|()| self.new_line()) + .and_then(|()| self.write_simple_string(variant)) + .and_then(|()| self.write(b": "))) + .and_then(move |()| { if len == 0 { iomap!(self.write(b"{}")) } else { iomap!(self.write(b"{")) } - .map(move |_| SerializeStructVariant { + .map(move |()| SerializeStructVariant { s: self, first: true, }) @@ -874,6 +878,7 @@ where #[cfg(test)] mod test { + #![allow(clippy::ignored_unit_patterns)] use crate::from_slice; #[cfg(not(target_arch = "wasm32"))] use crate::{OwnedValue as Value, StaticNode}; diff --git a/src/serde/value/borrowed/se.rs b/src/serde/value/borrowed/se.rs index 5321c8fc..a9df57ef 100644 --- a/src/serde/value/borrowed/se.rs +++ b/src/serde/value/borrowed/se.rs @@ -604,6 +604,7 @@ impl<'se> serde::ser::SerializeStructVariant for SerializeStructVariant<'se> { #[cfg(test)] mod test { + #![allow(clippy::ignored_unit_patterns)] use super::Value; use crate::{borrowed::Object, serde::from_slice, ObjectHasher}; use serde::{Deserialize, Serialize}; diff --git a/src/serde/value/owned/se.rs b/src/serde/value/owned/se.rs index 6ebd6f58..53895c12 100644 --- a/src/serde/value/owned/se.rs +++ b/src/serde/value/owned/se.rs @@ -580,17 +580,10 @@ impl serde::ser::SerializeStructVariant for SerializeStructVariant { #[cfg(test)] mod test { + #![allow(clippy::ignored_unit_patterns)] use crate::serde::from_slice; #[cfg(not(target_arch = "wasm32"))] use crate::serde::{from_str, to_string}; - /* - use crate::{ - owned::to_value, owned::Object, owned::Value, to_borrowed_value, to_owned_value, - Deserializer, - }; - use halfbrown::HashMap; - use proptest::prelude::*; - */ use serde::{Deserialize, Serialize}; use serde_json; diff --git a/src/simd128/deser.rs b/src/simd128/deser.rs index 8ada661e..a2a5225f 100644 --- a/src/simd128/deser.rs +++ b/src/simd128/deser.rs @@ -13,13 +13,12 @@ use crate::{ #[target_feature(enable = "simd128")] #[allow( clippy::if_not_else, - clippy::transmute_ptr_to_ptr, clippy::cast_ptr_alignment, clippy::cast_possible_wrap, clippy::too_many_lines )] #[cfg_attr(not(feature = "no-inline"), inline)] -pub(crate) fn parse_str_simd128<'invoke, 'de>( +pub(crate) fn parse_str<'invoke, 'de>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], diff --git a/src/simd128/stage1.rs b/src/simd128/stage1.rs index d627a97a..9d66328e 100644 --- a/src/simd128/stage1.rs +++ b/src/simd128/stage1.rs @@ -10,7 +10,10 @@ pub(crate) struct SimdInput128 { v3: v128, } -impl Stage1Parse for SimdInput128 { +impl Stage1Parse for SimdInput128 { + type Utf8Validator = simdutf8::basic::imp::wasm32::simd128::ChunkedUtf8ValidatorImp; + type SimdRepresentation = v128; + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_ptr_alignment)] unsafe fn new(ptr: &[u8]) -> Self { diff --git a/src/sse42/deser.rs b/src/sse42/deser.rs index 0fa3527b..0539b35d 100644 --- a/src/sse42/deser.rs +++ b/src/sse42/deser.rs @@ -8,8 +8,6 @@ use arch::{ __m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8, _mm_storeu_si128, }; -use std::mem; - pub use crate::error::{Error, ErrorType}; use crate::safer_unchecked::GetSaferUnchecked; use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP}; @@ -17,15 +15,9 @@ use crate::Deserializer; pub use crate::Result; #[target_feature(enable = "sse4.2")] -#[allow( - clippy::if_not_else, - clippy::transmute_ptr_to_ptr, - clippy::cast_ptr_alignment, - clippy::cast_possible_wrap, - clippy::too_many_lines -)] +#[allow(clippy::if_not_else, clippy::cast_possible_wrap)] #[cfg_attr(not(feature = "no-inline"), inline)] -pub(crate) unsafe fn parse_str_sse<'invoke, 'de>( +pub(crate) unsafe fn parse_str<'invoke, 'de>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], @@ -43,6 +35,8 @@ pub(crate) unsafe fn parse_str_sse<'invoke, 'de>( let mut src_i: usize = 0; let mut len = src_i; loop { + // _mm_loadu_si128 does not require alignmnet + #[allow(clippy::cast_ptr_alignment)] let v: __m128i = unsafe { _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) }; @@ -94,8 +88,12 @@ pub(crate) unsafe fn parse_str_sse<'invoke, 'de>( // To be more conform with upstream loop { + // _mm_loadu_si128 does not require alignmnet + #[allow(clippy::cast_ptr_alignment)] let v: __m128i = _mm_loadu_si128(src.as_ptr().add(src_i).cast::()); + // _mm_storeu_si128 does not require alignmnet + #[allow(clippy::cast_ptr_alignment)] _mm_storeu_si128(buffer.as_mut_ptr().add(dst_i).cast::(), v); // store to dest unconditionally - we can overwrite the bits we don't like diff --git a/src/sse42/stage1.rs b/src/sse42/stage1.rs index c869ef6f..9a4476ba 100644 --- a/src/sse42/stage1.rs +++ b/src/sse42/stage1.rs @@ -19,8 +19,6 @@ use arch::{ _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128, }; -use std::mem; - macro_rules! low_nibble_mask { () => { _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0) @@ -41,7 +39,9 @@ pub(crate) struct SimdInputSSE { v3: __m128i, } -impl Stage1Parse<__m128i> for SimdInputSSE { +impl Stage1Parse for SimdInputSSE { + type Utf8Validator = simdutf8::basic::imp::x86::sse42::ChunkedUtf8ValidatorImp; + type SimdRepresentation = __m128i; #[target_feature(enable = "sse4.2")] #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_ptr_alignment)] diff --git a/src/stage2.rs b/src/stage2.rs index e396a67c..55b88a7d 100644 --- a/src/stage2.rs +++ b/src/stage2.rs @@ -2,7 +2,7 @@ use crate::charutils::is_not_structural_or_whitespace; use crate::safer_unchecked::GetSaferUnchecked; use crate::value::tape::Node; -use crate::{Deserializer, Error, ErrorType, Result}; +use crate::{Deserializer, Error, ErrorType, InternalError, Result}; use value_trait::StaticNode; #[cfg_attr(not(feature = "no-inline"), inline(always))] @@ -276,7 +276,12 @@ impl<'de> Deserializer<'de> { unsafe { res.set_len(r_i); }; - return Err(Error::new_c(idx, c as char, ErrorType::InternalError)); + dbg!(); + return Err(Error::new_c( + idx, + c as char, + ErrorType::InternalError(InternalError::TapeError), + )); }; ($t:expr) => { // We need to ensure that rust doesn't @@ -675,7 +680,14 @@ mod test { input2.append(vec![0; SIMDJSON_PADDING * 2].as_mut()); let mut buffer = vec![0; 1024]; - let s = Deserializer::parse_str_(input.as_mut_ptr(), &input2, buffer.as_mut_slice(), 0)?; + let s = unsafe { + dbg!(Deserializer::parse_str_( + input.as_mut_ptr(), + &input2, + buffer.as_mut_slice(), + 0 + ))? + }; dbg!(s); dbg!(&input[..20]); dbg!(&input2[..20]); diff --git a/src/stringparse.rs b/src/stringparse.rs index aa638e84..8344d0f7 100644 --- a/src/stringparse.rs +++ b/src/stringparse.rs @@ -25,6 +25,18 @@ pub(crate) const ESCAPE_MAP: [u8; 256] = [ const HIGH_SURROGATES: Range = 0xd800..0xdc00; const LOW_SURROGATES: Range = 0xdc00..0xe000; +/// handle a unicode codepoint +/// write appropriate values into dest +#[cfg_attr(not(feature = "no-inline"), inline(always))] +pub(crate) fn handle_unicode_codepoint( + src_ptr: &[u8], + dst_ptr: &mut [u8], +) -> Result<(usize, usize), ErrorType> { + let (code_point, src_offset) = get_unicode_codepoint(src_ptr)?; + let offset: usize = codepoint_to_utf8(code_point, dst_ptr); + Ok((offset, src_offset)) +} + /// handle a unicode codepoint /// write appropriate values into dest /// src will advance 6 bytes or 12 bytes @@ -32,10 +44,7 @@ const LOW_SURROGATES: Range = 0xdc00..0xe000; /// return true if the unicode codepoint was valid /// We work in little-endian then swap at write time #[cfg_attr(not(feature = "no-inline"), inline(always))] -pub(crate) fn handle_unicode_codepoint( - mut src_ptr: &[u8], - dst_ptr: &mut [u8], -) -> Result<(usize, usize), ErrorType> { +pub(crate) fn get_unicode_codepoint(mut src_ptr: &[u8]) -> Result<(u32, usize), ErrorType> { // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the // multilingual plane check @@ -77,6 +86,5 @@ pub(crate) fn handle_unicode_codepoint( // This is a low surrogate on it's own, which is invalid. return Err(ErrorType::InvalidUtf8); } - let offset: usize = codepoint_to_utf8(code_point, dst_ptr); - Ok((offset, src_offset)) + Ok((code_point, src_offset)) } diff --git a/src/tests.rs b/src/tests.rs new file mode 100644 index 00000000..2328b578 --- /dev/null +++ b/src/tests.rs @@ -0,0 +1,270 @@ +#![allow(clippy::ignored_unit_patterns)] + +#[cfg(feature = "serde_impl")] +mod serde; + +mod impls; + +use super::to_borrowed_value; +use super::{owned::Value, to_owned_value, Deserializer}; +use crate::tape::Node; +#[cfg(not(target_arch = "wasm32"))] +use proptest::prelude::*; +#[cfg(not(target_arch = "wasm32"))] +use value_trait::StaticNode; +use value_trait::Writable; + +#[cfg(not(feature = "approx-number-parsing"))] +#[test] +#[allow(clippy::float_cmp)] +fn alligned_number_parse() { + let str = "9521.824380305317"; + let mut slice: Vec = str.as_bytes().to_owned(); + let value: crate::BorrowedValue<'_> = + crate::to_borrowed_value(&mut slice).expect("failed to parse"); + assert_eq!(value, 9_521.824_380_305_317); +} + +#[test] +fn test_send_sync() { + struct TestStruct(T); + #[allow(clippy::let_underscore_drop)] // test + let _: TestStruct<_> = TestStruct(super::AlignedBuf::with_capacity(0)); +} + +#[test] +fn count1() { + let mut d = String::from("[]"); + let d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(d).expect(""); + assert_eq!(simd.tape[1], Node::Array { len: 0, count: 0 }); +} + +#[test] +fn count2() { + let mut d = String::from("[1]"); + let d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(d).expect(""); + assert_eq!(simd.tape[1], Node::Array { len: 1, count: 1 }); +} + +#[test] +fn count3() { + let mut d = String::from("[1,2]"); + let d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(d).expect(""); + assert_eq!(simd.tape[1], Node::Array { len: 2, count: 2 }); +} + +#[test] +fn count4() { + let mut d = String::from(" [ 1 , [ 3 ] , 2 ]"); + let d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(d).expect(""); + assert_eq!(simd.tape[1], Node::Array { len: 3, count: 4 }); + assert_eq!(simd.tape[3], Node::Array { len: 1, count: 1 }); +} + +#[test] +fn count5() { + let mut d = String::from("[[],null,null]"); + let d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(d).expect(""); + assert_eq!(simd.tape[1], Node::Array { len: 3, count: 3 }); + assert_eq!(simd.tape[2], Node::Array { len: 0, count: 0 }); +} + +#[test] +fn test_tape_object_simple() { + let mut d = String::from(r#" { "hello": 1 , "b": 1 }"#); + let d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(d).expect(""); + assert_eq!( + simd.tape, + [ + Node::Static(StaticNode::Null), + Node::Object { len: 2, count: 4 }, + Node::String("hello"), // <-- This is already escaped + Node::Static(StaticNode::I64(1)), + Node::String("b"), + Node::Static(StaticNode::I64(1)), + ] + ); +} + +#[test] +fn test_tape_object_escaped() { + let mut d = String::from(r#" { "hell\"o": 1 , "b": [ 1, 2, 3 ] }"#); + let d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(d).expect(""); + assert_eq!( + simd.tape, + [ + Node::Static(StaticNode::Null), + Node::Object { len: 2, count: 7 }, + Node::String(r#"hell"o"#), // <-- This is already escaped + Node::Static(StaticNode::I64(1)), + Node::String("b"), + Node::Array { len: 3, count: 3 }, + Node::Static(StaticNode::I64(1)), + Node::Static(StaticNode::I64(2)), + Node::Static(StaticNode::I64(3)) + ] + ); +} + +#[test] +fn string_array() { + const STR: &str = r#""{\"arg\":\"test\"}""#; + let mut d = String::from(STR); + let d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(d).expect(""); + dbg!(&simd.tape); + // assert_eq!(simd.tape[1], Node::Array(1, 3)); + assert_eq!(simd.tape[1], Node::String("{\"arg\":\"test\"}")); +} + +#[cfg(feature = "128bit")] +#[test] +fn odd_nuber() { + use super::value::owned::to_value; + use super::value::{Builder, Mutable}; + + let mut d = + String::from(r#"{"name": "max_unsafe_auto_id_timestamp", "value": -9223372036854776000}"#); + + let mut d = unsafe { d.as_bytes_mut() }; + let mut o = Value::object(); + o.insert("name", "max_unsafe_auto_id_timestamp") + .expect("failed to set key"); + o.insert("value", -9_223_372_036_854_776_000_i128) + .expect("failed to set key"); + assert_eq!(to_value(&mut d), Ok(o)); +} + +#[cfg(feature = "128bit")] +#[test] +fn odd_nuber2() { + use super::value::owned::to_value; + use super::value::{Builder, Mutable}; + + let mut d = + String::from(r#"{"name": "max_unsafe_auto_id_timestamp", "value": 9223372036854776000}"#); + + let mut d = unsafe { d.as_bytes_mut() }; + let mut o = Value::object(); + o.insert("name", "max_unsafe_auto_id_timestamp") + .expect("failed to set key"); + o.insert("value", 9_223_372_036_854_776_000_u128) + .expect("failed to set key"); + assert_eq!(to_value(&mut d), Ok(o)); +} +// How much do we care about this, it's within the same range and +// based on floating point math imprecisions during parsing. +// Is this a real issue worth improving? +#[test] +fn silly_float1() { + let v = Value::from(3.090_144_804_232_201_7e305); + let s = v.encode(); + let mut bytes = s.as_bytes().to_vec(); + let parsed = to_owned_value(&mut bytes).expect("failed to parse generated float"); + assert_eq!(v, parsed); +} + +#[test] +#[ignore] +fn silly_float2() { + let v = Value::from(-6.990_585_694_841_803e305); + let s = v.encode(); + let mut bytes = s.as_bytes().to_vec(); + let parsed = to_owned_value(&mut bytes).expect("failed to parse generated float"); + assert_eq!(v, parsed); +} +#[cfg(not(feature = "128bit"))] +#[cfg(not(target_arch = "wasm32"))] +fn arb_json_value() -> BoxedStrategy { + let leaf = prop_oneof![ + Just(Value::Static(StaticNode::Null)), + any::().prop_map(Value::from), + //(-1.0e306f64..1.0e306f64).prop_map(Value::from), // damn you float! + any::().prop_map(Value::from), + any::().prop_map(Value::from), + ".*".prop_map(Value::from), + ]; + leaf.prop_recursive( + 8, // 8 levels deep + 256, // Shoot for maximum size of 256 nodes + 10, // We put up to 10 items per collection + |inner| { + prop_oneof![ + // Take the inner strategy and make the two recursive cases. + prop::collection::vec(inner.clone(), 0..10).prop_map(Value::from), + prop::collection::hash_map(".*", inner, 0..10).prop_map(Value::from), + ] + }, + ) + .boxed() +} + +#[cfg(feature = "128bit")] +#[cfg(not(target_arch = "wasm32"))] +fn arb_json_value() -> BoxedStrategy { + let leaf = prop_oneof![ + Just(Value::Static(StaticNode::Null)), + any::().prop_map(Value::from), + //(-1.0e306f64..1.0e306f64).prop_map(Value::from), // damn you float! + any::().prop_map(Value::from), + any::().prop_map(Value::from), + any::().prop_map(Value::from), + any::().prop_map(Value::from), + ".*".prop_map(Value::from), + ]; + leaf.prop_recursive( + 8, // 8 levels deep + 256, // Shoot for maximum size of 256 nodes + 10, // We put up to 10 items per collection + |inner| { + prop_oneof![ + // Take the inner strategy and make the two recursive cases. + prop::collection::vec(inner.clone(), 0..10).prop_map(Value::from), + prop::collection::hash_map(".*", inner, 0..10).prop_map(Value::from), + ] + }, + ) + .boxed() +} + +#[cfg(not(target_arch = "wasm32"))] +proptest! { + #![proptest_config(ProptestConfig { + // Setting both fork and timeout is redundant since timeout implies + // fork, but both are shown for clarity. + // Disabled for code coverage, enable to track bugs + // fork: true, + .. ProptestConfig::default() + })] + + #[test] + fn prop_json_encode_decode(val in arb_json_value()) { + let mut encoded: Vec = Vec::new(); + val.write(&mut encoded).expect("write"); + println!("{}", String::from_utf8_lossy(&encoded)); + let mut e = encoded.clone(); + let res = to_owned_value(&mut e).expect("can't convert"); + assert_eq!(val, res); + let mut e = encoded.clone(); + let res = to_borrowed_value(&mut e).expect("can't convert"); + assert_eq!(val, res); + #[cfg(not(feature = "128bit"))] + { // we can't do 128 bit w/ serde + use crate::{deserialize, BorrowedValue, OwnedValue}; + let mut e = encoded.clone(); + let res: OwnedValue = deserialize(&mut e).expect("can't convert"); + assert_eq!(val, res); + let mut e = encoded; + let res: BorrowedValue = deserialize(&mut e).expect("can't convert"); + assert_eq!(val, res); + } + } + +} diff --git a/src/tests/impls.rs b/src/tests/impls.rs new file mode 100644 index 00000000..0a66d183 --- /dev/null +++ b/src/tests/impls.rs @@ -0,0 +1,80 @@ +use crate::{native::stage1::NativeInput, Deserializer, Stage1Parse, SIMDJSON_PADDING}; + +fn test_find_structural_bits(input_str: &str, expected: &[u32]) { + let mut input = input_str.as_bytes().to_vec(); + input.append(&mut vec![0; SIMDJSON_PADDING]); + let mut res = Vec::new(); + + unsafe { + Deserializer::_find_structural_bits::(input.as_slice(), &mut res) + .expect("failed to find structural bits"); + }; + println!("{input_str}"); + assert_eq!(res, expected); +} + +fn find_structural_bits_test_cases() { + test_find_structural_bits::("", &[0, 0]); + test_find_structural_bits::("1", &[0, 0]); + test_find_structural_bits::("[1]", &[0, 0, 1, 2, 3]); + test_find_structural_bits::("[1, 2]", &[0, 0, 1, 2, 4, 5, 6]); + test_find_structural_bits::( + r#"{ + "snot": "badger", + "numbers": [1,2,3,4,5,6,7,8,9,10,11,12, 13, {"not a number": "but a flat object"}], + "a float because we can": 0.123456789e11, + "and a string that we can put in here": "oh my stringy string, you are long so that we exceed the twohundredsixtyfive bits of a simd register" + }"#, + &[ + 0, 0, 18, 24, 26, 34, 52, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 84, 85, 87, 88, 90, 92, 94, 96, 97, 111, 113, 132, 133, 134, + 152, 176, 178, 192, 210, 248, 250, 357, 358, + ], + ); + + test_find_structural_bits::( + r#" { "hell\"o": 1 , "b": [ 1, 2, 3 ] }"#, + &[ + 0, 1, 3, 12, 14, 16, 18, 21, 23, 25, 26, 28, 29, 31, 33, 35, 36, + ], + ); +} + +#[test] +fn find_structural_bits_native() { + find_structural_bits_test_cases::(); +} + +#[cfg(feature = "portable")] +#[test] +fn find_structural_bits_portable() { + find_structural_bits_test_cases::(); +} + +#[cfg(target_feature = "avx2")] +#[test] +fn find_structural_bits_avx() { + if std::is_x86_feature_detected!("avx2") { + find_structural_bits_test_cases::(); + } +} + +#[cfg(target_feature = "sse4.2")] +#[test] +fn find_structural_bits_sse() { + if std::is_x86_feature_detected!("sse4.2") { + find_structural_bits_test_cases::(); + } +} + +#[cfg(target_arch = "aarch64")] +#[test] +fn find_structural_bits_aarch64() { + find_structural_bits_test_cases::(); +} + +#[cfg(target_feature = "simd128")] +#[test] +fn find_structural_bits_simd128() { + find_structural_bits_test_cases::(); +} diff --git a/src/tests/serde.rs b/src/tests/serde.rs new file mode 100644 index 00000000..132e13b7 --- /dev/null +++ b/src/tests/serde.rs @@ -0,0 +1,964 @@ +#![allow( + clippy::unnecessary_operation, + clippy::non_ascii_literal, + clippy::ignored_unit_patterns +)] +use crate::{ + deserialize, + owned::{to_value, Object, Value}, + serde::from_slice, + to_borrowed_value, to_owned_value, OwnedValue, +}; +use halfbrown::HashMap; +#[cfg(not(target_arch = "wasm32"))] +use proptest::prelude::*; +use serde::Deserialize; + +use value_trait::{Builder, Mutable, StaticNode}; + +#[test] +fn empty() { + let mut d = String::new(); + let d = unsafe { d.as_bytes_mut() }; + let v_simd = from_slice::(d); + let v_serde = serde_json::from_slice::(d); + assert!(v_simd.is_err()); + assert!(v_serde.is_err()); +} + +#[test] +fn bool_true() { + let mut d = String::from("true"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(d1), Ok(Value::from(true))); +} + +#[test] +fn bool_false() { + let mut d = String::from("false"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(d1), Ok(Value::from(false))); + //assert!(false) +} + +#[test] +fn union() { + let mut d = String::from("null"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(d1), Ok(Value::Static(StaticNode::Null))); +} + +#[test] +fn int() { + let mut d = String::from("42"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(d1), Ok(Value::from(42))); +} + +#[test] +fn zero() { + let mut d = String::from("0"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(d1), Ok(Value::from(0))); +} + +#[test] +fn one() { + let mut d = String::from("1"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(d1), Ok(Value::from(1))); +} + +#[test] +fn minus_one() { + let mut d = String::from("-1"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(d1), Ok(Value::from(-1))); +} + +#[test] +fn float() { + let mut d = String::from("23.0"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(d1), Ok(Value::from(23.0))); +} + +#[test] +fn string() { + let mut d = String::from(r#""snot""#); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(to_value(d1), Ok(Value::from("snot"))); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn lonely_quote() { + let mut d = String::from(r#"""#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde = serde_json::from_slice::(d).is_err(); + let v_simd = from_slice::(d).is_err(); + assert!(v_simd); + assert!(v_serde); +} + +#[test] +fn lonely_quote1() { + let mut d = String::from(r#"["]"#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde = serde_json::from_slice::(d).is_err(); + let v_simd = from_slice::(d).is_err(); + assert!(v_simd); + assert!(v_serde); +} +#[test] +fn lonely_quote2() { + let mut d = String::from(r#"[1, "]"#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde = serde_json::from_slice::(d).is_err(); + let v_simd = from_slice::(d).is_err(); + assert!(v_simd); + assert!(v_serde); +} + +#[test] +fn lonely_quote3() { + let mut d = String::from(r#"{": 1}"#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde = serde_json::from_slice::(d).is_err(); + let v_simd = from_slice::(d).is_err(); + assert!(v_simd); + assert!(v_serde); +} + +#[test] +fn empty_string() { + let mut d = String::from(r#""""#); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(to_value(d1), Ok(Value::from(""))); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn empty_array() { + let mut d = String::from("[]"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); + let v_simd: serde_json::Value = from_slice(d).expect("parse_simd"); + assert_eq!(to_value(d1), Ok(Value::Array(vec![]))); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn malformed_array() { + let mut d = String::from("[["); + let mut d1 = d.clone(); + let mut d2 = d.clone(); + let d = unsafe { d.as_bytes_mut() }; + let d1 = unsafe { d1.as_bytes_mut() }; + let d2 = unsafe { d2.as_bytes_mut() }; + let v_serde: Result = serde_json::from_slice(d); + let v_simd_owned_value = to_owned_value(d); + let v_simd_borrowed_value = to_borrowed_value(d1); + let v_simd: Result = from_slice(d2); + assert!(v_simd_owned_value.is_err()); + assert!(v_simd_borrowed_value.is_err()); + assert!(v_simd.is_err()); + assert!(v_serde.is_err()); +} + +#[test] +fn double_array() { + let mut d = String::from("[[]]"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); + let v_simd: serde_json::Value = from_slice(d).expect("parse_simd"); + assert_eq!(to_value(d1), Ok(Value::Array(vec![Value::Array(vec![])]))); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn null_null_array() { + let mut d = String::from("[[],null,null]"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); + let v_simd: serde_json::Value = from_slice(d).expect("parse_simd"); + assert_eq!( + to_value(d1), + Ok(Value::Array(vec![ + Value::Array(vec![]), + Value::Static(StaticNode::Null), + Value::Static(StaticNode::Null), + ])) + ); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn one_element_array() { + let mut d = String::from(r#"["snot"]"#); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + assert_eq!(to_value(d1), Ok(Value::Array(vec![Value::from("snot")]))); + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn two_element_array() { + let mut d = String::from(r#"["snot", "badger"]"#); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(d1), + Ok(Value::Array(vec![ + Value::from("snot"), + Value::from("badger") + ])) + ); + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn list() { + let mut d = String::from(r#"[42, 23.0, "snot badger"]"#); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!( + to_value(d1), + Ok(Value::Array(vec![ + Value::from(42), + Value::from(23.0), + Value::from("snot badger") + ])) + ); +} + +#[test] +fn nested_list1() { + let mut d = String::from(r#"[42, [23.0, "snot"], "bad", "ger"]"#); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(d1), + Ok(Value::Array(vec![ + Value::from(42), + Value::Array(vec![Value::from(23.0), Value::from("snot")]), + Value::from("bad"), + Value::from("ger") + ])) + ); + + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn nested_list2() { + let mut d = String::from(r#"[42, [23.0, "snot"], {"bad": "ger"}]"#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn utf8() { + let mut d = String::from(r#""\u000e""#); + let d = unsafe { d.as_bytes_mut() }; + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, "\u{e}"); + // NOTE: serde is broken for this + //assert_eq!(v_serde, "\u{e}"); + //assert_eq!(v_simd, v_serde); +} +#[test] +fn utf8_invalid_surrogates() { + // This is invalid UTF-8, the first character is a high surrogate + let mut d = String::from(r#""\uDE71""#); + let d = unsafe { d.as_bytes_mut() }; + let v_simd: Result = from_slice(d); + assert!(v_simd.is_err()); +} + +#[test] +fn unicode() { + let mut d = String::from(r#""Β‘\"""#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn odd_array() { + let mut d = String::from("[{},null]"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!( + to_value(d1), + Ok(Value::Array(vec![ + Value::from(Object::default()), + Value::Static(StaticNode::Null) + ])) + ); +} + +#[test] +fn min_i64() { + let mut d = + String::from(r#"{"name": "max_unsafe_auto_id_timestamp", "value": -9223372036854775808}"#); + + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + let mut o = Value::object(); + o.insert("name", "max_unsafe_auto_id_timestamp") + .expect("failed to set key"); + o.insert("value", -9_223_372_036_854_775_808_i64) + .expect("failed to set key"); + assert_eq!(to_value(d1), Ok(o)); +} + +#[test] +fn map2() { + let mut d = String::from(r#"[{"\u0000":null}]"#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn null() { + let mut d = String::from("null"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + assert_eq!(to_value(d1), Ok(Value::Static(StaticNode::Null))); + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} +#[test] +fn null_null() { + let mut d = String::from("[null, null]"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(d1), + Ok(Value::Array(vec![ + Value::Static(StaticNode::Null), + Value::Static(StaticNode::Null), + ])) + ); + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn nested_null() { + let mut d = String::from("[[null, null]]"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(d1), + Ok(Value::Array(vec![Value::Array(vec![ + Value::Static(StaticNode::Null), + Value::Static(StaticNode::Null), + ])])) + ); + + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn nestednested_null() { + let mut d = String::from("[[[null, null]]]"); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!( + to_value(d1), + Ok(Value::Array(vec![Value::Array(vec![Value::Array(vec![ + Value::Static(StaticNode::Null), + Value::Static(StaticNode::Null), + ])])])) + ); +} + +#[test] +fn odd_array2() { + let mut d = String::from("[[\"\\u0000\\\"\"]]"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn odd_array3() { + let mut d = String::from("[{\"\\u0000\\u0000\":null}]"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn odd_array4() { + let mut d = String::from("[{\"\\u0000𐀀a\":null}]"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn float1() { + let mut d = String::from("2.3250706903316115e307"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); + let v_simd: serde_json::Value = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +// We ignore this since serde is less precise on this test +#[ignore] +#[test] +fn float2() { + let mut d = String::from("-4.5512678569607477e306"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); + let v_simd: serde_json::Value = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[cfg(not(feature = "approx-number-parsing"))] +#[test] +fn float3() { + let mut d = String::from("0.6"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Number = serde_json::from_slice(d).expect("serde_json"); + let v_simd: serde_json::Number = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn map0() { + let mut d = String::from(r#"{"snot": "badger"}"#); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + let mut h = Object::default(); + h.insert("snot".into(), Value::from("badger")); + assert_eq!(to_value(d1), Ok(Value::from(h))); +} + +#[test] +fn map1() { + let mut d = String::from(r#"{"snot": "badger", "badger": "snot"}"#); + let mut d1 = d.clone(); + let d1 = unsafe { d1.as_bytes_mut() }; + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(d).expect(""); + assert_eq!(v_simd, v_serde); + let mut h = Object::default(); + h.insert("snot".into(), Value::from("badger")); + h.insert("badger".into(), Value::from("snot")); + assert_eq!(to_value(d1), Ok(Value::from(h))); +} + +#[cfg(feature = "serde_impl")] +#[test] +fn tpl1() { + let mut d = String::from("[-65.613616999999977, 43.420273000000009]"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: (f32, f32) = serde_json::from_slice(d).expect("serde_json"); + let v_simd: (f32, f32) = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn tpl2() { + let mut d = String::from("[[-65.613616999999977, 43.420273000000009]]"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec<(f32, f32)> = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn tpl3() { + let mut d = String::from( + "[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]", + ); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec<(f32, f32)> = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} +#[test] +fn tpl4() { + let mut d = String::from("[[[-65.613616999999977,43.420273000000009]]]"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec> = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} +#[test] +fn tpl5() { + let mut d = String::from( + "[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]", + ); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec> = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn tpl6() { + let mut d = String::from( + "[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]", + ); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec>> = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn tpl7() { + let mut d = String::from( + "[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]", + ); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec>> = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[derive(Deserialize, PartialEq, Debug)] +struct Obj { + a: u64, + b: u64, +} + +#[derive(Deserialize, PartialEq, Debug)] +struct Obj1 { + a: Obj, +} + +#[test] +fn obj1() { + let mut d = String::from(r#"{"a": 1, "b":1}"#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: Obj = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Obj = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn obj2() { + let mut d = + String::from(r#"{"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}"#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); + let v_simd: HashMap = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn obj3() { + let mut d = String::from( + r#"{"c": {"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}}"#, + ); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: HashMap> = + serde_json::from_slice(d).expect("serde_json"); + let v_simd: HashMap> = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn obj4() { + let mut d = String::from(r#"{"c": {"a": {"a": 1, "b":1}}}"#); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); + let v_simd: HashMap = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn vecvec() { + let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]], [[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec> = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[test] +fn invalid_float_array() { + let mut data = b"[11111111111111111111111111111E1,-111111111111111111111E111111111".to_vec(); + + assert!(to_owned_value(&mut data).is_err()); +} + +#[test] +fn crazy_string() { + // there is unicode in here! + let d = "\"𐀀𐀀 𐀀𐀀0 𐀀A\\u00000A0 A \\u000b\""; + let mut d = String::from(d); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); + let v_simd: serde_json::Value = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +#[cfg(feature = "serde_impl")] +#[test] +fn event() { + #[derive(Deserialize, Debug, PartialEq, Eq)] + #[serde(deny_unknown_fields, rename_all = "camelCase")] + pub struct CitmCatalog { + pub area_names: HashMap, + pub audience_sub_category_names: HashMap, + pub block_names: HashMap, + pub events: HashMap, + } + pub type Id = u32; + #[derive(Deserialize, Debug, PartialEq, Eq)] + #[serde(deny_unknown_fields, rename_all = "camelCase")] + pub struct Event { + pub description: (), + pub id: Id, + pub logo: Option, + pub name: String, + pub sub_topic_ids: Vec, + pub subject_code: (), + pub subtitle: (), + pub topic_ids: Vec, + } + + let mut d = String::from( + r#" +{ + "areaNames": { + "205705993": "ArriΓ¨re-scΓ¨ne central", + "205705994": "1er balcon central", + "205705995": "2Γ¨me balcon bergerie cour", + "205705996": "2Γ¨me balcon bergerie jardin", + "205705998": "1er balcon bergerie jardin", + "205705999": "1er balcon bergerie cour", + "205706000": "ArriΓ¨re-scΓ¨ne jardin", + "205706001": "ArriΓ¨re-scΓ¨ne cour", + "205706002": "2Γ¨me balcon jardin", + "205706003": "2Γ¨me balcon cour", + "205706004": "2Γ¨me Balcon central", + "205706005": "1er balcon jardin", + "205706006": "1er balcon cour", + "205706007": "Orchestre central", + "205706008": "Orchestre jardin", + "205706009": "Orchestre cour", + "342752287": "Zone physique secrΓ¨te" + }, + "audienceSubCategoryNames": { + "337100890": "AbonnΓ©" + }, + "blockNames": {}, + "events": { + "138586341": { + "description": null, + "id": 138586341, + "logo": null, + "name": "30th Anniversary Tour", + "subTopicIds": [ + 337184269, + 337184283 + ], + "subjectCode": null, + "subtitle": null, + "topicIds": [ + 324846099, + 107888604 + ] + }, + "138586345": { + "description": null, + "id": 138586345, + "logo": "/images/UE0AAAAACEKo6QAAAAZDSVRN", + "name": "Berliner Philharmoniker", + "subTopicIds": [ + 337184268, + 337184283, + 337184275 + ], + "subjectCode": null, + "subtitle": null, + "topicIds": [ + 324846099, + 107888604, + 324846100 + ] + } + } +} +"#, + ); + let d = unsafe { d.as_bytes_mut() }; + let v_serde: CitmCatalog = serde_json::from_slice(d).expect("serde_json"); + let v_simd: CitmCatalog = from_slice(d).expect("simd_json"); + assert_eq!(v_simd, v_serde); +} + +//6.576692109929364e305 +#[cfg(not(target_arch = "wasm32"))] +fn arb_json() -> BoxedStrategy { + let leaf = prop_oneof![ + Just(Value::Static(StaticNode::Null)), + any::() + .prop_map(StaticNode::Bool) + .prop_map(Value::Static), + // (-1.0e306f64..1.0e306f64).prop_map(Value::from), // The float parsing of simd and serde are too different + any::().prop_map(Value::from), + ".*".prop_map(Value::from), + ]; + leaf.prop_recursive( + 8, // 8 levels deep + 256, // Shoot for maximum size of 256 nodes + 10, // We put up to 10 items per collection + |inner| { + prop_oneof![ + // Take the inner strategy and make the two recursive cases. + prop::collection::vec(inner.clone(), 0..10).prop_map(Value::from), + prop::collection::hash_map(".*", inner, 0..10).prop_map(Value::from), + ] + }, + ) + .prop_map(|v| serde_json::to_string(&v).expect("")) + .boxed() +} + +#[cfg(feature = "serde_impl")] +#[test] +fn int_map_key() -> Result<(), crate::Error> { + use std::collections::BTreeMap; + + let mut map = BTreeMap::new(); + map.insert(0, "foo"); + map.insert(1, "bar"); + map.insert(2, "baz"); + + assert_eq!( + r#"{"0":"foo","1":"bar","2":"baz"}"#, + crate::to_string(&map)? + ); + Ok(()) +} + +#[cfg(feature = "serde_impl")] +#[test] +fn enum_test() -> Result<(), crate::Error> { + use serde::{Deserialize, Serialize}; + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + struct MyStruct { + field: u8, + } + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + enum MyEnum { + First(MyStruct), + Second(u8), + } + + let thing = MyEnum::First(MyStruct { field: 1 }); + let mut ser = crate::serde::to_string(&thing)?; + println!("Ser {ser:?}"); + let des: MyEnum = unsafe { crate::serde::from_str(&mut ser)? }; + println!("Des {des:?}"); + assert_eq!(thing, des); + Ok(()) +} + +#[test] +fn invalid_float() { + let mut s: Vec = b"[100,9e999]".to_vec(); + assert!(to_owned_value(&mut s).is_err()); +} + +#[cfg(not(target_arch = "wasm32"))] +proptest! { + #![proptest_config(ProptestConfig { + // Setting both fork and timeout is redundant since timeout implies + // fork, but both are shown for clarity. + // Disabled for code coverage, enable to track bugs + // fork: true, + .. ProptestConfig::default() + })] + + #[test] + fn prop_json(d in arb_json()) { + if let Ok(v_serde) = serde_json::from_slice::(d.as_bytes()) { + let mut d1 = d.clone(); + let d1 = unsafe{ d1.as_bytes_mut()}; + let v_simd_serde: serde_json::Value = from_slice(d1).expect(""); + // We add our own encoder in here. + let mut d2 = v_simd_serde.to_string(); + let d2 = unsafe{ d2.as_bytes_mut()}; + let mut d3 = d.clone(); + let d3 = unsafe{ d3.as_bytes_mut()}; + let mut d4 = d.clone(); + let d4 = unsafe{ d4.as_bytes_mut()}; + assert_eq!(v_simd_serde, v_serde); + let v_simd_owned = to_owned_value(d2).expect("to_owned_value failed"); + let v_simd_borrowed = to_borrowed_value(d3).expect("to_borrowed_value failed"); + assert_eq!(v_simd_borrowed, v_simd_owned); + let v_deserialize: OwnedValue = deserialize(d4).expect("deserialize failed"); + assert_eq!(v_deserialize, v_simd_owned); + } + + } + +} + +#[cfg(not(target_arch = "wasm32"))] +fn arb_junk() -> BoxedStrategy> { + prop::collection::vec(any::(), 0..(1024 * 8)).boxed() +} +#[cfg(not(target_arch = "wasm32"))] +proptest! { + #![proptest_config(ProptestConfig { + // Setting both fork and timeout is redundant since timeout implies + // fork, but both are shown for clarity. + // Disabled for code coverage, enable to track bugs + // fork: true, + .. ProptestConfig::default() + })] + #[test] + #[allow(clippy::should_panic_without_expect)] + #[should_panic] + fn prop_junk(d in arb_junk()) { + let mut d1 = d.clone(); + let mut d2 = d.clone(); + let mut d3 = d; + + from_slice::(&mut d1).expect("from_slice"); + to_borrowed_value(&mut d2).expect("to_borrowed_value"); + to_owned_value(&mut d3).expect("to_owned_value"); + + } +} + +#[cfg(not(target_arch = "wasm32"))] +proptest! { + #![proptest_config(ProptestConfig { + // Setting both fork and timeout is redundant since timeout implies + // fork, but both are shown for clarity. + // Disabled for code coverage, enable to track bugs + // fork: true, + .. ProptestConfig::default() + })] + + #[test] + #[allow(clippy::should_panic_without_expect)] + #[should_panic] + fn prop_string(d in "\\PC*") { + let mut d1 = d.clone(); + let d1 = unsafe{ d1.as_bytes_mut()}; + let mut d2 = d.clone(); + let d2 = unsafe{ d2.as_bytes_mut()}; + let mut d3 = d; + let d3 = unsafe{ d3.as_bytes_mut()}; + from_slice::(d1).expect("from_slice"); + to_borrowed_value(d2).expect("to_borrowed_value"); + to_owned_value(d3).expect("to_owned_value"); + + } +} diff --git a/src/value/borrowed.rs b/src/value/borrowed.rs index 9ed196a0..6874f0d4 100644 --- a/src/value/borrowed.rs +++ b/src/value/borrowed.rs @@ -100,7 +100,7 @@ impl<'value> Value<'value> { // https://docs.rs/beef/0.4.4/src/beef/generic.rs.html#379-391 Self::String(s) => unsafe { std::mem::transmute::, Value<'static>>(Self::String(Cow::from( - s.to_string(), + s.into_owned(), ))) }, // For an array we turn every value into a static @@ -459,6 +459,7 @@ impl<'de> BorrowDeserializer<'de> { #[cfg(test)] mod test { + #![allow(clippy::ignored_unit_patterns)] #![allow(clippy::cognitive_complexity)] use super::*; diff --git a/src/value/borrowed/serialize.rs b/src/value/borrowed/serialize.rs index 91294fac..520d1687 100644 --- a/src/value/borrowed/serialize.rs +++ b/src/value/borrowed/serialize.rs @@ -262,11 +262,11 @@ mod test { } #[test] fn string() { - assert_str(r#"this is a test"#, r#""this is a test""#); + assert_str("this is a test", r#""this is a test""#); assert_str(r#"this is a test ""#, r#""this is a test \"""#); assert_str(r#"this is a test """#, r#""this is a test \"\"""#); assert_str( - r#"this is a test a long test that should span the 32 byte boundary"#, + "this is a test a long test that should span the 32 byte boundary", r#""this is a test a long test that should span the 32 byte boundary""#, ); assert_str( diff --git a/src/value/owned.rs b/src/value/owned.rs index 86be8926..19f2cef9 100644 --- a/src/value/owned.rs +++ b/src/value/owned.rs @@ -376,7 +376,7 @@ impl<'de> OwnedDeserializer<'de> { #[cfg(test)] mod test { - #![allow(clippy::cognitive_complexity)] + #![allow(clippy::cognitive_complexity, clippy::ignored_unit_patterns)] use super::*; #[test] diff --git a/src/value/owned/serialize.rs b/src/value/owned/serialize.rs index b24443ee..33457a87 100644 --- a/src/value/owned/serialize.rs +++ b/src/value/owned/serialize.rs @@ -262,11 +262,11 @@ mod test { #[test] fn string() { - assert_str(r#"this is a test"#, r#""this is a test""#); + assert_str("this is a test", r#""this is a test""#); assert_str(r#"this is a test ""#, r#""this is a test \"""#); assert_str(r#"this is a test """#, r#""this is a test \"\"""#); assert_str( - r#"this is a test a long test that should span the 32 byte boundary"#, + "this is a test a long test that should span the 32 byte boundary", r#""this is a test a long test that should span the 32 byte boundary""#, ); assert_str( diff --git a/src/value/tape.rs b/src/value/tape.rs index 588a7922..9e56bf8b 100644 --- a/src/value/tape.rs +++ b/src/value/tape.rs @@ -41,7 +41,7 @@ mod test { use crate::prelude::*; #[test] - #[should_panic] + #[should_panic = "Not supported"] #[allow(unused_variables, clippy::no_effect)] fn object_index() { let v = StaticNode::Null; @@ -49,14 +49,14 @@ mod test { } #[test] - #[should_panic] + #[should_panic = "Not supported"] fn mut_object_index() { let mut v = StaticNode::Null; v["test"] = (); } #[test] - #[should_panic] + #[should_panic = "Not supported"] #[allow(unused_variables, clippy::no_effect)] fn array_index() { let v = StaticNode::Null; @@ -64,7 +64,7 @@ mod test { } #[test] - #[should_panic] + #[should_panic = "Not supported"] fn mut_array_index() { let mut v = StaticNode::Null; v[0] = (); From 48456a2800de6c94561d5c0b605f90b2c6f03c11 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Thu, 19 Oct 2023 15:42:22 +0200 Subject: [PATCH 2/8] update std::simd tp use 512 bit registers if possible Signed-off-by: Heinz N. Gies --- src/portable/stage1.rs | 89 +++++++++++++++--------------------------- src/stage2.rs | 1 - 2 files changed, 31 insertions(+), 59 deletions(-) diff --git a/src/portable/stage1.rs b/src/portable/stage1.rs index c3a518bc..2244d86f 100644 --- a/src/portable/stage1.rs +++ b/src/portable/stage1.rs @@ -7,7 +7,8 @@ macro_rules! low_nibble_mask { () => { [ 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, - 2, 9, 0, 0, + 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 12, 1, 2, 9, 0, 0, ] }; } @@ -16,7 +17,8 @@ macro_rules! high_nibble_mask { () => { [ 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, - 2, 1, 0, 0, + 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, + 0, 0, 0, 3, 2, 1, 0, 0, ] }; } @@ -24,19 +26,17 @@ macro_rules! high_nibble_mask { use crate::{static_cast_i32, Stage1Parse}; #[derive(Debug)] pub(crate) struct SimdInputPortable { - v0: u8x32, - v1: u8x32, + v: u8x64, } impl Stage1Parse for SimdInputPortable { type Utf8Validator = simdutf8::basic::imp::portable::ChunkedUtf8ValidatorImp; - type SimdRepresentation = u8x32; + type SimdRepresentation = u8x64; #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_ptr_alignment)] unsafe fn new(ptr: &[u8]) -> Self { Self { - v0: u8x32::from_array(*ptr.as_ptr().cast::<[u8; 32]>()), - v1: u8x32::from_array(*ptr.as_ptr().add(32).cast::<[u8; 32]>()), + v: u8x64::from_array(*ptr.as_ptr().cast::<[u8; 64]>()), } } @@ -56,21 +56,15 @@ impl Stage1Parse for SimdInputPortable { #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] unsafe fn cmp_mask_against_input(&self, m: u8) -> u64 { - let mask = u8x32::splat(m); - let cmp_res_0 = self.v0.simd_eq(mask); - let res_0 = u64::from(cmp_res_0.to_bitmask()); - let cmp_res_1 = self.v1.simd_eq(mask); - let res_1 = u64::from(cmp_res_1.to_bitmask()); - res_0 | (res_1 << 32) + let mask = u8x64::splat(m); + self.v.simd_eq(mask).to_bitmask() } // find all values less than or equal than the content of maxval (using unsigned arithmetic) #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] - unsafe fn unsigned_lteq_against_input(&self, maxval: u8x32) -> u64 { - let res_0: u64 = u64::from(self.v0.simd_le(maxval).to_bitmask()); - let res_1: u64 = u64::from(self.v1.simd_le(maxval).to_bitmask()); - res_0 | (res_1 << 32) + unsafe fn unsigned_lteq_against_input(&self, maxval: u8x64) -> u64 { + self.v.simd_le(maxval).to_bitmask() } #[cfg_attr(not(feature = "no-inline"), inline)] @@ -93,56 +87,35 @@ impl Stage1Parse for SimdInputPortable { // * carriage return 0x0d // these go into the next 2 buckets of the comparison (8/16) - const LOW_NIBBLE_MASK: u8x32 = u8x32::from_array(low_nibble_mask!()); - const HIGH_NIBBLE_MASK: u8x32 = u8x32::from_array(high_nibble_mask!()); + const LOW_NIBBLE_MASK: u8x64 = u8x64::from_array(low_nibble_mask!()); + const HIGH_NIBBLE_MASK: u8x64 = u8x64::from_array(high_nibble_mask!()); - let structural_shufti_mask: u8x32 = u8x32::splat(0b0000_0111); // 0x07 - let whitespace_shufti_mask: u8x32 = u8x32::splat(0b0001_1000); // 0x18 + let structural_shufti_mask: u8x64 = u8x64::splat(0b0000_0111); // 0x07 + let whitespace_shufti_mask: u8x64 = u8x64::splat(0b0001_1000); // 0x18 // FIXME: do we need this dance? - let v0_32 = i32x8::from_array(std::mem::transmute(*self.v0.as_array())); - let v0_shifted: Simd = v0_32.shr(i32x8::splat(4)); - let v0_shifted = u8x32::from_array(std::mem::transmute(v0_shifted)); + let v32 = i32x16::from_array(std::mem::transmute(*self.v.as_array())); + let v_shifted = v32.shr(i32x16::splat(4)); + let v_shifted = u8x64::from_array(std::mem::transmute(v_shifted)); // We have to adjust the index here the reason being that while the avx instruction // only uses the lower 4 bits for index and the 8th bit for overflow (set to 0) // std::simd::swizzle uses all bits 5-8 for overflow, so we need to mask out // bit 6, 5 and 7 to get the correct behaviour - let v0_idx_low = self.v0 & u8x32::splat(0b1000_1111); - let v0_swizzle_low = LOW_NIBBLE_MASK.swizzle_dyn(v0_idx_low); - let v0_idx_high = v0_shifted & u8x32::splat(0b0000_1111); - let v0_swizzle_high = HIGH_NIBBLE_MASK.swizzle_dyn(v0_idx_high); - let v_lo = v0_swizzle_low & v0_swizzle_high; + let v_idx_low = self.v & u8x64::splat(0b1000_1111); + let v_swizzle_low = LOW_NIBBLE_MASK.swizzle_dyn(v_idx_low); + let v_idx_high = v_shifted & u8x64::splat(0b0000_1111); + let v_swizzle_high = HIGH_NIBBLE_MASK.swizzle_dyn(v_idx_high); + let v = v_swizzle_low & v_swizzle_high; - let v1_32 = i32x8::from_array(std::mem::transmute(*self.v1.as_array())); - let v1_shifted: Simd = v1_32.shr(i32x8::splat(4)); - let v1_shifted = u8x32::from_array(std::mem::transmute(v1_shifted)); + let tmp: Mask = (v & structural_shufti_mask).simd_eq(u8x64::splat(0)); - // We have to adjust the index here the reason being that while the avx instruction - // only uses the lower 4 bits for index and the 8th bit for overflow (set to 0) - // std::simd::swizzle uses all bits 5-8 for overflow, so we need to mask out - // bit 6, 5 and 7 to get the correct behaviour - let v1_idx_low = self.v1 & u8x32::splat(0b1000_1111); - let v1_swizzle_low = LOW_NIBBLE_MASK.swizzle_dyn(v1_idx_low); - let v1_idx_high = v1_shifted & u8x32::splat(0b0000_1111); - let v1_swizzle_high = HIGH_NIBBLE_MASK.swizzle_dyn(v1_idx_high); - let v_hi = v1_swizzle_low & v1_swizzle_high; - - let tmp_lo = (v_lo & structural_shufti_mask).simd_eq(u8x32::splat(0)); - let tmp_hi = (v_hi & structural_shufti_mask).simd_eq(u8x32::splat(0)); - - let structural_res_0 = u64::from(tmp_lo.to_bitmask()); - let structural_res_1 = u64::from(tmp_hi.to_bitmask()); - *structurals = !(structural_res_0 | (structural_res_1 << 32)); - - let tmp_ws_lo = (v_lo & whitespace_shufti_mask).simd_eq(u8x32::splat(0)); - let tmp_ws_hi = (v_hi & whitespace_shufti_mask).simd_eq(u8x32::splat(0)); + *structurals = !tmp.to_bitmask(); - let ws_res_0 = u64::from(tmp_ws_lo.to_bitmask()); - let ws_res_1 = u64::from(tmp_ws_hi.to_bitmask()); + let tmp_ws = (v & whitespace_shufti_mask).simd_eq(u8x64::splat(0)); - *whitespace = !(ws_res_0 | (ws_res_1 << 32)); + *whitespace = !(tmp_ws.to_bitmask()); } // flatten out values in 'bits' assuming that they are are to have values of idx @@ -211,12 +184,12 @@ impl Stage1Parse for SimdInputPortable { #[allow(clippy::cast_sign_loss)] #[cfg_attr(not(feature = "no-inline"), inline)] - unsafe fn fill_s8(n: i8) -> u8x32 { - u8x32::splat(n as u8) + unsafe fn fill_s8(n: i8) -> u8x64 { + u8x64::splat(n as u8) } #[cfg_attr(not(feature = "no-inline"), inline)] - unsafe fn zero() -> u8x32 { - u8x32::splat(0) + unsafe fn zero() -> u8x64 { + u8x64::splat(0) } } diff --git a/src/stage2.rs b/src/stage2.rs index 55b88a7d..7f4c7919 100644 --- a/src/stage2.rs +++ b/src/stage2.rs @@ -276,7 +276,6 @@ impl<'de> Deserializer<'de> { unsafe { res.set_len(r_i); }; - dbg!(); return Err(Error::new_c( idx, c as char, From f9421c4fcb8fe8574be65646e2519dc226f35d66 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Thu, 19 Oct 2023 16:02:08 +0200 Subject: [PATCH 3/8] Move nibble code to comment in portable impl Signed-off-by: Heinz N. Gies --- Cargo.toml | 6 +- src/avx2/mod.rs | 2 - src/{ => impls}/avx2/deser.rs | 0 src/impls/avx2/mod.rs | 4 + src/{ => impls}/avx2/stage1.rs | 4 +- src/impls/mod.rs | 19 ++ src/{ => impls}/native/deser.rs | 4 +- src/{ => impls}/native/mod.rs | 6 +- src/{ => impls}/native/stage1.rs | 6 +- src/{ => impls}/neon/deser.rs | 0 src/impls/neon/mod.rs | 4 + src/{ => impls}/neon/stage1.rs | 4 +- src/{ => impls}/portable/deser.rs | 0 src/impls/portable/mod.rs | 4 + src/{ => impls}/portable/stage1.rs | 43 ++-- src/{ => impls}/simd128/deser.rs | 0 src/impls/simd128/mod.rs | 4 + src/{ => impls}/simd128/stage1.rs | 4 +- src/{ => impls}/sse42/deser.rs | 0 src/impls/sse42/mod.rs | 4 + src/{ => impls}/sse42/stage1.rs | 4 +- src/lib.rs | 314 ++++++++++++----------------- src/neon/mod.rs | 4 - src/portable/mod.rs | 2 - src/simd128/mod.rs | 2 - src/sse42/mod.rs | 2 - src/tests/impls.rs | 14 +- tree | 215 ++++++++++++++++++++ 28 files changed, 423 insertions(+), 252 deletions(-) delete mode 100644 src/avx2/mod.rs rename src/{ => impls}/avx2/deser.rs (100%) create mode 100644 src/impls/avx2/mod.rs rename src/{ => impls}/avx2/stage1.rs (99%) create mode 100644 src/impls/mod.rs rename src/{ => impls}/native/deser.rs (97%) rename src/{ => impls}/native/mod.rs (91%) rename src/{ => impls}/native/stage1.rs (99%) rename src/{ => impls}/neon/deser.rs (100%) create mode 100644 src/impls/neon/mod.rs rename src/{ => impls}/neon/stage1.rs (99%) rename src/{ => impls}/portable/deser.rs (100%) create mode 100644 src/impls/portable/mod.rs rename src/{ => impls}/portable/stage1.rs (93%) rename src/{ => impls}/simd128/deser.rs (100%) create mode 100644 src/impls/simd128/mod.rs rename src/{ => impls}/simd128/stage1.rs (99%) rename src/{ => impls}/sse42/deser.rs (100%) create mode 100644 src/impls/sse42/mod.rs rename src/{ => impls}/sse42/stage1.rs (99%) delete mode 100644 src/neon/mod.rs delete mode 100644 src/portable/mod.rs delete mode 100644 src/simd128/mod.rs delete mode 100644 src/sse42/mod.rs create mode 100644 tree diff --git a/Cargo.toml b/Cargo.toml index 6c6948ea..f1df7a30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,7 +56,7 @@ name = "parse" harness = false [features] -default = ["swar-number-parsing", "serde_impl"] +default = ["swar-number-parsing", "serde_impl", "runtime-detection"] arraybackend = ["halfbrown/arraybackend"] @@ -108,6 +108,10 @@ docsrs = [] # portable simd support (as of rust 1.73 nightly only) portable = ["simdutf8/portable"] + +# use runtime detection of the CPU features where possible instead of enforcing an instruction set +runtime-detection = [] + [[example]] name = "perf" diff --git a/src/avx2/mod.rs b/src/avx2/mod.rs deleted file mode 100644 index 6db7ee75..00000000 --- a/src/avx2/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod deser; -pub mod stage1; diff --git a/src/avx2/deser.rs b/src/impls/avx2/deser.rs similarity index 100% rename from src/avx2/deser.rs rename to src/impls/avx2/deser.rs diff --git a/src/impls/avx2/mod.rs b/src/impls/avx2/mod.rs new file mode 100644 index 00000000..19effade --- /dev/null +++ b/src/impls/avx2/mod.rs @@ -0,0 +1,4 @@ +pub(crate) mod deser; +mod stage1; + +pub(crate) use stage1::SimdInput; diff --git a/src/avx2/stage1.rs b/src/impls/avx2/stage1.rs similarity index 99% rename from src/avx2/stage1.rs rename to src/impls/avx2/stage1.rs index 2a0995a4..bbac4bb7 100644 --- a/src/avx2/stage1.rs +++ b/src/impls/avx2/stage1.rs @@ -36,12 +36,12 @@ macro_rules! high_nibble_mask { } #[derive(Debug)] -pub(crate) struct SimdInputAVX { +pub(crate) struct SimdInput { v0: __m256i, v1: __m256i, } -impl Stage1Parse for SimdInputAVX { +impl Stage1Parse for SimdInput { type Utf8Validator = simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp; type SimdRepresentation = __m256i; #[cfg_attr(not(feature = "no-inline"), inline)] diff --git a/src/impls/mod.rs b/src/impls/mod.rs new file mode 100644 index 00000000..0428044b --- /dev/null +++ b/src/impls/mod.rs @@ -0,0 +1,19 @@ +#[cfg(any(test, not(feature = "portable")))] +/// rust native implementation +pub(crate) mod native; + +#[cfg(feature = "portable")] +/// rust native implementation +pub(crate) mod portable; + +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +pub(crate) mod avx2; + +#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +pub(crate) mod sse42; + +#[cfg(target_arch = "aarch64")] +pub(crate) mod neon; + +#[cfg(target_feature = "simd128")] +pub(crate) mod simd128; diff --git a/src/native/deser.rs b/src/impls/native/deser.rs similarity index 97% rename from src/native/deser.rs rename to src/impls/native/deser.rs index 0e07b587..3e63a67c 100644 --- a/src/native/deser.rs +++ b/src/impls/native/deser.rs @@ -114,9 +114,7 @@ mod test { input2.append(vec![0; SIMDJSON_PADDING * 2].as_mut()); let mut buffer = vec![0; 1024]; - let r = unsafe { - Deserializer::parse_str_(input.as_mut_ptr(), &input2, buffer.as_mut_slice(), 0)? - }; + let r = unsafe { super::parse_str(input.as_mut_ptr(), &input2, buffer.as_mut_slice(), 0)? }; dbg!(r); Ok(String::from(r)) } diff --git a/src/native/mod.rs b/src/impls/native/mod.rs similarity index 91% rename from src/native/mod.rs rename to src/impls/native/mod.rs index 904d5887..80718c1e 100644 --- a/src/native/mod.rs +++ b/src/impls/native/mod.rs @@ -1,7 +1,9 @@ use simdutf8::basic::imp::ChunkedUtf8Validator; -pub mod deser; -pub mod stage1; +pub(crate) mod deser; +mod stage1; + +pub(crate) use stage1::SimdInput; /// This is a hack, since there is no native implementation of the chunked validator we pre-validate the entire /// input string in the case of a fallback and then always let the chunked validator return true. diff --git a/src/native/stage1.rs b/src/impls/native/stage1.rs similarity index 99% rename from src/native/stage1.rs rename to src/impls/native/stage1.rs index 2907d5c0..a19b4365 100644 --- a/src/native/stage1.rs +++ b/src/impls/native/stage1.rs @@ -287,18 +287,18 @@ fn u8x16_bitmask(a: v128) -> u16 { // } #[derive(Debug)] -pub(crate) struct NativeInput { +pub(crate) struct SimdInput { v0: v128, v1: v128, v2: v128, v3: v128, } -impl Stage1Parse for NativeInput { +impl Stage1Parse for SimdInput { type Utf8Validator = super::ChunkedUtf8ValidatorImp; type SimdRepresentation = v128; unsafe fn new(ptr: &[u8]) -> Self { - NativeInput { + SimdInput { v0: *(ptr.as_ptr().cast::()), v1: *(ptr.as_ptr().add(16).cast::()), v2: *(ptr.as_ptr().add(32).cast::()), diff --git a/src/neon/deser.rs b/src/impls/neon/deser.rs similarity index 100% rename from src/neon/deser.rs rename to src/impls/neon/deser.rs diff --git a/src/impls/neon/mod.rs b/src/impls/neon/mod.rs new file mode 100644 index 00000000..547212c8 --- /dev/null +++ b/src/impls/neon/mod.rs @@ -0,0 +1,4 @@ +mod deser; +mod stage1; + +pub(crate) use stage1::SimdInput; diff --git a/src/neon/stage1.rs b/src/impls/neon/stage1.rs similarity index 99% rename from src/neon/stage1.rs rename to src/impls/neon/stage1.rs index 0f459a62..ccb5c92d 100644 --- a/src/neon/stage1.rs +++ b/src/impls/neon/stage1.rs @@ -42,14 +42,14 @@ pub unsafe fn neon_movemask_bulk( //pub const SIMDINPUT_LENGTH: usize = 64; #[derive(Debug)] -pub(crate) struct SimdInputNEON { +pub(crate) struct SimdInput { v0: uint8x16_t, v1: uint8x16_t, v2: uint8x16_t, v3: uint8x16_t, } -impl Stage1Parse for SimdInputNEON { +impl Stage1Parse for SimdInput { type Utf8Validator = simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp; type SimdRepresentation = int8x16_t; #[cfg_attr(not(feature = "no-inline"), inline)] diff --git a/src/portable/deser.rs b/src/impls/portable/deser.rs similarity index 100% rename from src/portable/deser.rs rename to src/impls/portable/deser.rs diff --git a/src/impls/portable/mod.rs b/src/impls/portable/mod.rs new file mode 100644 index 00000000..4ec8345c --- /dev/null +++ b/src/impls/portable/mod.rs @@ -0,0 +1,4 @@ +pub mod deser; +pub mod stage1; + +pub(crate) use stage1::SimdInput; diff --git a/src/portable/stage1.rs b/src/impls/portable/stage1.rs similarity index 93% rename from src/portable/stage1.rs rename to src/impls/portable/stage1.rs index 2244d86f..802308ee 100644 --- a/src/portable/stage1.rs +++ b/src/impls/portable/stage1.rs @@ -1,35 +1,12 @@ -use std::{ - ops::Shr, - simd::{prelude::*, ToBitMask}, -}; - -macro_rules! low_nibble_mask { - () => { - [ - 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, - 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 12, 1, 2, 9, 0, 0, - ] - }; -} - -macro_rules! high_nibble_mask { - () => { - [ - 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, - 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, - 0, 0, 0, 3, 2, 1, 0, 0, - ] - }; -} +use std::simd::{prelude::*, ToBitMask}; use crate::{static_cast_i32, Stage1Parse}; #[derive(Debug)] -pub(crate) struct SimdInputPortable { +pub(crate) struct SimdInput { v: u8x64, } -impl Stage1Parse for SimdInputPortable { +impl Stage1Parse for SimdInput { type Utf8Validator = simdutf8::basic::imp::portable::ChunkedUtf8ValidatorImp; type SimdRepresentation = u8x64; #[cfg_attr(not(feature = "no-inline"), inline)] @@ -87,8 +64,16 @@ impl Stage1Parse for SimdInputPortable { // * carriage return 0x0d // these go into the next 2 buckets of the comparison (8/16) - const LOW_NIBBLE_MASK: u8x64 = u8x64::from_array(low_nibble_mask!()); - const HIGH_NIBBLE_MASK: u8x64 = u8x64::from_array(high_nibble_mask!()); + const LOW_NIBBLE_MASK: u8x64 = u8x64::from_array([ + 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, + 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 12, 1, 2, 9, 0, 0, + ]); + const HIGH_NIBBLE_MASK: u8x64 = u8x64::from_array([ + 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, + 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0, 1, + 0, 0, 0, 3, 2, 1, 0, 0, + ]); let structural_shufti_mask: u8x64 = u8x64::splat(0b0000_0111); // 0x07 let whitespace_shufti_mask: u8x64 = u8x64::splat(0b0001_1000); // 0x18 @@ -96,7 +81,7 @@ impl Stage1Parse for SimdInputPortable { // FIXME: do we need this dance? let v32 = i32x16::from_array(std::mem::transmute(*self.v.as_array())); - let v_shifted = v32.shr(i32x16::splat(4)); + let v_shifted = v32 >> i32x16::splat(4); let v_shifted = u8x64::from_array(std::mem::transmute(v_shifted)); // We have to adjust the index here the reason being that while the avx instruction diff --git a/src/simd128/deser.rs b/src/impls/simd128/deser.rs similarity index 100% rename from src/simd128/deser.rs rename to src/impls/simd128/deser.rs diff --git a/src/impls/simd128/mod.rs b/src/impls/simd128/mod.rs new file mode 100644 index 00000000..547212c8 --- /dev/null +++ b/src/impls/simd128/mod.rs @@ -0,0 +1,4 @@ +mod deser; +mod stage1; + +pub(crate) use stage1::SimdInput; diff --git a/src/simd128/stage1.rs b/src/impls/simd128/stage1.rs similarity index 99% rename from src/simd128/stage1.rs rename to src/impls/simd128/stage1.rs index 9d66328e..b9aacb1f 100644 --- a/src/simd128/stage1.rs +++ b/src/impls/simd128/stage1.rs @@ -3,14 +3,14 @@ use std::arch::wasm32::*; use crate::Stage1Parse; #[derive(Debug)] -pub(crate) struct SimdInput128 { +pub(crate) struct SimdInput { v0: v128, v1: v128, v2: v128, v3: v128, } -impl Stage1Parse for SimdInput128 { +impl Stage1Parse for SimdInput { type Utf8Validator = simdutf8::basic::imp::wasm32::simd128::ChunkedUtf8ValidatorImp; type SimdRepresentation = v128; diff --git a/src/sse42/deser.rs b/src/impls/sse42/deser.rs similarity index 100% rename from src/sse42/deser.rs rename to src/impls/sse42/deser.rs diff --git a/src/impls/sse42/mod.rs b/src/impls/sse42/mod.rs new file mode 100644 index 00000000..19effade --- /dev/null +++ b/src/impls/sse42/mod.rs @@ -0,0 +1,4 @@ +pub(crate) mod deser; +mod stage1; + +pub(crate) use stage1::SimdInput; diff --git a/src/sse42/stage1.rs b/src/impls/sse42/stage1.rs similarity index 99% rename from src/sse42/stage1.rs rename to src/impls/sse42/stage1.rs index 9a4476ba..5a257ea7 100644 --- a/src/sse42/stage1.rs +++ b/src/impls/sse42/stage1.rs @@ -32,14 +32,14 @@ macro_rules! high_nibble_mask { } #[derive(Debug)] -pub(crate) struct SimdInputSSE { +pub(crate) struct SimdInput { v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i, } -impl Stage1Parse for SimdInputSSE { +impl Stage1Parse for SimdInput { type Utf8Validator = simdutf8::basic::imp::x86::sse42::ChunkedUtf8ValidatorImp; type SimdRepresentation = __m128i; #[target_feature(enable = "sse4.2")] diff --git a/src/lib.rs b/src/lib.rs index 0505b21d..1fbed6ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -120,13 +120,6 @@ //! let v: Value = simd_json::serde::from_slice(&mut d).unwrap(); //! ``` -/// rust native implementation -mod native; - -#[cfg(feature = "portable")] -/// rust native implementation -mod portable; - #[cfg(feature = "serde_impl")] extern crate serde as serde_ext; @@ -154,6 +147,8 @@ mod stringparse; use safer_unchecked::GetSaferUnchecked; +mod impls; + /// Reexport of Cow pub mod cow; @@ -162,26 +157,6 @@ pub const SIMDJSON_PADDING: usize = 32; // take upper limit mem::size_of::<__m25 /// It's 64 for all (Is this correct?) pub const SIMDINPUT_LENGTH: usize = 64; -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] -mod avx2; -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] -pub(crate) use crate::avx2::stage1::SimdInputAVX; - -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] -mod sse42; -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] -pub(crate) use crate::sse42::stage1::SimdInputSSE; - -#[cfg(target_arch = "aarch64")] -mod neon; -#[cfg(target_arch = "aarch64")] -pub(crate) use crate::neon::stage1::SimdInputNEON; - -#[cfg(target_feature = "simd128")] -mod simd128; -#[cfg(target_feature = "simd128")] -pub(crate) use crate::simd128::stage1::SimdInput128; - mod stage2; /// simd-json JSON-DOM value pub mod value; @@ -395,97 +370,175 @@ pub struct Deserializer<'de> { idx: usize, } +// architecture dependant parse_str + impl<'de> Deserializer<'de> { #[inline] - #[cfg(all( - any(target_arch = "x86_64", target_arch = "x86"), - not(feature = "avx2"), - not(feature = "sse42"), - ))] + #[cfg(not(any( + target_feature = "avx2", + target_feature = "sse4.2", + target_feature = "simd128", + target_arch = "aarch64", + )))] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, ) -> Result<&'de str> { - if std::is_x86_feature_detected!("avx2") { - crate::avx2::deser::parse_str(input, data, buffer, idx) - } else if std::is_x86_feature_detected!("sse4.2") { - crate::sse42::deser::parse_str(input, data, buffer, idx) - } else { - #[cfg(feature = "portable")] - let r = crate::portable::deser::parse_str(input, data, buffer, idx); - #[cfg(not(feature = "portable"))] - let r = crate::native::deser::parse_str(input, data, buffer, idx); - - r + #[cfg(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), + ))] + { + if std::is_x86_feature_detected!("avx2") { + return impls::avx2::deser::parse_str(input, data, buffer, idx); + } else if std::is_x86_feature_detected!("sse4.2") { + return impls::sse42::deser::parse_str(input, data, buffer, idx); + } } + + #[cfg(feature = "portable")] + let r = impls::portable::deser::parse_str(input, data, buffer, idx); + #[cfg(not(feature = "portable"))] + let r = impls::native::deser::parse_str(input, data, buffer, idx); + + r } - /// To allow inlining #[inline] - #[cfg(target_arch = "aarch64")] - pub(crate) fn parse_str_<'invoke>( + #[cfg(feature = "sse4.2")] + pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, ) -> std::result::Result, ErrorType> { - unsafe { crate::neon::deser::parse_str(input, data, buffer, idx) } + impls::sse42::deser::parse_str(input, data, buffer, idx) } - /// To allow inlining + #[inline] - #[cfg(target_feature = "simd128")] - pub(crate) fn parse_str_<'invoke>( + #[cfg(target_arch = "aarch64")] + pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, ) -> std::result::Result, ErrorType> { - unsafe { crate::simd128::deser::parse_str(input, data, buffer, idx) } + impls::neon::deser::parse_str(input, data, buffer, idx) } - - #[cfg(all( - not(target_feature = "simd128"), - not(target_arch = "aarch64"), - not(target_arch = "x86_64"), - not(target_arch = "x86") - ))] - pub(crate) fn parse_str_<'invoke>( + #[inline] + #[cfg(target_feature = "simd128")] + pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, ) -> std::result::Result, ErrorType> { - #[cfg(feature = "portable")] - let r = crate::portable::deser::parse_str(input, data, buffer, idx); - #[cfg(not(feature = "portable"))] - let r = crate::native::deser::parse_str(input, data, buffer, idx); - r + impls::simd128::deser::parse_str(input, data, buffer, idx) } #[inline] #[cfg(feature = "avx2")] - pub(crate) fn parse_str_<'invoke>( + pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, ) -> std::result::Result, ErrorType> { - unsafe { crate::avx2::deser::parse_str(input, data, buffer, idx) } + impls::avx2::deser::parse_str(input, data, buffer, idx) } +} +/// architecture dependant `find_structural_bits` +impl<'de> Deserializer<'de> { + // This version is the runtime detection version, it is only enabled if the `runtime-detection` + // feature is enabled and we are not on neon or wasm platforms + // + // We do allow non x86 platforms for this as well as it provides a fallback with std::simd and + // rust native implementations #[inline] - #[cfg(feature = "sse4.2")] - pub(crate) fn parse_str_<'invoke>( - input: *mut u8, - data: &'invoke [u8], - buffer: &'invoke mut [u8], - idx: usize, - ) -> std::result::Result, ErrorType> { - unsafe { crate::sse42::deser::parse_str(input, data, buffer, idx) } + #[cfg(not(any( + target_feature = "avx2", + target_feature = "sse4.2", + target_feature = "simd128", + target_arch = "aarch64", + )))] + pub(crate) unsafe fn find_structural_bits( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + #[cfg(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), + ))] + { + if std::is_x86_feature_detected!("avx2") { + return Self::_find_structural_bits::( + input, + structural_indexes, + ); + } else if std::is_x86_feature_detected!("sse4.2") { + return Self::_find_structural_bits::( + input, + structural_indexes, + ); + } + } + + // This is a horrible hack to allow ChunkedUtf8ValidatorImpNative to not do anything + #[cfg(not(feature = "portable"))] + let r = { + match core::str::from_utf8(input) { + Ok(_) => (), + Err(_) => return Err(ErrorType::InvalidUtf8), + }; + #[cfg(not(feature = "portable"))] + Self::_find_structural_bits::(input, structural_indexes) + }; + #[cfg(feature = "portable")] + let r = + Self::_find_structural_bits::(input, structural_indexes); + r + } + + #[inline] + #[cfg(target_feature = "avx2")] + pub(crate) unsafe fn find_structural_bits( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + Self::_find_structural_bits::(input, structural_indexes) + } + + #[inline] + #[cfg(target_feature = "sse4.2")] + pub(crate) unsafe fn find_structural_bits( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + Self::_find_structural_bits::(input, structural_indexes) } + #[inline] + #[cfg(target_arch = "aarch64")] + pub(crate) unsafe fn find_structural_bits( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + Self::_find_structural_bits::(input, structural_indexes) + } + + #[inline] + #[cfg(target_feature = "simd128")] + pub(crate) unsafe fn find_structural_bits( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + Self::_find_structural_bits::(input, structural_indexes) + } +} +impl<'de> Deserializer<'de> { /// Extracts the tape from the Deserializer #[must_use] pub fn into_tape(self) -> Vec> { @@ -587,119 +640,6 @@ impl<'de> Deserializer<'de> { *self.tape.get_kinda_unchecked(self.idx) } - #[inline] - #[allow(clippy::cast_possible_truncation)] - #[cfg(not(any(feature = "avx2", feature = "sse42")))] - pub(crate) unsafe fn find_structural_bits( - input: &[u8], - structural_indexes: &mut Vec, - ) -> std::result::Result<(), ErrorType> { - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - { - if std::is_x86_feature_detected!("avx2") { - Self::find_structural_bits_avx(input, structural_indexes) - } else if std::is_x86_feature_detected!("sse4.2") { - Self::find_structural_bits_sse(input, structural_indexes) - } else { - // This is a horrible hack to allow ChunkedUtf8ValidatorImpNative to not do anything - #[cfg(not(feature = "portable"))] - match core::str::from_utf8(input) { - Ok(_) => (), - Err(_) => return Err(ErrorType::InvalidUtf8), - }; - #[cfg(not(feature = "portable"))] - let r = Self::find_structural_bits_native(input, structural_indexes); - #[cfg(feature = "portable")] - let r = Self::find_structural_bits_portable(input, structural_indexes); - r - } - } - #[cfg(target_arch = "aarch64")] - { - return Self::_find_structural_bits::(input, structural_indexes); - } - - #[cfg(target_feature = "simd128")] - { - return Self::_find_structural_bits::(input, structural_indexes); - } - // If we're on a non supported platform fall back to the native ones - #[cfg(all( - not(target_feature = "simd128"), - not(target_arch = "aarch64"), - not(target_arch = "x86_64"), - not(target_arch = "x86") - ))] - { - // This is a horrible hack to allow ChunkedUtf8ValidatorImpNative to not do anything - #[cfg(not(feature = "portable"))] - match core::str::from_utf8(input) { - Ok(_) => (), - Err(_) => return Err(ErrorType::InvalidUtf8), - }; - #[cfg(not(feature = "portable"))] - let r = Self::find_structural_bits_native(input, structural_indexes); - #[cfg(feature = "portable")] - let r = Self::find_structural_bits_portable(input, structural_indexes); - r - } - } - - #[inline] - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - #[target_feature(enable = "avx2")] - pub(crate) unsafe fn find_structural_bits_avx( - input: &[u8], - structural_indexes: &mut Vec, - ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::(input, structural_indexes) - } - - #[inline] - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - #[target_feature(enable = "sse4.2")] - pub(crate) unsafe fn find_structural_bits_sse( - input: &[u8], - structural_indexes: &mut Vec, - ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::(input, structural_indexes) - } - - #[inline] - // #[cfg(not(feature = "portable"))] - pub(crate) unsafe fn find_structural_bits_native( - input: &[u8], - structural_indexes: &mut Vec, - ) -> std::result::Result<(), ErrorType> { - use native::stage1::NativeInput; - - Self::_find_structural_bits::(input, structural_indexes) - } - - #[inline] - #[cfg(feature = "portable")] - pub(crate) unsafe fn find_structural_bits_portable( - input: &[u8], - structural_indexes: &mut Vec, - ) -> std::result::Result<(), ErrorType> { - use portable::stage1::SimdInputPortable; - - Self::_find_structural_bits::(input, structural_indexes) - } - - #[allow(clippy::cast_possible_truncation)] - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - #[cfg(feature = "avx2")] - pub(crate) unsafe fn find_structural_bits( - input: &[u8], - structural_indexes: &mut Vec, - ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::<_, SimdInputAVX, ChunkedUtf8ValidatorImpAVX2>( - input, - structural_indexes, - ) - } - #[cfg_attr(not(feature = "no-inline"), inline(always))] #[allow(clippy::cast_possible_truncation)] pub(crate) unsafe fn _find_structural_bits( diff --git a/src/neon/mod.rs b/src/neon/mod.rs deleted file mode 100644 index 27be9a6e..00000000 --- a/src/neon/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod deser; -pub mod stage1; - -pub(crate) struct ChunkedUtf8ValidatorImp {} diff --git a/src/portable/mod.rs b/src/portable/mod.rs deleted file mode 100644 index fbcc5482..00000000 --- a/src/portable/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod deser; -pub(crate) mod stage1; diff --git a/src/simd128/mod.rs b/src/simd128/mod.rs deleted file mode 100644 index 6db7ee75..00000000 --- a/src/simd128/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod deser; -pub mod stage1; diff --git a/src/sse42/mod.rs b/src/sse42/mod.rs deleted file mode 100644 index 6db7ee75..00000000 --- a/src/sse42/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod deser; -pub mod stage1; diff --git a/src/tests/impls.rs b/src/tests/impls.rs index 0a66d183..2c3d5d34 100644 --- a/src/tests/impls.rs +++ b/src/tests/impls.rs @@ -1,4 +1,4 @@ -use crate::{native::stage1::NativeInput, Deserializer, Stage1Parse, SIMDJSON_PADDING}; +use crate::{impls, Deserializer, Stage1Parse, SIMDJSON_PADDING}; fn test_find_structural_bits(input_str: &str, expected: &[u32]) { let mut input = input_str.as_bytes().to_vec(); @@ -42,20 +42,20 @@ fn find_structural_bits_test_cases() { #[test] fn find_structural_bits_native() { - find_structural_bits_test_cases::(); + find_structural_bits_test_cases::(); } #[cfg(feature = "portable")] #[test] fn find_structural_bits_portable() { - find_structural_bits_test_cases::(); + find_structural_bits_test_cases::(); } #[cfg(target_feature = "avx2")] #[test] fn find_structural_bits_avx() { if std::is_x86_feature_detected!("avx2") { - find_structural_bits_test_cases::(); + find_structural_bits_test_cases::(); } } @@ -63,18 +63,18 @@ fn find_structural_bits_avx() { #[test] fn find_structural_bits_sse() { if std::is_x86_feature_detected!("sse4.2") { - find_structural_bits_test_cases::(); + find_structural_bits_test_cases::(); } } #[cfg(target_arch = "aarch64")] #[test] fn find_structural_bits_aarch64() { - find_structural_bits_test_cases::(); + find_structural_bits_test_cases::(); } #[cfg(target_feature = "simd128")] #[test] fn find_structural_bits_simd128() { - find_structural_bits_test_cases::(); + find_structural_bits_test_cases::(); } diff --git a/tree b/tree new file mode 100644 index 00000000..9d3c4ef2 --- /dev/null +++ b/tree @@ -0,0 +1,215 @@ +simd-json v0.12.0 (/home/heinz/simd-json) +β”œβ”€β”€ ahash v0.8.3 +β”‚ β”œβ”€β”€ cfg-if v1.0.0 +β”‚ β”œβ”€β”€ getrandom v0.2.10 +β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 +β”‚ β”‚ └── libc v0.2.147 +β”‚ └── once_cell v1.18.0 +β”‚ [build-dependencies] +β”‚ └── version_check v0.9.4 +β”œβ”€β”€ alloc_counter v0.0.4 +β”‚ β”œβ”€β”€ alloc_counter_macro v0.0.2 (proc-macro) +β”‚ β”‚ β”œβ”€β”€ proc-macro2 v1.0.66 +β”‚ β”‚ β”‚ └── unicode-ident v1.0.11 +β”‚ β”‚ β”œβ”€β”€ quote v1.0.33 +β”‚ β”‚ β”‚ └── proc-macro2 v1.0.66 (*) +β”‚ β”‚ └── syn v1.0.109 +β”‚ β”‚ β”œβ”€β”€ proc-macro2 v1.0.66 (*) +β”‚ β”‚ β”œβ”€β”€ quote v1.0.33 (*) +β”‚ β”‚ └── unicode-ident v1.0.11 +β”‚ └── pin-utils v0.1.0 +β”œβ”€β”€ beef v0.5.2 +β”œβ”€β”€ colored v2.0.4 +β”‚ β”œβ”€β”€ is-terminal v0.4.9 +β”‚ β”‚ └── rustix v0.38.8 +β”‚ β”‚ β”œβ”€β”€ bitflags v2.4.0 +β”‚ β”‚ └── linux-raw-sys v0.4.5 +β”‚ └── lazy_static v1.4.0 +β”œβ”€β”€ getopts v0.2.21 +β”‚ └── unicode-width v0.1.10 +β”œβ”€β”€ halfbrown v0.2.4 +β”‚ β”œβ”€β”€ arrayvec v0.7.4 +β”‚ β”œβ”€β”€ hashbrown v0.13.2 +β”‚ β”‚ └── ahash v0.8.3 (*) +β”‚ └── serde v1.0.185 +β”‚ └── serde_derive v1.0.185 (proc-macro) +β”‚ β”œβ”€β”€ proc-macro2 v1.0.66 (*) +β”‚ β”œβ”€β”€ quote v1.0.33 (*) +β”‚ └── syn v2.0.29 +β”‚ β”œβ”€β”€ proc-macro2 v1.0.66 (*) +β”‚ β”œβ”€β”€ quote v1.0.33 (*) +β”‚ └── unicode-ident v1.0.11 +β”œβ”€β”€ jemallocator v0.5.4 +β”‚ β”œβ”€β”€ jemalloc-sys v0.5.4+5.3.0-patched +β”‚ β”‚ └── libc v0.2.147 +β”‚ β”‚ [build-dependencies] +β”‚ β”‚ └── cc v1.0.83 +β”‚ β”‚ └── libc v0.2.147 +β”‚ └── libc v0.2.147 +β”œβ”€β”€ lexical-core v0.8.5 +β”‚ β”œβ”€β”€ lexical-parse-float v0.8.5 +β”‚ β”‚ β”œβ”€β”€ lexical-parse-integer v0.8.6 +β”‚ β”‚ β”‚ β”œβ”€β”€ lexical-util v0.8.5 +β”‚ β”‚ β”‚ β”‚ └── static_assertions v1.1.0 +β”‚ β”‚ β”‚ └── static_assertions v1.1.0 +β”‚ β”‚ β”œβ”€β”€ lexical-util v0.8.5 (*) +β”‚ β”‚ └── static_assertions v1.1.0 +β”‚ β”œβ”€β”€ lexical-parse-integer v0.8.6 (*) +β”‚ β”œβ”€β”€ lexical-util v0.8.5 (*) +β”‚ β”œβ”€β”€ lexical-write-float v0.8.5 +β”‚ β”‚ β”œβ”€β”€ lexical-util v0.8.5 (*) +β”‚ β”‚ β”œβ”€β”€ lexical-write-integer v0.8.5 +β”‚ β”‚ β”‚ β”œβ”€β”€ lexical-util v0.8.5 (*) +β”‚ β”‚ β”‚ └── static_assertions v1.1.0 +β”‚ β”‚ └── static_assertions v1.1.0 +β”‚ └── lexical-write-integer v0.8.5 (*) +β”œβ”€β”€ once_cell v1.18.0 +β”œβ”€β”€ perfcnt v0.8.0 +β”‚ β”œβ”€β”€ bitflags v1.3.2 +β”‚ β”œβ”€β”€ libc v0.2.147 +β”‚ β”œβ”€β”€ mmap v0.1.1 +β”‚ β”‚ β”œβ”€β”€ libc v0.1.12 +β”‚ β”‚ └── tempdir v0.3.7 +β”‚ β”‚ β”œβ”€β”€ rand v0.4.6 +β”‚ β”‚ β”‚ └── libc v0.2.147 +β”‚ β”‚ └── remove_dir_all v0.5.3 +β”‚ β”œβ”€β”€ nom v4.2.3 +β”‚ β”‚ └── memchr v2.5.0 +β”‚ β”‚ [build-dependencies] +β”‚ β”‚ └── version_check v0.1.5 +β”‚ └── x86 v0.47.0 +β”‚ β”œβ”€β”€ bit_field v0.10.2 +β”‚ β”œβ”€β”€ bitflags v1.3.2 +β”‚ β”œβ”€β”€ phf v0.9.0 +β”‚ β”‚ └── phf_shared v0.9.0 +β”‚ β”‚ └── siphasher v0.3.10 +β”‚ └── raw-cpuid v10.7.0 +β”‚ └── bitflags v1.3.2 +β”‚ [build-dependencies] +β”‚ β”œβ”€β”€ csv v1.2.2 +β”‚ β”‚ β”œβ”€β”€ csv-core v0.1.10 +β”‚ β”‚ β”‚ └── memchr v2.5.0 +β”‚ β”‚ β”œβ”€β”€ itoa v1.0.9 +β”‚ β”‚ β”œβ”€β”€ ryu v1.0.15 +β”‚ β”‚ └── serde v1.0.185 +β”‚ β”œβ”€β”€ phf_codegen v0.9.0 +β”‚ β”‚ β”œβ”€β”€ phf_generator v0.9.1 +β”‚ β”‚ β”‚ β”œβ”€β”€ phf_shared v0.9.0 +β”‚ β”‚ β”‚ β”‚ └── siphasher v0.3.10 +β”‚ β”‚ β”‚ └── rand v0.8.5 +β”‚ β”‚ β”‚ β”œβ”€β”€ libc v0.2.147 +β”‚ β”‚ β”‚ β”œβ”€β”€ rand_chacha v0.3.1 +β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ ppv-lite86 v0.2.17 +β”‚ β”‚ β”‚ β”‚ └── rand_core v0.6.4 +β”‚ β”‚ β”‚ β”‚ └── getrandom v0.2.10 (*) +β”‚ β”‚ β”‚ └── rand_core v0.6.4 (*) +β”‚ β”‚ └── phf_shared v0.9.0 (*) +β”‚ └── serde_json v1.0.105 +β”‚ β”œβ”€β”€ itoa v1.0.9 +β”‚ β”œβ”€β”€ ryu v1.0.15 +β”‚ └── serde v1.0.185 (*) +β”œβ”€β”€ serde v1.0.185 (*) +β”œβ”€β”€ serde_json v1.0.105 (*) +β”œβ”€β”€ simdutf8 v0.1.4 (/home/heinz/simdutf8) +└── value-trait v0.6.1 + β”œβ”€β”€ float-cmp v0.9.0 + β”‚ └── num-traits v0.2.16 + β”‚ └── libm v0.2.7 + β”‚ [build-dependencies] + β”‚ └── autocfg v1.1.0 + β”œβ”€β”€ halfbrown v0.2.4 (*) + β”œβ”€β”€ itoa v1.0.9 + └── ryu v1.0.15 +[dev-dependencies] +β”œβ”€β”€ core_affinity v0.8.1 +β”‚ β”œβ”€β”€ libc v0.2.147 +β”‚ └── num_cpus v1.16.0 +β”‚ └── libc v0.2.147 +β”œβ”€β”€ criterion v0.5.1 +β”‚ β”œβ”€β”€ anes v0.1.6 +β”‚ β”œβ”€β”€ cast v0.3.0 +β”‚ β”œβ”€β”€ ciborium v0.2.1 +β”‚ β”‚ β”œβ”€β”€ ciborium-io v0.2.1 +β”‚ β”‚ β”œβ”€β”€ ciborium-ll v0.2.1 +β”‚ β”‚ β”‚ β”œβ”€β”€ ciborium-io v0.2.1 +β”‚ β”‚ β”‚ └── half v1.8.2 +β”‚ β”‚ └── serde v1.0.185 (*) +β”‚ β”œβ”€β”€ clap v4.3.23 +β”‚ β”‚ └── clap_builder v4.3.23 +β”‚ β”‚ β”œβ”€β”€ anstyle v1.0.1 +β”‚ β”‚ └── clap_lex v0.5.0 +β”‚ β”œβ”€β”€ criterion-plot v0.5.0 +β”‚ β”‚ β”œβ”€β”€ cast v0.3.0 +β”‚ β”‚ └── itertools v0.10.5 +β”‚ β”‚ └── either v1.9.0 +β”‚ β”œβ”€β”€ is-terminal v0.4.9 (*) +β”‚ β”œβ”€β”€ itertools v0.10.5 (*) +β”‚ β”œβ”€β”€ num-traits v0.2.16 (*) +β”‚ β”œβ”€β”€ once_cell v1.18.0 +β”‚ β”œβ”€β”€ oorandom v11.1.3 +β”‚ β”œβ”€β”€ plotters v0.3.5 +β”‚ β”‚ β”œβ”€β”€ num-traits v0.2.16 (*) +β”‚ β”‚ β”œβ”€β”€ plotters-backend v0.3.5 +β”‚ β”‚ └── plotters-svg v0.3.5 +β”‚ β”‚ └── plotters-backend v0.3.5 +β”‚ β”œβ”€β”€ rayon v1.7.0 +β”‚ β”‚ β”œβ”€β”€ either v1.9.0 +β”‚ β”‚ └── rayon-core v1.11.0 +β”‚ β”‚ β”œβ”€β”€ crossbeam-channel v0.5.8 +β”‚ β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 +β”‚ β”‚ β”‚ └── crossbeam-utils v0.8.16 +β”‚ β”‚ β”‚ └── cfg-if v1.0.0 +β”‚ β”‚ β”œβ”€β”€ crossbeam-deque v0.8.3 +β”‚ β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 +β”‚ β”‚ β”‚ β”œβ”€β”€ crossbeam-epoch v0.9.15 +β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 +β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ crossbeam-utils v0.8.16 (*) +β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ memoffset v0.9.0 +β”‚ β”‚ β”‚ β”‚ β”‚ [build-dependencies] +β”‚ β”‚ β”‚ β”‚ β”‚ └── autocfg v1.1.0 +β”‚ β”‚ β”‚ β”‚ └── scopeguard v1.2.0 +β”‚ β”‚ β”‚ β”‚ [build-dependencies] +β”‚ β”‚ β”‚ β”‚ └── autocfg v1.1.0 +β”‚ β”‚ β”‚ └── crossbeam-utils v0.8.16 (*) +β”‚ β”‚ β”œβ”€β”€ crossbeam-utils v0.8.16 (*) +β”‚ β”‚ └── num_cpus v1.16.0 (*) +β”‚ β”œβ”€β”€ regex v1.9.3 +β”‚ β”‚ β”œβ”€β”€ regex-automata v0.3.6 +β”‚ β”‚ β”‚ └── regex-syntax v0.7.4 +β”‚ β”‚ └── regex-syntax v0.7.4 +β”‚ β”œβ”€β”€ serde v1.0.185 (*) +β”‚ β”œβ”€β”€ serde_derive v1.0.185 (proc-macro) (*) +β”‚ β”œβ”€β”€ serde_json v1.0.105 (*) +β”‚ β”œβ”€β”€ tinytemplate v1.2.1 +β”‚ β”‚ β”œβ”€β”€ serde v1.0.185 (*) +β”‚ β”‚ └── serde_json v1.0.105 (*) +β”‚ └── walkdir v2.3.3 +β”‚ └── same-file v1.0.6 +β”œβ”€β”€ float-cmp v0.9.0 (*) +β”œβ”€β”€ getopts v0.2.21 (*) +└── proptest v1.2.0 + β”œβ”€β”€ bit-set v0.5.3 + β”‚ └── bit-vec v0.6.3 + β”œβ”€β”€ bitflags v1.3.2 + β”œβ”€β”€ byteorder v1.4.3 + β”œβ”€β”€ lazy_static v1.4.0 + β”œβ”€β”€ num-traits v0.2.16 (*) + β”œβ”€β”€ rand v0.8.5 + β”‚ β”œβ”€β”€ libc v0.2.147 + β”‚ β”œβ”€β”€ rand_chacha v0.3.1 (*) + β”‚ └── rand_core v0.6.4 (*) + β”œβ”€β”€ rand_chacha v0.3.1 (*) + β”œβ”€β”€ rand_xorshift v0.3.0 + β”‚ └── rand_core v0.6.4 (*) + β”œβ”€β”€ regex-syntax v0.6.29 + β”œβ”€β”€ rusty-fork v0.3.0 + β”‚ β”œβ”€β”€ fnv v1.0.7 + β”‚ β”œβ”€β”€ quick-error v1.2.3 + β”‚ β”œβ”€β”€ tempfile v3.8.0 + β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 + β”‚ β”‚ β”œβ”€β”€ fastrand v2.0.0 + β”‚ β”‚ └── rustix v0.38.8 (*) + β”‚ └── wait-timeout v0.2.0 + β”‚ └── libc v0.2.147 + β”œβ”€β”€ tempfile v3.8.0 (*) + └── unarray v0.1.4 From d59cafcda3c1e2016983b0695060ebc50ceca9d6 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Fri, 20 Oct 2023 10:34:12 +0200 Subject: [PATCH 4/8] Port simdutf8's runtime feature detection code Signed-off-by: Heinz N. Gies --- src/impls/avx2/deser.rs | 15 ++-- src/impls/native/deser.rs | 9 +- src/impls/portable/deser.rs | 5 +- src/impls/sse42/deser.rs | 15 ++-- src/lib.rs | 166 ++++++++++++++++++++++++++++-------- 5 files changed, 158 insertions(+), 52 deletions(-) diff --git a/src/impls/avx2/deser.rs b/src/impls/avx2/deser.rs index edcabc87..f1841cd9 100644 --- a/src/impls/avx2/deser.rs +++ b/src/impls/avx2/deser.rs @@ -9,11 +9,12 @@ use std::arch::x86_64::{ _mm256_storeu_si256, }; -pub use crate::error::{Error, ErrorType}; -use crate::safer_unchecked::GetSaferUnchecked; -use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP}; -use crate::Deserializer; -pub use crate::Result; +use crate::{ + error::ErrorType, + safer_unchecked::GetSaferUnchecked, + stringparse::{handle_unicode_codepoint, ESCAPE_MAP}, + Deserializer, Result, SillyWrapper, +}; #[target_feature(enable = "avx2")] #[allow( @@ -24,12 +25,14 @@ pub use crate::Result; )] #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn parse_str<'invoke, 'de>( - input: *mut u8, + input: SillyWrapper<'de>, data: &'invoke [u8], buffer: &'invoke mut [u8], mut idx: usize, ) -> Result<&'de str> { use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint}; + + let input = input.input; // Add 1 to skip the initial " idx += 1; //let mut read: usize = 0; diff --git a/src/impls/native/deser.rs b/src/impls/native/deser.rs index 3e63a67c..d0bc5b35 100644 --- a/src/impls/native/deser.rs +++ b/src/impls/native/deser.rs @@ -1,18 +1,19 @@ use crate::{ safer_unchecked::GetSaferUnchecked, stringparse::{get_unicode_codepoint, ESCAPE_MAP}, - Deserializer, ErrorType, Result, + Deserializer, ErrorType, Result, SillyWrapper, }; #[allow(clippy::cast_possible_truncation)] pub(crate) unsafe fn parse_str<'invoke, 'de>( - input: *mut u8, + input: SillyWrapper<'de>, data: &'invoke [u8], _buffer: &'invoke mut [u8], idx: usize, ) -> Result<&'de str> { use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint}; + let input = input.input; // skip leading `"` let src: &[u8] = data.get_kinda_unchecked(idx + 1..); let input = input.add(idx + 1); @@ -114,7 +115,9 @@ mod test { input2.append(vec![0; SIMDJSON_PADDING * 2].as_mut()); let mut buffer = vec![0; 1024]; - let r = unsafe { super::parse_str(input.as_mut_ptr(), &input2, buffer.as_mut_slice(), 0)? }; + let r = unsafe { + super::parse_str(input.as_mut_ptr().into(), &input2, buffer.as_mut_slice(), 0)? + }; dbg!(r); Ok(String::from(r)) } diff --git a/src/impls/portable/deser.rs b/src/impls/portable/deser.rs index d1aab295..75b6b52a 100644 --- a/src/impls/portable/deser.rs +++ b/src/impls/portable/deser.rs @@ -3,16 +3,17 @@ use std::simd::{u8x32, SimdPartialEq, ToBitMask}; use crate::{ safer_unchecked::GetSaferUnchecked, stringparse::{handle_unicode_codepoint, ESCAPE_MAP}, - Deserializer, ErrorType, Result, + Deserializer, ErrorType, Result, SillyWrapper, }; #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn parse_str<'invoke, 'de>( - input: *mut u8, + input: SillyWrapper<'de>, data: &'invoke [u8], buffer: &'invoke mut [u8], mut idx: usize, ) -> Result<&'de str> { + let input = input.input; use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint}; const SLASH: u8x32 = u8x32::from_array([b'\\'; 32]); diff --git a/src/impls/sse42/deser.rs b/src/impls/sse42/deser.rs index 0539b35d..00d801f4 100644 --- a/src/impls/sse42/deser.rs +++ b/src/impls/sse42/deser.rs @@ -4,26 +4,27 @@ use std::arch::x86 as arch; #[cfg(target_arch = "x86_64")] use std::arch::x86_64 as arch; +use crate::{ + error::ErrorType, + safer_unchecked::GetSaferUnchecked, + stringparse::{handle_unicode_codepoint, ESCAPE_MAP}, + Deserializer, Result, SillyWrapper, +}; use arch::{ __m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8, _mm_storeu_si128, }; -pub use crate::error::{Error, ErrorType}; -use crate::safer_unchecked::GetSaferUnchecked; -use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP}; -use crate::Deserializer; -pub use crate::Result; - #[target_feature(enable = "sse4.2")] #[allow(clippy::if_not_else, clippy::cast_possible_wrap)] #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn parse_str<'invoke, 'de>( - input: *mut u8, + input: SillyWrapper<'de>, data: &'invoke [u8], buffer: &'invoke mut [u8], mut idx: usize, ) -> Result<&'de str> { use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint}; + let input = input.input; // Add 1 to skip the initial " idx += 1; diff --git a/src/lib.rs b/src/lib.rs index 1fbed6ec..e05e6b41 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -372,6 +372,35 @@ pub struct Deserializer<'de> { // architecture dependant parse_str +#[derive(Debug, Clone, Copy)] +pub(crate) struct SillyWrapper<'de> { + input: *mut u8, + _marker: std::marker::PhantomData<&'de ()>, +} + +impl<'de> From<*mut u8> for SillyWrapper<'de> { + #[inline] + fn from(input: *mut u8) -> Self { + Self { + input, + _marker: std::marker::PhantomData, + } + } +} + +// The runtime detection code is inspired from simdutf8's implementation +type FnRaw = *mut (); +type ParseStrFn = for<'invoke, 'de> unsafe fn( + SillyWrapper<'de>, + &'invoke [u8], + &'invoke mut [u8], + usize, +) -> std::result::Result<&'de str, error::Error>; +type FindStructuralBitsFn = unsafe fn( + input: &[u8], + structural_indexes: &mut Vec, +) -> std::result::Result<(), ErrorType>; + impl<'de> Deserializer<'de> { #[inline] #[cfg(not(any( @@ -385,25 +414,65 @@ impl<'de> Deserializer<'de> { data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, - ) -> Result<&'de str> { + ) -> Result<&'de str> + where + 'de: 'invoke, + { + let input: SillyWrapper<'de> = SillyWrapper::from(input); #[cfg(all( feature = "runtime-detection", any(target_arch = "x86_64", target_arch = "x86"), ))] { - if std::is_x86_feature_detected!("avx2") { - return impls::avx2::deser::parse_str(input, data, buffer, idx); - } else if std::is_x86_feature_detected!("sse4.2") { - return impls::sse42::deser::parse_str(input, data, buffer, idx); + use std::sync::atomic::{AtomicPtr, Ordering}; + + static FN: AtomicPtr<()> = AtomicPtr::new(get_fastest as FnRaw); + + #[inline] + fn get_fastest_available_implementation() -> ParseStrFn { + if std::is_x86_feature_detected!("avx2") { + impls::avx2::deser::parse_str + } else if std::is_x86_feature_detected!("sse4.2") { + impls::sse42::deser::parse_str + } else { + #[cfg(feature = "portable")] + let r = impls::portable::deser::parse_str; + #[cfg(not(feature = "portable"))] + let r = impls::native::deser::parse_str; + r + } } - } - #[cfg(feature = "portable")] - let r = impls::portable::deser::parse_str(input, data, buffer, idx); - #[cfg(not(feature = "portable"))] - let r = impls::native::deser::parse_str(input, data, buffer, idx); + #[inline] + unsafe fn get_fastest<'invoke, 'de>( + input: SillyWrapper<'de>, + data: &'invoke [u8], + buffer: &'invoke mut [u8], + idx: usize, + ) -> core::result::Result<&'de str, error::Error> + where + 'de: 'invoke, + { + let fun = get_fastest_available_implementation(); + FN.store(fun as FnRaw, Ordering::Relaxed); + (fun)(input, data, buffer, idx) + } + + let fun = FN.load(Ordering::Relaxed); + mem::transmute::(fun)(input, data, buffer, idx) + } + #[cfg(not(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), + )))] + { + #[cfg(feature = "portable")] + let r = impls::portable::deser::parse_str(input, data, buffer, idx); + #[cfg(not(feature = "portable"))] + let r = impls::native::deser::parse_str(input, data, buffer, idx); - r + r + } } #[inline] @@ -473,33 +542,62 @@ impl<'de> Deserializer<'de> { any(target_arch = "x86_64", target_arch = "x86"), ))] { - if std::is_x86_feature_detected!("avx2") { - return Self::_find_structural_bits::( - input, - structural_indexes, - ); - } else if std::is_x86_feature_detected!("sse4.2") { - return Self::_find_structural_bits::( - input, - structural_indexes, - ); + use std::sync::atomic::{AtomicPtr, Ordering}; + + static FN: AtomicPtr<()> = AtomicPtr::new(get_fastest as FnRaw); + + #[inline] + fn get_fastest_available_implementation() -> FindStructuralBitsFn { + if std::is_x86_feature_detected!("avx2") { + Deserializer::_find_structural_bits:: + } else if std::is_x86_feature_detected!("sse4.2") { + Deserializer::_find_structural_bits:: + } else { + #[cfg(feature = "portable")] + let r = Deserializer::_find_structural_bits::; + #[cfg(not(feature = "portable"))] + let r = Deserializer::_find_structural_bits::; + r + } } + + #[inline] + unsafe fn get_fastest( + input: &[u8], + structural_indexes: &mut Vec, + ) -> core::result::Result<(), error::ErrorType> { + let fun = get_fastest_available_implementation(); + FN.store(fun as FnRaw, Ordering::Relaxed); + (fun)(input, structural_indexes) + } + + let fun = FN.load(Ordering::Relaxed); + mem::transmute::(fun)(input, structural_indexes) } - // This is a horrible hack to allow ChunkedUtf8ValidatorImpNative to not do anything - #[cfg(not(feature = "portable"))] - let r = { - match core::str::from_utf8(input) { - Ok(_) => (), - Err(_) => return Err(ErrorType::InvalidUtf8), - }; + #[cfg(not(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), + )))] + { #[cfg(not(feature = "portable"))] - Self::_find_structural_bits::(input, structural_indexes) - }; - #[cfg(feature = "portable")] - let r = - Self::_find_structural_bits::(input, structural_indexes); - r + let r = { + // This is a nasty hack, we don't have a chunked implementation for native rust + // so we validate UTF8 ahead of time + match core::str::from_utf8(input) { + Ok(_) => (), + Err(_) => return Err(ErrorType::InvalidUtf8), + }; + #[cfg(not(feature = "portable"))] + Self::_find_structural_bits::(input, structural_indexes) + }; + #[cfg(feature = "portable")] + let r = Self::_find_structural_bits::( + input, + structural_indexes, + ); + r + } } #[inline] From 9f5fae44d9d835a5f16ff5d73037201d0a87b539 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Fri, 20 Oct 2023 10:36:36 +0200 Subject: [PATCH 5/8] Disable portable simd until simdutf8 is updated Signed-off-by: Heinz N. Gies --- Cargo.toml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f1df7a30..0b4a1da0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,15 +15,12 @@ rust-version = "1.61" getrandom = { version = "0.2", features = ["js"] } [dependencies] +simdutf8 = { version = "0.1.4", features = ["public_imp", "aarch64_neon"] } + lexical-core = { version = "0.8", features = ["format"] } beef = { version = "0.5", optional = true } halfbrown = "0.2" value-trait = { version = "0.6.1" } -simdutf8 = { version = "0.1.4", features = [ - "public_imp", - "aarch64_neon", -], path = "../simdutf8" } - # ahash known key once_cell = { version = "1.17", optional = true } ahash = { version = "0.8", optional = true } @@ -106,7 +103,7 @@ perf = ["perfcnt", "getopts", "colored", "serde_json"] docsrs = [] # portable simd support (as of rust 1.73 nightly only) -portable = ["simdutf8/portable"] +# portable = ["simdutf8/portable"] # use runtime detection of the CPU features where possible instead of enforcing an instruction set From 3d8b54bc3e95db21285c3a19ab95bab82ffe679f Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Fri, 20 Oct 2023 10:40:09 +0200 Subject: [PATCH 6/8] cleanup & soundness Signed-off-by: Heinz N. Gies --- README.md | 16 +- src/charutils.rs | 4 +- src/impls/avx2/deser.rs | 1 - src/impls/avx2/mod.rs | 4 +- src/impls/avx2/stage1.rs | 13 +- src/impls/mod.rs | 6 +- src/impls/native/mod.rs | 4 +- src/impls/native/stage1.rs | 59 +++--- src/impls/neon/deser.rs | 11 +- src/impls/neon/mod.rs | 2 + src/impls/neon/stage1.rs | 29 ++- src/impls/portable/mod.rs | 2 + src/impls/portable/stage1.rs | 17 +- src/impls/simd128/deser.rs | 25 ++- src/impls/simd128/mod.rs | 2 + src/impls/simd128/stage1.rs | 66 ++++--- src/impls/sse42/mod.rs | 4 +- src/impls/sse42/stage1.rs | 10 +- src/lib.rs | 313 +++++++++++++++++--------------- src/macros.rs | 14 +- src/numberparse.rs | 4 +- src/numberparse/approx.rs | 4 +- src/numberparse/correct.rs | 2 +- src/safer_unchecked.rs | 4 +- src/serde.rs | 32 ++-- src/serde/de.rs | 2 +- src/stage2.rs | 64 +++---- src/stringparse.rs | 4 +- src/tests/serde.rs | 4 +- src/value.rs | 15 +- src/value/borrowed.rs | 15 +- src/value/borrowed/serialize.rs | 8 +- src/value/owned.rs | 15 +- src/value/owned/serialize.rs | 8 +- tree | 215 ---------------------- 35 files changed, 399 insertions(+), 599 deletions(-) delete mode 100644 tree diff --git a/README.md b/README.md index ab197782..65fcf7fb 100644 --- a/README.md +++ b/README.md @@ -24,13 +24,25 @@ To be able to take advantage of `simd-json` your system needs to be SIMD capable. On `x86` it will select the best SIMD featureset (`avx2`, or `sse4.2`) during runtime. If `simd-json` is compiled with SIMD support, it will disable runtime detection. -`simd-json` supports AVX2, SSE4.2 and NEON. +`simd-json` supports AVX2, SSE4.2 and NEON and simd128 (wasm) natively, it also includes a unoptimized fallback implementation using native rust for other platforms, however this is a last resport measure and nothing we'd recommend relying on. + + ### allocator For best performance we highly suggest using [mimalloc](https://crates.io/crates/mimalloc) or [jemalloc](https://crates.io/crates/jemalloc) instead of the system allocator used by default. Another recent allocator that works well ( but we have yet to test in production a setting ) is [snmalloc](https://github.com/microsoft/snmalloc). -## `serde` +### `runtime-detection` + +This feature allowa selecting the optimal algorithn based on availalbe features during runeimte, it has no effect on non x86 or x86_64 platforms. When neither `AVX2` nor `SSE4.2` is spported it will fallback to a native rust implementaiton. + +### `portable` + +**Currently disabled** + +An implementation of the algorithm using `std::simd` and up to 512 byte wide registers, currently disabled due to dependencies and highly experimental. + +### `serde_impl` `simd-json` is compatible with serde and `serde-json`. The Value types provided implement serializers and deserializers. In addition to that `simd-json` implements the `Deserializer` trait for the parser so it can deserialize anything that implements the serde `Deserialize` trait. Note, that serde provides both a `Deserializer` and a `Deserialize` trait. diff --git a/src/charutils.rs b/src/charutils.rs index ed1c389e..0b753d6d 100644 --- a/src/charutils.rs +++ b/src/charutils.rs @@ -22,12 +22,12 @@ const STRUCTURAL_OR_WHITESPACE: [u32; 256] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub fn is_not_structural_or_whitespace(c: u8) -> u32 { unsafe { *STRUCTURAL_OR_WHITESPACE_NEGATED.get_kinda_unchecked(c as usize) } } -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub fn is_structural_or_whitespace(c: u8) -> u32 { unsafe { *STRUCTURAL_OR_WHITESPACE.get_kinda_unchecked(c as usize) } } diff --git a/src/impls/avx2/deser.rs b/src/impls/avx2/deser.rs index f1841cd9..9ccccae5 100644 --- a/src/impls/avx2/deser.rs +++ b/src/impls/avx2/deser.rs @@ -20,7 +20,6 @@ use crate::{ #[allow( clippy::if_not_else, clippy::cast_possible_wrap, - clippy::if_not_else, clippy::too_many_lines )] #[cfg_attr(not(feature = "no-inline"), inline)] diff --git a/src/impls/avx2/mod.rs b/src/impls/avx2/mod.rs index 19effade..2e259cd2 100644 --- a/src/impls/avx2/mod.rs +++ b/src/impls/avx2/mod.rs @@ -1,4 +1,6 @@ -pub(crate) mod deser; +#![allow(unused_imports, dead_code)] +mod deser; mod stage1; +pub(crate) use deser::parse_str; pub(crate) use stage1::SimdInput; diff --git a/src/impls/avx2/stage1.rs b/src/impls/avx2/stage1.rs index bbac4bb7..1529b0e0 100644 --- a/src/impls/avx2/stage1.rs +++ b/src/impls/avx2/stage1.rs @@ -15,8 +15,6 @@ use std::arch::x86_64::{ _mm_clmulepi64_si128, _mm_cvtsi128_si64, _mm_set1_epi8, _mm_set_epi64x, }; -use std::mem; - macro_rules! low_nibble_mask { () => { _mm256_setr_epi8( @@ -45,6 +43,7 @@ impl Stage1Parse for SimdInput { type Utf8Validator = simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp; type SimdRepresentation = __m256i; #[cfg_attr(not(feature = "no-inline"), inline)] + // _mm256_loadu_si256 does not need allignment #[allow(clippy::cast_ptr_alignment)] #[target_feature(enable = "avx2")] unsafe fn new(ptr: &[u8]) -> Self { @@ -171,11 +170,7 @@ impl Stage1Parse for SimdInput { // needs to be large enough to handle this //TODO: usize was u32 here does this matter? #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow( - clippy::cast_possible_wrap, - clippy::cast_ptr_alignment, - clippy::uninit_vec - )] + #[allow(clippy::cast_possible_wrap, clippy::cast_ptr_alignment)] #[target_feature(enable = "avx2")] unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { let cnt: usize = bits.count_ones() as usize; @@ -198,7 +193,7 @@ impl Stage1Parse for SimdInput { // We later indiscriminatory writre over the len we set but that's OK // since we ensure we reserve the needed space base.reserve(64); - base.set_len(l + cnt); + let final_len = l + cnt; while bits != 0 { let v0 = bits.trailing_zeros() as i32; @@ -228,6 +223,8 @@ impl Stage1Parse for SimdInput { ); l += 8; } + // We have written all the data + base.set_len(final_len); } #[cfg_attr(not(feature = "no-inline"), inline)] diff --git a/src/impls/mod.rs b/src/impls/mod.rs index 0428044b..8f4b08e4 100644 --- a/src/impls/mod.rs +++ b/src/impls/mod.rs @@ -1,4 +1,8 @@ -#[cfg(any(test, not(feature = "portable")))] +#[cfg(all( + any(test, not(feature = "portable")), + not(target_arch = "aarch64"), + not(target_feature = "simd128") +))] /// rust native implementation pub(crate) mod native; diff --git a/src/impls/native/mod.rs b/src/impls/native/mod.rs index 80718c1e..8a2dd9dd 100644 --- a/src/impls/native/mod.rs +++ b/src/impls/native/mod.rs @@ -1,8 +1,10 @@ +#![allow(unused_imports, dead_code)] use simdutf8::basic::imp::ChunkedUtf8Validator; -pub(crate) mod deser; +mod deser; mod stage1; +pub(crate) use deser::parse_str; pub(crate) use stage1::SimdInput; /// This is a hack, since there is no native implementation of the chunked validator we pre-validate the entire diff --git a/src/impls/native/stage1.rs b/src/impls/native/stage1.rs index a19b4365..8bdcf788 100644 --- a/src/impls/native/stage1.rs +++ b/src/impls/native/stage1.rs @@ -2,14 +2,13 @@ use crate::{static_cast_i32, Stage1Parse}; -#[allow(non_camel_case_types)] -type v128 = [u8; 16]; +type V128 = [u8; 16]; -fn u8x16_splat(n: u8) -> v128 { +fn u8x16_splat(n: u8) -> V128 { [n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n] } -fn v128_and(a: v128, b: v128) -> v128 { +fn v128_and(a: V128, b: V128) -> V128 { [ a[0] & b[0], a[1] & b[1], @@ -30,7 +29,7 @@ fn v128_and(a: v128, b: v128) -> v128 { ] } -fn u8x16_shr(a: v128, n: i32) -> v128 { +fn u8x16_shr(a: V128, n: i32) -> V128 { [ a[0] >> n, a[1] >> n, @@ -51,7 +50,7 @@ fn u8x16_shr(a: v128, n: i32) -> v128 { ] } -fn u8x16_swizzle(a: v128, s: v128) -> [u8; 16] { +fn u8x16_swizzle(a: V128, s: V128) -> [u8; 16] { [ if s[0] > 0x0f { 0 @@ -197,7 +196,7 @@ fn bool_to_u8(b: bool) -> u8 { 0x00 } } -fn u8x16_le(a: v128, b: v128) -> v128 { +fn u8x16_le(a: V128, b: V128) -> V128 { [ bool_to_u8(a[0] <= b[0]), bool_to_u8(a[1] <= b[1]), @@ -218,7 +217,7 @@ fn u8x16_le(a: v128, b: v128) -> v128 { ] } -fn u8x16_eq(a: v128, b: v128) -> v128 { +fn u8x16_eq(a: V128, b: V128) -> V128 { [ bool_to_u8(a[0] == b[0]), bool_to_u8(a[1] == b[1]), @@ -239,7 +238,7 @@ fn u8x16_eq(a: v128, b: v128) -> v128 { ] } -fn u8x16_bitmask(a: v128) -> u16 { +fn u8x16_bitmask(a: V128) -> u16 { (a[0] & 0b1000_0000 != 0) as u16 | (((a[1] & 0b1000_0000 != 0) as u16) << 1) | (((a[2] & 0b1000_0000 != 0) as u16) << 2) @@ -261,7 +260,7 @@ fn u8x16_bitmask(a: v128) -> u16 { // unsafe { mem::transmute(a) } // } -// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// #[cfg_attr(not(feature = "no-inline"), inline)] // pub unsafe fn neon_movemask_bulk( // p0: uint8x16_t, // p1: uint8x16_t, @@ -288,21 +287,21 @@ fn u8x16_bitmask(a: v128) -> u16 { #[derive(Debug)] pub(crate) struct SimdInput { - v0: v128, - v1: v128, - v2: v128, - v3: v128, + v0: V128, + v1: V128, + v2: V128, + v3: V128, } impl Stage1Parse for SimdInput { type Utf8Validator = super::ChunkedUtf8ValidatorImp; - type SimdRepresentation = v128; + type SimdRepresentation = V128; unsafe fn new(ptr: &[u8]) -> Self { SimdInput { - v0: *(ptr.as_ptr().cast::()), - v1: *(ptr.as_ptr().add(16).cast::()), - v2: *(ptr.as_ptr().add(32).cast::()), - v3: *(ptr.as_ptr().add(48).cast::()), + v0: *(ptr.as_ptr().cast::()), + v1: *(ptr.as_ptr().add(16).cast::()), + v2: *(ptr.as_ptr().add(32).cast::()), + v3: *(ptr.as_ptr().add(48).cast::()), } } @@ -331,7 +330,7 @@ impl Stage1Parse for SimdInput { res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48) } - unsafe fn unsigned_lteq_against_input(&self, maxval: v128) -> u64 { + unsafe fn unsigned_lteq_against_input(&self, maxval: V128) -> u64 { let cmp_res_0 = u8x16_le(self.v0, maxval); let res_0 = u8x16_bitmask(cmp_res_0) as u64; let cmp_res_1 = u8x16_le(self.v1, maxval); @@ -360,8 +359,8 @@ impl Stage1Parse for SimdInput { // * horizontal tab 0x09 // * carriage return 0x0d // these go into the next 2 buckets of the comparison (8/16) - const LOW_NIBBLE_MASK: v128 = [16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0]; - const HIGH_NIBBLE_MASK: v128 = [8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0]; + const LOW_NIBBLE_MASK: V128 = [16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0]; + const HIGH_NIBBLE_MASK: V128 = [8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0]; let structural_shufti_mask = u8x16_splat(0x7); let whitespace_shufti_mask = u8x16_splat(0x18); @@ -425,12 +424,8 @@ impl Stage1Parse for SimdInput { *whitespace = !(ws_res_0 | (ws_res_1 << 16) | (ws_res_2 << 32) | (ws_res_3 << 48)); } - #[cfg_attr(not(feature = "no-inline"), inline(always))] - #[allow( - clippy::cast_possible_wrap, - clippy::cast_ptr_alignment, - clippy::uninit_vec - )] + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow(clippy::cast_possible_wrap, clippy::cast_ptr_alignment)] unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { let cnt: usize = bits.count_ones() as usize; let mut l = base.len(); @@ -448,7 +443,7 @@ impl Stage1Parse for SimdInput { // We later indiscriminatory writre over the len we set but that's OK // since we ensure we reserve the needed space base.reserve(64); - base.set_len(l + cnt); + let final_len = l + cnt; while bits != 0 { let v0 = bits.trailing_zeros() as i32; @@ -469,13 +464,15 @@ impl Stage1Parse for SimdInput { std::ptr::write(base.as_mut_ptr().add(l).cast::<[i32; 4]>(), v); l += 4; } + // We have written all the data + base.set_len(final_len); } - unsafe fn fill_s8(n: i8) -> v128 { + unsafe fn fill_s8(n: i8) -> V128 { u8x16_splat(n as u8) } - unsafe fn zero() -> v128 { + unsafe fn zero() -> V128 { u8x16_splat(0) } } diff --git a/src/impls/neon/deser.rs b/src/impls/neon/deser.rs index 04e9d449..7956cb3a 100644 --- a/src/impls/neon/deser.rs +++ b/src/impls/neon/deser.rs @@ -10,7 +10,7 @@ use std::arch::aarch64::{ vreinterpretq_u32_u8, }; -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] fn find_bs_bits_and_quote_bits(v0: uint8x16_t, v1: uint8x16_t) -> (u32, u32) { unsafe { let quote_mask = vmovq_n_u8(b'"'); @@ -39,13 +39,8 @@ fn find_bs_bits_and_quote_bits(v0: uint8x16_t, v1: uint8x16_t) -> (u32, u32) { } } -#[allow( - clippy::if_not_else, - clippy::if_not_else, - clippy::cast_ptr_alignment, - clippy::too_many_lines -)] -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[allow(clippy::if_not_else, clippy::too_many_lines)] +#[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) fn parse_str<'invoke, 'de>( input: *mut u8, data: &'invoke [u8], diff --git a/src/impls/neon/mod.rs b/src/impls/neon/mod.rs index 547212c8..2e259cd2 100644 --- a/src/impls/neon/mod.rs +++ b/src/impls/neon/mod.rs @@ -1,4 +1,6 @@ +#![allow(unused_imports, dead_code)] mod deser; mod stage1; +pub(crate) use deser::parse_str; pub(crate) use stage1::SimdInput; diff --git a/src/impls/neon/stage1.rs b/src/impls/neon/stage1.rs index ccb5c92d..73a95e64 100644 --- a/src/impls/neon/stage1.rs +++ b/src/impls/neon/stage1.rs @@ -7,7 +7,7 @@ use std::arch::aarch64::{ use std::mem; // NEON-SPECIFIC -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) unsafe fn bit_mask() -> uint8x16_t { std::mem::transmute([ 0x01u8, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, @@ -15,7 +15,7 @@ pub(crate) unsafe fn bit_mask() -> uint8x16_t { ]) } -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub unsafe fn neon_movemask_bulk( p0: uint8x16_t, p1: uint8x16_t, @@ -53,7 +53,6 @@ impl Stage1Parse for SimdInput { type Utf8Validator = simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp; type SimdRepresentation = int8x16_t; #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow(clippy::cast_ptr_alignment)] unsafe fn new(ptr: &[u8]) -> Self { Self { v0: vld1q_u8(ptr.as_ptr().cast::()), @@ -63,7 +62,7 @@ impl Stage1Parse for SimdInput { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn compute_quote_mask(mut quote_bits: u64) -> u64 { quote_bits ^= quote_bits << 1; quote_bits ^= quote_bits << 2; @@ -75,7 +74,7 @@ impl Stage1Parse for SimdInput { } /// a straightforward comparison of a mask against input - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn cmp_mask_against_input(&self, m: u8) -> u64 { let mask: uint8x16_t = vmovq_n_u8(m); let cmp_res_0: uint8x16_t = vceqq_u8(self.v0, mask); @@ -87,7 +86,7 @@ impl Stage1Parse for SimdInput { } // find all values less than or equal than the content of maxval (using unsigned arithmetic) - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn unsigned_lteq_against_input(&self, maxval: int8x16_t) -> u64 { let maxval = vreinterpretq_u8_s8(maxval); let cmp_res_0: uint8x16_t = vcleq_u8(self.v0, maxval); @@ -97,7 +96,7 @@ impl Stage1Parse for SimdInput { neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] unsafe fn find_whitespace_and_structurals(&self, whitespace: &mut u64, structurals: &mut u64) { // do a 'shufti' to detect structural JSON characters @@ -170,12 +169,8 @@ impl Stage1Parse for SimdInput { // will potentially store extra values beyond end of valid bits, so base_ptr // needs to be large enough to handle this //TODO: usize was u32 here does this matter? - #[cfg_attr(not(feature = "no-inline"), inline(always))] - #[allow( - clippy::cast_possible_wrap, - clippy::cast_ptr_alignment, - clippy::uninit_vec - )] + #[cfg_attr(not(feature = "no-inline"), inline)] + #[allow(clippy::cast_possible_wrap, clippy::cast_ptr_alignment)] unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { let cnt: usize = bits.count_ones() as usize; let mut l = base.len(); @@ -193,7 +188,7 @@ impl Stage1Parse for SimdInput { // We later indiscriminatory writre over the len we set but that's OK // since we ensure we reserve the needed space base.reserve(64); - base.set_len(l + cnt); + let final_len = l + cnt; while bits != 0 { let v0 = bits.trailing_zeros() as i32; @@ -210,14 +205,16 @@ impl Stage1Parse for SimdInput { std::ptr::write(base.as_mut_ptr().add(l).cast::(), v); l += 4; } + // We have written all the data + base.set_len(final_len); } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn fill_s8(n: i8) -> int8x16_t { vdupq_n_s8(n) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn zero() -> int8x16_t { vdupq_n_s8(0) } diff --git a/src/impls/portable/mod.rs b/src/impls/portable/mod.rs index 4ec8345c..515c63c7 100644 --- a/src/impls/portable/mod.rs +++ b/src/impls/portable/mod.rs @@ -1,4 +1,6 @@ +#![allow(unused_imports, dead_code)] pub mod deser; pub mod stage1; +pub(crate) use deser::parse_str; pub(crate) use stage1::SimdInput; diff --git a/src/impls/portable/stage1.rs b/src/impls/portable/stage1.rs index 802308ee..4ea0ac8d 100644 --- a/src/impls/portable/stage1.rs +++ b/src/impls/portable/stage1.rs @@ -10,7 +10,6 @@ impl Stage1Parse for SimdInput { type Utf8Validator = simdutf8::basic::imp::portable::ChunkedUtf8ValidatorImp; type SimdRepresentation = u8x64; #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow(clippy::cast_ptr_alignment)] unsafe fn new(ptr: &[u8]) -> Self { Self { v: u8x64::from_array(*ptr.as_ptr().cast::<[u8; 64]>()), @@ -18,7 +17,6 @@ impl Stage1Parse for SimdInput { } #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow(clippy::cast_sign_loss)] unsafe fn compute_quote_mask(quote_bits: u64) -> u64 { let mut quote_mask: u64 = quote_bits ^ (quote_bits << 1); quote_mask = quote_mask ^ (quote_mask << 2); @@ -31,7 +29,6 @@ impl Stage1Parse for SimdInput { /// a straightforward comparison of a mask against input #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] unsafe fn cmp_mask_against_input(&self, m: u8) -> u64 { let mask = u8x64::splat(m); self.v.simd_eq(mask).to_bitmask() @@ -39,13 +36,11 @@ impl Stage1Parse for SimdInput { // find all values less than or equal than the content of maxval (using unsigned arithmetic) #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow(clippy::cast_sign_loss)] unsafe fn unsigned_lteq_against_input(&self, maxval: u8x64) -> u64 { self.v.simd_le(maxval).to_bitmask() } #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow(clippy::cast_sign_loss, clippy::cast_lossless)] unsafe fn find_whitespace_and_structurals(&self, whitespace: &mut u64, structurals: &mut u64) { // do a 'shufti' to detect structural JSON characters // they are @@ -110,11 +105,7 @@ impl Stage1Parse for SimdInput { // needs to be large enough to handle this //TODO: usize was u32 here does this matter? #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow( - clippy::cast_possible_wrap, - clippy::cast_ptr_alignment, - clippy::uninit_vec - )] + #[allow(clippy::cast_possible_wrap, clippy::cast_ptr_alignment)] unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { let cnt: usize = bits.count_ones() as usize; let mut l = base.len(); @@ -136,7 +127,7 @@ impl Stage1Parse for SimdInput { // We later indiscriminatory writre over the len we set but that's OK // since we ensure we reserve the needed space base.reserve(64); - base.set_len(l + cnt); + let final_len = l + cnt; while bits != 0 { let v0 = bits.trailing_zeros() as i32; @@ -165,11 +156,13 @@ impl Stage1Parse for SimdInput { // l += 8; } + // We have written all the data + base.set_len(final_len); } - #[allow(clippy::cast_sign_loss)] #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn fill_s8(n: i8) -> u8x64 { + #[allow(clippy::cast_sign_loss)] u8x64::splat(n as u8) } diff --git a/src/impls/simd128/deser.rs b/src/impls/simd128/deser.rs index a2a5225f..d68bc3a0 100644 --- a/src/impls/simd128/deser.rs +++ b/src/impls/simd128/deser.rs @@ -1,30 +1,27 @@ use std::arch::wasm32::{u8x16_bitmask, u8x16_eq, u8x16_splat, v128, v128_load, v128_store}; -pub use crate::{ - error::{Error, ErrorType}, - Result, -}; use crate::{ + error::ErrorType, safer_unchecked::GetSaferUnchecked, stringparse::{handle_unicode_codepoint, ESCAPE_MAP}, - Deserializer, + Deserializer, Result, SillyWrapper, }; #[target_feature(enable = "simd128")] #[allow( clippy::if_not_else, - clippy::cast_ptr_alignment, clippy::cast_possible_wrap, clippy::too_many_lines )] #[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) fn parse_str<'invoke, 'de>( - input: *mut u8, + input: SillyWrapper<'de>, data: &'invoke [u8], buffer: &'invoke mut [u8], mut idx: usize, ) -> Result<&'de str> { use ErrorType::{InvalidEscape, InvalidUnicodeCodepoint}; + let input = input.input; // Add 1 to skip the initial " idx += 1; @@ -36,7 +33,11 @@ pub(crate) fn parse_str<'invoke, 'de>( let mut src_i = 0; let mut len = src_i; loop { - let v = unsafe { v128_load(src.as_ptr().add(src_i).cast::()) }; + let v = unsafe { + // v128_load requires no allignment + #[allow(clippy::cast_ptr_alignment)] + v128_load(src.as_ptr().add(src_i).cast::()) + }; let bs_bits = u8x16_bitmask(u8x16_eq(v, u8x16_splat(b'\\'))); let quote_bits = u8x16_bitmask(u8x16_eq(v, u8x16_splat(b'"'))); @@ -81,9 +82,15 @@ pub(crate) fn parse_str<'invoke, 'de>( // To be more conform with upstream loop { - let v = unsafe { v128_load(src.as_ptr().add(src_i).cast::()) }; + let v = unsafe { + // v128_load requires no allignment + #[allow(clippy::cast_ptr_alignment)] + v128_load(src.as_ptr().add(src_i).cast::()) + }; unsafe { + // v128_store requires no allignment + #[allow(clippy::cast_ptr_alignment)] v128_store(buffer.as_mut_ptr().add(dst_i).cast::(), v); }; diff --git a/src/impls/simd128/mod.rs b/src/impls/simd128/mod.rs index 547212c8..2e259cd2 100644 --- a/src/impls/simd128/mod.rs +++ b/src/impls/simd128/mod.rs @@ -1,4 +1,6 @@ +#![allow(unused_imports, dead_code)] mod deser; mod stage1; +pub(crate) use deser::parse_str; pub(crate) use stage1::SimdInput; diff --git a/src/impls/simd128/stage1.rs b/src/impls/simd128/stage1.rs index b9aacb1f..056fb28d 100644 --- a/src/impls/simd128/stage1.rs +++ b/src/impls/simd128/stage1.rs @@ -1,6 +1,8 @@ -use std::arch::wasm32::*; - use crate::Stage1Parse; +use std::arch::wasm32::{ + i8x16_splat, u32x4, u32x4_add, u32x4_splat, u8x16, u8x16_bitmask, u8x16_eq, u8x16_le, + u8x16_shr, u8x16_splat, u8x16_swizzle, v128, v128_and, v128_load, v128_store, +}; #[derive(Debug)] pub(crate) struct SimdInput { @@ -25,7 +27,7 @@ impl Stage1Parse for SimdInput { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn compute_quote_mask(mut quote_bits: u64) -> u64 { #[allow(clippy::cast_sign_loss)] let b = -1_i64 as u64; @@ -40,35 +42,35 @@ impl Stage1Parse for SimdInput { } /// a straightforward comparison of a mask against input - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn cmp_mask_against_input(&self, m: u8) -> u64 { let mask = u8x16_splat(m); let cmp_res_0 = u8x16_eq(self.v0, mask); - let res_0 = u8x16_bitmask(cmp_res_0) as u64; + let res_0 = u64::from(u8x16_bitmask(cmp_res_0)); let cmp_res_1 = u8x16_eq(self.v1, mask); - let res_1 = u8x16_bitmask(cmp_res_1) as u64; + let res_1 = u64::from(u8x16_bitmask(cmp_res_1)); let cmp_res_2 = u8x16_eq(self.v2, mask); - let res_2 = u8x16_bitmask(cmp_res_2) as u64; + let res_2 = u64::from(u8x16_bitmask(cmp_res_2)); let cmp_res_3 = u8x16_eq(self.v3, mask); - let res_3 = u8x16_bitmask(cmp_res_3) as u64; + let res_3 = u64::from(u8x16_bitmask(cmp_res_3)); res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48) } // find all values less than or equal than the content of maxval (using unsigned arithmetic) - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn unsigned_lteq_against_input(&self, maxval: v128) -> u64 { let cmp_res_0 = u8x16_le(self.v0, maxval); - let res_0 = u8x16_bitmask(cmp_res_0) as u64; + let res_0 = u64::from(u8x16_bitmask(cmp_res_0)); let cmp_res_1 = u8x16_le(self.v1, maxval); - let res_1 = u8x16_bitmask(cmp_res_1) as u64; + let res_1 = u64::from(u8x16_bitmask(cmp_res_1)); let cmp_res_2 = u8x16_le(self.v2, maxval); - let res_2 = u8x16_bitmask(cmp_res_2) as u64; + let res_2 = u64::from(u8x16_bitmask(cmp_res_2)); let cmp_res_3 = u8x16_le(self.v3, maxval); - let res_3 = u8x16_bitmask(cmp_res_3) as u64; + let res_3 = u64::from(u8x16_bitmask(cmp_res_3)); res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] unsafe fn find_whitespace_and_structurals(&self, whitespace: &mut u64, structurals: &mut u64) { // do a 'shufti' to detect structural JSON characters @@ -129,10 +131,10 @@ impl Stage1Parse for SimdInput { let tmp_v2 = u8x16_eq(v128_and(v_v2, structural_shufti_mask), zero_mask); let tmp_v3 = u8x16_eq(v128_and(v_v3, structural_shufti_mask), zero_mask); - let structural_res_0 = u8x16_bitmask(tmp_v0) as u64; - let structural_res_1 = u8x16_bitmask(tmp_v1) as u64; - let structural_res_2 = u8x16_bitmask(tmp_v2) as u64; - let structural_res_3 = u8x16_bitmask(tmp_v3) as u64; + let structural_res_0 = u64::from(u8x16_bitmask(tmp_v0)); + let structural_res_1 = u64::from(u8x16_bitmask(tmp_v1)); + let structural_res_2 = u64::from(u8x16_bitmask(tmp_v2)); + let structural_res_3 = u64::from(u8x16_bitmask(tmp_v3)); *structurals = !(structural_res_0 | (structural_res_1 << 16) @@ -144,10 +146,10 @@ impl Stage1Parse for SimdInput { let tmp_ws_v2 = u8x16_eq(v128_and(v_v2, whitespace_shufti_mask), zero_mask); let tmp_ws_v3 = u8x16_eq(v128_and(v_v3, whitespace_shufti_mask), zero_mask); - let ws_res_0 = u8x16_bitmask(tmp_ws_v0) as u64; - let ws_res_1 = u8x16_bitmask(tmp_ws_v1) as u64; - let ws_res_2 = u8x16_bitmask(tmp_ws_v2) as u64; - let ws_res_3 = u8x16_bitmask(tmp_ws_v3) as u64; + let ws_res_0 = u64::from(u8x16_bitmask(tmp_ws_v0)); + let ws_res_1 = u64::from(u8x16_bitmask(tmp_ws_v1)); + let ws_res_2 = u64::from(u8x16_bitmask(tmp_ws_v2)); + let ws_res_3 = u64::from(u8x16_bitmask(tmp_ws_v3)); *whitespace = !(ws_res_0 | (ws_res_1 << 16) | (ws_res_2 << 32) | (ws_res_3 << 48)); } @@ -158,7 +160,7 @@ impl Stage1Parse for SimdInput { // will potentially store extra values beyond end of valid bits, so base_ptr // needs to be large enough to handle this //TODO: usize was u32 here does this matter? - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { let cnt: usize = bits.count_ones() as usize; let mut l = base.len(); @@ -171,31 +173,35 @@ impl Stage1Parse for SimdInput { // We later indiscriminatory writre over the len we set but that's OK // since we ensure we reserve the needed space base.reserve(64); - base.set_len(l + cnt); + let fial_len = l + cnt; while bits != 0 { - let v0 = bits.trailing_zeros() as u32; + let v0 = bits.trailing_zeros(); bits &= bits.wrapping_sub(1); - let v1 = bits.trailing_zeros() as u32; + let v1 = bits.trailing_zeros(); bits &= bits.wrapping_sub(1); - let v2 = bits.trailing_zeros() as u32; + let v2 = bits.trailing_zeros(); bits &= bits.wrapping_sub(1); - let v3 = bits.trailing_zeros() as u32; + let v3 = bits.trailing_zeros(); bits &= bits.wrapping_sub(1); let v = u32x4(v0, v1, v2, v3); let v = u32x4_add(idx_64_v, v); + // v128_store requires no allignment + #[allow(clippy::cast_ptr_alignment)] v128_store(base.as_mut_ptr().add(l).cast::(), v); l += 4; } + // We have written all the data + base.set_len(final_len); } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn fill_s8(n: i8) -> v128 { i8x16_splat(n) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn zero() -> v128 { i8x16_splat(0) } diff --git a/src/impls/sse42/mod.rs b/src/impls/sse42/mod.rs index 19effade..2e259cd2 100644 --- a/src/impls/sse42/mod.rs +++ b/src/impls/sse42/mod.rs @@ -1,4 +1,6 @@ -pub(crate) mod deser; +#![allow(unused_imports, dead_code)] +mod deser; mod stage1; +pub(crate) use deser::parse_str; pub(crate) use stage1::SimdInput; diff --git a/src/impls/sse42/stage1.rs b/src/impls/sse42/stage1.rs index 5a257ea7..53fb21c8 100644 --- a/src/impls/sse42/stage1.rs +++ b/src/impls/sse42/stage1.rs @@ -232,11 +232,7 @@ impl Stage1Parse for SimdInput { //TODO: usize was u32 here does this matter? #[target_feature(enable = "sse4.2")] #[cfg_attr(not(feature = "no-inline"), inline)] - #[allow( - clippy::cast_possible_wrap, - clippy::cast_ptr_alignment, - clippy::uninit_vec - )] + #[allow(clippy::cast_possible_wrap, clippy::cast_ptr_alignment)] unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { let cnt: usize = bits.count_ones() as usize; let mut l = base.len(); @@ -254,7 +250,7 @@ impl Stage1Parse for SimdInput { // We later indiscriminatory writre over the len we set but that's OK // since we ensure we reserve the needed space base.reserve(64); - base.set_len(l + cnt); + let final_len = l + cnt; while bits != 0 { let v0 = bits.trailing_zeros() as i32; @@ -271,6 +267,8 @@ impl Stage1Parse for SimdInput { _mm_storeu_si128(base.as_mut_ptr().add(l).cast::(), v); l += 4; } + // We have written all the data + base.set_len(final_len); } #[target_feature(enable = "sse4.2")] diff --git a/src/lib.rs b/src/lib.rs index e05e6b41..1d7c3bad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,13 +9,7 @@ clippy::pedantic, missing_docs )] -// We might want to revisit inline_always -#![allow( - clippy::module_name_repetitions, - clippy::inline_always, - clippy::arc_with_non_send_sync, - renamed_and_removed_lints -)] +#![allow(clippy::module_name_repetitions, renamed_and_removed_lints)] //! simd-json is a rust port of the simdjson c++ library. It follows //! most of the design closely with a few exceptions to make it better @@ -245,8 +239,7 @@ pub(crate) trait Stage1Parse { // Note that we don't do any error checking to see if we have backslash // sequences outside quotes; these // backslash sequences (of any length) will be detected elsewhere. - #[cfg_attr(not(feature = "no-inline"), inline(always))] - #[allow(overflowing_literals, clippy::cast_sign_loss)] + #[cfg_attr(not(feature = "no-inline"), inline)] fn find_quote_mask_and_bits( &self, odd_ends: u64, @@ -284,7 +277,7 @@ pub(crate) trait Stage1Parse { // indicate whether we end an iteration on an odd-length sequence of // backslashes, which modifies our subsequent search for odd-length // sequences of backslashes in an obvious way. - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn find_odd_backslash_sequences(&self, prev_iter_ends_odd_backslash: &mut u64) -> u64 { const EVEN_BITS: u64 = 0x5555_5555_5555_5555; const ODD_BITS: u64 = !EVEN_BITS; @@ -322,7 +315,7 @@ pub(crate) trait Stage1Parse { // iteration ended on a whitespace or a structural character (which means that // the next iteration // will have a pseudo-structural character at its start) - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn finalize_structurals( mut structurals: u64, whitespace: u64, @@ -388,22 +381,86 @@ impl<'de> From<*mut u8> for SillyWrapper<'de> { } } -// The runtime detection code is inspired from simdutf8's implementation +#[cfg(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), +))] // The runtime detection code is inspired from simdutf8's implementation type FnRaw = *mut (); +#[cfg(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), +))] type ParseStrFn = for<'invoke, 'de> unsafe fn( SillyWrapper<'de>, &'invoke [u8], &'invoke mut [u8], usize, ) -> std::result::Result<&'de str, error::Error>; +#[cfg(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), +))] type FindStructuralBitsFn = unsafe fn( input: &[u8], structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType>; impl<'de> Deserializer<'de> { + #[inline] + #[cfg(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), + ))] + pub(crate) unsafe fn parse_str_<'invoke>( + input: *mut u8, + data: &'invoke [u8], + buffer: &'invoke mut [u8], + idx: usize, + ) -> Result<&'de str> + where + 'de: 'invoke, + { + use std::sync::atomic::{AtomicPtr, Ordering}; + + static FN: AtomicPtr<()> = AtomicPtr::new(get_fastest as FnRaw); + + #[inline] + fn get_fastest_available_implementation() -> ParseStrFn { + if std::is_x86_feature_detected!("avx2") { + impls::avx2::parse_str + } else if std::is_x86_feature_detected!("sse4.2") { + impls::sse42::parse_str + } else { + #[cfg(feature = "portable")] + let r = impls::portable::parse_str; + #[cfg(not(feature = "portable"))] + let r = impls::native::parse_str; + r + } + } + + #[inline] + unsafe fn get_fastest<'invoke, 'de>( + input: SillyWrapper<'de>, + data: &'invoke [u8], + buffer: &'invoke mut [u8], + idx: usize, + ) -> core::result::Result<&'de str, error::Error> + where + 'de: 'invoke, + { + let fun = get_fastest_available_implementation(); + FN.store(fun as FnRaw, Ordering::Relaxed); + (fun)(input, data, buffer, idx) + } + + let input: SillyWrapper<'de> = SillyWrapper::from(input); + let fun = FN.load(Ordering::Relaxed); + mem::transmute::(fun)(input, data, buffer, idx) + } #[inline] #[cfg(not(any( + feature = "runtime-detection", target_feature = "avx2", target_feature = "sse4.2", target_feature = "simd128", @@ -419,115 +476,112 @@ impl<'de> Deserializer<'de> { 'de: 'invoke, { let input: SillyWrapper<'de> = SillyWrapper::from(input); - #[cfg(all( - feature = "runtime-detection", - any(target_arch = "x86_64", target_arch = "x86"), - ))] - { - use std::sync::atomic::{AtomicPtr, Ordering}; - - static FN: AtomicPtr<()> = AtomicPtr::new(get_fastest as FnRaw); - - #[inline] - fn get_fastest_available_implementation() -> ParseStrFn { - if std::is_x86_feature_detected!("avx2") { - impls::avx2::deser::parse_str - } else if std::is_x86_feature_detected!("sse4.2") { - impls::sse42::deser::parse_str - } else { - #[cfg(feature = "portable")] - let r = impls::portable::deser::parse_str; - #[cfg(not(feature = "portable"))] - let r = impls::native::deser::parse_str; - r - } - } - #[inline] - unsafe fn get_fastest<'invoke, 'de>( - input: SillyWrapper<'de>, - data: &'invoke [u8], - buffer: &'invoke mut [u8], - idx: usize, - ) -> core::result::Result<&'de str, error::Error> - where - 'de: 'invoke, - { - let fun = get_fastest_available_implementation(); - FN.store(fun as FnRaw, Ordering::Relaxed); - (fun)(input, data, buffer, idx) - } - - let fun = FN.load(Ordering::Relaxed); - mem::transmute::(fun)(input, data, buffer, idx) - } - #[cfg(not(all( - feature = "runtime-detection", - any(target_arch = "x86_64", target_arch = "x86"), - )))] - { - #[cfg(feature = "portable")] - let r = impls::portable::deser::parse_str(input, data, buffer, idx); - #[cfg(not(feature = "portable"))] - let r = impls::native::deser::parse_str(input, data, buffer, idx); - - r - } + #[cfg(feature = "portable")] + let r = impls::portable::parse_str(input, data, buffer, idx); + #[cfg(not(feature = "portable"))] + let r = impls::native::parse_str(input, data, buffer, idx); + r } #[inline] - #[cfg(feature = "sse4.2")] + #[cfg(all(target_feature = "avx2", not(feature = "runtime-detection")))] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, - ) -> std::result::Result, ErrorType> { - impls::sse42::deser::parse_str(input, data, buffer, idx) + ) -> Result<&'de str> { + let input: SillyWrapper<'de> = SillyWrapper::from(input); + impls::avx2::parse_str(input, data, buffer, idx) } #[inline] - #[cfg(target_arch = "aarch64")] + #[cfg(all( + target_feature = "sse4.2", + not(feature = "runtime-detection"), + not(target_feature = "avx2") + ))] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, - ) -> std::result::Result, ErrorType> { - impls::neon::deser::parse_str(input, data, buffer, idx) + ) -> Result<&'de str> { + let input: SillyWrapper<'de> = SillyWrapper::from(input); + impls::sse42::parse_str(input, data, buffer, idx) } + #[inline] - #[cfg(target_feature = "simd128")] + #[cfg(target_arch = "aarch64")] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, - ) -> std::result::Result, ErrorType> { - impls::simd128::deser::parse_str(input, data, buffer, idx) + ) -> Result<&'de str> { + impls::neon::parse_str(input, data, buffer, idx) } - #[inline] - #[cfg(feature = "avx2")] + #[cfg(target_feature = "simd128")] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], buffer: &'invoke mut [u8], idx: usize, - ) -> std::result::Result, ErrorType> { - impls::avx2::deser::parse_str(input, data, buffer, idx) + ) -> Result<&'de str> { + let input: SillyWrapper<'de> = SillyWrapper::from(input); + impls::simd128::parse_str(input, data, buffer, idx) } } /// architecture dependant `find_structural_bits` impl<'de> Deserializer<'de> { - // This version is the runtime detection version, it is only enabled if the `runtime-detection` - // feature is enabled and we are not on neon or wasm platforms - // - // We do allow non x86 platforms for this as well as it provides a fallback with std::simd and - // rust native implementations + #[inline] + #[cfg(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), + ))] + pub(crate) unsafe fn find_structural_bits( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + use std::sync::atomic::{AtomicPtr, Ordering}; + + static FN: AtomicPtr<()> = AtomicPtr::new(get_fastest as FnRaw); + + #[inline] + fn get_fastest_available_implementation() -> FindStructuralBitsFn { + if std::is_x86_feature_detected!("avx2") { + Deserializer::_find_structural_bits:: + } else if std::is_x86_feature_detected!("sse4.2") { + Deserializer::_find_structural_bits:: + } else { + #[cfg(feature = "portable")] + let r = Deserializer::_find_structural_bits::; + #[cfg(not(feature = "portable"))] + let r = Deserializer::_find_structural_bits::; + r + } + } + + #[inline] + unsafe fn get_fastest( + input: &[u8], + structural_indexes: &mut Vec, + ) -> core::result::Result<(), error::ErrorType> { + let fun = get_fastest_available_implementation(); + FN.store(fun as FnRaw, Ordering::Relaxed); + (fun)(input, structural_indexes) + } + + let fun = FN.load(Ordering::Relaxed); + mem::transmute::(fun)(input, structural_indexes) + } + #[inline] #[cfg(not(any( + feature = "runtime-detection", target_feature = "avx2", target_feature = "sse4.2", target_feature = "simd128", @@ -537,71 +591,25 @@ impl<'de> Deserializer<'de> { input: &[u8], structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - #[cfg(all( - feature = "runtime-detection", - any(target_arch = "x86_64", target_arch = "x86"), - ))] - { - use std::sync::atomic::{AtomicPtr, Ordering}; - - static FN: AtomicPtr<()> = AtomicPtr::new(get_fastest as FnRaw); - - #[inline] - fn get_fastest_available_implementation() -> FindStructuralBitsFn { - if std::is_x86_feature_detected!("avx2") { - Deserializer::_find_structural_bits:: - } else if std::is_x86_feature_detected!("sse4.2") { - Deserializer::_find_structural_bits:: - } else { - #[cfg(feature = "portable")] - let r = Deserializer::_find_structural_bits::; - #[cfg(not(feature = "portable"))] - let r = Deserializer::_find_structural_bits::; - r - } - } - - #[inline] - unsafe fn get_fastest( - input: &[u8], - structural_indexes: &mut Vec, - ) -> core::result::Result<(), error::ErrorType> { - let fun = get_fastest_available_implementation(); - FN.store(fun as FnRaw, Ordering::Relaxed); - (fun)(input, structural_indexes) - } - - let fun = FN.load(Ordering::Relaxed); - mem::transmute::(fun)(input, structural_indexes) - } - - #[cfg(not(all( - feature = "runtime-detection", - any(target_arch = "x86_64", target_arch = "x86"), - )))] - { - #[cfg(not(feature = "portable"))] - let r = { - // This is a nasty hack, we don't have a chunked implementation for native rust - // so we validate UTF8 ahead of time - match core::str::from_utf8(input) { - Ok(_) => (), - Err(_) => return Err(ErrorType::InvalidUtf8), - }; - #[cfg(not(feature = "portable"))] - Self::_find_structural_bits::(input, structural_indexes) + #[cfg(not(feature = "portable"))] + let r = { + // This is a nasty hack, we don't have a chunked implementation for native rust + // so we validate UTF8 ahead of time + match core::str::from_utf8(input) { + Ok(_) => (), + Err(_) => return Err(ErrorType::InvalidUtf8), }; - #[cfg(feature = "portable")] - let r = Self::_find_structural_bits::( - input, - structural_indexes, - ); - r - } + #[cfg(not(feature = "portable"))] + Self::_find_structural_bits::(input, structural_indexes) + }; + #[cfg(feature = "portable")] + let r = + Self::_find_structural_bits::(input, structural_indexes); + r } #[inline] - #[cfg(target_feature = "avx2")] + #[cfg(all(target_feature = "avx2", not(feature = "runtime-detection")))] pub(crate) unsafe fn find_structural_bits( input: &[u8], structural_indexes: &mut Vec, @@ -610,7 +618,11 @@ impl<'de> Deserializer<'de> { } #[inline] - #[cfg(target_feature = "sse4.2")] + #[cfg(all( + target_feature = "sse4.2", + not(feature = "runtime-detection"), + not(target_feature = "avx2") + ))] pub(crate) unsafe fn find_structural_bits( input: &[u8], structural_indexes: &mut Vec, @@ -643,12 +655,12 @@ impl<'de> Deserializer<'de> { self.tape } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn error(error: ErrorType) -> Error { Error::new(0, None, error) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn error_c(idx: usize, c: char, error: ErrorType) -> Error { Error::new(idx, Some(c), error) } @@ -720,7 +732,7 @@ impl<'de> Deserializer<'de> { } #[cfg(feature = "serde_impl")] - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn skip(&mut self) { self.idx += 1; } @@ -732,13 +744,13 @@ impl<'de> Deserializer<'de> { /// /// This function is not safe to use, it is meant for internal use /// where it's know the tape isn't finished. - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] pub unsafe fn next_(&mut self) -> Node<'de> { self.idx += 1; *self.tape.get_kinda_unchecked(self.idx) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_possible_truncation)] pub(crate) unsafe fn _find_structural_bits( input: &[u8], @@ -807,7 +819,6 @@ impl<'de> Deserializer<'de> { // take the previous iterations structural bits, not our current iteration, // and flatten - #[allow(clippy::cast_possible_truncation)] S::flatten_bits(structural_indexes, idx as u32, structurals); let mut whitespace: u64 = 0; diff --git a/src/macros.rs b/src/macros.rs index 506bbaca..8e856b3d 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1195,7 +1195,7 @@ macro_rules! json_unexpected { #[macro_export] macro_rules! likely { ($e:expr) => { - std::intrinsics::likely($e) + ::std::intrinsics::likely($e) }; } @@ -1204,7 +1204,7 @@ macro_rules! likely { #[macro_export] macro_rules! unlikely { ($e:expr) => {{ - std::intrinsics::unlikely($e) + ::std::intrinsics::unlikely($e) }}; } @@ -1230,7 +1230,7 @@ macro_rules! unlikely { #[macro_export] macro_rules! static_cast_i8 { ($v:expr) => { - mem::transmute::<_, i8>($v) + ::std::transmute::<_, i8>($v) }; } @@ -1246,7 +1246,7 @@ macro_rules! static_cast_i32 { #[macro_export] macro_rules! static_cast_u32 { ($v:expr) => { - std::mem::transmute::<_, u32>($v) + ::std::mem::transmute::<_, u32>($v) }; } @@ -1254,7 +1254,7 @@ macro_rules! static_cast_u32 { #[macro_export] macro_rules! static_cast_i64 { ($v:expr) => { - mem::transmute::<_, i64>($v) + ::std::mem::transmute::<_, i64>($v) }; } @@ -1262,7 +1262,7 @@ macro_rules! static_cast_i64 { #[macro_export] macro_rules! static_cast_i128 { ($v:expr) => { - mem::transmute::<_, i128>($v) + ::std::mem::transmute::<_, i128>($v) }; } @@ -1270,7 +1270,7 @@ macro_rules! static_cast_i128 { #[macro_export] macro_rules! static_cast_u64 { ($v:expr) => { - mem::transmute::<_, u64>($v) + ::std::mem::transmute::<_, u64>($v) }; } diff --git a/src/numberparse.rs b/src/numberparse.rs index 85fec590..1cbbcf69 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -21,7 +21,7 @@ use arch::{ _mm_packus_epi32, _mm_set1_epi8, _mm_setr_epi16, _mm_setr_epi8, _mm_sub_epi8, }; -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub fn is_integer(c: u8) -> bool { c.is_ascii_digit() } @@ -49,7 +49,7 @@ const STRUCTURAL_OR_WHITESPACE_OR_EXPONENT_OR_DECIMAL_NEGATED: [bool; 256] = [ true, true, true, true, true, true, true, ]; -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] fn is_not_structural_or_whitespace_or_exponent_or_decimal(c: u8) -> bool { unsafe { *STRUCTURAL_OR_WHITESPACE_OR_EXPONENT_OR_DECIMAL_NEGATED.get_kinda_unchecked(c as usize) diff --git a/src/numberparse/approx.rs b/src/numberparse/approx.rs index 7cec9b5a..57bc9b7b 100644 --- a/src/numberparse/approx.rs +++ b/src/numberparse/approx.rs @@ -6,7 +6,7 @@ use crate::charutils::is_structural_or_whitespace; use crate::safer_unchecked::GetSaferUnchecked; use crate::unlikely; use crate::StaticNode; -use crate::{mem, static_cast_i64, Deserializer, ErrorType, Result}; +use crate::{static_cast_i64, Deserializer, ErrorType, Result}; const POWER_OF_TEN: [f64; 632] = [ 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316, 1e-315, 1e-314, 1e-313, 1e-312, @@ -351,7 +351,7 @@ impl<'de> Deserializer<'de> { // parse the number at buf + offset // define JSON_TEST_NUMBERS for unit testing - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow( clippy::cast_possible_truncation, clippy::cast_sign_loss, diff --git a/src/numberparse/correct.rs b/src/numberparse/correct.rs index 54169343..9d731316 100644 --- a/src/numberparse/correct.rs +++ b/src/numberparse/correct.rs @@ -11,7 +11,7 @@ use crate::error::Error; use crate::safer_unchecked::GetSaferUnchecked; use crate::unlikely; use crate::StaticNode; -use crate::{mem, static_cast_i64, Deserializer, ErrorType, Result}; +use crate::{static_cast_i64, Deserializer, ErrorType, Result}; macro_rules! get { ($buf:ident, $idx:expr) => { diff --git a/src/safer_unchecked.rs b/src/safer_unchecked.rs index fe9dced4..d10bda4b 100644 --- a/src/safer_unchecked.rs +++ b/src/safer_unchecked.rs @@ -14,7 +14,7 @@ pub trait GetSaferUnchecked { } impl GetSaferUnchecked for [T] { - #[inline(always)] + #[inline] unsafe fn get_kinda_unchecked(&self, index: I) -> &>::Output where I: SliceIndex<[T]>, @@ -26,7 +26,7 @@ impl GetSaferUnchecked for [T] { } } - #[inline(always)] + #[inline] unsafe fn get_kinda_unchecked_mut(&mut self, index: I) -> &mut >::Output where I: SliceIndex<[T]>, diff --git a/src/serde.rs b/src/serde.rs index 748f5a97..3dafa8b9 100644 --- a/src/serde.rs +++ b/src/serde.rs @@ -54,7 +54,7 @@ impl std::error::Error for SerdeConversionError {} /// # Errors /// /// Will return `Err` if `s` is invalid JSON. -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub fn from_slice<'a, T>(s: &'a mut [u8]) -> Result where T: Deserialize<'a>, @@ -75,7 +75,7 @@ where /// This function mutates the string passed into it, it's a convinience wrapper around `from_slice`, /// holding the same guarantees as `str::as_bytes_mut` in that after the call &str might include /// invalid utf8 bytes. -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub unsafe fn from_str<'a, T>(s: &'a mut str) -> Result where T: Deserialize<'a>, @@ -91,7 +91,7 @@ where /// /// Will return `Err` if an IO error is encountered while reading /// rdr or if the readers content is invalid JSON. -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub fn from_reader(mut rdr: R) -> Result where R: io::Read, @@ -119,7 +119,7 @@ impl serde_ext::ser::Error for Error { // Functions purely used by serde impl<'de> Deserializer<'de> { - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn next(&mut self) -> Result> { self.idx += 1; self.tape @@ -128,7 +128,7 @@ impl<'de> Deserializer<'de> { .ok_or_else(|| Self::error(ErrorType::Syntax)) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn peek(&self) -> Result { self.tape .get(self.idx + 1) @@ -136,7 +136,7 @@ impl<'de> Deserializer<'de> { .ok_or_else(|| Self::error(ErrorType::Eof)) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_u8(&mut self) -> Result { match unsafe { self.next_() } { @@ -147,7 +147,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_u16(&mut self) -> Result { match unsafe { self.next_() } { @@ -158,7 +158,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_u32(&mut self) -> Result { match unsafe { self.next_() } { @@ -169,7 +169,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_u64(&mut self) -> Result { match unsafe { self.next_() } { @@ -180,7 +180,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_u128(&mut self) -> Result { match unsafe { self.next_() } { @@ -191,7 +191,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_i8(&mut self) -> Result { match unsafe { self.next_() } { @@ -202,7 +202,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_i16(&mut self) -> Result { match unsafe { self.next_() } { @@ -213,7 +213,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_i32(&mut self) -> Result { match unsafe { self.next_() } { @@ -224,7 +224,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_i64(&mut self) -> Result { match unsafe { self.next_() } { @@ -235,7 +235,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_sign_loss)] fn parse_i128(&mut self) -> Result { match unsafe { self.next_() } { @@ -246,7 +246,7 @@ impl<'de> Deserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] #[allow(clippy::cast_possible_wrap, clippy::cast_precision_loss)] fn parse_double(&mut self) -> Result { match unsafe { self.next_() } { diff --git a/src/serde/de.rs b/src/serde/de.rs index 9f838ef4..8716101e 100644 --- a/src/serde/de.rs +++ b/src/serde/de.rs @@ -13,7 +13,7 @@ where // Look at the input data to decide what Serde data model type to // deserialize as. Not all data formats are able to support this operation. // Formats that support `deserialize_any` are known as self-describing. - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, diff --git a/src/stage2.rs b/src/stage2.rs index 7f4c7919..a2c65856 100644 --- a/src/stage2.rs +++ b/src/stage2.rs @@ -5,8 +5,7 @@ use crate::value::tape::Node; use crate::{Deserializer, Error, ErrorType, InternalError, Result}; use value_trait::StaticNode; -#[cfg_attr(not(feature = "no-inline"), inline(always))] -#[allow(clippy::cast_ptr_alignment)] +#[cfg_attr(not(feature = "no-inline"), inline)] pub fn is_valid_true_atom(loc: &[u8]) -> bool { debug_assert!(loc.len() >= 8, "loc too short for a u64 read"); @@ -18,8 +17,6 @@ pub fn is_valid_true_atom(loc: &[u8]) -> bool { const TV: u64 = 0x00_00_00_00_65_75_72_74; const MASK4: u64 = 0x00_00_00_00_ff_ff_ff_ff; - // TODO: does this has the same effect as: - // std::memcpy(&locval, loc, sizeof(uint64_t)); let locval: u64 = loc.as_ptr().cast::().read_unaligned(); error = (locval & MASK4) ^ TV; @@ -34,50 +31,48 @@ macro_rules! get { }}; } -#[cfg_attr(not(feature = "no-inline"), inline(always))] -#[allow(clippy::cast_ptr_alignment, unused_unsafe)] +#[cfg_attr(not(feature = "no-inline"), inline)] pub fn is_valid_false_atom(loc: &[u8]) -> bool { + const FV: u64 = 0x00_00_00_65_73_6c_61_66; + const MASK5: u64 = 0x00_00_00_ff_ff_ff_ff_ff; + debug_assert!(loc.len() >= 8, "loc too short for a u64 read"); // TODO: this is ugly and probably copies data every time let mut error: u64; - unsafe { - //let fv: u64 = *(b"false ".as_ptr() as *const u64); - // this is the same: + //let fv: u64 = *(b"false ".as_ptr() as *const u64); + // this is the same: - const FV: u64 = 0x00_00_00_65_73_6c_61_66; - const MASK5: u64 = 0x00_00_00_ff_ff_ff_ff_ff; + let locval: u64 = unsafe { loc.as_ptr().cast::().read_unaligned() }; - let locval: u64 = loc.as_ptr().cast::().read_unaligned(); + // FIXME the original code looks like this: + // error = ((locval & mask5) ^ fv) as u32; + // but that fails on falsy as the u32 conversion + // will mask the error on the y so we re-write it + // it would be interesting what the consequences are + error = (locval & MASK5) ^ FV; + error |= u64::from(is_not_structural_or_whitespace(*get!(loc, 5))); - // FIXME the original code looks like this: - // error = ((locval & mask5) ^ fv) as u32; - // but that fails on falsy as the u32 conversion - // will mask the error on the y so we re-write it - // it would be interesting what the consequences are - error = (locval & MASK5) ^ FV; - error |= u64::from(is_not_structural_or_whitespace(*get!(loc, 5))); - } error == 0 } -#[cfg_attr(not(feature = "no-inline"), inline(always))] -#[allow(clippy::cast_ptr_alignment, unused_unsafe)] +#[cfg_attr(not(feature = "no-inline"), inline)] pub fn is_valid_null_atom(loc: &[u8]) -> bool { + //let nv: u64 = *(b"null ".as_ptr() as *const u64); + // this is the same: + const NV: u64 = 0x00_00_00_00_6c_6c_75_6e; + const MASK4: u64 = 0x00_00_00_00_ff_ff_ff_ff; + debug_assert!(loc.len() >= 8, "loc too short for a u64 read"); // TODO is this expensive? let mut error: u64; - unsafe { - //let nv: u64 = *(b"null ".as_ptr() as *const u64); - // this is the same: - const NV: u64 = 0x00_00_00_00_6c_6c_75_6e; - const MASK4: u64 = 0x00_00_00_00_ff_ff_ff_ff; - let locval: u64 = loc.as_ptr().cast::().read_unaligned(); - error = (locval & MASK4) ^ NV; - error |= u64::from(is_not_structural_or_whitespace(*get!(loc, 4))); - } + let locval: u64 = unsafe { loc.as_ptr().cast::().read_unaligned() }; + + error = (locval & MASK4) ^ NV; + error |= u64::from(is_not_structural_or_whitespace(*get!(loc, 4))); + error == 0 } @@ -96,12 +91,7 @@ enum StackState { impl<'de> Deserializer<'de> { #[inline] - #[allow( - clippy::cognitive_complexity, - clippy::too_many_lines, - unused_unsafe, - clippy::uninit_vec - )] + #[allow(clippy::cognitive_complexity, clippy::too_many_lines, unused_unsafe)] pub(crate) fn build_tape( input: &'de mut [u8], input2: &[u8], diff --git a/src/stringparse.rs b/src/stringparse.rs index 8344d0f7..4fb5bf78 100644 --- a/src/stringparse.rs +++ b/src/stringparse.rs @@ -27,7 +27,7 @@ const LOW_SURROGATES: Range = 0xdc00..0xe000; /// handle a unicode codepoint /// write appropriate values into dest -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) fn handle_unicode_codepoint( src_ptr: &[u8], dst_ptr: &mut [u8], @@ -43,7 +43,7 @@ pub(crate) fn handle_unicode_codepoint( /// dest will advance a variable amount (return via pointer) /// return true if the unicode codepoint was valid /// We work in little-endian then swap at write time -#[cfg_attr(not(feature = "no-inline"), inline(always))] +#[cfg_attr(not(feature = "no-inline"), inline)] pub(crate) fn get_unicode_codepoint(mut src_ptr: &[u8]) -> Result<(u32, usize), ErrorType> { // hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the // conversion isn't valid; we defer the check for this to inside the diff --git a/src/tests/serde.rs b/src/tests/serde.rs index 132e13b7..3b77602e 100644 --- a/src/tests/serde.rs +++ b/src/tests/serde.rs @@ -922,7 +922,7 @@ proptest! { .. ProptestConfig::default() })] #[test] - #[allow(clippy::should_panic_without_expect)] + // #[allow(clippy::should_panic_without_expect)] #[should_panic] fn prop_junk(d in arb_junk()) { let mut d1 = d.clone(); @@ -947,7 +947,7 @@ proptest! { })] #[test] - #[allow(clippy::should_panic_without_expect)] + // #[allow(clippy::should_panic_without_expect)] #[should_panic] fn prop_string(d in "\\PC*") { let mut d1 = d.clone(); diff --git a/src/value.rs b/src/value.rs index 65b18f4d..6361944a 100644 --- a/src/value.rs +++ b/src/value.rs @@ -64,7 +64,6 @@ pub use self::owned::{ to_value as to_owned_value, to_value_with_buffers as to_owned_value_with_buffers, Value as OwnedValue, }; -use crate::safer_unchecked::GetSaferUnchecked; use crate::{Deserializer, Result}; use halfbrown::HashMap; use std::hash::Hash; @@ -123,7 +122,7 @@ where } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] pub fn parse(&mut self) -> Value { match unsafe { self.de.next_() } { Node::Static(s) => Value::from(s), @@ -133,23 +132,23 @@ where } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] - #[allow(clippy::uninit_vec)] + #[cfg_attr(not(feature = "no-inline"), inline)] fn parse_array(&mut self, len: usize) -> Value { // Rust doesn't optimize the normal loop away here // so we write our own avoiding the length // checks during push - let mut res = Vec::with_capacity(len); + let mut res: Vec = Vec::with_capacity(len); + let res_ptr = res.as_mut_ptr(); unsafe { - res.set_len(len); for i in 0..len { - std::ptr::write(res.get_kinda_unchecked_mut(i), self.parse()); + res_ptr.add(i).write(self.parse()); } + res.set_len(len); } Value::from(res) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn parse_map(&mut self, len: usize) -> Value { let mut res: HashMap = HashMap::with_capacity_and_hasher(len, ObjectHasher::default()); diff --git a/src/value/borrowed.rs b/src/value/borrowed.rs index 6874f0d4..5f4dc285 100644 --- a/src/value/borrowed.rs +++ b/src/value/borrowed.rs @@ -26,7 +26,6 @@ mod serialize; use super::ObjectHasher; use crate::cow::Cow; -use crate::safer_unchecked::GetSaferUnchecked; use crate::{prelude::*, Buffers}; use crate::{Deserializer, Node, Result, StaticNode}; use halfbrown::HashMap; @@ -411,7 +410,7 @@ impl<'de> BorrowDeserializer<'de> { Self(de) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] pub fn parse(&mut self) -> Value<'de> { match unsafe { self.0.next_() } { Node::Static(s) => Value::Static(s), @@ -421,23 +420,23 @@ impl<'de> BorrowDeserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] - #[allow(clippy::uninit_vec)] + #[cfg_attr(not(feature = "no-inline"), inline)] fn parse_array(&mut self, len: usize) -> Value<'de> { // Rust doesn't optimize the normal loop away here // so we write our own avoiding the length // checks during push - let mut res = Vec::with_capacity(len); + let mut res: Vec> = Vec::with_capacity(len); + let res_ptr = res.as_mut_ptr(); unsafe { - res.set_len(len); for i in 0..len { - std::ptr::write(res.get_kinda_unchecked_mut(i), self.parse()); + res_ptr.add(i).write(self.parse()); } + res.set_len(len); } Value::Array(res) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn parse_map(&mut self, len: usize) -> Value<'de> { let mut res = Object::with_capacity_and_hasher(len, ObjectHasher::default()); diff --git a/src/value/borrowed/serialize.rs b/src/value/borrowed/serialize.rs index 520d1687..09cacc38 100644 --- a/src/value/borrowed/serialize.rs +++ b/src/value/borrowed/serialize.rs @@ -53,7 +53,7 @@ impl<'value> Writable for Value<'value> { trait Generator: BaseGenerator { type T: Write; - #[inline(always)] + #[inline] fn write_object(&mut self, object: &Object) -> io::Result<()> { if object.is_empty() { self.write(b"{}") @@ -87,7 +87,7 @@ trait Generator: BaseGenerator { } } - #[inline(always)] + #[inline] fn write_json(&mut self, json: &Value) -> io::Result<()> { match *json { Value::Static(StaticNode::Null) => self.write(b"null"), @@ -138,7 +138,7 @@ trait Generator: BaseGenerator { trait FastGenerator: BaseGenerator { type T: Write; - #[inline(always)] + #[inline] fn write_object(&mut self, object: &Object) -> io::Result<()> { if object.is_empty() { self.write(b"{}") @@ -167,7 +167,7 @@ trait FastGenerator: BaseGenerator { } } - #[inline(always)] + #[inline] fn write_json(&mut self, json: &Value) -> io::Result<()> { match *json { Value::Static(StaticNode::Null) => self.write(b"null"), diff --git a/src/value/owned.rs b/src/value/owned.rs index 19f2cef9..cee78862 100644 --- a/src/value/owned.rs +++ b/src/value/owned.rs @@ -24,7 +24,6 @@ mod from; mod serialize; use super::ObjectHasher; -use crate::safer_unchecked::GetSaferUnchecked; use crate::{prelude::*, Buffers}; use crate::{Deserializer, Node, Result, StaticNode}; use halfbrown::HashMap; @@ -330,7 +329,7 @@ impl<'de> OwnedDeserializer<'de> { pub fn from_deserializer(de: Deserializer<'de>) -> Self { Self { de } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] pub fn parse(&mut self) -> Value { match unsafe { self.de.next_() } { Node::Static(s) => Value::Static(s), @@ -340,23 +339,23 @@ impl<'de> OwnedDeserializer<'de> { } } - #[cfg_attr(not(feature = "no-inline"), inline(always))] - #[allow(clippy::uninit_vec)] + #[cfg_attr(not(feature = "no-inline"), inline)] fn parse_array(&mut self, len: usize) -> Value { // Rust doesn't optimize the normal loop away here // so we write our own avoiding the length // checks during push - let mut res = Vec::with_capacity(len); + let mut res: Vec = Vec::with_capacity(len); + let res_ptr = res.as_mut_ptr(); unsafe { - res.set_len(len); for i in 0..len { - std::ptr::write(res.get_kinda_unchecked_mut(i), self.parse()); + res_ptr.add(i).write(self.parse()); } + res.set_len(len); } Value::Array(res) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] + #[cfg_attr(not(feature = "no-inline"), inline)] fn parse_map(&mut self, len: usize) -> Value { let mut res = Object::with_capacity_and_hasher(len, ObjectHasher::default()); diff --git a/src/value/owned/serialize.rs b/src/value/owned/serialize.rs index 33457a87..665aaff3 100644 --- a/src/value/owned/serialize.rs +++ b/src/value/owned/serialize.rs @@ -52,7 +52,7 @@ impl Writable for Value { trait Generator: BaseGenerator { type T: Write; - #[inline(always)] + #[inline] fn write_object(&mut self, object: &Object) -> io::Result<()> { if object.is_empty() { self.write(b"{}") @@ -86,7 +86,7 @@ trait Generator: BaseGenerator { } } - #[inline(always)] + #[inline] fn write_json(&mut self, json: &Value) -> io::Result<()> { match *json { Value::Static(StaticNode::Null) => self.write(b"null"), @@ -139,7 +139,7 @@ trait Generator: BaseGenerator { trait FastGenerator: BaseGenerator { type T: Write; - #[inline(always)] + #[inline] fn write_object(&mut self, object: &Object) -> io::Result<()> { if object.is_empty() { self.write(b"{}") @@ -168,7 +168,7 @@ trait FastGenerator: BaseGenerator { } } - #[inline(always)] + #[inline] fn write_json(&mut self, json: &Value) -> io::Result<()> { match *json { Value::Static(StaticNode::Null) => self.write(b"null"), diff --git a/tree b/tree deleted file mode 100644 index 9d3c4ef2..00000000 --- a/tree +++ /dev/null @@ -1,215 +0,0 @@ -simd-json v0.12.0 (/home/heinz/simd-json) -β”œβ”€β”€ ahash v0.8.3 -β”‚ β”œβ”€β”€ cfg-if v1.0.0 -β”‚ β”œβ”€β”€ getrandom v0.2.10 -β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 -β”‚ β”‚ └── libc v0.2.147 -β”‚ └── once_cell v1.18.0 -β”‚ [build-dependencies] -β”‚ └── version_check v0.9.4 -β”œβ”€β”€ alloc_counter v0.0.4 -β”‚ β”œβ”€β”€ alloc_counter_macro v0.0.2 (proc-macro) -β”‚ β”‚ β”œβ”€β”€ proc-macro2 v1.0.66 -β”‚ β”‚ β”‚ └── unicode-ident v1.0.11 -β”‚ β”‚ β”œβ”€β”€ quote v1.0.33 -β”‚ β”‚ β”‚ └── proc-macro2 v1.0.66 (*) -β”‚ β”‚ └── syn v1.0.109 -β”‚ β”‚ β”œβ”€β”€ proc-macro2 v1.0.66 (*) -β”‚ β”‚ β”œβ”€β”€ quote v1.0.33 (*) -β”‚ β”‚ └── unicode-ident v1.0.11 -β”‚ └── pin-utils v0.1.0 -β”œβ”€β”€ beef v0.5.2 -β”œβ”€β”€ colored v2.0.4 -β”‚ β”œβ”€β”€ is-terminal v0.4.9 -β”‚ β”‚ └── rustix v0.38.8 -β”‚ β”‚ β”œβ”€β”€ bitflags v2.4.0 -β”‚ β”‚ └── linux-raw-sys v0.4.5 -β”‚ └── lazy_static v1.4.0 -β”œβ”€β”€ getopts v0.2.21 -β”‚ └── unicode-width v0.1.10 -β”œβ”€β”€ halfbrown v0.2.4 -β”‚ β”œβ”€β”€ arrayvec v0.7.4 -β”‚ β”œβ”€β”€ hashbrown v0.13.2 -β”‚ β”‚ └── ahash v0.8.3 (*) -β”‚ └── serde v1.0.185 -β”‚ └── serde_derive v1.0.185 (proc-macro) -β”‚ β”œβ”€β”€ proc-macro2 v1.0.66 (*) -β”‚ β”œβ”€β”€ quote v1.0.33 (*) -β”‚ └── syn v2.0.29 -β”‚ β”œβ”€β”€ proc-macro2 v1.0.66 (*) -β”‚ β”œβ”€β”€ quote v1.0.33 (*) -β”‚ └── unicode-ident v1.0.11 -β”œβ”€β”€ jemallocator v0.5.4 -β”‚ β”œβ”€β”€ jemalloc-sys v0.5.4+5.3.0-patched -β”‚ β”‚ └── libc v0.2.147 -β”‚ β”‚ [build-dependencies] -β”‚ β”‚ └── cc v1.0.83 -β”‚ β”‚ └── libc v0.2.147 -β”‚ └── libc v0.2.147 -β”œβ”€β”€ lexical-core v0.8.5 -β”‚ β”œβ”€β”€ lexical-parse-float v0.8.5 -β”‚ β”‚ β”œβ”€β”€ lexical-parse-integer v0.8.6 -β”‚ β”‚ β”‚ β”œβ”€β”€ lexical-util v0.8.5 -β”‚ β”‚ β”‚ β”‚ └── static_assertions v1.1.0 -β”‚ β”‚ β”‚ └── static_assertions v1.1.0 -β”‚ β”‚ β”œβ”€β”€ lexical-util v0.8.5 (*) -β”‚ β”‚ └── static_assertions v1.1.0 -β”‚ β”œβ”€β”€ lexical-parse-integer v0.8.6 (*) -β”‚ β”œβ”€β”€ lexical-util v0.8.5 (*) -β”‚ β”œβ”€β”€ lexical-write-float v0.8.5 -β”‚ β”‚ β”œβ”€β”€ lexical-util v0.8.5 (*) -β”‚ β”‚ β”œβ”€β”€ lexical-write-integer v0.8.5 -β”‚ β”‚ β”‚ β”œβ”€β”€ lexical-util v0.8.5 (*) -β”‚ β”‚ β”‚ └── static_assertions v1.1.0 -β”‚ β”‚ └── static_assertions v1.1.0 -β”‚ └── lexical-write-integer v0.8.5 (*) -β”œβ”€β”€ once_cell v1.18.0 -β”œβ”€β”€ perfcnt v0.8.0 -β”‚ β”œβ”€β”€ bitflags v1.3.2 -β”‚ β”œβ”€β”€ libc v0.2.147 -β”‚ β”œβ”€β”€ mmap v0.1.1 -β”‚ β”‚ β”œβ”€β”€ libc v0.1.12 -β”‚ β”‚ └── tempdir v0.3.7 -β”‚ β”‚ β”œβ”€β”€ rand v0.4.6 -β”‚ β”‚ β”‚ └── libc v0.2.147 -β”‚ β”‚ └── remove_dir_all v0.5.3 -β”‚ β”œβ”€β”€ nom v4.2.3 -β”‚ β”‚ └── memchr v2.5.0 -β”‚ β”‚ [build-dependencies] -β”‚ β”‚ └── version_check v0.1.5 -β”‚ └── x86 v0.47.0 -β”‚ β”œβ”€β”€ bit_field v0.10.2 -β”‚ β”œβ”€β”€ bitflags v1.3.2 -β”‚ β”œβ”€β”€ phf v0.9.0 -β”‚ β”‚ └── phf_shared v0.9.0 -β”‚ β”‚ └── siphasher v0.3.10 -β”‚ └── raw-cpuid v10.7.0 -β”‚ └── bitflags v1.3.2 -β”‚ [build-dependencies] -β”‚ β”œβ”€β”€ csv v1.2.2 -β”‚ β”‚ β”œβ”€β”€ csv-core v0.1.10 -β”‚ β”‚ β”‚ └── memchr v2.5.0 -β”‚ β”‚ β”œβ”€β”€ itoa v1.0.9 -β”‚ β”‚ β”œβ”€β”€ ryu v1.0.15 -β”‚ β”‚ └── serde v1.0.185 -β”‚ β”œβ”€β”€ phf_codegen v0.9.0 -β”‚ β”‚ β”œβ”€β”€ phf_generator v0.9.1 -β”‚ β”‚ β”‚ β”œβ”€β”€ phf_shared v0.9.0 -β”‚ β”‚ β”‚ β”‚ └── siphasher v0.3.10 -β”‚ β”‚ β”‚ └── rand v0.8.5 -β”‚ β”‚ β”‚ β”œβ”€β”€ libc v0.2.147 -β”‚ β”‚ β”‚ β”œβ”€β”€ rand_chacha v0.3.1 -β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ ppv-lite86 v0.2.17 -β”‚ β”‚ β”‚ β”‚ └── rand_core v0.6.4 -β”‚ β”‚ β”‚ β”‚ └── getrandom v0.2.10 (*) -β”‚ β”‚ β”‚ └── rand_core v0.6.4 (*) -β”‚ β”‚ └── phf_shared v0.9.0 (*) -β”‚ └── serde_json v1.0.105 -β”‚ β”œβ”€β”€ itoa v1.0.9 -β”‚ β”œβ”€β”€ ryu v1.0.15 -β”‚ └── serde v1.0.185 (*) -β”œβ”€β”€ serde v1.0.185 (*) -β”œβ”€β”€ serde_json v1.0.105 (*) -β”œβ”€β”€ simdutf8 v0.1.4 (/home/heinz/simdutf8) -└── value-trait v0.6.1 - β”œβ”€β”€ float-cmp v0.9.0 - β”‚ └── num-traits v0.2.16 - β”‚ └── libm v0.2.7 - β”‚ [build-dependencies] - β”‚ └── autocfg v1.1.0 - β”œβ”€β”€ halfbrown v0.2.4 (*) - β”œβ”€β”€ itoa v1.0.9 - └── ryu v1.0.15 -[dev-dependencies] -β”œβ”€β”€ core_affinity v0.8.1 -β”‚ β”œβ”€β”€ libc v0.2.147 -β”‚ └── num_cpus v1.16.0 -β”‚ └── libc v0.2.147 -β”œβ”€β”€ criterion v0.5.1 -β”‚ β”œβ”€β”€ anes v0.1.6 -β”‚ β”œβ”€β”€ cast v0.3.0 -β”‚ β”œβ”€β”€ ciborium v0.2.1 -β”‚ β”‚ β”œβ”€β”€ ciborium-io v0.2.1 -β”‚ β”‚ β”œβ”€β”€ ciborium-ll v0.2.1 -β”‚ β”‚ β”‚ β”œβ”€β”€ ciborium-io v0.2.1 -β”‚ β”‚ β”‚ └── half v1.8.2 -β”‚ β”‚ └── serde v1.0.185 (*) -β”‚ β”œβ”€β”€ clap v4.3.23 -β”‚ β”‚ └── clap_builder v4.3.23 -β”‚ β”‚ β”œβ”€β”€ anstyle v1.0.1 -β”‚ β”‚ └── clap_lex v0.5.0 -β”‚ β”œβ”€β”€ criterion-plot v0.5.0 -β”‚ β”‚ β”œβ”€β”€ cast v0.3.0 -β”‚ β”‚ └── itertools v0.10.5 -β”‚ β”‚ └── either v1.9.0 -β”‚ β”œβ”€β”€ is-terminal v0.4.9 (*) -β”‚ β”œβ”€β”€ itertools v0.10.5 (*) -β”‚ β”œβ”€β”€ num-traits v0.2.16 (*) -β”‚ β”œβ”€β”€ once_cell v1.18.0 -β”‚ β”œβ”€β”€ oorandom v11.1.3 -β”‚ β”œβ”€β”€ plotters v0.3.5 -β”‚ β”‚ β”œβ”€β”€ num-traits v0.2.16 (*) -β”‚ β”‚ β”œβ”€β”€ plotters-backend v0.3.5 -β”‚ β”‚ └── plotters-svg v0.3.5 -β”‚ β”‚ └── plotters-backend v0.3.5 -β”‚ β”œβ”€β”€ rayon v1.7.0 -β”‚ β”‚ β”œβ”€β”€ either v1.9.0 -β”‚ β”‚ └── rayon-core v1.11.0 -β”‚ β”‚ β”œβ”€β”€ crossbeam-channel v0.5.8 -β”‚ β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 -β”‚ β”‚ β”‚ └── crossbeam-utils v0.8.16 -β”‚ β”‚ β”‚ └── cfg-if v1.0.0 -β”‚ β”‚ β”œβ”€β”€ crossbeam-deque v0.8.3 -β”‚ β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 -β”‚ β”‚ β”‚ β”œβ”€β”€ crossbeam-epoch v0.9.15 -β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 -β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ crossbeam-utils v0.8.16 (*) -β”‚ β”‚ β”‚ β”‚ β”œβ”€β”€ memoffset v0.9.0 -β”‚ β”‚ β”‚ β”‚ β”‚ [build-dependencies] -β”‚ β”‚ β”‚ β”‚ β”‚ └── autocfg v1.1.0 -β”‚ β”‚ β”‚ β”‚ └── scopeguard v1.2.0 -β”‚ β”‚ β”‚ β”‚ [build-dependencies] -β”‚ β”‚ β”‚ β”‚ └── autocfg v1.1.0 -β”‚ β”‚ β”‚ └── crossbeam-utils v0.8.16 (*) -β”‚ β”‚ β”œβ”€β”€ crossbeam-utils v0.8.16 (*) -β”‚ β”‚ └── num_cpus v1.16.0 (*) -β”‚ β”œβ”€β”€ regex v1.9.3 -β”‚ β”‚ β”œβ”€β”€ regex-automata v0.3.6 -β”‚ β”‚ β”‚ └── regex-syntax v0.7.4 -β”‚ β”‚ └── regex-syntax v0.7.4 -β”‚ β”œβ”€β”€ serde v1.0.185 (*) -β”‚ β”œβ”€β”€ serde_derive v1.0.185 (proc-macro) (*) -β”‚ β”œβ”€β”€ serde_json v1.0.105 (*) -β”‚ β”œβ”€β”€ tinytemplate v1.2.1 -β”‚ β”‚ β”œβ”€β”€ serde v1.0.185 (*) -β”‚ β”‚ └── serde_json v1.0.105 (*) -β”‚ └── walkdir v2.3.3 -β”‚ └── same-file v1.0.6 -β”œβ”€β”€ float-cmp v0.9.0 (*) -β”œβ”€β”€ getopts v0.2.21 (*) -└── proptest v1.2.0 - β”œβ”€β”€ bit-set v0.5.3 - β”‚ └── bit-vec v0.6.3 - β”œβ”€β”€ bitflags v1.3.2 - β”œβ”€β”€ byteorder v1.4.3 - β”œβ”€β”€ lazy_static v1.4.0 - β”œβ”€β”€ num-traits v0.2.16 (*) - β”œβ”€β”€ rand v0.8.5 - β”‚ β”œβ”€β”€ libc v0.2.147 - β”‚ β”œβ”€β”€ rand_chacha v0.3.1 (*) - β”‚ └── rand_core v0.6.4 (*) - β”œβ”€β”€ rand_chacha v0.3.1 (*) - β”œβ”€β”€ rand_xorshift v0.3.0 - β”‚ └── rand_core v0.6.4 (*) - β”œβ”€β”€ regex-syntax v0.6.29 - β”œβ”€β”€ rusty-fork v0.3.0 - β”‚ β”œβ”€β”€ fnv v1.0.7 - β”‚ β”œβ”€β”€ quick-error v1.2.3 - β”‚ β”œβ”€β”€ tempfile v3.8.0 - β”‚ β”‚ β”œβ”€β”€ cfg-if v1.0.0 - β”‚ β”‚ β”œβ”€β”€ fastrand v2.0.0 - β”‚ β”‚ └── rustix v0.38.8 (*) - β”‚ └── wait-timeout v0.2.0 - β”‚ └── libc v0.2.147 - β”œβ”€β”€ tempfile v3.8.0 (*) - └── unarray v0.1.4 From 52b94bd7fbccf3bc02277755fd408bf63af09543 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Fri, 20 Oct 2023 13:36:30 +0200 Subject: [PATCH 7/8] Add algorithm function Signed-off-by: Heinz N. Gies --- src/impls/simd128/stage1.rs | 2 +- src/lib.rs | 99 +++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/src/impls/simd128/stage1.rs b/src/impls/simd128/stage1.rs index 056fb28d..48af72c6 100644 --- a/src/impls/simd128/stage1.rs +++ b/src/impls/simd128/stage1.rs @@ -173,7 +173,7 @@ impl Stage1Parse for SimdInput { // We later indiscriminatory writre over the len we set but that's OK // since we ensure we reserve the needed space base.reserve(64); - let fial_len = l + cnt; + let final_len = l + cnt; while bits != 0 { let v0 = bits.trailing_zeros(); diff --git a/src/lib.rs b/src/lib.rs index 1d7c3bad..670abb52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -405,6 +405,105 @@ type FindStructuralBitsFn = unsafe fn( structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType>; +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +/// Supported implementations +pub enum Implementation { + /// Rust native implementation + Native, + /// Rust native implementation with using std::simd + StdSimd, + /// SSE4.2 implementation + SSE42, + /// AVX2 implementation + AVX2, + /// ARM NEON implementation + NEON, + /// WEBASM SIMD128 implementation + SIMD128, +} + +impl std::fmt::Display for Implementation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Implementation::Native => write!(f, "Rust Native"), + Implementation::StdSimd => write!(f, "std::simd"), + Implementation::SSE42 => write!(f, "SSE42"), + Implementation::AVX2 => write!(f, "AVX2"), + Implementation::NEON => write!(f, "NEON"), + Implementation::SIMD128 => write!(f, "SIMD128"), + } + } +} + +impl<'de> Deserializer<'de> { + /// returns the algorithm / architecture used by the deserializer + #[cfg(all( + feature = "runtime-detection", + any(target_arch = "x86_64", target_arch = "x86"), + ))] + #[must_use] + pub fn algorithm() -> Implementation { + if std::is_x86_feature_detected!("avx2") { + Implementation::AVX2 + } else if std::is_x86_feature_detected!("sse4.2") { + Implementation::SSE42 + } else { + #[cfg(feature = "portable")] + let r = Implementation::StdSimd; + #[cfg(not(feature = "portable"))] + let r = Implementation::Native; + r + } + } + #[cfg(not(any( + feature = "runtime-detection", + target_feature = "avx2", + target_feature = "sse4.2", + target_feature = "simd128", + target_arch = "aarch64", + )))] + /// returns the algorithm / architecture used by the deserializer + #[must_use] + pub fn algorithm() -> Implementation { + #[cfg(feature = "portable")] + let r = Implementation::StdSimd; + #[cfg(not(feature = "portable"))] + let r = Implementation::Native; + r + } + + #[cfg(all(target_feature = "avx2", not(feature = "runtime-detection")))] + /// returns the algorithm / architecture used by the deserializer + #[must_use] + pub fn algorithm() -> Implementation { + Implementation::AVX2 + } + + #[cfg(all( + target_feature = "sse4.2", + not(feature = "runtime-detection"), + not(target_feature = "avx2") + ))] + /// returns the algorithm / architecture used by the deserializer + #[must_use] + pub fn algorithm() -> Implementation { + Implementation::SSE42 + } + + #[cfg(target_arch = "aarch64")] + /// returns the algorithm / architecture used by the deserializer + #[must_use] + pub fn algorithm() -> Implementation { + Implementation::NEON + } + #[cfg(target_feature = "simd128")] + /// returns the algorithm / architecture used by the deserializer + #[must_use] + pub fn algorithm() -> Implementation { + Implementation::SIMD128 + } +} + impl<'de> Deserializer<'de> { #[inline] #[cfg(all( From 008db067f12ff10f747f1e99f176b827ce09ce52 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Fri, 20 Oct 2023 14:32:22 +0200 Subject: [PATCH 8/8] feature flag improvements Signed-off-by: Heinz N. Gies --- README.md | 8 ++++ examples/perf.rs | 24 ++++++++--- src/lib.rs | 110 ++++++++++++++++++++++++++++++----------------- 3 files changed, 97 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 65fcf7fb..121a3eec 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,12 @@ To be able to take advantage of `simd-json` your system needs to be SIMD capable `simd-json` supports AVX2, SSE4.2 and NEON and simd128 (wasm) natively, it also includes a unoptimized fallback implementation using native rust for other platforms, however this is a last resport measure and nothing we'd recommend relying on. +### Performance characteristics + +- CPU native cpu compilation results in the best performance. +- CPU detection for AVX and SSE4.2 is the second fastes (on x86_* only). +- portable std::simd is the next fasted implementaiton when compiled with a native cpu target. +- std::simd or the rust native implementation is the least performant. ### allocator @@ -36,6 +42,8 @@ For best performance we highly suggest using [mimalloc](https://crates.io/crates This feature allowa selecting the optimal algorithn based on availalbe features during runeimte, it has no effect on non x86 or x86_64 platforms. When neither `AVX2` nor `SSE4.2` is spported it will fallback to a native rust implementaiton. +note that a application compiled with `runtime-detection` will not run as fast as an applicaiton compiled for a specific CPU, the reason being is that rust can't optimize as far to the instruction set when it uses the generic instruction set, also non simd parts of the code won't be optimized for the given instruction set either. + ### `portable` **Currently disabled** diff --git a/examples/perf.rs b/examples/perf.rs index 2aae1158..b5786c6a 100644 --- a/examples/perf.rs +++ b/examples/perf.rs @@ -8,14 +8,24 @@ mod int { use perfcnt::linux::{HardwareEventType, PerfCounterBuilderLinux}; use perfcnt::{AbstractPerfCounter, PerfCounter}; use serde::{Deserialize, Serialize}; + use simd_json::{Deserializer, Implementation}; use std::io::BufReader; #[derive(Default, Serialize, Deserialize)] struct Stats { + algo: String, best: Stat, total: Stat, iters: u64, } + impl Stats { + fn new(algo: Implementation) -> Self { + Stats { + algo: algo.to_string(), + ..Default::default() + } + } + } #[derive(Default, Serialize, Deserialize)] struct Stat { @@ -96,7 +106,7 @@ mod int { let branch_instructions = self.total.branch_instructions / self.iters; println!( - "{:20} {:10} {:10} {:10} {:10} {:10} {:10.3} {:10.3}", + "{:20} {:10} {:10} {:10} {:10} {:10} {:10.3} {:10.3} {:21}", name, cycles, instructions, @@ -104,7 +114,8 @@ mod int { cache_misses, cache_references, ((self.best.cycles as f64) / bytes as f64), - ((cycles as f64) / bytes as f64) + ((cycles as f64) / bytes as f64), + self.algo ); } pub fn print_diff(&self, baseline: &Stats, name: &str, bytes: usize) { @@ -135,7 +146,7 @@ mod int { } println!( - "{:20} {:>10} {:>10} {:>10} {:>10} {:>10} {:10} {:10}", + "{:20} {:>10} {:>10} {:>10} {:>10} {:>10} {:10} {:10} {:21}", format!("{}(+/-)", name), d((1.0 - cycles_b as f64 / cycles as f64) * 100.0), d((1.0 - instructions_b as f64 / instructions as f64) * 100.0), @@ -144,6 +155,7 @@ mod int { d((1.0 - cache_references_b as f64 / cache_references as f64) * 100.0), d((1.0 - best_cycles_per_byte_b as f64 / best_cycles_per_byte as f64) * 100.0), d((1.0 - cycles_per_byte_b as f64 / cycles_per_byte as f64) * 100.0), + baseline.algo ); } } @@ -166,7 +178,7 @@ mod int { for mut bytes in &mut data_entries[..WARMUP as usize] { simd_json::to_borrowed_value(&mut bytes).unwrap(); } - let mut stats = Stats::default(); + let mut stats = Stats::new(Deserializer::algorithm()); for mut bytes in &mut data_entries[WARMUP as usize..] { // Set up counters let pc = stats.start(); @@ -219,8 +231,8 @@ fn main() { let matches = opts.parse(&args[1..]).unwrap(); println!( - "{:^20} {:^10} {:^21} {:^21} {:^21}", - " ", "", "Instructions", "Cache.", "Cycle/byte" + "{:^20} {:^10} {:^21} {:^21} {:^21} {:21}", + " ", "", "Instructions", "Cache.", "Cycle/byte", "Algorithm" ); println!( "{:^20} {:^10} {:^10} {:^10} {:^10} {:^10} {:^10} {:^10}", diff --git a/src/lib.rs b/src/lib.rs index 670abb52..0ae519fe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -457,6 +457,7 @@ impl<'de> Deserializer<'de> { } #[cfg(not(any( feature = "runtime-detection", + feature = "portable", target_feature = "avx2", target_feature = "sse4.2", target_feature = "simd128", @@ -465,14 +466,20 @@ impl<'de> Deserializer<'de> { /// returns the algorithm / architecture used by the deserializer #[must_use] pub fn algorithm() -> Implementation { - #[cfg(feature = "portable")] - let r = Implementation::StdSimd; - #[cfg(not(feature = "portable"))] - let r = Implementation::Native; - r + Implementation::Native + } + #[cfg(all(feature = "portable", not(feature = "runtime-detection")))] + /// returns the algorithm / architecture used by the deserializer + #[must_use] + pub fn algorithm() -> Implementation { + Implementation::StdSimd } - #[cfg(all(target_feature = "avx2", not(feature = "runtime-detection")))] + #[cfg(all( + target_feature = "avx2", + not(feature = "portable"), + not(feature = "runtime-detection"), + ))] /// returns the algorithm / architecture used by the deserializer #[must_use] pub fn algorithm() -> Implementation { @@ -481,8 +488,9 @@ impl<'de> Deserializer<'de> { #[cfg(all( target_feature = "sse4.2", + not(target_feature = "avx2"), not(feature = "runtime-detection"), - not(target_feature = "avx2") + not(feature = "portable"), ))] /// returns the algorithm / architecture used by the deserializer #[must_use] @@ -490,13 +498,14 @@ impl<'de> Deserializer<'de> { Implementation::SSE42 } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", not(feature = "portable")))] /// returns the algorithm / architecture used by the deserializer #[must_use] pub fn algorithm() -> Implementation { Implementation::NEON } - #[cfg(target_feature = "simd128")] + + #[cfg(all(target_feature = "simd128", not(feature = "portable")))] /// returns the algorithm / architecture used by the deserializer #[must_use] pub fn algorithm() -> Implementation { @@ -560,6 +569,7 @@ impl<'de> Deserializer<'de> { #[inline] #[cfg(not(any( feature = "runtime-detection", + feature = "portable", target_feature = "avx2", target_feature = "sse4.2", target_feature = "simd128", @@ -575,16 +585,29 @@ impl<'de> Deserializer<'de> { 'de: 'invoke, { let input: SillyWrapper<'de> = SillyWrapper::from(input); - - #[cfg(feature = "portable")] - let r = impls::portable::parse_str(input, data, buffer, idx); - #[cfg(not(feature = "portable"))] - let r = impls::native::parse_str(input, data, buffer, idx); - r + impls::native::parse_str(input, data, buffer, idx) + } + #[inline] + #[cfg(all(feature = "portable", not(feature = "runtime-detection")))] + pub(crate) unsafe fn parse_str_<'invoke>( + input: *mut u8, + data: &'invoke [u8], + buffer: &'invoke mut [u8], + idx: usize, + ) -> Result<&'de str> + where + 'de: 'invoke, + { + let input: SillyWrapper<'de> = SillyWrapper::from(input); + impls::portable::parse_str(input, data, buffer, idx) } #[inline] - #[cfg(all(target_feature = "avx2", not(feature = "runtime-detection")))] + #[cfg(all( + target_feature = "avx2", + not(feature = "portable"), + not(feature = "runtime-detection"), + ))] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], @@ -598,8 +621,9 @@ impl<'de> Deserializer<'de> { #[inline] #[cfg(all( target_feature = "sse4.2", + not(target_feature = "avx2"), not(feature = "runtime-detection"), - not(target_feature = "avx2") + not(feature = "portable"), ))] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, @@ -612,7 +636,7 @@ impl<'de> Deserializer<'de> { } #[inline] - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", not(feature = "portable")))] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], @@ -622,7 +646,7 @@ impl<'de> Deserializer<'de> { impls::neon::parse_str(input, data, buffer, idx) } #[inline] - #[cfg(target_feature = "simd128")] + #[cfg(all(target_feature = "simd128", not(feature = "portable")))] pub(crate) unsafe fn parse_str_<'invoke>( input: *mut u8, data: &'invoke [u8], @@ -678,50 +702,58 @@ impl<'de> Deserializer<'de> { mem::transmute::(fun)(input, structural_indexes) } - #[inline] #[cfg(not(any( feature = "runtime-detection", + feature = "portable", target_feature = "avx2", target_feature = "sse4.2", target_feature = "simd128", target_arch = "aarch64", )))] + #[inline] pub(crate) unsafe fn find_structural_bits( input: &[u8], structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - #[cfg(not(feature = "portable"))] - let r = { - // This is a nasty hack, we don't have a chunked implementation for native rust - // so we validate UTF8 ahead of time - match core::str::from_utf8(input) { - Ok(_) => (), - Err(_) => return Err(ErrorType::InvalidUtf8), - }; - #[cfg(not(feature = "portable"))] - Self::_find_structural_bits::(input, structural_indexes) + // This is a nasty hack, we don't have a chunked implementation for native rust + // so we validate UTF8 ahead of time + match core::str::from_utf8(input) { + Ok(_) => (), + Err(_) => return Err(ErrorType::InvalidUtf8), }; - #[cfg(feature = "portable")] - let r = - Self::_find_structural_bits::(input, structural_indexes); - r + #[cfg(not(feature = "portable"))] + Self::_find_structural_bits::(input, structural_indexes) } + #[cfg(all(feature = "portable", not(feature = "runtime-detection")))] #[inline] - #[cfg(all(target_feature = "avx2", not(feature = "runtime-detection")))] pub(crate) unsafe fn find_structural_bits( input: &[u8], structural_indexes: &mut Vec, ) -> std::result::Result<(), ErrorType> { - Self::_find_structural_bits::(input, structural_indexes) + Self::_find_structural_bits::(input, structural_indexes) } + #[cfg(all( + target_feature = "avx2", + not(feature = "portable"), + not(feature = "runtime-detection"), + ))] #[inline] + pub(crate) unsafe fn find_structural_bits( + input: &[u8], + structural_indexes: &mut Vec, + ) -> std::result::Result<(), ErrorType> { + Self::_find_structural_bits::(input, structural_indexes) + } + #[cfg(all( target_feature = "sse4.2", + not(target_feature = "avx2"), not(feature = "runtime-detection"), - not(target_feature = "avx2") + not(feature = "portable"), ))] + #[inline] pub(crate) unsafe fn find_structural_bits( input: &[u8], structural_indexes: &mut Vec, @@ -729,8 +761,8 @@ impl<'de> Deserializer<'de> { Self::_find_structural_bits::(input, structural_indexes) } + #[cfg(all(target_arch = "aarch64", not(feature = "portable")))] #[inline] - #[cfg(target_arch = "aarch64")] pub(crate) unsafe fn find_structural_bits( input: &[u8], structural_indexes: &mut Vec, @@ -738,8 +770,8 @@ impl<'de> Deserializer<'de> { Self::_find_structural_bits::(input, structural_indexes) } + #[cfg(all(target_feature = "simd128", not(feature = "portable")))] #[inline] - #[cfg(target_feature = "simd128")] pub(crate) unsafe fn find_structural_bits( input: &[u8], structural_indexes: &mut Vec,