From 538c91a9135ad487e947d1d229b8ce3e3e6855eb Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Wed, 14 Sep 2022 10:17:34 -0500 Subject: [PATCH 1/5] x86 arch properly feature flagged --- src/numberparse.rs | 16 ++++++++++------ src/sse42/deser.rs | 15 ++++++++------- src/sse42/stage1.rs | 26 +++++++++++++++----------- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/numberparse.rs b/src/numberparse.rs index 272eb91a..097a1195 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -5,16 +5,20 @@ use crate::StaticNode; use crate::{mem, static_cast_i64, Deserializer, ErrorType, Result}; #[cfg(all(target_arch = "x86", feature = "swar-number-parsing"))] -use std::arch::x86::{ - __m128i, _mm_cvtsi128_si32, _mm_loadu_si128, _mm_madd_epi16, _mm_maddubs_epi16, - _mm_packus_epi32, _mm_set1_epi8, _mm_setr_epi16, _mm_setr_epi8, _mm_sub_epi8, -}; +use std::arch::x86 as arch; + + + #[cfg(all(target_arch = "x86_64", feature = "swar-number-parsing"))] -use std::arch::x86_64::{ +use std::arch::x86_64 as arch; + +#[cfg(feature = "swar-number-parsing")] +use arch::{ __m128i, _mm_cvtsi128_si32, _mm_loadu_si128, _mm_madd_epi16, _mm_maddubs_epi16, _mm_packus_epi32, _mm_set1_epi8, _mm_setr_epi16, _mm_setr_epi8, _mm_sub_epi8, }; + const POWER_OF_TEN: [f64; 632] = [ 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316, 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, @@ -150,7 +154,7 @@ fn parse_eight_digits_unrolled(chars: &[u8]) -> u32 { chars .get_kinda_unchecked(0..16) .as_ptr() - .cast::(), + .cast::(), ), ascii0, ); diff --git a/src/sse42/deser.rs b/src/sse42/deser.rs index 7399ac97..3054d200 100644 --- a/src/sse42/deser.rs +++ b/src/sse42/deser.rs @@ -1,9 +1,10 @@ #[cfg(target_arch = "x86")] -use std::arch::x86::{ - __m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8, _mm_storeu_si128, -}; +use std::arch::x86 as arch; + #[cfg(target_arch = "x86_64")] -use std::arch::x86_64::{ +use std::arch::x86_64 as arch; + +use arch::{ __m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_set1_epi8, _mm_storeu_si128, }; @@ -45,7 +46,7 @@ impl<'de> Deserializer<'de> { let mut len = src_i; loop { let v: __m128i = unsafe { - _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) + _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) }; // store to dest unconditionally - we can overwrite the bits we don't like @@ -99,7 +100,7 @@ impl<'de> Deserializer<'de> { // To be more conform with upstream loop { let v: __m128i = unsafe { - _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) + _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) }; unsafe { @@ -107,7 +108,7 @@ impl<'de> Deserializer<'de> { buffer .as_mut_ptr() .add(dst_i) - .cast::(), + .cast::(), v, ); }; diff --git a/src/sse42/stage1.rs b/src/sse42/stage1.rs index 09fc78eb..94fb514e 100644 --- a/src/sse42/stage1.rs +++ b/src/sse42/stage1.rs @@ -1,12 +1,19 @@ use crate::{static_cast_i32, static_cast_u32, Stage1Parse}; #[cfg(target_arch = "x86")] -use std::arch::x86::{ +use std::arch::x86 as arch; + +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64 as arch; + +#[cfg(target_arch = "x86")] +use arch::{ __m128i, _mm_add_epi32, _mm_and_si128, _mm_cmpeq_epi8, _mm_cmpgt_epi8, _mm_loadu_si128, _mm_max_epu8, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8, _mm_set_epi32, _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128, _mm_testz_si128, }; + #[cfg(target_arch = "x86_64")] -use std::arch::x86_64::{ +use arch::{ __m128i, _mm_add_epi32, _mm_and_si128, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_max_epu8, _mm_movemask_epi8, _mm_set1_epi8, _mm_set_epi32, _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8, _mm_srli_epi32, _mm_storeu_si128, @@ -43,10 +50,10 @@ impl SimdInput { pub(crate) fn new(ptr: &[u8]) -> Self { unsafe { Self { - v0: _mm_loadu_si128(ptr.as_ptr().cast::()), - v1: _mm_loadu_si128(ptr.as_ptr().add(16).cast::()), - v2: _mm_loadu_si128(ptr.as_ptr().add(32).cast::()), - v3: _mm_loadu_si128(ptr.as_ptr().add(48).cast::()), + v0: _mm_loadu_si128(ptr.as_ptr().cast::()), + v1: _mm_loadu_si128(ptr.as_ptr().add(16).cast::()), + v2: _mm_loadu_si128(ptr.as_ptr().add(32).cast::()), + v3: _mm_loadu_si128(ptr.as_ptr().add(48).cast::()), } } } @@ -57,10 +64,7 @@ impl Stage1Parse<__m128i> for SimdInput { #[cfg(target_feature = "pclmulqdq")] #[allow(clippy::cast_sign_loss)] fn compute_quote_mask(quote_bits: u64) -> u64 { - #[cfg(target_arch = "x86")] - use std::arch::x86::{_mm_clmulepi64_si128, _mm_cvtsi128_si64, _mm_set_epi64x}; - #[cfg(target_arch = "x86_64")] - use std::arch::x86_64::{_mm_clmulepi64_si128, _mm_cvtsi128_si64, _mm_set_epi64x}; + use arch::{_mm_clmulepi64_si128, _mm_cvtsi128_si64, _mm_set_epi64x}; unsafe { _mm_cvtsi128_si64(_mm_clmulepi64_si128( @@ -278,7 +282,7 @@ impl Stage1Parse<__m128i> for SimdInput { _mm_storeu_si128( base.as_mut_ptr() .add(l) - .cast::(), + .cast::(), v, ); } From ab771a37265dd94fa2e63bb1f101e36abeb61082 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Wed, 14 Sep 2022 10:21:38 -0500 Subject: [PATCH 2/5] x86 arch properly feature flagged --- src/numberparse.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/numberparse.rs b/src/numberparse.rs index 097a1195..34df3f41 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -12,7 +12,10 @@ use std::arch::x86 as arch; #[cfg(all(target_arch = "x86_64", feature = "swar-number-parsing"))] use std::arch::x86_64 as arch; -#[cfg(feature = "swar-number-parsing")] +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + feature = "swar-number-parsing" +))] use arch::{ __m128i, _mm_cvtsi128_si32, _mm_loadu_si128, _mm_madd_epi16, _mm_maddubs_epi16, _mm_packus_epi32, _mm_set1_epi8, _mm_setr_epi16, _mm_setr_epi8, _mm_sub_epi8, From 1e1f3376aaf10478dfa93e9c4c0ca13906add727 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Thu, 6 Oct 2022 11:15:57 -0500 Subject: [PATCH 3/5] chore: run "cargo fmt" --- src/numberparse.rs | 3 --- src/sse42/deser.rs | 18 +++++------------- src/sse42/stage1.rs | 7 +------ 3 files changed, 6 insertions(+), 22 deletions(-) diff --git a/src/numberparse.rs b/src/numberparse.rs index 34df3f41..a8a6c940 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -7,8 +7,6 @@ use crate::{mem, static_cast_i64, Deserializer, ErrorType, Result}; #[cfg(all(target_arch = "x86", feature = "swar-number-parsing"))] use std::arch::x86 as arch; - - #[cfg(all(target_arch = "x86_64", feature = "swar-number-parsing"))] use std::arch::x86_64 as arch; @@ -21,7 +19,6 @@ use arch::{ _mm_packus_epi32, _mm_set1_epi8, _mm_setr_epi16, _mm_setr_epi8, _mm_sub_epi8, }; - const POWER_OF_TEN: [f64; 632] = [ 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316, 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, diff --git a/src/sse42/deser.rs b/src/sse42/deser.rs index 3054d200..4fa7034d 100644 --- a/src/sse42/deser.rs +++ b/src/sse42/deser.rs @@ -45,9 +45,8 @@ impl<'de> Deserializer<'de> { let mut src_i: usize = 0; let mut len = src_i; loop { - let v: __m128i = unsafe { - _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) - }; + let v: __m128i = + unsafe { _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) }; // store to dest unconditionally - we can overwrite the bits we don't like // later @@ -99,18 +98,11 @@ impl<'de> Deserializer<'de> { // To be more conform with upstream loop { - let v: __m128i = unsafe { - _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) - }; + let v: __m128i = + unsafe { _mm_loadu_si128(src.as_ptr().add(src_i).cast::()) }; unsafe { - _mm_storeu_si128( - buffer - .as_mut_ptr() - .add(dst_i) - .cast::(), - v, - ); + _mm_storeu_si128(buffer.as_mut_ptr().add(dst_i).cast::(), v); }; // store to dest unconditionally - we can overwrite the bits we don't like diff --git a/src/sse42/stage1.rs b/src/sse42/stage1.rs index 94fb514e..c939dba8 100644 --- a/src/sse42/stage1.rs +++ b/src/sse42/stage1.rs @@ -279,12 +279,7 @@ impl Stage1Parse<__m128i> for SimdInput { let v: __m128i = _mm_set_epi32(v3, v2, v1, v0); let v: __m128i = _mm_add_epi32(idx_64_v, v); - _mm_storeu_si128( - base.as_mut_ptr() - .add(l) - .cast::(), - v, - ); + _mm_storeu_si128(base.as_mut_ptr().add(l).cast::(), v); } l += 4; } From baaa70db5f9e8287aca29da32214deb27bece6c7 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Wed, 14 Sep 2022 10:17:34 -0500 Subject: [PATCH 4/5] x86 arch properly feature flagged --- src/numberparse.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/numberparse.rs b/src/numberparse.rs index a8a6c940..e7d75954 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -19,6 +19,7 @@ use arch::{ _mm_packus_epi32, _mm_set1_epi8, _mm_setr_epi16, _mm_setr_epi8, _mm_sub_epi8, }; + const POWER_OF_TEN: [f64; 632] = [ 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316, 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, From c36a4b0bbe0e5bc77ba147686ccd14c638a8dbdf Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Thu, 6 Oct 2022 11:20:18 -0500 Subject: [PATCH 5/5] cargo fmt --- src/numberparse.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/numberparse.rs b/src/numberparse.rs index e7d75954..a8a6c940 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -19,7 +19,6 @@ use arch::{ _mm_packus_epi32, _mm_set1_epi8, _mm_setr_epi16, _mm_setr_epi8, _mm_sub_epi8, }; - const POWER_OF_TEN: [f64; 632] = [ 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316, 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,