Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize find_first_of for 4 and 8 byte elements #4587

Merged
merged 23 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
28fbeee
Vectorize `find_first_of` for 4 and 8 byte elements
AlexGuteniev Apr 13, 2024
721bfed
Format
AlexGuteniev Apr 13, 2024
12e08b4
fix x86 build
AlexGuteniev Apr 13, 2024
7edceb7
We don't actually need dependent `false`
AlexGuteniev Apr 14, 2024
3963bea
Namespace and some renames avoid wrapping
AlexGuteniev Apr 14, 2024
810c695
Swap _Tmp1 and _Tmp2
AlexGuteniev Apr 14, 2024
df433ba
Format
AlexGuteniev Apr 14, 2024
c5e6c9e
Swap other _Tmp1 and _Tmp2 too
AlexGuteniev Apr 14, 2024
6781576
-newline
AlexGuteniev Apr 14, 2024
4240874
Don't have _Needle_length_el == 1 code path
AlexGuteniev Apr 14, 2024
5122201
spelling
AlexGuteniev Apr 14, 2024
0028fa3
missing include
AlexGuteniev Apr 14, 2024
710a7b4
unreachable
AlexGuteniev Apr 14, 2024
09a2973
Drop unnecessary `typename` when `using`.
StephanTLavavej Apr 14, 2024
39bcff8
Add `noexcept`.
StephanTLavavej Apr 14, 2024
42a5675
`__48_impl` => `__4_8_impl`
StephanTLavavej Apr 14, 2024
a528021
more ARM64EC guards
AlexGuteniev Apr 15, 2024
1d71a47
Use uppercase `_Ugly` names.
StephanTLavavej Apr 15, 2024
cbd0d6e
After checking `_Amount == 8`, directly say `8`.
StephanTLavavej Apr 15, 2024
6cb45cb
Mark `_Val` as `const`.
StephanTLavavej Apr 15, 2024
b447a9b
Remove `const` from `__m256i` return type.
StephanTLavavej Apr 15, 2024
1cae60f
`!_mm256_testz_si256(ARGS)` => `_mm256_testz_si256(ARGS) == 0`
StephanTLavavej Apr 15, 2024
74990a6
Revert "`!_mm256_testz_si256(ARGS)` => `_mm256_testz_si256(ARGS) == 0`"
StephanTLavavej Apr 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 9 additions & 13 deletions benchmarks/src/find_first_of.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,14 @@ void bm(benchmark::State& state) {
}
}

#define ARGS \
Args({2, 3}) \
->Args({7, 4}) \
->Args({9, 3}) \
->Args({22, 5}) \
->Args({58, 2}) \
->Args({102, 4}) \
->Args({325, 1}) \
->Args({1011, 11}) \
->Args({3056, 7});

BENCHMARK(bm<uint8_t>)->ARGS;
BENCHMARK(bm<uint16_t>)->ARGS;
void common_args(auto bm) {
bm->Args({2, 3})->Args({7, 4})->Args({9, 3})->Args({22, 5})->Args({58, 2});
bm->Args({102, 4})->Args({325, 1})->Args({1011, 11})->Args({3056, 7});
}

BENCHMARK(bm<uint8_t>)->Apply(common_args);
BENCHMARK(bm<uint16_t>)->Apply(common_args);
BENCHMARK(bm<uint32_t>)->Apply(common_args);
BENCHMARK(bm<uint64_t>)->Apply(common_args);

BENCHMARK_MAIN();
14 changes: 11 additions & 3 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ const void* __stdcall __std_find_first_of_trivial_1(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_find_first_of_trivial_2(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_find_first_of_trivial_4(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_find_first_of_trivial_8(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;

__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept;
Expand Down Expand Up @@ -202,6 +206,12 @@ _Ty1* _Find_first_of_vectorized(
} else if constexpr (sizeof(_Ty1) == 2) {
return const_cast<_Ty1*>(
static_cast<const _Ty1*>(::__std_find_first_of_trivial_2(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 4) {
return const_cast<_Ty1*>(
static_cast<const _Ty1*>(::__std_find_first_of_trivial_4(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 8) {
return const_cast<_Ty1*>(
static_cast<const _Ty1*>(::__std_find_first_of_trivial_8(_First1, _Last1, _First2, _Last2)));
} else {
static_assert(_Always_false<_Ty1>, "Unexpected size");
}
Expand Down Expand Up @@ -230,9 +240,7 @@ _INLINE_VAR constexpr ptrdiff_t _Threshold_find_first_of = 16;

// Can we activate the vector algorithms for find_first_of?
template <class _It1, class _It2, class _Pr>
constexpr bool _Vector_alg_in_find_first_of_is_safe =
_Equal_memcmp_is_safe<_It1, _It2, _Pr> // can replace value comparison with bitwise comparison
&& sizeof(_Iter_value_t<_It1>) <= 2; // pcmpestri compatible size
constexpr bool _Vector_alg_in_find_first_of_is_safe = _Equal_memcmp_is_safe<_It1, _It2, _Pr>;

// Can we activate the vector algorithms for replace?
template <class _Iter, class _Ty1>
Expand Down
Loading
Loading