Skip to content

Commit

Permalink
Vectorize basic_string::find_last_of (#4934)
Browse files Browse the repository at this point in the history
Co-authored-by: Stephan T. Lavavej <[email protected]>
Co-authored-by: Casey Carter <[email protected]>
  • Loading branch information
3 people authored Oct 12, 2024
1 parent caba83c commit 23dc7e3
Show file tree
Hide file tree
Showing 4 changed files with 310 additions and 89 deletions.
41 changes: 30 additions & 11 deletions benchmarks/src/find_first_of.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,38 @@
#include <benchmark/benchmark.h>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <limits>
#include <numeric>
#include <string>
#include <type_traits>
#include <vector>

using namespace std;

enum class AlgType : bool { std_func, str_member };
enum class AlgType { std_func, str_member_first, str_member_last };

template <AlgType Alg, class T, T Start = T{'a'}>
template <AlgType Alg, class T, T Start = T{'!'}>
void bm(benchmark::State& state) {
const size_t Pos = static_cast<size_t>(state.range(0));
const size_t NSize = static_cast<size_t>(state.range(1));
const size_t HSize = Pos * 2;
const size_t Which = 0;

using container = conditional_t<Alg == AlgType::str_member, basic_string<T>, vector<T>>;
using container = conditional_t<Alg == AlgType::std_func, vector<T>, basic_string<T>>;

container h(HSize, T{'.'});
constexpr T HaystackFiller{' '};
static_assert(HaystackFiller < Start, "The following iota() should not produce the haystack filler.");

container h(HSize, HaystackFiller);
container n(NSize, T{0});

if (NSize - 1 > static_cast<size_t>(numeric_limits<T>::max()) - static_cast<size_t>(Start)) {
puts("ERROR: The following iota() would overflow.");
abort();
}

iota(n.begin(), n.end(), Start);

if (Pos >= HSize || Which >= NSize) {
Expand All @@ -37,26 +48,34 @@ void bm(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(h);
benchmark::DoNotOptimize(n);
if constexpr (Alg == AlgType::str_member) {
benchmark::DoNotOptimize(h.find_first_of(n.data(), 0, n.size()));
if constexpr (Alg == AlgType::str_member_first) {
benchmark::DoNotOptimize(h.find_first_of(n));
} else if constexpr (Alg == AlgType::str_member_last) {
benchmark::DoNotOptimize(h.find_last_of(n));
} else {
benchmark::DoNotOptimize(find_first_of(h.begin(), h.end(), n.begin(), n.end()));
}
}
}

void common_args(auto bm) {
bm->Args({2, 3})->Args({7, 4})->Args({9, 3})->Args({22, 5})->Args({58, 2});
bm->Args({102, 4})->Args({325, 1})->Args({1011, 11})->Args({1502, 23})->Args({3056, 7});
bm->Args({2, 3})->Args({7, 4})->Args({9, 3})->Args({22, 5})->Args({58, 2})->Args({102, 4});
bm->Args({325, 1})->Args({400, 50})->Args({1011, 11})->Args({1502, 23})->Args({3056, 7});
}

BENCHMARK(bm<AlgType::std_func, uint8_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::std_func, uint16_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::std_func, uint32_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::std_func, uint64_t>)->Apply(common_args);

BENCHMARK(bm<AlgType::str_member, char>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member, wchar_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member, wchar_t, L'\x03B1'>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_first, char>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_first, wchar_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_first, wchar_t, L'\x03B1'>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_first, char32_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_first, char32_t, U'\x03B1'>)->Apply(common_args);

BENCHMARK(bm<AlgType::str_member_last, char>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_last, wchar_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_last, wchar_t, L'\x03B1'>)->Apply(common_args);

BENCHMARK_MAIN();
116 changes: 92 additions & 24 deletions stl/inc/__msvc_string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,42 @@ _STL_DISABLE_CLANG_WARNINGS
#pragma push_macro("new")
#undef new

#if _USE_STD_VECTOR_ALGORITHMS
extern "C" {
// The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms
// won't be stored beyond the lifetime of the function, and that the function will only reference arrays denoted by
// those pointers. The optimizer also assumes in that case that a pointer parameter is not returned to the caller via
// the return value, so functions using "noalias" must usually return void. This attribute is valuable because these
// functions are in native code objects that the compiler cannot analyze. In the absence of the noalias attribute, the
// compiler has to assume that the denoted arrays are "globally address taken", and that any later calls to
// unanalyzable routines may modify those arrays.

__declspec(noalias) size_t __stdcall __std_find_last_of_trivial_pos_1(
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
__declspec(noalias) size_t __stdcall __std_find_last_of_trivial_pos_2(
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;

} // extern "C"

_STD_BEGIN

template <class _Ty1, class _Ty2>
size_t _Find_last_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _Haystack_length,
const _Ty2* const _Needle, const size_t _Needle_length) noexcept {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
if constexpr (sizeof(_Ty1) == 1) {
return ::__std_find_last_of_trivial_pos_1(_Haystack, _Haystack_length, _Needle, _Needle_length);
} else if constexpr (sizeof(_Ty1) == 2) {
return ::__std_find_last_of_trivial_pos_2(_Haystack, _Haystack_length, _Needle, _Needle_length);
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
}
}

_STD_END

#endif // _USE_STD_VECTOR_ALGORITHMS

_STD_BEGIN
#ifdef __clang__
#define _HAS_MEMCPY_MEMMOVE_INTRINSICS 1
Expand Down Expand Up @@ -731,10 +767,14 @@ constexpr size_t _Traits_find_first_of(_In_reads_(_Hay_size) const _Traits_ptr_t
const bool _Try_vectorize = _Hay_size - _Start_at > _Threshold_find_first_of;

// Additional condition for when the vectorization outperforms the table lookup
const bool _Use_bitmap = !_Try_vectorize || (sizeof(_Elem) > 1 && sizeof(_Elem) * _Needle_size > 16);
#else
constexpr size_t _Find_first_of_bitmap_threshold = sizeof(_Elem) == 1 ? 48
: sizeof(_Elem) == 8 ? 8
: 16;

const bool _Use_bitmap = !_Try_vectorize || _Needle_size > _Find_first_of_bitmap_threshold;
#else // ^^^ _USE_STD_VECTOR_ALGORITHMS / !_USE_STD_VECTOR_ALGORITHMS vvv
const bool _Use_bitmap = true;
#endif // _USE_STD_VECTOR_ALGORITHMS
#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^

if (_Use_bitmap) {
_String_bitmap<_Elem> _Matches;
Expand Down Expand Up @@ -776,42 +816,70 @@ constexpr size_t _Traits_find_first_of(_In_reads_(_Hay_size) const _Traits_ptr_t
return static_cast<size_t>(-1); // no match
}

template <class _Traits, bool _Special = _Is_implementation_handled_char_traits<_Traits>>
template <class _Traits>
constexpr size_t _Traits_find_last_of(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits> _Haystack,
const size_t _Hay_size, const size_t _Start_at, _In_reads_(_Needle_size) const _Traits_ptr_t<_Traits> _Needle,
const size_t _Needle_size) noexcept {
// in [_Haystack, _Haystack + _Hay_size), look for last of [_Needle, _Needle + _Needle_size), before _Start_at
if (_Needle_size != 0 && _Hay_size != 0) { // worth searching, do it
if constexpr (_Special) {
_String_bitmap<typename _Traits::char_type> _Matches;
if (!_Matches._Mark(_Needle, _Needle + _Needle_size)) { // couldn't put one of the characters into the
// bitmap, fall back to the serial algorithm
return _Traits_find_last_of<_Traits, false>(_Haystack, _Hay_size, _Start_at, _Needle, _Needle_size);
if (_Needle_size == 0 || _Hay_size == 0) { // not worth searching
return static_cast<size_t>(-1);
}

const auto _Hay_start = (_STD min)(_Start_at, _Hay_size - 1);

if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
if (!_STD _Is_constant_evaluated()) {
using _Elem = typename _Traits::char_type;

bool _Use_bitmap = true;
#if _USE_STD_VECTOR_ALGORITHMS
bool _Try_vectorize = false;

if constexpr (sizeof(_Elem) <= 2) {
_Try_vectorize = _Hay_start + 1 > _Threshold_find_first_of;
// Additional condition for when the vectorization outperforms the table lookup
constexpr size_t _Find_last_of_bitmap_threshold = sizeof(_Elem) == 1 ? 48 : 8;

_Use_bitmap = !_Try_vectorize || _Needle_size > _Find_last_of_bitmap_threshold;
}
#endif // _USE_STD_VECTOR_ALGORITHMS

for (auto _Match_try = _Haystack + (_STD min)(_Start_at, _Hay_size - 1);; --_Match_try) {
if (_Matches._Match(*_Match_try)) {
return static_cast<size_t>(_Match_try - _Haystack); // found a match
}
if (_Use_bitmap) {
_String_bitmap<_Elem> _Matches;
if (_Matches._Mark(_Needle, _Needle + _Needle_size)) {
for (auto _Match_try = _Haystack + _Hay_start;; --_Match_try) {
if (_Matches._Match(*_Match_try)) {
return static_cast<size_t>(_Match_try - _Haystack); // found a match
}

if (_Match_try == _Haystack) {
break; // at beginning, no more chance for match
if (_Match_try == _Haystack) {
return static_cast<size_t>(-1); // at beginning, no more chance for match
}
}
}

// couldn't put one of the characters into the bitmap, fall back to vectorized or serial algorithms
}
} else {
for (auto _Match_try = _Haystack + (_STD min)(_Start_at, _Hay_size - 1);; --_Match_try) {
if (_Traits::find(_Needle, _Needle_size, *_Match_try)) {
return static_cast<size_t>(_Match_try - _Haystack); // found a match
}

if (_Match_try == _Haystack) {
break; // at beginning, no more chance for match
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (sizeof(_Elem) <= 2) {
if (_Try_vectorize) {
return _STD _Find_last_of_pos_vectorized(_Haystack, _Hay_start + 1, _Needle, _Needle_size);
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
}
}

return static_cast<size_t>(-1); // no match
for (auto _Match_try = _Haystack + _Hay_start;; --_Match_try) {
if (_Traits::find(_Needle, _Needle_size, *_Match_try)) {
return static_cast<size_t>(_Match_try - _Haystack); // found a match
}

if (_Match_try == _Haystack) {
return static_cast<size_t>(-1); // at beginning, no more chance for match
}
}
}

template <class _Traits, bool _Special = _Is_implementation_handled_char_traits<_Traits>>
Expand Down
Loading

0 comments on commit 23dc7e3

Please sign in to comment.