Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize find_end #4943

Merged
merged 13 commits into from
Oct 24, 2024
93 changes: 78 additions & 15 deletions benchmarks/src/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <vector>
using namespace std::string_view_literals;

const char src_haystack[] =
constexpr std::string_view common_src_data =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
Expand Down Expand Up @@ -43,13 +43,42 @@ const char src_haystack[] =
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";

constexpr std::array patterns = {
"aliquet"sv,
"aliquet malesuada"sv,
template <size_t Size, bool Last_is_different>
constexpr auto make_fill_pattern_array() {
std::array<char, Size> result;
result.fill('*');

if constexpr (Last_is_different) {
result.back() = '!';
}

return result;
}

template <size_t Size, bool Last_is_different>
constexpr std::array fill_pattern_array = make_fill_pattern_array<Size, Last_is_different>();

template <size_t Size, bool Last_is_different>
constexpr std::string_view fill_pattern_view = fill_pattern_array<Size, Last_is_different>;

struct data_and_pattern {
std::string_view data;
std::string_view pattern;
};

constexpr data_and_pattern patterns[] = {
/* 0. Small, closer to end */ {common_src_data, "aliquet"sv},
/* 1. Large, closer to end */ {common_src_data, "aliquet malesuada"sv},
/* 2. Small, closer to begin */ {common_src_data, "pulvinar"sv},
/* 3. Large, closer to begin */ {common_src_data, "dapibus elit interdum"sv},

/* 4. Small, evil */ {fill_pattern_view<3000, false>, fill_pattern_view<7, true>},
/* 5. Large, evil */ {fill_pattern_view<3000, false>, fill_pattern_view<20, true>},
};

void c_strstr(benchmark::State& state) {
const auto& src_needle = patterns[static_cast<size_t>(state.range())];
const auto& src_haystack = patterns[static_cast<size_t>(state.range())].data;
const auto& src_needle = patterns[static_cast<size_t>(state.range())].pattern;

const std::string haystack(std::begin(src_haystack), std::end(src_haystack));
const std::string needle(std::begin(src_needle), std::end(src_needle));
Expand All @@ -64,7 +93,8 @@ void c_strstr(benchmark::State& state) {

template <class T>
void classic_search(benchmark::State& state) {
const auto& src_needle = patterns[static_cast<size_t>(state.range())];
const auto& src_haystack = patterns[static_cast<size_t>(state.range())].data;
const auto& src_needle = patterns[static_cast<size_t>(state.range())].pattern;

const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));
Expand All @@ -79,7 +109,8 @@ void classic_search(benchmark::State& state) {

template <class T>
void ranges_search(benchmark::State& state) {
const auto& src_needle = patterns[static_cast<size_t>(state.range())];
const auto& src_haystack = patterns[static_cast<size_t>(state.range())].data;
const auto& src_needle = patterns[static_cast<size_t>(state.range())].pattern;

const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));
Expand All @@ -94,7 +125,8 @@ void ranges_search(benchmark::State& state) {

template <class T>
void search_default_searcher(benchmark::State& state) {
const auto& src_needle = patterns[static_cast<size_t>(state.range())];
const auto& src_haystack = patterns[static_cast<size_t>(state.range())].data;
const auto& src_needle = patterns[static_cast<size_t>(state.range())].pattern;

const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));
Expand All @@ -107,26 +139,57 @@ void search_default_searcher(benchmark::State& state) {
}
}

template <class T>
void classic_find_end(benchmark::State& state) {
const auto& src_haystack = patterns[static_cast<size_t>(state.range())].data;
const auto& src_needle = patterns[static_cast<size_t>(state.range())].pattern;

const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::find_end(haystack.begin(), haystack.end(), needle.begin(), needle.end());
benchmark::DoNotOptimize(res);
}
}

template <class T>
void ranges_find_end(benchmark::State& state) {
const auto& src_haystack = patterns[static_cast<size_t>(state.range())].data;
const auto& src_needle = patterns[static_cast<size_t>(state.range())].pattern;

const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::ranges::find_end(haystack, needle);
benchmark::DoNotOptimize(res);
}
}

void common_args(auto bm) {
bm->Range(0, patterns.size() - 1);
bm->DenseRange(0, std::size(patterns) - 1, 1);
}

BENCHMARK(c_strstr)->Apply(common_args);

BENCHMARK(classic_search<std::uint8_t>)->Apply(common_args);
BENCHMARK(classic_search<std::uint16_t>)->Apply(common_args);
BENCHMARK(classic_search<std::uint32_t>)->Apply(common_args);
BENCHMARK(classic_search<std::uint64_t>)->Apply(common_args);

BENCHMARK(ranges_search<std::uint8_t>)->Apply(common_args);
BENCHMARK(ranges_search<std::uint16_t>)->Apply(common_args);
BENCHMARK(ranges_search<std::uint32_t>)->Apply(common_args);
BENCHMARK(ranges_search<std::uint64_t>)->Apply(common_args);

BENCHMARK(search_default_searcher<std::uint8_t>)->Apply(common_args);
BENCHMARK(search_default_searcher<std::uint16_t>)->Apply(common_args);
BENCHMARK(search_default_searcher<std::uint32_t>)->Apply(common_args);
BENCHMARK(search_default_searcher<std::uint64_t>)->Apply(common_args);

BENCHMARK(classic_find_end<std::uint8_t>)->Apply(common_args);
BENCHMARK(classic_find_end<std::uint16_t>)->Apply(common_args);

BENCHMARK(ranges_find_end<std::uint8_t>)->Apply(common_args);
BENCHMARK(ranges_find_end<std::uint16_t>)->Apply(common_args);

BENCHMARK_MAIN();
66 changes: 66 additions & 0 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ const void* __stdcall __std_find_last_trivial_2(const void* _First, const void*
const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept;
const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept;

const void* __stdcall __std_find_end_1(
const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept;
const void* __stdcall __std_find_end_2(
const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept;

__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* _First, const void* _Last) noexcept;
Expand Down Expand Up @@ -189,6 +194,19 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val
}
}

template <class _Ty1, class _Ty2>
_Ty1* _Find_end_vectorized(
_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, const size_t _Count2) noexcept {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
if constexpr (sizeof(_Ty1) == 1) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_find_end_1(_First1, _Last1, _First2, _Count2)));
} else if constexpr (sizeof(_Ty1) == 2) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_find_end_2(_First1, _Last1, _First2, _Count2)));
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
}
}

template <class _Ty, class _TVal1, class _TVal2>
__declspec(noalias) void _Replace_vectorized(
_Ty* const _First, _Ty* const _Last, const _TVal1 _Old_val, const _TVal2 _New_val) noexcept {
Expand Down Expand Up @@ -3194,6 +3212,26 @@ _NODISCARD _CONSTEXPR20 _FwdIt1 find_end(
if constexpr (_Is_ranges_random_iter_v<_FwdIt1> && _Is_ranges_random_iter_v<_FwdIt2>) {
const _Iter_diff_t<_FwdIt2> _Count2 = _ULast2 - _UFirst2;
if (_Count2 > 0 && _Count2 <= _ULast1 - _UFirst1) {
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Vector_alg_in_search_is_safe<decltype(_UFirst1), decltype(_UFirst2), _Pr>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Ptr1 = _STD _To_address(_UFirst1);

const auto _Ptr_res1 = _STD _Find_end_vectorized(
_Ptr1, _STD _To_address(_ULast1), _STD _To_address(_UFirst2), static_cast<size_t>(_Count2));

if constexpr (is_pointer_v<decltype(_UFirst1)>) {
_UFirst1 = _Ptr_res1;
} else {
_UFirst1 += _Ptr_res1 - _Ptr1;
}

_STD _Seek_wrapped(_First1, _UFirst1);
return _First1;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

for (auto _UCandidate = _ULast1 - static_cast<_Iter_diff_t<_FwdIt1>>(_Count2);; --_UCandidate) {
if (_STD _Equal_rev_pred_unchecked(_UCandidate, _UFirst2, _ULast2, _STD _Pass_fn(_Pred))) {
_STD _Seek_wrapped(_First1, _UCandidate);
Expand Down Expand Up @@ -3297,6 +3335,34 @@ namespace ranges {

if (_Count2 > 0 && _Count2 <= _Count1) {
const auto _Count2_as1 = static_cast<iter_difference_t<_It1>>(_Count2);
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Vector_alg_in_search_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity>
&& is_same_v<_Pj2, identity>) {
if (!_STD is_constant_evaluated()) {
const auto _Ptr1 = _STD to_address(_First1);
const auto _Ptr2 = _STD to_address(_First2);
const auto _Ptr_last1 = _Ptr1 + _Count1;

const auto _Ptr_res1 =
_STD _Find_end_vectorized(_Ptr1, _Ptr_last1, _Ptr2, static_cast<size_t>(_Count2));

if constexpr (is_pointer_v<_It1>) {
if (_Ptr_res1 != _Ptr_last1) {
return {_Ptr_res1, _Ptr_res1 + _Count2};
} else {
return {_Ptr_res1, _Ptr_res1};
}
} else {
_First1 += _Ptr_res1 - _Ptr1;
if (_Ptr_res1 != _Ptr_last1) {
return {_First1, _First1 + _Count2_as1};
} else {
return {_First1, _First1};
}
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

for (auto _Candidate = _First1 + (_Count1 - _Count2_as1);; --_Candidate) {
auto _Match_and_mid1 =
Expand Down
Loading