Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizations for unreachable sentinels #1810

Merged
35 changes: 30 additions & 5 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,15 @@ namespace ranges {
template <class _It1, class _Se1, class _It2, class _Se2, class _Pr, class _Pj1, class _Pj2>
_NODISCARD static constexpr bool _Equal_4(
_It1 _First1, _Se1 _Last1, _It2 _First2, _Se2 _Last2, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) {
if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && same_as<_Pj1, identity> //
CaseyCarter marked this conversation as resolved.
Show resolved Hide resolved
&& same_as<_Pj2, identity> //
&& (same_as<_Se1, unreachable_sentinel_t> || same_as<_Se2, unreachable_sentinel_t>) ) {
if constexpr (same_as<_Se1, _Se2>) {
CaseyCarter marked this conversation as resolved.
Show resolved Hide resolved
_STL_ASSERT(false, "Tried to compare ranges with both sentinels being unreachable");
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
}
return false; // If any of sentinels is unreachable the ranges are not equal
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
}

for (;;) {
if (_First1 == _Last1) {
return _First2 == _Last2;
Expand Down Expand Up @@ -10394,12 +10403,28 @@ namespace ranges {

using _Memcmp_classification_pred =
typename decltype(_Lex_compare_memcmp_classify(_First1, _First2, _Pred))::_Pred;
if constexpr (!is_void_v<_Memcmp_classification_pred> && sized_sentinel_for<_Se1, _It1> //
&& sized_sentinel_for<_Se2, _It2> && same_as<_Pj1, identity> && same_as<_Pj2, identity>) {
constexpr bool _Is_sized1 = sized_sentinel_for<_Se1, _It1>;
constexpr bool _Is_sized2 = sized_sentinel_for<_Se2, _It2>;
if constexpr (!is_void_v<_Memcmp_classification_pred> && _Sized_or_unreachable_sentinel_for<_Se1, _It1>
#pragma warning(suppress : 6287) // Redundant code: the left and right subexpressions are identical
CaseyCarter marked this conversation as resolved.
Show resolved Hide resolved
&& _Sized_or_unreachable_sentinel_for<_Se2, _It2> //
&& same_as<_Pj1, identity> && same_as<_Pj2, identity>) {
if (!_STD is_constant_evaluated()) {
const auto _Num1 = static_cast<size_t>(_Last1 - _First1);
const auto _Num2 = static_cast<size_t>(_Last2 - _First2);
const int _Ans = _Memcmp_count(_First1, _First2, (_STD min)(_Num1, _Num2));
size_t _Num1;
if constexpr (_Is_sized1) {
_Num1 = static_cast<size_t>(_Last1 - _First1);
} else {
_Num1 = SIZE_MAX;
}

size_t _Num2;
if constexpr (_Is_sized2) {
_Num2 = static_cast<size_t>(_Last2 - _First2);
} else {
_Num2 = SIZE_MAX;
}

const int _Ans = _Memcmp_count(_First1, _First2, (_STD min)(_Num1, _Num2));
return _Memcmp_classification_pred{}(_Ans, 0) || (_Ans == 0 && _Num1 < _Num2);
}
}
Expand Down
54 changes: 39 additions & 15 deletions stl/inc/memory
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,21 @@ namespace ranges {
_STL_INTERNAL_STATIC_ASSERT(_No_throw_sentinel_for<_OSe, _Out>);
_STL_INTERNAL_STATIC_ASSERT(constructible_from<iter_value_t<_Out>, iter_reference_t<_It>>);

if constexpr (_Ptr_copy_cat<_It, _Out>::_Really_trivial
&& sized_sentinel_for<_Se, _It> && sized_sentinel_for<_OSe, _Out>) {
return _Copy_memcpy_common(_IFirst, _RANGES next(_IFirst, _STD move(_ILast)), _OFirst,
_RANGES next(_OFirst, _STD move(_OLast)));
constexpr bool _Is_sized1 = sized_sentinel_for<_Se, _It>;
constexpr bool _Is_sized2 = sized_sentinel_for<_OSe, _Out>;
if constexpr (_Ptr_copy_cat<_It, _Out>::_Really_trivial && _Sized_or_unreachable_sentinel_for<_Se, _It>
#pragma warning(suppress : 6287) // Redundant code: the left and right subexpressions are identical
&& _Sized_or_unreachable_sentinel_for<_OSe, _Out>) {
if constexpr (_Is_sized1 && _Is_sized2) {
return _Copy_memcpy_common(_IFirst, _RANGES next(_IFirst, _STD move(_ILast)), _OFirst,
_RANGES next(_OFirst, _STD move(_OLast)));
} else if constexpr (_Is_sized1) {
return _Copy_memcpy_distance(_IFirst, _OFirst, _IFirst, _RANGES next(_IFirst, _STD move(_ILast)));
} else if constexpr (_Is_sized2) {
return _Copy_memcpy_distance(_IFirst, _OFirst, _OFirst, _RANGES next(_OFirst, _STD move(_OLast)));
} else {
CaseyCarter marked this conversation as resolved.
Show resolved Hide resolved
_STL_ASSERT(false, "Tried to uninitialized copy two ranges with unreachable sentinels");
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
}
} else {
_Uninitialized_backout _Backout{_STD move(_OFirst)};

Expand Down Expand Up @@ -147,13 +158,20 @@ namespace ranges {
}

_Adl_verify_range(_First2, _Last2);
auto _IFirst = _Get_unwrapped_n(_STD move(_First1), _Count);
auto _OFirst = _Get_unwrapped(_STD move(_First2));
const auto _OLast = _Get_unwrapped(_STD move(_Last2));
if constexpr (_Ptr_copy_cat<_It, _Out>::_Really_trivial) {
auto _UResult = _Copy_memcpy_common(_IFirst, _IFirst + _Count, _OFirst, _OLast);
_IFirst = _UResult.in;
_OFirst = _UResult.out;
auto _IFirst = _Get_unwrapped_n(_STD move(_First1), _Count);
auto _OFirst = _Get_unwrapped(_STD move(_First2));
auto _OLast = _Get_unwrapped(_STD move(_Last2));
if constexpr (_Ptr_copy_cat<_It, _Out>::_Really_trivial && _Sized_or_unreachable_sentinel_for<_OSe, _Out>) {
if constexpr (sized_sentinel_for<_OSe, _Out>) {
auto _UResult = _Copy_memcpy_common(
_IFirst, _IFirst + _Count, _OFirst, _RANGES next(_OFirst, _STD move(_OLast)));
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
_IFirst = _UResult.in;
_OFirst = _UResult.out;
} else {
auto _UResult = _Copy_memcpy_count(_IFirst, _OFirst, static_cast<size_t>(_Count));
_IFirst = _UResult.in;
_OFirst = _UResult.out;
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
}
} else {
_Uninitialized_backout _Backout{_STD move(_OFirst)};

Expand Down Expand Up @@ -286,10 +304,16 @@ namespace ranges {
auto _IFirst = _Get_unwrapped_n(_STD move(_First1), _Count);
auto _OFirst = _Get_unwrapped(_STD move(_First2));
const auto _OLast = _Get_unwrapped(_STD move(_Last2));
if constexpr (_Ptr_move_cat<_It, _Out>::_Really_trivial) {
auto _UResult = _Copy_memcpy_common(_IFirst, _IFirst + _Count, _OFirst, _OLast);
_IFirst = _UResult.in;
_OFirst = _UResult.out;
if constexpr (_Ptr_move_cat<_It, _Out>::_Really_trivial && _Sized_or_unreachable_sentinel_for<_OSe, _Out>) {
if constexpr (sized_sentinel_for<_OSe, _Out>) {
auto _UResult = _Copy_memcpy_common(_IFirst, _IFirst + _Count, _OFirst, _OLast);
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
_IFirst = _UResult.in;
_OFirst = _UResult.out;
} else {
auto _UResult = _Copy_memcpy_count(_IFirst, _OFirst, static_cast<size_t>(_Count));
_IFirst = _UResult.in;
_OFirst = _UResult.out;
}
} else {
_Uninitialized_backout _Backout{_STD move(_OFirst)};

Expand Down
68 changes: 64 additions & 4 deletions stl/inc/xmemory
Original file line number Diff line number Diff line change
Expand Up @@ -1554,6 +1554,55 @@ namespace ranges {
&& _No_throw_forward_iterator<iterator_t<_Rng>>;
// clang-format on

template <class _InIt, class _OutIt>
in_out_result<_InIt, _OutIt> _Copy_memcpy_count(_InIt _IFirst, _OutIt _OFirst, const size_t _Count) noexcept {
const auto _IFirstPtr = _To_address(_IFirst);
const auto _OFirstPtr = _To_address(_OFirst);
const auto _IFirst_ch = const_cast<char*>(reinterpret_cast<const volatile char*>(_IFirstPtr));
const auto _OFirst_ch = const_cast<char*>(reinterpret_cast<const volatile char*>(_OFirstPtr));
const size_t _Count_bytes = _Count * sizeof(iter_value_t<_InIt>);
_CSTD memcpy(_OFirst_ch, _IFirst_ch, _Count_bytes);
if constexpr (is_pointer_v<_InIt>) {
_IFirst = reinterpret_cast<_InIt>(_IFirst_ch + _Count_bytes);
} else {
_IFirst += _Count;
}
CaseyCarter marked this conversation as resolved.
Show resolved Hide resolved

if constexpr (is_pointer_v<_OutIt>) {
_OFirst = reinterpret_cast<_OutIt>(_OFirst_ch + _Count_bytes);
} else {
_OFirst += _Count;
}
return {_STD move(_IFirst), _STD move(_OFirst)};
}

template <class _InIt, class _OutIt, typename _DistIt>
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
in_out_result<_InIt, _OutIt> _Copy_memcpy_distance(
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
_InIt _IFirst, _OutIt _OFirst, _DistIt _DFirst, _DistIt _DLast) noexcept {
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
const auto _IFirstPtr = _To_address(_IFirst);
const auto _OFirstPtr = _To_address(_OFirst);
const auto _DFirstPtr = _To_address(_DFirst);
const auto _DLastPtr = _To_address(_DLast);
const auto _IFirst_ch = const_cast<char*>(reinterpret_cast<const volatile char*>(_IFirstPtr));
const auto _OFirst_ch = const_cast<char*>(reinterpret_cast<const volatile char*>(_OFirstPtr));
const auto _DFirst_ch = const_cast<char*>(reinterpret_cast<const volatile char*>(_DFirstPtr));
const auto _DLast_ch = const_cast<char*>(reinterpret_cast<const volatile char*>(_DLastPtr));
const auto _Count = static_cast<size_t>(_DLast_ch - _DFirst_ch);
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
_CSTD memcpy(_OFirst_ch, _IFirst_ch, _Count);
if constexpr (is_pointer_v<_InIt>) {
_IFirst = reinterpret_cast<_InIt>(_IFirst_ch + _Count);
} else {
_IFirst += _Count / sizeof(iter_value_t<_InIt>);
}

if constexpr (is_pointer_v<_OutIt>) {
_OFirst = reinterpret_cast<_OutIt>(_OFirst_ch + _Count);
} else {
_OFirst += _Count / sizeof(iter_value_t<_OutIt>);
}
return {_STD move(_IFirst), _STD move(_OFirst)};
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that overload is superfluous. The only difference is how we get to _Count

I guess we can get to that easier that duplicating all that machinery

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might just use the _count version but that might have worse codegen. #1433 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought more about determining _Count via the right method and passing that around. That should give the optimal codegen all the time

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I'm happy with this as-is. I think extracting the "distance" behavior from this function would either produce less efficient code or something even more confusing.


template <class _InIt, class _OutIt>
in_out_result<_InIt, _OutIt> _Copy_memcpy_common(
_InIt _IFirst, _InIt _ILast, _OutIt _OFirst, _OutIt _OLast) noexcept {
Expand Down Expand Up @@ -1593,10 +1642,21 @@ namespace ranges {
uninitialized_move_result<_It, _Out> _Uninitialized_move_unchecked(
_It _IFirst, _Se _ILast, _Out _OFirst, _OSe _OLast) {
// clang-format on
if constexpr (_Ptr_move_cat<_It, _Out>::_Really_trivial
&& sized_sentinel_for<_Se, _It> && sized_sentinel_for<_OSe, _Out>) {
return _Copy_memcpy_common(
_IFirst, _RANGES next(_IFirst, _STD move(_ILast)), _OFirst, _RANGES next(_OFirst, _STD move(_OLast)));
constexpr bool _Is_sized1 = sized_sentinel_for<_Se, _It>;
constexpr bool _Is_sized2 = sized_sentinel_for<_OSe, _Out>;
if constexpr (_Ptr_move_cat<_It, _Out>::_Really_trivial && _Sized_or_unreachable_sentinel_for<_Se, _It>
#pragma warning(suppress : 6287) // Redundant code: the left and right subexpressions are identical
&& _Sized_or_unreachable_sentinel_for<_OSe, _Out>) {
if constexpr (_Is_sized1 && _Is_sized2) {
return _Copy_memcpy_common(_IFirst, _RANGES next(_IFirst, _STD move(_ILast)), _OFirst,
_RANGES next(_OFirst, _STD move(_OLast)));
} else if constexpr (_Is_sized1) {
return _Copy_memcpy_distance(_IFirst, _OFirst, _IFirst, _RANGES next(_IFirst, _STD move(_ILast)));
} else if constexpr (_Is_sized2) {
return _Copy_memcpy_distance(_IFirst, _OFirst, _OFirst, _RANGES next(_OFirst, _STD move(_OLast)));
} else {
_STL_ASSERT(false, "Tried to uninitialized move two ranges with unreachable sentinels");
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
}
} else {
_Uninitialized_backout _Backout{_STD move(_OFirst)};

Expand Down
22 changes: 19 additions & 3 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -5216,19 +5216,35 @@ _NODISCARD _FwdIt find(_ExPo&& _Exec, _FwdIt _First, const _FwdIt _Last, const _
#ifdef __cpp_lib_concepts
namespace ranges {
// clang-format off
template <class _Se, class _It>
concept _Sized_or_unreachable_sentinel_for = sized_sentinel_for<_Se, _It> || same_as<_Se, unreachable_sentinel_t>;

// concept-constrained for strict enforcement as it is used by several algorithms
template <input_iterator _It, sentinel_for<_It> _Se, class _Ty, class _Pj = identity>
requires indirect_binary_predicate<ranges::equal_to, projected<_It, _Pj>, const _Ty*>
_NODISCARD constexpr _It _Find_unchecked(_It _First, const _Se _Last, const _Ty& _Val, _Pj _Proj = {}) {
if constexpr (_Memchr_in_find_is_safe<_It, _Ty> && sized_sentinel_for<_Se, _It> && same_as<_Pj, identity>) {
constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>;
if constexpr (_Memchr_in_find_is_safe<_It, _Ty> && _Sized_or_unreachable_sentinel_for<_Se, _It>
&& same_as<_Pj, identity>) {
if (!_STD is_constant_evaluated()) {
if (!_Within_limits(_First, _Val)) {
return _RANGES next(_STD move(_First), _Last);
if constexpr (_Is_sized) {
return _RANGES next(_STD move(_First), _Last);
} else {
_STL_ASSERT(false, "Tried to find a value in a range with unreachable sentinel"
CaseyCarter marked this conversation as resolved.
Show resolved Hide resolved
" that is not within limits of range's value type");
AdamBucior marked this conversation as resolved.
Show resolved Hide resolved
}
}

size_t _Count;
if constexpr (_Is_sized) {
_Count = static_cast<size_t>(_Last - _First);
} else {
_Count = SIZE_MAX;
}
const auto _First_ptr = _STD to_address(_First);
const auto _Result = static_cast<remove_reference_t<_Iter_ref_t<_It>>*>(_CSTD memchr(_First_ptr,
static_cast<unsigned char>(_Val), static_cast<size_t>(_Last - _First)));
static_cast<unsigned char>(_Val), _Count));
if (_Result) {
if constexpr (is_pointer_v<_It>) {
return _Result;
Expand Down
11 changes: 9 additions & 2 deletions tests/std/tests/P0896R4_ranges_alg_equal/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#include <range_algorithm_support.hpp>

constexpr void smoke_test() {
using ranges::equal, ranges::equal_to;
using std::abort, std::array, std::pair, std::same_as;
using ranges::equal, ranges::equal_to, ranges::begin, ranges::end;
using std::abort, std::array, std::pair, std::same_as, std::unreachable_sentinel;

array<pair<int, int>, 3> const x = {{{0, 42}, {2, 42}, {4, 42}}};
array<pair<long, long>, 3> const y = {{{13, -1}, {13, 1}, {13, 3}}};
Expand Down Expand Up @@ -71,6 +71,13 @@ constexpr void smoke_test() {
arr2[1] = 7;
assert(!equal(arr1, arr2));
}
{
// Validate memcmp + unreachable_sentinel cases
int arr1[3]{0, 2, 5};
int arr2[3]{0, 2, 5};
assert(!equal(begin(arr1), unreachable_sentinel, begin(arr2), end(arr2)));
assert(!equal(begin(arr1), end(arr1), begin(arr2), unreachable_sentinel));
}
}

int main() {
Expand Down
17 changes: 11 additions & 6 deletions tests/std/tests/P0896R4_ranges_alg_find/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ struct instantiator {

template <ranges::input_range Read>
static constexpr void call() {
using ranges::find, ranges::iterator_t;
using ranges::find, ranges::iterator_t, ranges::begin, ranges::end;

for (const auto& [value, _] : haystack) {
{ // Validate range overload [found case]
Expand Down Expand Up @@ -48,15 +48,20 @@ struct instantiator {
STATIC_ASSERT(same_as<decltype(result), iterator_t<Read>>);
assert(result == wrapped_input.end());
}
{ // Validate memchr case [found case]
{ // Validate memchr case
char arr[5]{4, 8, 1, -15, 125};

// found case
auto result = find(arr, 1);
assert(*result == 1);
}
{ // Validate memchr case [not found case]
char arr[5]{4, 8, 1, -15, 125};
auto result = find(arr, 10);

// not found case
result = find(arr, 10);
assert(result == end(arr));

// unreachable_sentinel case
result = find(begin(arr), unreachable_sentinel, 1);
assert(*result == 1);
}
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct instantiator {

template <class In1, class In2>
static constexpr void call() {
using ranges::lexicographical_compare, ranges::less;
using ranges::lexicographical_compare, ranges::less, ranges::begin, ranges::end;

// Validate range overload
{
Expand Down Expand Up @@ -202,6 +202,24 @@ struct instantiator {
arr2[2] = 1;
assert(!lexicographical_compare(arr1, arr2));
}
{ // Validate memcmp + unreachable_sentinel cases
unsigned char arr1[3]{0, 1, 2};
unsigned char arr2[3]{0, 1, 3};

assert(lexicographical_compare(begin(arr1), end(arr1), begin(arr1), unreachable_sentinel));
assert(!lexicographical_compare(begin(arr1), unreachable_sentinel, begin(arr1), end(arr1)));

assert(lexicographical_compare(begin(arr1), unreachable_sentinel, begin(arr2), unreachable_sentinel));
assert(lexicographical_compare(begin(arr1), unreachable_sentinel, begin(arr2), end(arr2)));
assert(lexicographical_compare(begin(arr1), end(arr1), begin(arr2), unreachable_sentinel));
arr2[2] = 2;
assert(!lexicographical_compare(begin(arr1), unreachable_sentinel, begin(arr2), end(arr2)));
assert(lexicographical_compare(begin(arr1), end(arr1), begin(arr2), unreachable_sentinel));
arr2[2] = 1;
assert(!lexicographical_compare(begin(arr1), unreachable_sentinel, begin(arr2), unreachable_sentinel));
assert(!lexicographical_compare(begin(arr1), unreachable_sentinel, begin(arr2), end(arr2)));
assert(!lexicographical_compare(begin(arr1), end(arr1), begin(arr2), unreachable_sentinel));
}
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,18 @@ struct memcpy_test {
assert(equal(input, expected_input));
assert(equal(output, expected_input_short));
}

{ // Validate unreachable_sentinel
vector<int> input = {13, 55, 12345, 42};
vector<int> output = {-1, -1, -1, -1};

const same_as<uninitialized_copy_n_result<iterator_t<vector<int>>, iterator_t<vector<int>>>> auto result =
uninitialized_copy_n(input.begin(), 3, output.begin(), unreachable_sentinel);
assert(next(result.in) == input.end());
assert(next(result.out) == output.end());
assert(equal(input, expected_input));
assert(equal(output, expected_output));
}
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,18 @@ struct memcpy_test {
assert(equal(input, expected_input));
assert(equal(output, expected_input_short));
}

{ // Validate unreachable_sentinel
vector<int> input = {13, 55, 12345, 42};
vector<int> output = {-1, -1, -1, -1};

const same_as<uninitialized_move_n_result<iterator_t<vector<int>>, iterator_t<vector<int>>>> auto result =
uninitialized_move_n(input.begin(), 3, output.begin(), unreachable_sentinel);
assert(next(result.in) == input.end());
assert(next(result.out) == output.end());
assert(equal(input, expected_input));
assert(equal(output, expected_output));
}
}
};

Expand Down