Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

<bit>: popcount() utilizes cnt instruction on arm64 #2127

Merged
merged 16 commits into from
Sep 11, 2021
25 changes: 23 additions & 2 deletions stl/inc/limits
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
#include <climits>
#include <cwchar>
#include <intrin0.h>
#if defined(_M_ARM64) && !defined(_M_ARM64EC) // TRANSITION, GH-2129
#include <arm64_neon.h>
#endif // defined(_M_ARM64) && !defined(_M_ARM64EC)
fsb4000 marked this conversation as resolved.
Show resolved Hide resolved
#include <isa_availability.h>
#include <xstddef>

Expand Down Expand Up @@ -1116,6 +1119,20 @@ _NODISCARD int _Checked_x86_x64_popcount(const _Ty _Val) noexcept {
}
#endif // _HAS_POPCNT_INTRINSICS

#if defined(_M_ARM64) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) && !defined(__INTEL_COMPILER) \
&& !defined(__clang__) // TRANSITION, LLVM-51488
fsb4000 marked this conversation as resolved.
Show resolved Hide resolved
#define _HAS_CNT_INTRINSICS 1
#else // ^^^ intrinsics available ^^^ / vvv intrinsics unavailable vvv
#define _HAS_CNT_INTRINSICS 0
fsb4000 marked this conversation as resolved.
Show resolved Hide resolved
#endif // ^^^ intrinsics unavailable ^^^

#if _HAS_CNT_INTRINSICS
_NODISCARD inline int _Arm64_popcount(const unsigned long long _Val) noexcept {
const __n64 _Temp = neon_cnt(__uint64ToN64_v(_Val));
return neon_addv8(_Temp).n8_i8[0];
}
#endif // _HAS_CNT_INTRINSICS

template <class _Ty>
constexpr bool _Is_standard_unsigned_integer =
_Is_any_of_v<remove_cv_t<_Ty>, unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long>;
Expand All @@ -1135,14 +1152,18 @@ _NODISCARD constexpr int _Countr_zero(const _Ty _Val) noexcept {

template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> _Enabled = 0>
_NODISCARD _CONSTEXPR20 int _Popcount(const _Ty _Val) noexcept {
#if _HAS_POPCNT_INTRINSICS
#if _HAS_POPCNT_INTRINSICS || _HAS_CNT_INTRINSICS
fsb4000 marked this conversation as resolved.
Show resolved Hide resolved
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
#if _HAS_POPCNT_INTRINSICS
return _Checked_x86_x64_popcount(_Val);
#elif _HAS_CNT_INTRINSICS
return _Arm64_popcount(_Val);
#endif
fsb4000 marked this conversation as resolved.
Show resolved Hide resolved
}
#endif // _HAS_POPCNT_INTRINSICS
#endif // _HAS_POPCNT_INTRINSICS || _HAS_CNT_INTRINSICS
return _Popcount_fallback(_Val);
}

Expand Down