From 903c59faff91610c3ea05e698ef16754c3de28df Mon Sep 17 00:00:00 2001 From: statementreply Date: Mon, 27 Jul 2020 20:39:23 +0800 Subject: [PATCH 01/19] Fix undue overflow and underflow in complex sqrt Modifies the scale factors in `_Fabs` (used by `sqrt`) such that: - `_Fabs` doesn't underflow when the input is tiny. - `sqrt` doesn't overflow when the input is huge. --- stl/inc/complex | 65 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/stl/inc/complex b/stl/inc/complex index 2bbba58573..8ca4aa2449 100644 --- a/stl/inc/complex +++ b/stl/inc/complex @@ -66,8 +66,12 @@ public: return (numeric_limits<_Ty>::max)(); } + static constexpr _Ty _Flt_norm_min() { + return (numeric_limits<_Ty>::min)() > 0 ? (numeric_limits<_Ty>::min)() : 0; + } + static _Ty _Abs(_Ty _Left) { - return static_cast<_Ty>(_Left < 0 ? -_Left : _Left); + return static_cast<_Ty>(_Signbit(_Left) ? -_Left : _Left); } static _Ty _Cosh(_Ty _Left, _Ty _Right) { // return cosh(_Left) * _Right @@ -75,7 +79,7 @@ public: } static _Ty _Copysign(_Ty _Magnitude, _Ty _Sign) { - return static_cast<_Ty>(_Sign < 0 ? -_Abs(_Magnitude) : _Abs(_Magnitude)); + return static_cast<_Ty>(_Signbit(_Sign) ? -_Abs(_Magnitude) : _Abs(_Magnitude)); } static short _Exp(_Ty* _Pleft, _Ty _Right, short _Exponent) { // compute exp(*_Pleft) * _Right * 2 ^ _Exponent @@ -106,7 +110,7 @@ public: } static bool _Signbit(_Ty _Left) { - return _Left < 0; + return _CSTD signbit(static_cast(_Left)); } static _Ty _Sinh(_Ty _Left, _Ty _Right) { // return sinh(_Left) * _Right @@ -200,6 +204,10 @@ public: return (numeric_limits::max)(); } + static constexpr _Ty _Flt_norm_min() { + return (numeric_limits::min)(); + } + static _Ty _Abs(_Ty _Left) { // testing _Left < 0 would be incorrect when _Left is -0.0 return _CSTD fabsl(_Left); @@ -332,6 +340,10 @@ public: return (numeric_limits::max)(); } + static constexpr _Ty _Flt_norm_min() { + return (numeric_limits::min)(); + } + static _Ty _Abs(_Ty _Left) { // testing _Left < 0 would be incorrect when _Left is -0.0 return _CSTD fabs(_Left); @@ -467,6 +479,10 @@ public: return (numeric_limits::max)(); } + static constexpr _Ty _Flt_norm_min() { + return (numeric_limits::min)(); + } + static _Ty _Abs(_Ty _Left) { // testing _Left < 0 would be incorrect when _Left is -0.0 return _CSTD fabsf(_Left); @@ -1537,10 +1553,13 @@ _NODISCARD complex<_Ty> exp(const complex<_Ty>& _Left) { // FUNCTION TEMPLATE _Fabs template -_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale factor +_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return magnitude and scale factor. + // returns a non-zero even integer in *_Pexp when _Left is finite + // and non-zero. + // returns 0 in *_Pexp when _Left is zero, infinity or NaN. *_Pexp = 0; - _Ty _Av = real(_Left); - _Ty _Bv = imag(_Left); + _Ty _Av = _Ctraits<_Ty>::_Abs(_STD real(_Left)); + _Ty _Bv = _Ctraits<_Ty>::_Abs(_STD imag(_Left)); if (_Ctraits<_Ty>::_Isinf(_Av) || _Ctraits<_Ty>::_Isinf(_Bv)) { return _Ctraits<_Ty>::_Infv(); // at least one component is INF @@ -1549,13 +1568,8 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale } else if (_Ctraits<_Ty>::_Isnan(_Bv)) { return _Bv; // imaginary component is NaN } else { // neither component is NaN or INF - _Av = _Ctraits<_Ty>::_Abs(_Av); - _Bv = _Ctraits<_Ty>::_Abs(_Bv); - if (_Av < _Bv) { // ensure that |_Bv| <= |_Av| - _Ty _Tmp = _Av; - _Av = _Bv; - _Bv = _Tmp; + _STD swap(_Av, _Bv); } if (_Av == 0) { @@ -1563,16 +1577,27 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale } if (1 <= _Av) { - *_Pexp = 2; - _Av = _Av * static_cast<_Ty>(0.25); - _Bv = _Bv * static_cast<_Ty>(0.25); + *_Pexp = 4; + _Av = _Av * static_cast<_Ty>(0.0625); + _Bv = _Bv * static_cast<_Ty>(0.0625); } else { - *_Pexp = -2; - _Av = _Av * 4; - _Bv = _Bv * 4; + constexpr _Ty _Flt_eps = _Ctraits<_Ty>::_Flt_eps(); + + // TRANSITION, workaround for non floating point _Ty + if (_Flt_eps != 0 && _Av < 2 * _Ctraits<_Ty>::_Flt_norm_min() / _Flt_eps) { + constexpr int _Exponent = -2 * numeric_limits<_Ty>::digits; + + *_Pexp = _Exponent; + _Av = _Ctraits<_Ty>::ldexp(_Av, -_Exponent); + _Bv = _Ctraits<_Ty>::ldexp(_Bv, -_Exponent); + } else { + *_Pexp = -2; + _Av = _Av * 4; + _Bv = _Bv * 4; + } } - _Ty _Tmp = _Av - _Bv; + const _Ty _Tmp = _Av - _Bv; if (_Tmp == _Av) { return _Av; // _Bv unimportant } else if (_Bv < _Tmp) { // use simple approximation @@ -1691,7 +1716,7 @@ _NODISCARD complex<_Ty> sqrt(const complex<_Ty>& _Left) { return complex<_Ty>(_Ctraits<_Ty>::_Infv(), _Im); // (any, +/-Inf) } else if (_Ctraits<_Ty>::_Isnan(_Im)) { if (_Re < 0) { - return complex<_Ty>(_Im, _Re); // (-Inf, NaN) + return complex<_Ty>(_Ctraits<_Ty>::_Abs(_Im), _Ctraits<_Ty>::_Copysign(_Re, _Im)); // (-Inf, NaN) } else { return _Left; // (+Inf, NaN) } From a6a59345b8cd261e3e96fc6d4eb37293624ca0cf Mon Sep 17 00:00:00 2001 From: statementreply Date: Mon, 3 Aug 2020 22:13:29 +0800 Subject: [PATCH 02/19] Improve accuracy of `log` when |z| is close to 1 When |z| is close to 1, compute log(|z|) as log1p(norm_minus_1(z)) / 2, where norm_minus_1(z) = real(z) ^ 2 + imag(z) ^ 2 - 1 computed with double width arithmetic to avoid catastrophic cancellation. --- stl/CMakeLists.txt | 2 + stl/inc/complex | 41 ++++------ stl/inc/xutility | 18 +++++ stl/src/float_multi_prec.hpp | 112 ++++++++++++++++++++++++++ stl/src/math_algorithms.cpp | 148 +++++++++++++++++++++++++++++++++++ 5 files changed, 297 insertions(+), 24 deletions(-) create mode 100644 stl/src/float_multi_prec.hpp create mode 100644 stl/src/math_algorithms.cpp diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt index 23dc688099..6c5e1d445a 100644 --- a/stl/CMakeLists.txt +++ b/stl/CMakeLists.txt @@ -240,7 +240,9 @@ endforeach() # Objs that exist in both libcpmt[d][01].lib and msvcprt[d].lib. set(IMPLIB_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp + ${CMAKE_CURRENT_LIST_DIR}/src/float_multi_prec.hpp ${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp + ${CMAKE_CURRENT_LIST_DIR}/src/math_algorithms.cpp ${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp ${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp ${CMAKE_CURRENT_LIST_DIR}/src/syserror_import_lib.cpp diff --git a/stl/inc/complex b/stl/inc/complex index 8ca4aa2449..03ff979401 100644 --- a/stl/inc/complex +++ b/stl/inc/complex @@ -40,6 +40,11 @@ struct _C_ldouble_complex { #define _RE 0 #define _IM 1 +_EXTERN_C +_NODISCARD double __stdcall __std_math_log_hypot(double, double) noexcept; +_NODISCARD float __stdcall __std_math_log_hypotf(float, float) noexcept; +_END_EXTERN_C + _STD_BEGIN using _Dcomplex_value = _CSTD _C_double_complex; using _Fcomplex_value = _CSTD _C_float_complex; @@ -1618,32 +1623,20 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return mag // FUNCTION TEMPLATE log template -_NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) { - _Ty _Theta = _Ctraits<_Ty>::atan2(imag(_Left), real(_Left)); // get phase - - if (_Ctraits<_Ty>::_Isnan(_Theta)) { - return complex<_Ty>(_Theta, _Theta); // real or imag is NaN - } else { // use 1 1/2 precision to preserve bits - constexpr _Ty _Cm = static_cast<_Ty>(22713.0L / 32768.0L); - constexpr _Ty _Cl = static_cast<_Ty>(1.4286068203094172321214581765680755e-6L); - int _Leftexp; - _Ty _Rho = _Fabs(_Left, &_Leftexp); // get magnitude and scale factor - - _Ty _Leftn = static_cast<_Ty>(_Leftexp); +_NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept { + return static_cast<_Ty>( + __std_math_log_hypot(static_cast(_STD real(_Left)), static_cast(_STD imag(_Left)))); +}; - _Ty _Real; - if (_Rho == 0) { - _Real = -_Ctraits<_Ty>::_Infv(); // log(0) == -INF - } else if (_Ctraits<_Ty>::_Isinf(_Rho)) { - _Real = _Rho; // log(INF) == INF - } else { - _Real = static_cast<_Ty>(_Ctraits<_Ty>::log(_Rho)); // These casts are TRANSITION, DevCom-1093507 - _Real += static_cast<_Ty>(_Leftn * _Cl); - _Real += static_cast<_Ty>(_Leftn * _Cm); - } +_NODISCARD inline float _Log_abs(const complex& _Left) noexcept { + return __std_math_log_hypotf(_STD real(_Left), _STD imag(_Left)); +}; - return complex<_Ty>(_Real, _Theta); - } +template +_NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) { + const _Ty _Log_abs = _STD _Log_abs(_Left); // get logarithm of magnitude + const _Ty _Theta = _Ctraits<_Ty>::atan2(_STD imag(_Left), _STD real(_Left)); // get phase + return complex<_Ty>(_Log_abs, _Theta); } // FUNCTION TEMPLATE pow diff --git a/stl/inc/xutility b/stl/inc/xutility index 9c82bb27c5..9e76c2145e 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5927,16 +5927,20 @@ struct _Float_traits { // traits for double and long double: using type = unsigned long long; + static constexpr type _Sign_mask = 0x8000'0000'0000'0000ULL; static constexpr type _Magnitude_mask = 0x7fff'ffff'ffff'ffffULL; static constexpr type _Exponent_mask = 0x7ff0'0000'0000'0000ULL; + static constexpr type _Quiet_nan_mask = 0x0008'0000'0000'0000ULL; }; template <> struct _Float_traits { using type = unsigned int; + static constexpr type _Sign_mask = 0x8000'0000U; static constexpr type _Magnitude_mask = 0x7fff'ffffU; static constexpr type _Exponent_mask = 0x7f80'0000U; + static constexpr type _Quiet_nan_mask = 0x0040'0000U; }; // FUNCTION TEMPLATE _Float_abs_bits @@ -5952,12 +5956,26 @@ _NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_abs(const _Ty _Xx) { // constexpr floa return _Bit_cast<_Ty>(_Float_abs_bits(_Xx)); } +// FUNCTION TEMPLATE _Float_copysign +template , int> = 0> +_NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_copysign(const _Ty _Magnitude, const _Ty _Sign) { // constexpr copysign() + const auto _Signbit = _Bit_cast::type>(_Sign) & _Float_traits<_Ty>::_Sign_mask; + return _Bit_cast<_Ty>(_Float_abs_bits(_Magnitude) | _Signbit); +} + // FUNCTION TEMPLATE _Is_nan template , int> = 0> _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_nan(const _Ty _Xx) { // constexpr isnan() return _Float_abs_bits(_Xx) > _Float_traits<_Ty>::_Exponent_mask; } +// FUNCTION TEMPLATE _Is_signaling_nan +template , int> = 0> +_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_signaling_nan(const _Ty& _Xx) { // returns true if input is a signaling NaN + const auto _Abs_bits = _Float_abs_bits(_Xx); + return _Abs_bits > _Float_traits<_Ty>::_Exponent_mask && ((_Abs_bits & _Float_traits<_Ty>::_Quiet_nan_mask) == 0); +} + // FUNCTION TEMPLATE _Is_finite template , int> = 0> _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_finite(const _Ty _Xx) { // constexpr isfinite() diff --git a/stl/src/float_multi_prec.hpp b/stl/src/float_multi_prec.hpp new file mode 100644 index 0000000000..6599616df5 --- /dev/null +++ b/stl/src/float_multi_prec.hpp @@ -0,0 +1,112 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// implements multi-precision floating point arithmetic for numerical algorithms + +// This must be as small as possible, because its contents are +// injected into the msvcprt.lib and msvcprtd.lib import libraries. +// Do not include or define anything else here. +// In particular, basic_string must not be included here. + +#pragma once + +#if defined(_M_FP_FAST) || defined(__FAST_MATH__) +#error float_multi_prec.hpp should not be compiled with /fp:fast or -ffast-math +#endif // defined(_M_FP_FAST) || defined(__FAST_MATH__) + +#include +#include +#include +#include + +extern "C" int __isa_available; + +_STD_BEGIN +namespace _Float_multi_prec { + // multi-precision floating point types + template + struct _Fmp_t; + + template + struct _Fmp_t<_Ty, 2> { + static_assert(_STD is_floating_point_v<_Ty>); + _Ty _Val0; + _Ty _Val1; + }; + + // addition + template + _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept { + const _Ty _Sum0 = _Xval + _Yval; + const _Ty _Ymod = _Sum0 - _Xval; + const _Ty _Xmod = _Sum0 - _Ymod; + const _Ty _Yerr = _Yval - _Ymod; + const _Ty _Xerr = _Xval - _Xmod; + return {_Sum0, _Xerr + _Yerr}; + } + + // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0 + template + _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept { + const _Ty _Sum0 = _Xval + _Yval; + const _Ty _Ymod = _Sum0 - _Xval; + const _Ty _Yerr = _Yval - _Ymod; + return {_Sum0, _Yerr}; + } + + // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0 + template + _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept { + const _Fmp_t<_Ty, 2> _Sum0 = _Add_small_x2(_Xval, _Yval._Val0); + return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1); + } + + template + _NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept { + const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0); + return _Sum00._Val0 + (_Sum00._Val1 + (_Xval._Val1 + _Yval._Val1)); + } + + // multiplication + _NODISCARD inline constexpr double _High_half(const double _Val) { + const auto _Bits = _STD _Bit_cast(_Val); + const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL; + return _STD _Bit_cast(_High_half_bits); + } + + _NODISCARD inline constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept { + const double _Xhigh = _High_half(_Xval); + const double _Xlow = _Xval - _Xhigh; + return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow; + } + +#if defined(_M_IX86) || defined(_M_X64) + _NODISCARD inline double _Sqr_error_x86_x64_fma(const double _Xval, const double _Prod0) noexcept { + const __m128d _Mx = _mm_set_sd(_Xval); + const __m128d _Mprod0 = _mm_set_sd(_Prod0); + const __m128d _Mresult = _mm_fmsub_sd(_Mx, _Mx, _Mprod0); + double _Result; + _mm_store_sd(&_Result, _Mresult); + return _Result; + } +#endif // defined(_M_IX86) || defined(_M_X64) + + _NODISCARD inline constexpr _Fmp_t _Sqr_x2(const double _Xval) noexcept { + const double _Prod0 = _Xval * _Xval; + + if (_STD is_constant_evaluated()) { + return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)}; + } else { +#if defined(_M_IX86) || defined(_M_X64) + if (__isa_available >= __ISA_AVAILABLE_AVX2) { + return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)}; + } else { + return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)}; + } +#else // ^^^ x86, x64 / arm, arm64 vvv + return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)}; +#endif // ^^^ arm, arm64 ^^^ + } + } +} // namespace _Float_multi_prec +_STD_END diff --git a/stl/src/math_algorithms.cpp b/stl/src/math_algorithms.cpp new file mode 100644 index 0000000000..8658271b63 --- /dev/null +++ b/stl/src/math_algorithms.cpp @@ -0,0 +1,148 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// implements numerical algorithms for + +// This must be as small as possible, because its contents are +// injected into the msvcprt.lib and msvcprtd.lib import libraries. +// Do not include or define anything else here. +// In particular, basic_string must not be included here. + +#include +#include +#include +#include + +#include "float_multi_prec.hpp" + +#define _FMP ::std::_Float_multi_prec:: + +namespace { + // TRANSITION: sqrt() isn't constexpr + // _Hypot_leg_huge = _Ty{0.5} * _STD sqrt((_STD numeric_limits<_Ty>::max)()); + // _Hypot_leg_tiny = _STD sqrt(_Ty{2.0} * (_STD numeric_limits<_Ty>::min)() / _STD numeric_limits<_Ty>::epsilon()); + template + inline constexpr _Ty _Hypot_leg_huge = _Ty{6.703903964971298e+153}; + template <> + inline constexpr float _Hypot_leg_huge = 9.2233715e+18f; + + template + inline constexpr _Ty _Hypot_leg_tiny = _Ty{1.4156865331029228e-146}; + template <> + inline constexpr float _Hypot_leg_tiny = 4.440892e-16f; + + template + _NODISCARD _Ty _Norm_minus_one(const _Ty _Xval, const _Ty _Yval) noexcept { + // requires |_Xval| >= |_Yval| and 0.5 <= |_Xval| < 2^12 + // returns _Xval * _Xval + _Yval * _Yval - 1 + const _FMP _Fmp_t<_Ty, 2> _Xsqr = _FMP _Sqr_x2(_Xval); + const _FMP _Fmp_t<_Ty, 2> _Ysqr = _FMP _Sqr_x2(_Yval); + const _FMP _Fmp_t<_Ty, 2> _Xsqr_m1 = _FMP _Add_small_x2(_Ty{-1.0}, _Xsqr); + return _Add_x1(_Xsqr_m1, _Ysqr); + } + + _NODISCARD inline float _Norm_minus_one(const float _Xval, const float _Yval) noexcept { + const auto _Dx = static_cast(_Xval); + const auto _Dy = static_cast(_Yval); + return static_cast((_Dx * _Dx - 1.0) + _Dy * _Dy); + } + + // TRANSITION: CRT log1p can be inaccurate for tiny inputs under directed rounding modes + template + _NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval) + static_assert(_STD is_floating_point_v<_Ty>); + + if (_STD _Is_nan(_Xval)) { + return _Xval + _Xval; // raise FE_INVALID if necessary + } + + if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is accurate + if (_Xval == (_STD numeric_limits<_Ty>::max)()) { // avoid overflow + return _STD log(_Xval); + } + + return _STD log(_Ty{1.0} + _Xval); + } + + const _Ty _Xabs = _STD _Float_abs(_Xval); + if (_Xabs < _STD numeric_limits<_Ty>::epsilon()) { // zero or tiny + if (_Xabs == _Ty{0.0}) { + return _Xval; + } + + // honor rounding mode, raise FE_INEXACT + return _Xabs * (_STD _Float_copysign(_Ty{1.0}, _Xval) - _Ty{0.5} * _Xabs); + } + + // compute log(1 + _Xval) with fixup for small _Xval + const _FMP _Fmp_t<_Ty, 2> _Xp1 = _FMP _Add_small_x2(_Ty{1.0}, _Xval); + return _STD log(_Xp1._Val0) + _Xp1._Val1 / _Xp1._Val0; + } + + template + _NODISCARD _Ty _Log_hypot(const _Ty _Xval, const _Ty _Yval) noexcept { // returns log(hypot(_Xval, _Yval)) + static_assert(_STD is_floating_point_v<_Ty>); + + if (!_STD _Is_finite(_Xval) || !_STD _Is_finite(_Yval)) { // Inf or NaN + if (_STD _Is_signaling_nan(_Xval) || _STD _Is_signaling_nan(_Yval)) { + return _Xval + _Yval; + } + + if (_STD isinf(_Xval)) { + return _STD _Float_abs(_Xval); + } + + if (_STD isinf(_Yval)) { + return _STD _Float_abs(_Yval); + } + + return _Xval + _Yval; + } + + _Ty _Av = _STD _Float_abs(_Xval); + _Ty _Bv = _STD _Float_abs(_Yval); + + if (_Av < _Bv) { // ensure that _Bv <= _Av + _STD swap(_Av, _Bv); + } + + if (_Bv == 0) { + return _STD log(_Av); + } + + if (_Hypot_leg_tiny<_Ty> < _Av && _Av < _Hypot_leg_huge<_Ty>) { // no overflow or harmful underflow + constexpr _Ty _Norm_small = _Ty{0.5}; + constexpr _Ty _Norm_big = _Ty{3.0}; + + const _Ty _Bv_sqr = _Bv * _Bv; + const _Ty _Norm = _Av * _Av + _Bv_sqr; + + if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation + return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5}; + } else { + return _STD log(_Norm) * _Ty{0.5}; + } + } else { // use 1 1/2 precision to preserve bits + constexpr _Ty _Cm = static_cast<_Ty>(22713.0L / 32768.0L); + constexpr _Ty _Cl = static_cast<_Ty>(1.4286068203094172321214581765680755e-6L); + + const int _Exponent = _STD ilogb(_Av); + const _Ty _Av_scaled = _STD scalbn(_Av, -_Exponent); + const _Ty _Bv_scaled = _STD scalbn(_Bv, -_Exponent); + const _Ty _Bv_scaled_sqr = _Bv_scaled * _Bv_scaled; + const _Ty _Norm_scaled = _Av_scaled * _Av_scaled + _Bv_scaled_sqr; + const _Ty _Real_shifted = _STD log(_Norm_scaled) * _Ty{0.5}; + return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm; + } + } +} // namespace + +_EXTERN_C +_NODISCARD double __stdcall __std_math_log_hypot(const double _Xval, const double _Yval) noexcept { + return _Log_hypot(_Xval, _Yval); +} + +_NODISCARD float __stdcall __std_math_log_hypotf(const float _Xval, const float _Yval) noexcept { + return _Log_hypot(_Xval, _Yval); +} +_END_EXTERN_C From 784311854dfd8095549aa278c078f1770022fe51 Mon Sep 17 00:00:00 2001 From: statementreply Date: Thu, 6 Aug 2020 21:34:14 +0800 Subject: [PATCH 03/19] Minor fixes, clarify comments --- stl/inc/complex | 5 +++-- stl/inc/xutility | 10 ++++++++++ stl/src/math_algorithms.cpp | 23 +++++++++++++---------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/stl/inc/complex b/stl/inc/complex index 03ff979401..60fa9c911c 100644 --- a/stl/inc/complex +++ b/stl/inc/complex @@ -1587,9 +1587,10 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return mag _Bv = _Bv * static_cast<_Ty>(0.0625); } else { constexpr _Ty _Flt_eps = _Ctraits<_Ty>::_Flt_eps(); - // TRANSITION, workaround for non floating point _Ty - if (_Flt_eps != 0 && _Av < 2 * _Ctraits<_Ty>::_Flt_norm_min() / _Flt_eps) { + constexpr _Ty _Leg_tiny = _Flt_eps == 0 ? _Ty{0} : 2 * _Ctraits<_Ty>::_Flt_norm_min() / _Flt_eps; + + if (_Av < _Leg_tiny) { constexpr int _Exponent = -2 * numeric_limits<_Ty>::digits; *_Pexp = _Exponent; diff --git a/stl/inc/xutility b/stl/inc/xutility index 9e76c2145e..453da3a8e7 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5970,12 +5970,22 @@ _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_nan(const _Ty _Xx) { // constexpr isnan( } // FUNCTION TEMPLATE _Is_signaling_nan +// TRANSITION, workaround x86 ABI +// On x86 ABI, floating point by-value arguments and return values are passed in 80-bit x87 registers. +// When the value is a 32-bit or 64-bit signaling NaN, the conversion to/from 80-bit raises FE_INVALID +// and turns it into a quiet NaN. This behavior is undesirable if we want to test for signaling NaNs. template , int> = 0> _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_signaling_nan(const _Ty& _Xx) { // returns true if input is a signaling NaN const auto _Abs_bits = _Float_abs_bits(_Xx); return _Abs_bits > _Float_traits<_Ty>::_Exponent_mask && ((_Abs_bits & _Float_traits<_Ty>::_Quiet_nan_mask) == 0); } +// FUNCTION TEMPLATE _Is_inf +template , int> = 0> +_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_inf(const _Ty _Xx) { // constexpr isinf() + return _Float_abs_bits(_Xx) == _Float_traits<_Ty>::_Exponent_mask; +} + // FUNCTION TEMPLATE _Is_finite template , int> = 0> _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_finite(const _Ty _Xx) { // constexpr isfinite() diff --git a/stl/src/math_algorithms.cpp b/stl/src/math_algorithms.cpp index 8658271b63..dd28c435d4 100644 --- a/stl/src/math_algorithms.cpp +++ b/stl/src/math_algorithms.cpp @@ -52,11 +52,11 @@ namespace { _NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval) static_assert(_STD is_floating_point_v<_Ty>); - if (_STD _Is_nan(_Xval)) { - return _Xval + _Xval; // raise FE_INVALID if necessary + if (_STD _Is_nan(_Xval)) { // NaN + return _Xval + _Xval; // raise FE_INVALID if _Xval is a signaling NaN } - if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is accurate + if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is moderately accurate if (_Xval == (_STD numeric_limits<_Ty>::max)()) { // avoid overflow return _STD log(_Xval); } @@ -66,12 +66,12 @@ namespace { const _Ty _Xabs = _STD _Float_abs(_Xval); if (_Xabs < _STD numeric_limits<_Ty>::epsilon()) { // zero or tiny - if (_Xabs == _Ty{0.0}) { + if (_Xval == _Ty{0.0}) { return _Xval; } // honor rounding mode, raise FE_INEXACT - return _Xabs * (_STD _Float_copysign(_Ty{1.0}, _Xval) - _Ty{0.5} * _Xabs); + return _Xval - _Ty{0.5} * _Xval * _Xval; } // compute log(1 + _Xval) with fixup for small _Xval @@ -84,18 +84,21 @@ namespace { static_assert(_STD is_floating_point_v<_Ty>); if (!_STD _Is_finite(_Xval) || !_STD _Is_finite(_Yval)) { // Inf or NaN + // raise FE_INVALID and return NaN if at least one of them is a signaling NaN if (_STD _Is_signaling_nan(_Xval) || _STD _Is_signaling_nan(_Yval)) { return _Xval + _Yval; } - if (_STD isinf(_Xval)) { + // return +Inf if at least one of them is an infinity, even when the other is a quiet NaN + if (_STD _Is_inf(_Xval)) { return _STD _Float_abs(_Xval); } - if (_STD isinf(_Yval)) { + if (_STD _Is_inf(_Yval)) { return _STD _Float_abs(_Yval); } + // at least one of them is a quiet NaN, and the other is not an infinity return _Xval + _Yval; } @@ -123,8 +126,8 @@ namespace { return _STD log(_Norm) * _Ty{0.5}; } } else { // use 1 1/2 precision to preserve bits - constexpr _Ty _Cm = static_cast<_Ty>(22713.0L / 32768.0L); - constexpr _Ty _Cl = static_cast<_Ty>(1.4286068203094172321214581765680755e-6L); + constexpr _Ty _Cm = _Ty{22713.0L / 32768.0L}; + constexpr _Ty _Cl = _Ty{1.4286068203094172321214581765680755e-6L}; const int _Exponent = _STD ilogb(_Av); const _Ty _Av_scaled = _STD scalbn(_Av, -_Exponent); @@ -135,7 +138,7 @@ namespace { return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm; } } -} // namespace +} // unnamed namespace _EXTERN_C _NODISCARD double __stdcall __std_math_log_hypot(const double _Xval, const double _Yval) noexcept { From 1d34722e6df7617a3afc282b74dea92be3258565 Mon Sep 17 00:00:00 2001 From: statementreply Date: Thu, 6 Aug 2020 22:19:58 +0800 Subject: [PATCH 04/19] Add complex sqrt test --- tests/std/include/fenv_prefix.hpp | 61 +++++ tests/std/test.lst | 1 + .../env.lst | 32 +++ .../floating_point_utils.hpp | 241 +++++++++++++++++ .../sqrt_test_cases.hpp | 249 ++++++++++++++++++ .../test.cpp | 59 +++++ .../test.hpp | 27 ++ tests/utils/stl/test/config.py | 6 + tests/utils/stl/test/tests.py | 8 + 9 files changed, 684 insertions(+) create mode 100644 tests/std/include/fenv_prefix.hpp create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/sqrt_test_cases.hpp create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/test.hpp diff --git a/tests/std/include/fenv_prefix.hpp b/tests/std/include/fenv_prefix.hpp new file mode 100644 index 0000000000..ab1a82fe54 --- /dev/null +++ b/tests/std/include/fenv_prefix.hpp @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#ifdef FP_CONFIG_PRESET +#if FP_CONFIG_PRESET == 3 +#define FP_PRESET_FAST 1 +#else // ^^^ FP_CONFIG_PRESET == 3 / FP_CONFIG_PRESET != 3 vvv +#define FP_PRESET_FAST 0 +#endif // ^^^ FP_CONFIG_PRESET != 3 ^^^ +#endif // defined(FP_CONFIG_PRESET) + +#ifdef FP_CONTRACT_MODE +#ifdef __clang__ + +#if FP_CONTRACT_MODE == 0 +#pragma STDC FP_CONTRACT OFF +#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv +#pragma STDC FP_CONTRACT ON +#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv +#pragma STDC FP_CONTRACT ON +#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv +#error invalid FP_CONTRACT_MODE +#endif // ^^^ invalid FP_CONTRACT_MODE ^^^ + +#else // ^^^ clang / MSVC vvv + +#if FP_CONTRACT_MODE == 0 +#pragma fp_contract(off) +#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv +#pragma fp_contract(on) +#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv +#pragma fp_contract(on) +#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv +#error invalid FP_CONTRACT_MODE +#endif // ^^^ invalid FP_CONTRACT_MODE ^^^ + +#endif // ^^^ MSVC ^^^ +#endif // defined(FP_CONTRACT_MODE) && !defined(__clang__) + +#include +#include + +struct fenv_initializer_t { + fenv_initializer_t() { +#if WITH_FP_ABRUPT_UNDERFLOW + { + const errno_t result = _controlfp_s(nullptr, _DN_FLUSH, _MCW_DN); + assert(result == 0); + } +#endif // WITH_FP_ABRUPT_UNDERFLOW + } + + ~fenv_initializer_t() = default; + + fenv_initializer_t(const fenv_initializer_t&) = delete; + fenv_initializer_t& operator=(const fenv_initializer_t&) = delete; +}; + +const fenv_initializer_t fenv_initializer{}; diff --git a/tests/std/test.lst b/tests/std/test.lst index f6eda1e96f..5c0ac0973a 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -161,6 +161,7 @@ tests\GH_000625_vector_bool_optimization tests\GH_000685_condition_variable_any tests\GH_000690_overaligned_function tests\GH_000890_pow_template +tests\GH_000935_complex_numerical_accuracy tests\GH_000940_missing_valarray_copy tests\GH_001010_filesystem_error_encoding tests\GH_001017_discrete_distribution_out_of_range diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst b/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst new file mode 100644 index 0000000000..918098b2eb --- /dev/null +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst @@ -0,0 +1,32 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\prefix.lst +RUNALL_CROSSLIST +PM_CL="/FIfenv_prefix.hpp" +RUNALL_CROSSLIST +PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest" +RUNALL_CROSSLIST +PM_CL="/Od /MDd /D_ITERATOR_DEBUG_LEVEL=2" +PM_CL="/O2 /MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive-" +PM_CL="/O2 /MT /D_ITERATOR_DEBUG_LEVEL=0 /GL" +PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /Od /MTd" +PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MT" +PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MD /Oi-" +RUNALL_CROSSLIST +PM_CL="" +PM_CL="/arch:IA32" +PM_CL="/arch:AVX2" +PM_CL="/arch:VFPv4" +RUNALL_CROSSLIST +PM_CL="/fp:strict /DFP_CONFIG_PRESET=1 /DTEST_FP_ROUNDING=1" +PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=1" +PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=0" +PM_CL="/fp:fast /DFP_CONFIG_PRESET=3 /DTEST_FP_ROUNDING=0" +RUNALL_CROSSLIST +PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=0" +PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=1" PM_LINK="loosefpmath.obj" +RUNALL_CROSSLIST +PM_CL="/DFP_CONTRACT_MODE=0" PM_CLANG="-ffp-contract=off" +PM_CL="/DFP_CONTRACT_MODE=1" PM_CLANG="-ffp-contract=on" +PM_CL="/DFP_CONTRACT_MODE=2" PM_CLANG="-ffp-contract=fast" diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp new file mode 100644 index 0000000000..22c2ad18f7 --- /dev/null +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp @@ -0,0 +1,241 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace fputil { + template + using float_bits_t = typename _STD _Float_traits::type; + + template + _INLINE_VAR constexpr float_bits_t magnitude_mask_v = _STD _Float_traits::_Magnitude_mask; + + template + _INLINE_VAR constexpr float_bits_t exponent_mask_v = _STD _Float_traits::_Exponent_mask; + + template + _INLINE_VAR constexpr float_bits_t significand_mask_v = magnitude_mask_v & ~exponent_mask_v; + + template + _INLINE_VAR constexpr float_bits_t sign_mask_v = _STD _Float_traits::_Sign_mask; + + template + _INLINE_VAR constexpr float_bits_t norm_min_bits_v = significand_mask_v + 1U; + + template + _INLINE_VAR constexpr float_bits_t norm_max_bits_v = exponent_mask_v - 1U; + + template + _INLINE_VAR constexpr float_bits_t infinity_bits_v = exponent_mask_v; + + // not affected by abrupt underflow + template ::value, int> = 0> + constexpr bool iszero(const T& x) { + return _STD _Float_abs_bits(x) == 0; + } + + // not affected by /fp:fast + template ::value, int> = 0> + constexpr bool signbit(const T& x) { + const auto bits = std::_Bit_cast>(x); + return (bits & sign_mask_v) != 0; + } + + enum class rounding_mode { + to_nearest_ties_even = FE_TONEAREST, + toward_zero = FE_TOWARDZERO, + toward_positive = FE_UPWARD, + toward_negative = FE_DOWNWARD, + }; + + bool is_directed_rounding_mode(const rounding_mode mode) { + switch (mode) { + case rounding_mode::to_nearest_ties_even: + return false; + + case rounding_mode::toward_zero: + case rounding_mode::toward_positive: + case rounding_mode::toward_negative: + return true; + + default: + assert(false); + return false; + } + } + +#if TEST_FP_ROUNDING + +#ifdef __clang__ +// TRANSITION, should be #pragma STDC FENV_ACCESS ON +#else // ^^^ clang / MSVC vvv +// TRANSITION, VSO-923474 -- should be #pragma STDC FENV_ACCESS ON +#pragma fenv_access(on) +#endif // ^^^ MSVC ^^^ + + constexpr rounding_mode all_rounding_modes[] = { + rounding_mode::to_nearest_ties_even, + rounding_mode::toward_zero, + rounding_mode::toward_positive, + rounding_mode::toward_negative, + }; + + class rounding_guard { + public: + explicit rounding_guard(const rounding_mode mode) : old_mode{static_cast(std::fegetround())} { + const int result = std::fesetround(static_cast(mode)); + assert(result == 0); + } + + ~rounding_guard() { + const int result = std::fesetround(static_cast(old_mode)); + assert(result == 0); + } + + rounding_guard(const rounding_guard&) = delete; + rounding_guard& operator=(const rounding_guard&) = delete; + + private: + rounding_mode old_mode; + }; + +#else // ^^^ alternative rounding modes / default rounding mode only vvv + + constexpr rounding_mode all_rounding_modes[] = {rounding_mode::to_nearest_ties_even}; + + class rounding_guard { + public: + explicit rounding_guard(const rounding_mode mode) { + static_cast(mode); + } + + ~rounding_guard() = default; + + rounding_guard(const rounding_guard&) = delete; + rounding_guard& operator=(const rounding_guard&) = delete; + }; + +#endif // ^^^ default rounding mode only ^^^ + + // compares whether two floating point values are equal + // all NaNs are equal, +0.0 and -0.0 are not equal + template ::value, int> = 0> + bool precise_equal(const T& actual, const T& expected) { + if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) { + return _STD _Is_nan(actual) == _STD _Is_nan(expected); + } else { + return actual == expected && fputil::signbit(actual) == fputil::signbit(expected); + } + } + + namespace detail { + // 0x80...00 = zero, 0x80...01 = numeric_limits::denorm_min(), 0x7f...ff = -numeric_limits::denorm_min() + template ::value, int> = 0> + float_bits_t offset_representation(const T& x) { + const float_bits_t abs_bits = _STD _Float_abs_bits(x); + return fputil::signbit(x) ? sign_mask_v - abs_bits : sign_mask_v + abs_bits; + } + + template ::value, int> = 0> + float_bits_t is_offset_value_subnormal_or_zero(const float_bits_t offset_value) { + constexpr float_bits_t positive_norm_min_offset = sign_mask_v + norm_min_bits_v; + constexpr float_bits_t negative_norm_min_offset = sign_mask_v - norm_min_bits_v; + + return negative_norm_min_offset < offset_value && offset_value < positive_norm_min_offset; + } + + // number of ulps above zero, if we count [0, numeric_limits::min()) as 1 ulp + template ::value, int> = 0> + double abrupt_underflow_ulp(const float_bits_t offset_value) { + using bits_type = float_bits_t; + + constexpr bits_type offset_positive_norm_min = sign_mask_v + norm_min_bits_v; + constexpr bits_type offset_negative_norm_min = sign_mask_v - norm_min_bits_v; + + if (offset_value >= offset_positive_norm_min) { + return 1.0 + (offset_value - offset_positive_norm_min); + } else if (offset_value <= offset_negative_norm_min) { + return -1.0 - (offset_negative_norm_min - offset_value); + } else if (offset_value >= sign_mask_v) { + return static_cast(offset_value - sign_mask_v) / norm_min_bits_v; + } else { + return -static_cast(sign_mask_v - offset_value) / norm_min_bits_v; + } + } + + template ::value, int> = 0> + bool is_within_ulp_tolerance(const T& actual, const T& expected, const int ulp_tolerance) { + if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) { + return _STD _Is_nan(actual) == _STD _Is_nan(expected); + } + + if (_STD _Is_inf(expected)) { + return actual == expected; + } + + if (fputil::signbit(actual) != fputil::signbit(expected)) { + return false; + } + + using bits_type = float_bits_t; + + // compute ulp difference + const bits_type actual_offset = detail::offset_representation(actual); + const bits_type expected_offset = detail::offset_representation(expected); + const bits_type ulp_diff = + actual_offset < expected_offset ? expected_offset - actual_offset : actual_offset - expected_offset; + + if (ulp_diff <= static_cast(ulp_tolerance) && ulp_tolerance >= 0) { + return true; + } + +#if WITH_FP_ABRUPT_UNDERFLOW + // handle abrupt underflow + if (detail::is_offset_value_subnormal_or_zero(expected_offset) + || detail::is_offset_value_subnormal_or_zero(actual_offset)) { + const double adjusted_actual_ulp = detail::abrupt_underflow_ulp(actual_offset); + const double adjusted_expected_ulp = detail::abrupt_underflow_ulp(expected_offset); + const double adjusted_ulp_diff = std::abs(adjusted_actual_ulp - adjusted_expected_ulp); + + if (adjusted_ulp_diff <= ulp_tolerance) { + return true; + } + } +#endif // WITH_FP_ABRUPT_UNDERFLOW + + return false; + } + + template ::value, int> = 0> + bool is_within_absolute_tolerance(const T& actual, const T& expected, const double absolute_tolerance) { + return _STD _Is_finite(actual) && _STD _Is_finite(expected) + && std::abs(actual - expected) <= absolute_tolerance; + } + } // namespace detail + + // returns whether floating point result is nearly equal to the expected value + template ::value, int> = 0> + bool near_equal( + const T& actual, const T& expected, const int ulp_tolerance = 1, const double absolute_tolerance = 0) { + if (precise_equal(actual, expected)) { + return true; + } + + if (ulp_tolerance > 0 && detail::is_within_ulp_tolerance(actual, expected, ulp_tolerance)) { + return true; + } + + if (absolute_tolerance > 0 && detail::is_within_absolute_tolerance(actual, expected, absolute_tolerance)) { + return true; + } + + return false; + } +} // namespace fputil diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/sqrt_test_cases.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/sqrt_test_cases.hpp new file mode 100644 index 0000000000..82c45a80c6 --- /dev/null +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/sqrt_test_cases.hpp @@ -0,0 +1,249 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include + +#include "floating_point_utils.hpp" +#include "test.hpp" + +constexpr complex_unary_test_case sqrt_double_cases[] = { + // normal cases + {{+0x3p-0, +0x4p-0}, {+0x2p-0, +0x1p-0}}, + {{+0x3p-0, -0x4p-0}, {+0x2p-0, -0x1p-0}}, + {{-0x3p-0, +0x4p-0}, {+0x1p-0, +0x2p-0}}, + {{-0x3p-0, -0x4p-0}, {+0x1p-0, -0x2p-0}}, + {{+0x3p-2, +0x4p-2}, {+0x2p-1, +0x1p-1}}, + {{+0x3p-2, -0x4p-2}, {+0x2p-1, -0x1p-1}}, + {{-0x3p-2, +0x4p-2}, {+0x1p-1, +0x2p-1}}, + {{-0x3p-2, -0x4p-2}, {+0x1p-1, -0x2p-1}}, + {{+0x3p-4, +0x4p-4}, {+0x2p-2, +0x1p-2}}, + {{+0x3p-4, -0x4p-4}, {+0x2p-2, -0x1p-2}}, + {{-0x3p-4, +0x4p-4}, {+0x1p-2, +0x2p-2}}, + {{-0x3p-4, -0x4p-4}, {+0x1p-2, -0x2p-2}}, + + // special cases + {{+0.0, +0.0}, {+0.0, +0.0}, {true, true}}, + {{+0.0, -0.0}, {+0.0, -0.0}, {true, true}}, + {{-0.0, +0.0}, {+0.0, +0.0}, {true, true}}, + {{-0.0, -0.0}, {+0.0, -0.0}, {true, true}}, + {{+1.0, +0.0}, {+1.0, +0.0}, {false, true}}, + {{+1.0, -0.0}, {+1.0, -0.0}, {false, true}}, + {{-1.0, +0.0}, {+0.0, +1.0}, {true, false}}, + {{-1.0, -0.0}, {+0.0, -1.0}, {true, false}}, + {{+0.0, +1.0}, {+0x1.6a09e667f3bcdp-1, +0x1.6a09e667f3bcdp-1}}, + {{+0.0, -1.0}, {+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}}, + {{-0.0, +1.0}, {+0x1.6a09e667f3bcdp-1, +0x1.6a09e667f3bcdp-1}}, + {{-0.0, -1.0}, {+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}}, + +#if !FP_PRESET_FAST + {{+double_inf, +0.0}, {+double_inf, +0.0}, {true, true}}, + {{+double_inf, -0.0}, {+double_inf, -0.0}, {true, true}}, + {{-double_inf, +0.0}, {+0.0, +double_inf}, {true, true}}, + {{-double_inf, -0.0}, {+0.0, -double_inf}, {true, true}}, + {{+double_inf, +1.0}, {+double_inf, +0.0}, {true, true}}, + {{+double_inf, -1.0}, {+double_inf, -0.0}, {true, true}}, + {{-double_inf, +1.0}, {+0.0, +double_inf}, {true, true}}, + {{-double_inf, -1.0}, {+0.0, -double_inf}, {true, true}}, + {{+double_inf, +double_inf}, {+double_inf, +double_inf}, {true, true}}, + {{+double_inf, -double_inf}, {+double_inf, -double_inf}, {true, true}}, + {{-double_inf, +double_inf}, {+double_inf, +double_inf}, {true, true}}, + {{-double_inf, -double_inf}, {+double_inf, -double_inf}, {true, true}}, + {{+1.0, +double_inf}, {+double_inf, +double_inf}, {true, true}}, + {{+1.0, -double_inf}, {+double_inf, -double_inf}, {true, true}}, + {{-1.0, +double_inf}, {+double_inf, +double_inf}, {true, true}}, + {{-1.0, -double_inf}, {+double_inf, -double_inf}, {true, true}}, + {{+0.0, +double_inf}, {+double_inf, +double_inf}, {true, true}}, + {{+0.0, -double_inf}, {+double_inf, -double_inf}, {true, true}}, + {{-0.0, +double_inf}, {+double_inf, +double_inf}, {true, true}}, + {{-0.0, -double_inf}, {+double_inf, -double_inf}, {true, true}}, + {{+double_inf, +double_nan}, {+double_inf, +double_nan}, {true, true}}, + {{+double_inf, -double_nan}, {+double_inf, -double_nan}, {true, true}}, + {{-double_inf, +double_nan}, {+double_nan, +double_inf}, {true, true}}, + {{-double_inf, -double_nan}, {+double_nan, -double_inf}, {true, true}}, + {{+double_nan, +double_inf}, {+double_inf, +double_inf}, {true, true}}, + {{+double_nan, -double_inf}, {+double_inf, -double_inf}, {true, true}}, + {{+double_nan, +0.0}, {+double_nan, +double_nan}, {true, true}}, + {{+double_nan, -0.0}, {+double_nan, -double_nan}, {true, true}}, + {{+0.0, +double_nan}, {+double_nan, +double_nan}, {true, true}}, + {{+0.0, -double_nan}, {+double_nan, -double_nan}, {true, true}}, + {{+double_nan, +double_nan}, {+double_nan, +double_nan}, {true, true}}, + {{+double_nan, -double_nan}, {+double_nan, -double_nan}, {true, true}}, +#endif // !FP_PRESET_FAST + + // abs(z) overflows + {{+0x1.fffffffffffffp+1023, +0x1.fffffffffffffp+1023}, {+0x1.19435caffa9f8p+512, +0x1.d203138f6c828p+510}}, + {{-0x1.bb67ae8584caap+1023, +0x1.0000000000000p+1023}, {+0x1.0907dc1930691p+510, +0x1.ee8dd4748bf15p+511}}, + {{+0x1.fffffffffffffp+1023, -0x0.0000000000001p-1022}, {+0x1.fffffffffffffp+511, -0x0.0000000000000p-1022}}, + + // norm(z) overflows + {{-0x1.4e718d7d7625ap+664, -0x1.4e718d7d7625ap+665}, {+0x1.cc1033be914a7p+331, -0x1.7432f2f528ea0p+332}}, + {{+0x1.ca3d8e6d80cbbp+511, -0x1.57ae2ad22098cp+511}, {+0x1.00e0ed3ec75c3p+256, -0x1.56813c53b47afp+254}}, + +#if !WITH_FP_ABRUPT_UNDERFLOW + // abs(z) underflows + {{-0x0.0000000000001p-1022, +0x0.0000000000001p-1022}, {+0x1.d203138f6c828p-539, +0x1.19435caffa9f9p-537}}, + {{+0x0.0000000000001p-1022, +0x0.8000000000000p-1022}, {+0x1.0000000000001p-512, +0x1.ffffffffffffep-513}}, +#endif // !WITH_FP_ABRUPT_UNDERFLOW + + // abs(z) close to underflow + {{+0x1.4p-1022, +0x1p-1022}, {+0x1.31a33f3eb2fd9p-511, +0x1.acd8ff10ebe7ep-513}}, + + // norm(z) underflows + {{+0x1.87e92154ef7acp-664, -0x1.87e92154ef7acp-665}, {+0x1.45f5e3f782563p-332, -0x1.33cb9c4327c54p-334}}, + {{-0x1.9be34ac46b18fp-513, -0x1.1297872d9cbb5p-512}, {+0x1.09220ecd9c241p-257, -0x1.09220ecd9c241p-256}}, + + // control flow edge cases + {{+0x1p-2, +0x1.fffffffffffffp-1}, {+0x1.99b96593b936dp-1, +0x1.3fe72a921c6f4p-1}}, + {{+0x1p-2, +0x1.0000000000000p+0}, {+0x1.99b96593b936ep-1, +0x1.3fe72a921c6f4p-1}}, + {{+0x1p-2, +0x1.0000000000001p+0}, {+0x1.99b96593b936ep-1, +0x1.3fe72a921c6f5p-1}}, + {{+0x1p+0, +0x1p-54}, {+0x1.0000000000000p+0, +0x1.0000000000000p-55}}, + {{+0x1p+0, +0x1p-53}, {+0x1.0000000000000p+0, +0x1.0000000000000p-54}}, + {{+0x1p+0, +0x1p-52}, {+0x1.0000000000000p+0, +0x1.0000000000000p-53}}, + {{+0x1p+0, +0x1.ffffffffffffep-2}, {+0x1.077225f1da572p+0, +0x1.f18773c56f720p-3}}, + {{+0x1p+0, +0x1.fffffffffffffp-2}, {+0x1.077225f1da572p+0, +0x1.f18773c56f721p-3}}, + {{+0x1p+0, +0x1.0000000000000p-1}, {+0x1.077225f1da572p+0, +0x1.f18773c56f721p-3}}, + {{+0x1p+0, +0x1.0000000000001p-1}, {+0x1.077225f1da572p+0, +0x1.f18773c56f723p-3}}, + {{+0x1.ffffffffffffep-970, +0x1.fffffffffffffp-970}, {+0x1.8dc42193d5c02p-485, +0x1.49852f983efddp-486}}, + {{+0x1.fffffffffffffp-970, +0x1.0000000000000p-969}, {+0x1.8dc42193d5c02p-485, +0x1.49852f983efdep-486}}, + {{+0x1.0000000000000p-969, +0x1.0000000000001p-969}, {+0x1.8dc42193d5c03p-485, +0x1.49852f983efdep-486}}, + {{+0x1.ffffffffffffep-971, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88455p-485, +0x1.92826ef258d1bp-486}}, + {{+0x1.fffffffffffffp-971, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}}, + {{+0x1.0000000000000p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1cp-486}}, + {{+0x1.fffffffffffffp-971, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}}, + {{+0x1.0000000000000p-970, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}}, + {{+0x1.0000000000001p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88457p-485, +0x1.92826ef258d1cp-486}}, + {{+0x1.0000000000000p-970, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}}, + {{+0x1.0000000000001p-970, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}}, + {{+0x1.0000000000002p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88457p-485, +0x1.92826ef258d1cp-486}}, + {{+0x1p-1022, +0x1.fffffffffffffp-970}, {+0x1.0000000000000p-485, +0x1.fffffffffffffp-486}}, + {{+0x1p-1022, +0x1.0000000000000p-969}, {+0x1.0000000000000p-485, +0x1.0000000000000p-485}}, + {{+0x1p-1022, +0x1.0000000000001p-969}, {+0x1.0000000000001p-485, +0x1.0000000000000p-485}}, + +#if !WITH_FP_ABRUPT_UNDERFLOW + {{+0x0.0000000000001p-1022, +0x1.fffffffffffffp-970}, {+0x1.0000000000000p-485, +0x1.fffffffffffffp-486}}, + {{+0x0.0000000000001p-1022, +0x1.0000000000000p-969}, {+0x1.0000000000000p-485, +0x1.0000000000000p-485}}, + {{+0x0.0000000000001p-1022, +0x1.0000000000001p-969}, {+0x1.0000000000001p-485, +0x1.0000000000000p-485}}, +#endif // !WITH_FP_ABRUPT_UNDERFLOW +}; + +constexpr complex_unary_test_case sqrt_float_cases[] = { + // normal cases + {{+0x3p-0F, +0x4p-0F}, {+0x2p-0F, +0x1p-0F}}, + {{+0x3p-0F, -0x4p-0F}, {+0x2p-0F, -0x1p-0F}}, + {{-0x3p-0F, +0x4p-0F}, {+0x1p-0F, +0x2p-0F}}, + {{-0x3p-0F, -0x4p-0F}, {+0x1p-0F, -0x2p-0F}}, + {{+0x3p-2F, +0x4p-2F}, {+0x2p-1F, +0x1p-1F}}, + {{+0x3p-2F, -0x4p-2F}, {+0x2p-1F, -0x1p-1F}}, + {{-0x3p-2F, +0x4p-2F}, {+0x1p-1F, +0x2p-1F}}, + {{-0x3p-2F, -0x4p-2F}, {+0x1p-1F, -0x2p-1F}}, + {{+0x3p-4F, +0x4p-4F}, {+0x2p-2F, +0x1p-2F}}, + {{+0x3p-4F, -0x4p-4F}, {+0x2p-2F, -0x1p-2F}}, + {{-0x3p-4F, +0x4p-4F}, {+0x1p-2F, +0x2p-2F}}, + {{-0x3p-4F, -0x4p-4F}, {+0x1p-2F, -0x2p-2F}}, + + // special cases + {{+0.0F, +0.0F}, {+0.0F, +0.0F}, {true, true}}, + {{+0.0F, -0.0F}, {+0.0F, -0.0F}, {true, true}}, + {{-0.0F, +0.0F}, {+0.0F, +0.0F}, {true, true}}, + {{-0.0F, -0.0F}, {+0.0F, -0.0F}, {true, true}}, + {{+1.0F, +0.0F}, {+1.0F, +0.0F}, {false, true}}, + {{+1.0F, -0.0F}, {+1.0F, -0.0F}, {false, true}}, + {{-1.0F, +0.0F}, {+0.0F, +1.0F}, {true, false}}, + {{-1.0F, -0.0F}, {+0.0F, -1.0F}, {true, false}}, + {{+0.0F, +1.0F}, {+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}}, + {{+0.0F, -1.0F}, {+0x1.6a09e6p-1F, -0x1.6a09e6p-1F}}, + {{-0.0F, +1.0F}, {+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}}, + {{-0.0F, -1.0F}, {+0x1.6a09e6p-1F, -0x1.6a09e6p-1F}}, + +#if !FP_PRESET_FAST + {{+float_inf, +0.0F}, {+float_inf, +0.0F}, {true, true}}, + {{+float_inf, -0.0F}, {+float_inf, -0.0F}, {true, true}}, + {{-float_inf, +0.0F}, {+0.0F, +float_inf}, {true, true}}, + {{-float_inf, -0.0F}, {+0.0F, -float_inf}, {true, true}}, + {{+float_inf, +1.0F}, {+float_inf, +0.0F}, {true, true}}, + {{+float_inf, -1.0F}, {+float_inf, -0.0F}, {true, true}}, + {{-float_inf, +1.0F}, {+0.0F, +float_inf}, {true, true}}, + {{-float_inf, -1.0F}, {+0.0F, -float_inf}, {true, true}}, + {{+float_inf, +float_inf}, {+float_inf, +float_inf}, {true, true}}, + {{+float_inf, -float_inf}, {+float_inf, -float_inf}, {true, true}}, + {{-float_inf, +float_inf}, {+float_inf, +float_inf}, {true, true}}, + {{-float_inf, -float_inf}, {+float_inf, -float_inf}, {true, true}}, + {{+1.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}}, + {{+1.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}}, + {{-1.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}}, + {{-1.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}}, + {{+0.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}}, + {{+0.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}}, + {{-0.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}}, + {{-0.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}}, + {{+float_inf, +float_nan}, {+float_inf, +float_nan}, {true, true}}, + {{+float_inf, -float_nan}, {+float_inf, -float_nan}, {true, true}}, + {{-float_inf, +float_nan}, {+float_nan, +float_inf}, {true, true}}, + {{-float_inf, -float_nan}, {+float_nan, -float_inf}, {true, true}}, + {{+float_nan, +float_inf}, {+float_inf, +float_inf}, {true, true}}, + {{+float_nan, -float_inf}, {+float_inf, -float_inf}, {true, true}}, + {{+float_nan, +0.0}, {+float_nan, +float_nan}, {true, true}}, + {{+float_nan, -0.0}, {+float_nan, -float_nan}, {true, true}}, + {{+0.0, +float_nan}, {+float_nan, +float_nan}, {true, true}}, + {{+0.0, -float_nan}, {+float_nan, -float_nan}, {true, true}}, + {{+float_nan, +float_nan}, {+float_nan, +float_nan}, {true, true}}, + {{+float_nan, -float_nan}, {+float_nan, -float_nan}, {true, true}}, +#endif // !FP_PRESET_FAST + + // abs(z) overflows + {{+0x1.fffffep+127F, +0x1.fffffep+127F}, {+0x1.19435cp+64F, +0x1.d20312p+62F}}, + {{-0x1.bb67aep+127F, +0x1.000000p+127F}, {+0x1.0907dcp+62F, +0x1.ee8dd4p+63F}}, + {{+0x1.fffffep+127F, -0x0.000002p-126F}, {+0x1.fffffep+63F, -0x0.000000p-126F}}, + + // norm(z) overflows + {{-0x1.08b2a2p+83F, -0x1.08b2a2p+84F}, {+0x1.216970p+41F, -0x1.d4473ap+41F}}, + {{+0x1.bc16d6p+63F, -0x1.4d1120p+63F}, {+0x1.f9c31ep+31F, -0x1.512cbep+30F}}, + +#if !WITH_FP_ABRUPT_UNDERFLOW + // abs(z) underflows + {{-0x0.000002p-126F, +0x0.000002p-126F}, {+0x1.498530p-76F, +0x1.8dc422p-75F}}, + {{+0x0.000002p-126F, +0x0.800000p-126F}, {+0x1.000002p-64F, +0x1.fffffcp-65F}}, +#endif // !WITH_FP_ABRUPT_UNDERFLOW + + // abs(z) close to underflow + {{+0x1.4p-126F, +0x1p-126F}, {+0x1.31a340p-63F, +0x1.acd900p-65F}}, + + // norm(z) underflows + {{+0x1.ef2d1p-83F, -0x1.ef2d10p-84F}, {+0x1.0314d8p-41F, -0x1.e9495ep-44F}}, + {{-0x1.622d7p-61F, -0x1.d83c94p-61F}, {+0x1.ebb76ep-32F, -0x1.ebb770p-31F}}, + + // control flow edge cases + {{+0x1p-2F, +0x1.fffffep-1F}, {+0x1.99b964p-1F, +0x1.3fe72ap-1F}}, + {{+0x1p-2F, +0x1.000000p+0F}, {+0x1.99b966p-1F, +0x1.3fe72ap-1F}}, + {{+0x1p-2F, +0x1.000002p+0F}, {+0x1.99b966p-1F, +0x1.3fe72cp-1F}}, + {{+0x1p+0F, +0x1p-25F}, {+0x1.000000p+0F, +0x1.000000p-26F}}, + {{+0x1p+0F, +0x1p-24F}, {+0x1.000000p+0F, +0x1.000000p-25F}}, + {{+0x1p+0F, +0x1p-23F}, {+0x1.000000p+0F, +0x1.000000p-24F}}, + {{+0x1p+0F, +0x1.fffffcp-2F}, {+0x1.077226p+0F, +0x1.f18770p-3F}}, + {{+0x1p+0F, +0x1.fffffep-2F}, {+0x1.077226p+0F, +0x1.f18772p-3F}}, + {{+0x1p+0F, +0x1.000000p-1F}, {+0x1.077226p+0F, +0x1.f18774p-3F}}, + {{+0x1p+0F, +0x1.000002p-1F}, {+0x1.077226p+0F, +0x1.f18778p-3F}}, + {{+0x1.fffffcp-103F, +0x1.fffffep-103F}, {+0x1.19435cp-51F, +0x1.d20314p-53F}}, + {{+0x1.fffffep-103F, +0x1.000000p-102F}, {+0x1.19435cp-51F, +0x1.d20314p-53F}}, + {{+0x1.000000p-102F, +0x1.000002p-102F}, {+0x1.19435ep-51F, +0x1.d20316p-53F}}, + {{+0x1.fffffcp-104F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}}, + {{+0x1.fffffep-104F, +0x1.000000p-102F}, {+0x1.cc8532p-52F, +0x1.1c9e02p-52F}}, + {{+0x1.000000p-103F, +0x1.000002p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e02p-52F}}, + {{+0x1.fffffep-104F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}}, + {{+0x1.000000p-103F, +0x1.000000p-102F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}}, + {{+0x1.000002p-103F, +0x1.000002p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e02p-52F}}, + {{+0x1.000000p-103F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}}, + {{+0x1.000002p-103F, +0x1.000000p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e00p-52F}}, + {{+0x1.000004p-103F, +0x1.000002p-102F}, {+0x1.cc8536p-52F, +0x1.1c9e02p-52F}}, + {{+0x1.000000p-126F, +0x1.fffffep-103F}, {+0x1.6a09e6p-52F, +0x1.6a09e4p-52F}}, + {{+0x1.000000p-126F, +0x1.000000p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e6p-52F}}, + {{+0x1.000000p-126F, +0x1.000002p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e8p-52F}}, + +#if !WITH_FP_ABRUPT_UNDERFLOW + {{+0x0.000002p-126F, +0x1.fffffep-103F}, {+0x1.6a09e6p-52F, +0x1.6a09e6p-52F}}, + {{+0x0.000002p-126F, +0x1.000000p-102F}, {+0x1.6a09e6p-52F, +0x1.6a09e6p-52F}}, + {{+0x0.000002p-126F, +0x1.000002p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e8p-52F}}, +#endif // !WITH_FP_ABRUPT_UNDERFLOW +}; diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp new file mode 100644 index 0000000000..363c3649f2 --- /dev/null +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +#include "floating_point_utils.hpp" +#include "sqrt_test_cases.hpp" + +using namespace std; +using namespace fputil; + +void test_sqrt(const rounding_mode mode) { +#if FP_PRESET_FAST + constexpr int ulp_tolerance = 4; +#else // ^^^ fp:fast / not fp:fast vvv + const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2; +#endif // ^^^ not fp:fast ^^^ + + const auto check_result = [&](const auto& result, const auto& test_case) { + const int case_real_ulp_tolerance = test_case.result_exactness.real ? 0 : ulp_tolerance; + const int case_imag_ulp_tolerance = test_case.result_exactness.imag ? 0 : ulp_tolerance; + return near_equal(result.real(), test_case.expected_result.real(), case_real_ulp_tolerance) + && near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance); + }; + + for (const auto& c : sqrt_double_cases) { + const auto result = [&] { + rounding_guard guard(mode); + return sqrt(c.input); + }(); + + assert(check_result(result, c)); + } + + for (const auto& c : sqrt_float_cases) { + const auto result = [&] { + rounding_guard guard(mode); + return sqrt(c.input); + }(); + + assert(check_result(result, c)); + } + + for (const auto& c : sqrt_double_cases) { + const auto result = [&] { + rounding_guard guard(mode); + return sqrt(static_cast>(c.input)); + }(); + + assert(check_result(static_cast>(result), c)); + } +} + +int main() { + for (const auto& mode : all_rounding_modes) { + test_sqrt(mode); + } +} diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.hpp new file mode 100644 index 0000000000..681d0e4523 --- /dev/null +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.hpp @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include +#include + +#include "floating_point_utils.hpp" + +constexpr double double_inf = std::numeric_limits::infinity(); +constexpr double double_nan = std::numeric_limits::quiet_NaN(); + +constexpr float float_inf = std::numeric_limits::infinity(); +constexpr float float_nan = std::numeric_limits::quiet_NaN(); + +struct complex_result_exactness { + bool real = false; + bool imag = false; +}; + +template +struct complex_unary_test_case { + std::complex input; + std::complex expected_result; + complex_result_exactness result_exactness{}; +}; diff --git a/tests/utils/stl/test/config.py b/tests/utils/stl/test/config.py index d9a6c04035..209aa4d9f3 100644 --- a/tests/utils/stl/test/config.py +++ b/tests/utils/stl/test/config.py @@ -129,6 +129,12 @@ def configure_features(self): if self.target_arch == 'x86': self.config.available_features.add('edg') + self.config.available_features.add('arch_ia32') + self.config.available_features.add('arch_avx2') + elif self.target_arch == 'x64': + self.config.available_features.add('arch_avx2') + elif self.target_arch == 'arm': + self.config.available_features.add('arch_vfpv4') def configure_test_source_root(self): test_source_root = self.get_lit_conf('test_source_root', None) diff --git a/tests/utils/stl/test/tests.py b/tests/utils/stl/test/tests.py index 6728132870..3d5cc2e013 100644 --- a/tests/utils/stl/test/tests.py +++ b/tests/utils/stl/test/tests.py @@ -41,6 +41,12 @@ def __init__(self, suite, path_in_suite, lit_config, test_config, self.requires.append('clr') # TRANSITION, GH-797 elif flag[1:] == 'BE': self.requires.append('edg') # available for x86, see config.py + elif flag[1:] == 'arch:AVX2': + self.requires.append('arch_avx2') # available for x86 and x64, see config.py + elif flag[1:] == 'arch:IA32': + self.requires.append('arch_ia32') # available for x86, see config.py + elif flag[1:] == 'arch:VFPv4': + self.requires.append('arch_vfpv4') # available for arm, see config.py def getOutputDir(self): return Path(os.path.join( @@ -135,6 +141,8 @@ def _configure_cxx(self, lit_config, envlst_entry, default_cxx): link_flags.extend(envlst_entry.getEnvVal('PM_LINK', '').split()) if ('clang'.casefold() in os.path.basename(cxx).casefold()): + flags.extend(map(lambda x : '/clang:' + x, envlst_entry.getEnvVal('PM_CLANG', '').split())) + target_arch = self.config.target_arch.casefold() if (target_arch == 'x64'.casefold()): compile_flags.append('-m64') From d79c9f39401b170547eff9b024929fca39d2390f Mon Sep 17 00:00:00 2001 From: statementreply Date: Sat, 8 Aug 2020 18:02:30 +0800 Subject: [PATCH 05/19] Add complex log test --- .../log_test_cases.hpp | 276 ++++++++++++++++++ .../test.cpp | 67 ++++- 2 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp new file mode 100644 index 0000000000..29b349e440 --- /dev/null +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp @@ -0,0 +1,276 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include + +#include "floating_point_utils.hpp" +#include "test.hpp" + +template +constexpr T pi_over_4_v = T{0.7853981633974483}; +template +constexpr T pi_over_2_v = T{1.5707963267948966}; +template +constexpr T pi_3_over_4_v = T{2.356194490192345}; +template +constexpr T pi_v = T{3.141592653589793}; + +constexpr complex_unary_test_case log_double_cases[] = { + // normal cases + {{+0x1.8p+0, +0x1p+1}, {+0x1.d5240f0e0e078p-1, +0x1.dac670561bb4fp-1}}, + {{+0x1.8p+0, -0x1p+1}, {+0x1.d5240f0e0e078p-1, -0x1.dac670561bb4fp-1}}, + {{-0x1.8p+0, +0x1p+1}, {+0x1.d5240f0e0e078p-1, +0x1.1b6e192ebbe44p+1}}, + {{-0x1.8p+0, -0x1p+1}, {+0x1.d5240f0e0e078p-1, -0x1.1b6e192ebbe44p+1}}, + {{+0x1.8p-1, +0x1p+0}, {+0x1.c8ff7c79a9a22p-3, +0x1.dac670561bb4fp-1}}, + {{+0x1.8p-1, -0x1p+0}, {+0x1.c8ff7c79a9a22p-3, -0x1.dac670561bb4fp-1}}, + {{-0x1.8p-1, +0x1p+0}, {+0x1.c8ff7c79a9a22p-3, +0x1.1b6e192ebbe44p+1}}, + {{-0x1.8p-1, -0x1p+0}, {+0x1.c8ff7c79a9a22p-3, -0x1.1b6e192ebbe44p+1}}, + {{+0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.dac670561bb4fp-1}}, + {{+0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.dac670561bb4fp-1}}, + {{-0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.1b6e192ebbe44p+1}}, + {{-0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.1b6e192ebbe44p+1}}, + + // special cases + {{+1.0, +0.0}, {0.0, +0.0}, {true, true}}, + {{+1.0, -0.0}, {0.0, -0.0}, {true, true}}, + {{+0.0, +1.0}, {0.0, +pi_over_2_v}, {true, false}}, + {{+0.0, -1.0}, {0.0, -pi_over_2_v}, {true, false}}, + {{-0.0, +1.0}, {0.0, +pi_over_2_v}, {true, false}}, + {{-0.0, -1.0}, {0.0, -pi_over_2_v}, {true, false}}, + {{-1.0, +0.0}, {0.0, +pi_v}, {true, false}}, + {{-1.0, -0.0}, {0.0, -pi_v}, {true, false}}, + +#if !FP_PRESET_FAST + {{+0.0, +0.0}, {-double_inf, +0.0}, {true, true}}, + {{+0.0, -0.0}, {-double_inf, -0.0}, {true, true}}, + {{-0.0, +0.0}, {-double_inf, +pi_v}, {true, false}}, + {{-0.0, -0.0}, {-double_inf, -pi_v}, {true, false}}, + {{+double_inf, +0.0}, {+double_inf, +0.0}, {true, true}}, + {{+double_inf, -0.0}, {+double_inf, -0.0}, {true, true}}, + {{+double_inf, +1.0}, {+double_inf, +0.0}, {true, true}}, + {{+double_inf, -1.0}, {+double_inf, -0.0}, {true, true}}, + {{+double_inf, +double_inf}, {+double_inf, +pi_over_4_v}, {true, false}}, + {{+double_inf, -double_inf}, {+double_inf, -pi_over_4_v}, {true, false}}, + {{+1.0, +double_inf}, {+double_inf, +pi_over_2_v}, {true, false}}, + {{+1.0, -double_inf}, {+double_inf, -pi_over_2_v}, {true, false}}, + {{+0.0, +double_inf}, {+double_inf, +pi_over_2_v}, {true, false}}, + {{+0.0, -double_inf}, {+double_inf, -pi_over_2_v}, {true, false}}, + {{-0.0, +double_inf}, {+double_inf, +pi_over_2_v}, {true, false}}, + {{-0.0, -double_inf}, {+double_inf, -pi_over_2_v}, {true, false}}, + {{-1.0, +double_inf}, {+double_inf, +pi_over_2_v}, {true, false}}, + {{-1.0, -double_inf}, {+double_inf, -pi_over_2_v}, {true, false}}, + {{-double_inf, +double_inf}, {+double_inf, +pi_3_over_4_v}, {true, false}}, + {{-double_inf, -double_inf}, {+double_inf, -pi_3_over_4_v}, {true, false}}, + {{-double_inf, +1.0}, {+double_inf, +pi_v}, {true, false}}, + {{-double_inf, -1.0}, {+double_inf, -pi_v}, {true, false}}, + {{-double_inf, +0.0}, {+double_inf, +pi_v}, {true, false}}, + {{-double_inf, -0.0}, {+double_inf, -pi_v}, {true, false}}, + {{+double_inf, double_nan}, {+double_inf, double_nan}, {true, true}}, + {{-double_inf, double_nan}, {+double_inf, double_nan}, {true, true}}, + {{double_nan, +double_inf}, {+double_inf, double_nan}, {true, true}}, + {{double_nan, -double_inf}, {+double_inf, double_nan}, {true, true}}, + {{double_nan, +0.0}, {double_nan, double_nan}, {true, true}}, + {{+0.0, double_nan}, {double_nan, double_nan}, {true, true}}, + {{double_nan, double_nan}, {double_nan, double_nan}, {true, true}}, +#endif // !FP_PRESET_FAST + + // abs(z) overflows + {{+0x1.fffffffffffffp+1023, +0x1.fffffffffffffp+1023}, {+0x1.63108c75a1936p+9, +0x1.921fb54442d18p-1}}, + {{-0x1.bb67ae8584caap+1023, +0x1.0000000000000p+1023}, {+0x1.62e42fefa39efp+9, +0x1.4f1a6c638d03fp+1}}, + {{+0x1.fffffffffffffp+1023, -0x0.0000000000001p-1022}, {+0x1.62e42fefa39efp+9, -0x0.0000000000000p-1022}}, + + // norm(z) overflows + {{-0x1.4e718d7d7625ap+664, -0x1.4e718d7d7625ap+665}, {+0x1.cd525d6474bb8p+8, -0x1.0468a8ace4df6p+1}}, + {{+0x1.ca3d8e6d80cbbp+511, -0x1.57ae2ad22098cp+511}, {+0x1.6300e9ed15a44p+8, -0x1.4978fa3269ee1p-1}}, + +#if !WITH_FP_ABRUPT_UNDERFLOW + // abs(z) underflows + {{-0x0.0000000000001p-1022, +0x0.0000000000001p-1022}, {-0x1.740bf7c0d927cp+9, +0x1.2d97c7f3321d2p+1}}, + {{+0x0.0000000000001p-1022, +0x0.8000000000000p-1022}, {-0x1.628b76e3a7b61p+9, +0x1.921fb54442d16p+0}}, +#endif // !WITH_FP_ABRUPT_UNDERFLOW + + // abs(z) close to underflow + {{+0x1.4p-1022, +0x1p-1022}, {-0x1.61f684c577299p+9, +0x1.5977a5103ea92p-1}}, + + // norm(z) underflows + {{+0x1.87e92154ef7acp-664, -0x1.87e92154ef7acp-665}, {-0x1.cbb65944f5e2bp+8, -0x1.dac670561bb4fp-2}}, + {{-0x1.9be34ac46b18fp-513, -0x1.1297872d9cbb5p-512}, {-0x1.62991d5d62a5ep+8, -0x1.1b6e192ebbe44p+1}}, + + // z close to 1 + {{+0x1.0000000000001p+0, -0.0}, {+0x1.fffffffffffffp-53, -0.0}, {false, true}}, + {{+0x1.fffffffffffffp-1, +0.0}, {-0x1.0000000000000p-53, +0.0}, {false, true}}, +#if !WITH_FP_ABRUPT_UNDERFLOW + {{+0x1.0000000000001p+0, -0x0.0000000000001p-1022}, {+0x1.fffffffffffffp-53, -0x0.0000000000001p-1022}}, + {{+0x1.0000000000000p+0, +0x0.0000000000001p-1022}, {+0x0.0000000000000p-1022, +0x0.0000000000001p-1022}}, + {{+0x1.fffffffffffffp-1, -0x0.0000000000001p-1022}, {-0x1.0000000000000p-53, -0x0.0000000000001p-1022}}, +#endif // !WITH_FP_ABRUPT_UNDERFLOW + {{+0x1.0000000000001p+0, +0x1p-1022}, {+0x1.fffffffffffffp-53, +0x0.fffffffffffffp-1022}}, + {{+0x1.0000000000000p+0, -0x1p-1022}, {+0x0.0000000000000p-1022, -0x1.0000000000000p-1022}}, + {{+0x1.fffffffffffffp-1, +0x1p-1022}, {-0x1.0000000000000p-53, +0x1.0000000000001p-1022}}, + {{+0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.ffffffffffffep-53}}, + {{+0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.0000000000000p-52}}, + {{+0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.0000000000000p-52}}, + {{+0x1.fffffffffffffp-1, +0x1p-26}, {+0x1.0000000000000p-107, +0x1.0000000000000p-26}}, + + // z close to -1, i or -i + {{-0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.921fb54442d18p+1}}, + {{-0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.921fb54442d18p+1}}, + {{-0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d18p+1}}, + {{+0x1p-52, +0x1.0000000000001p+0}, {+0x1.0000000000000p-52, +0x1.921fb54442d17p+0}}, + {{-0x1p-52, +0x1.0000000000000p+0}, {+0x1.0000000000000p-105, +0x1.921fb54442d19p+0}}, + {{+0x1p-52, +0x1.fffffffffffffp-1}, {-0x1.ffffffffffffep-54, +0x1.921fb54442d17p+0}}, + {{-0x1p-52, -0x1.0000000000001p+0}, {+0x1.0000000000000p-52, -0x1.921fb54442d19p+0}}, + {{+0x1p-52, -0x1.0000000000000p+0}, {+0x1.0000000000000p-105, -0x1.921fb54442d17p+0}}, + {{-0x1p-52, -0x1.fffffffffffffp-1}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d19p+0}}, + + // abs(z) close to 1 + {{+0x1.6a09e667f3bccp-1, +0x1.6a09e667f3bccp-1}, {-0x1.98d4d0da05571p-54, +0x1.921fb54442d18p-1}}, + {{+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}, {+0x1.3b3efbf5e2229p-54, -0x1.921fb54442d18p-1}}, + {{-0x1.3333333333333p-1, -0x1.999999999999ap-1}, {+0x1.999999999999ap-56, -0x1.1b6e192ebbe44p+1}}, + {{-0x1.3333333333333p-1, +0x1.9999999999999p-1}, {-0x1.3333333333333p-54, +0x1.1b6e192ebbe44p+1}}, + {{+0x1.69fbe76c8b439p-1, +0x1.69fbe76c8b439p-1}, {-0x1.3cb7c059d6699p-13, +0x1.921fb54442d18p-1}}, + {{-0x1.d89d89d89d89ep-1, +0x1.89d89d89d89d6p-2}, {-0x1.3b13b13b13b0cp-57, +0x1.5f97315254857p+1}}, + + // control flow edge cases + {{+0x1p-1, +0x1.fffffffffffffp-2}, {-0x1.62e42fefa39f0p-2, +0x1.921fb54442d18p-1}}, + {{+0x1p-1, +0x1.0000000000000p-1}, {-0x1.62e42fefa39efp-2, +0x1.921fb54442d18p-1}}, + {{+0x1p-1, +0x1.0000000000001p-1}, {-0x1.62e42fefa39edp-2, +0x1.921fb54442d19p-1}}, + {{+0x1p-1, +0x1.a887293fd6f33p+0}, {+0x1.193ea7aad0309p-1, +0x1.4727f6d4d118cp+0}}, + {{+0x1p-1, +0x1.a887293fd6f34p+0}, {+0x1.193ea7aad030ap-1, +0x1.4727f6d4d118dp+0}}, + {{+0x1p-1, +0x1.a887293fd6f35p+0}, {+0x1.193ea7aad030cp-1, +0x1.4727f6d4d118dp+0}}, + {{+6.703903964971297e+153, +6e+153}, {+0x1.627e0d1e7a85dp+8, +0x1.75c8a07421461p-1}}, + {{+6.703903964971298e+153, +6e+153}, {+0x1.627e0d1e7a85dp+8, +0x1.75c8a07421461p-1}}, + {{+1e-154, +1.4156865331029228e-146}, {-0x1.4fd46e5c84953p+8, +0x1.921fb525ec2fcp+0}}, + {{+1e-154, +1.415686533102923e-146}, {-0x1.4fd46e5c84953p+8, +0x1.921fb525ec2fcp+0}}, +}; + +constexpr complex_unary_test_case log_float_cases[] = { + // normal cases + {{+0x1.8p+0F, +0x1p+1F}, {+0x1.d52410p-1F, +0x1.dac670p-1F}}, + {{+0x1.8p+0F, -0x1p+1F}, {+0x1.d52410p-1F, -0x1.dac670p-1F}}, + {{-0x1.8p+0F, +0x1p+1F}, {+0x1.d52410p-1F, +0x1.1b6e1ap+1F}}, + {{-0x1.8p+0F, -0x1p+1F}, {+0x1.d52410p-1F, -0x1.1b6e1ap+1F}}, + {{+0x1.8p-1F, +0x1p+0F}, {+0x1.c8ff7cp-3F, +0x1.dac670p-1F}}, + {{+0x1.8p-1F, -0x1p+0F}, {+0x1.c8ff7cp-3F, -0x1.dac670p-1F}}, + {{-0x1.8p-1F, +0x1p+0F}, {+0x1.c8ff7cp-3F, +0x1.1b6e1ap+1F}}, + {{-0x1.8p-1F, -0x1p+0F}, {+0x1.c8ff7cp-3F, -0x1.1b6e1ap+1F}}, + {{+0x1.8p-2F, +0x1p-1F}, {-0x1.e148a2p-2F, +0x1.dac670p-1F}}, + {{+0x1.8p-2F, -0x1p-1F}, {-0x1.e148a2p-2F, -0x1.dac670p-1F}}, + {{-0x1.8p-2F, +0x1p-1F}, {-0x1.e148a2p-2F, +0x1.1b6e1ap+1F}}, + {{-0x1.8p-2F, -0x1p-1F}, {-0x1.e148a2p-2F, -0x1.1b6e1ap+1F}}, + + // special cases + {{+1.0F, +0.0F}, {0.0F, +0.0F}, {true, true}}, + {{+1.0F, -0.0F}, {0.0F, -0.0F}, {true, true}}, + {{+0.0F, +1.0F}, {0.0F, +pi_over_2_v}, {true, false}}, + {{+0.0F, -1.0F}, {0.0F, -pi_over_2_v}, {true, false}}, + {{-0.0F, +1.0F}, {0.0F, +pi_over_2_v}, {true, false}}, + {{-0.0F, -1.0F}, {0.0F, -pi_over_2_v}, {true, false}}, + {{-1.0F, +0.0F}, {0.0F, +pi_v}, {true, false}}, + {{-1.0F, -0.0F}, {0.0F, -pi_v}, {true, false}}, + +#if !FP_PRESET_FAST + {{+0.0F, +0.0F}, {-float_inf, +0.0F}, {true, true}}, + {{+0.0F, -0.0F}, {-float_inf, -0.0F}, {true, true}}, + {{-0.0F, +0.0F}, {-float_inf, +pi_v}, {true, false}}, + {{-0.0F, -0.0F}, {-float_inf, -pi_v}, {true, false}}, + {{+float_inf, +0.0F}, {+float_inf, +0.0F}, {true, true}}, + {{+float_inf, -0.0F}, {+float_inf, -0.0F}, {true, true}}, + {{+float_inf, +1.0F}, {+float_inf, +0.0F}, {true, true}}, + {{+float_inf, -1.0F}, {+float_inf, -0.0F}, {true, true}}, + {{+float_inf, +float_inf}, {+float_inf, +pi_over_4_v}, {true, false}}, + {{+float_inf, -float_inf}, {+float_inf, -pi_over_4_v}, {true, false}}, + {{+1.0F, +float_inf}, {+float_inf, +pi_over_2_v}, {true, false}}, + {{+1.0F, -float_inf}, {+float_inf, -pi_over_2_v}, {true, false}}, + {{+0.0F, +float_inf}, {+float_inf, +pi_over_2_v}, {true, false}}, + {{+0.0F, -float_inf}, {+float_inf, -pi_over_2_v}, {true, false}}, + {{-0.0F, +float_inf}, {+float_inf, +pi_over_2_v}, {true, false}}, + {{-0.0F, -float_inf}, {+float_inf, -pi_over_2_v}, {true, false}}, + {{-1.0F, +float_inf}, {+float_inf, +pi_over_2_v}, {true, false}}, + {{-1.0F, -float_inf}, {+float_inf, -pi_over_2_v}, {true, false}}, + {{-float_inf, +float_inf}, {+float_inf, +pi_3_over_4_v}, {true, false}}, + {{-float_inf, -float_inf}, {+float_inf, -pi_3_over_4_v}, {true, false}}, + {{-float_inf, +1.0F}, {+float_inf, +pi_v}, {true, false}}, + {{-float_inf, -1.0F}, {+float_inf, -pi_v}, {true, false}}, + {{-float_inf, +0.0F}, {+float_inf, +pi_v}, {true, false}}, + {{-float_inf, -0.0F}, {+float_inf, -pi_v}, {true, false}}, + {{+float_inf, float_nan}, {+float_inf, float_nan}, {true, true}}, + {{-float_inf, float_nan}, {+float_inf, float_nan}, {true, true}}, + {{float_nan, +float_inf}, {+float_inf, float_nan}, {true, true}}, + {{float_nan, -float_inf}, {+float_inf, float_nan}, {true, true}}, + {{float_nan, +0.0F}, {float_nan, float_nan}, {true, true}}, + {{+0.0F, float_nan}, {float_nan, float_nan}, {true, true}}, + {{float_nan, float_nan}, {float_nan, float_nan}, {true, true}}, +#endif // !FP_PRESET_FAST + + // abs(z) overflows + {{+0x1.fffffep+127F, +0x1.fffffep+127F}, {+0x1.644714p+6F, +0x1.921fb6p-1F}}, + {{-0x1.bb67aep+127F, +0x1.000000p+127F}, {+0x1.62e430p+6F, +0x1.4f1a6cp+1F}}, + {{+0x1.fffffep+127F, -0x0.000002p-126F}, {+0x1.62e430p+6F, -0x0.000000p-126F}}, + + // norm(z) overflows + {{-0x1.08b2a2p+83F, -0x1.08b2a2p+84F}, {+0x1.d2f46cp+5F, -0x1.0468a8p+1F}}, + {{+0x1.bc16d6p+63F, -0x1.4d1120p+63F}, {+0x1.6389c2p+5F, -0x1.4978fap-1F}}, + +#if !WITH_FP_ABRUPT_UNDERFLOW + // abs(z) underflows + {{-0x0.000002p-126F, +0x0.000002p-126F}, {-0x1.9bbabcp+6F, +0x1.2d97c8p+1F}}, + {{+0x0.000002p-126F, +0x0.800000p-126F}, {-0x1.601e68p+6F, +0x1.921fb2p+0F}}, +#endif // !WITH_FP_ABRUPT_UNDERFLOW + + // abs(z) close to underflow + {{+0x1.4p-126F, +0x1p-126F}, {-0x1.5b76d6p+6F, +0x1.5977a6p-1F}}, + + // norm(z) underflows + {{+0x1.ef2d10p-83F, -0x1.ef2d10p-84F}, {-0x1.c6144ap+5F, -0x1.dac670p-2F}}, + {{-0x1.622d70p-61F, -0x1.d83c94p-61F}, {-0x1.4b9280p+5F, -0x1.1b6e1ap+1F}}, + + // z close to 1 + {{+0x1.000002p+0F, -0.0F}, {+0x1.fffffep-24F, -0.0F}, {false, true}}, + {{+0x1.fffffep-1F, +0.0F}, {-0x1.000000p-24F, +0.0F}, {false, true}}, +#if !WITH_FP_ABRUPT_UNDERFLOW + {{+0x1.000002p+0F, -0x0.000002p-126F}, {+0x1.fffffep-24F, -0x0.000002p-126F}}, + {{+0x1.000000p+0F, +0x0.000002p-126F}, {+0x0.000000p-126F, +0x0.000002p-126F}}, + {{+0x1.fffffep-1F, -0x0.000002p-126F}, {-0x1.000000p-24F, -0x0.000002p-126F}}, +#endif // !WITH_FP_ABRUPT_UNDERFLOW + {{+0x1.000002p+0F, +0x1.000000p-126F}, {+0x1.fffffep-24F, +0x0.fffffep-126F}}, + {{+0x1.000000p+0F, -0x1.000000p-126F}, {+0x0.000000p+0F, -0x1.000000p-126F}}, + {{+0x1.fffffep-1F, +0x1.000000p-126F}, {-0x1.000000p-24F, +0x1.000002p-126F}}, + {{+0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.fffffcp-24F}}, + {{+0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.000000p-23F}}, + {{+0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.000000p-23F}}, + {{+0x1.fffffep-1F, +0x1.6a09e6p-12F}, {-0x1.302ae0p-52F, +0x1.6a09e6p-12F}}, + + // z close to -1, i or -i + {{-0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.921fb4p+1F}}, + {{-0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.921fb4p+1F}}, + {{-0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.921fb4p+1F}}, + {{+0x1.000000p-23F, +0x1.000002p+0F}, {+0x1.000000p-23F, +0x1.921fb4p+0F}}, + {{-0x1.000000p-23F, +0x1.000000p+0F}, {+0x1.000000p-47F, +0x1.921fb8p+0F}}, + {{+0x1.000000p-23F, +0x1.fffffep-1F}, {-0x1.fffffcp-25F, +0x1.921fb4p+0F}}, + {{-0x1.000000p-23F, -0x1.000002p+0F}, {+0x1.000000p-23F, -0x1.921fb8p+0F}}, + {{+0x1.000000p-23F, -0x1.000000p+0F}, {+0x1.000000p-47F, -0x1.921fb4p+0F}}, + {{-0x1.000000p-23F, -0x1.fffffep-1F}, {-0x1.fffffcp-25F, -0x1.921fb8p+0F}}, + + // abs(z) close to 1 + {{+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}, {-0x1.26055cp-26F, +0x1.921fb6p-1F}}, + {{+0x1.6a09e8p-1F, -0x1.6a09e8p-1F}, {+0x1.20888ep-24F, -0x1.921fb6p-1F}}, + {{-0x1.333334p-1F, -0x1.99999ap-1F}, {+0x1.99999ap-26F, -0x1.1b6e1ap+1F}}, + {{-0x1.333332p-1F, +0x1.99999ap-1F}, {-0x1.999998p-27F, +0x1.1b6e18p+1F}}, + {{+0x1.69fbe8p-1F, +0x1.69fbe8p-1F}, {-0x1.3caab8p-13F, +0x1.921fb6p-1F}}, + {{-0x1.d89d8ap-1F, +0x1.89d89ep-2F}, {+0x1.d89d8ap-28F, +0x1.5f9732p+1F}}, + + // control flow edge cases + {{+0x1p-1F, +0x1.fffffep-2F}, {-0x1.62e432p-2F, +0x1.921fb4p-1F}}, + {{+0x1p-1F, +0x1.000000p-1F}, {-0x1.62e430p-2F, +0x1.921fb6p-1F}}, + {{+0x1p-1F, +0x1.000002p-1F}, {-0x1.62e42cp-2F, +0x1.921fb8p-1F}}, + {{+0x1p-1F, +0x1.a88728p+0F}, {+0x1.193ea6p-1F, +0x1.4727f6p+0F}}, + {{+0x1p-1F, +0x1.a8872ap+0F}, {+0x1.193ea8p-1F, +0x1.4727f6p+0F}}, + {{+0x1p-1F, +0x1.a8872cp+0F}, {+0x1.193eaap-1F, +0x1.4727f8p+0F}}, + {{+9.223371e+18F, +9e+18F}, {+0x1.60059cp+5F, +0x1.8bd930p-1F}}, + {{+9.2233715e+18F, +9e+18F}, {+0x1.60059cp+5F, +0x1.8bd930p-1F}}, + {{+7e-20F, +4.440892e-16F}, {-0x1.1acdd6p+5F, +0x1.921560p+0F}}, + {{+7e-20F, +4.4408926e-16F}, {-0x1.1acdd6p+5F, +0x1.921560p+0F}}, +}; diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp index 363c3649f2..ffce4d5ec2 100644 --- a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp @@ -5,6 +5,7 @@ #include #include "floating_point_utils.hpp" +#include "log_test_cases.hpp" #include "sqrt_test_cases.hpp" using namespace std; @@ -14,7 +15,7 @@ void test_sqrt(const rounding_mode mode) { #if FP_PRESET_FAST constexpr int ulp_tolerance = 4; #else // ^^^ fp:fast / not fp:fast vvv - const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2; + const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2; #endif // ^^^ not fp:fast ^^^ const auto check_result = [&](const auto& result, const auto& test_case) { @@ -52,8 +53,72 @@ void test_sqrt(const rounding_mode mode) { } } +void test_log(const rounding_mode mode) { +#if FP_PRESET_FAST + constexpr int ulp_tolerance = 4; + // under /fp:fast, allow inaccurate real(log(z)) when |z| is close to 1 + constexpr double real_absolute_epsilon_tolerance = 4; +#else // ^^^ fp:fast / not fp:fast vvv + const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2; + constexpr double real_absolute_epsilon_tolerance = 0; +#endif // ^^^ not fp:fast ^^^ + + const auto check_result = [&](const auto& result, const auto& test_case) { + using Float = decltype(result.real()); + + constexpr auto epsilon = static_cast(numeric_limits::epsilon()); + const int case_real_ulp_tolerance = test_case.result_exactness.real ? 0 : ulp_tolerance; + const int case_imag_ulp_tolerance = test_case.result_exactness.imag ? 0 : ulp_tolerance; + const double case_real_absolute_tolerance = + test_case.result_exactness.real ? 0.0 : real_absolute_epsilon_tolerance * epsilon; + + // TRANSITION: under rounding toward negative mode, log(1.0) returns +0.0 on x86, -0.0 on x64 + const auto is_mod_exactly_one = [](const auto& z) { + // no other complex has mod of exactly 1 + return (abs(real(z)) == 1 && imag(z) == 0) || (real(z) == 0 && abs(imag(z)) == 1); + }; + + if (mode == rounding_mode::toward_negative && is_mod_exactly_one(test_case.input)) { + return abs(result.real()) <= case_real_absolute_tolerance + && near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance); + } + + return near_equal(result.real(), test_case.expected_result.real(), case_real_ulp_tolerance, + case_real_absolute_tolerance) + && near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance); + }; + + for (const auto& c : log_double_cases) { + const auto result = [&] { + rounding_guard guard(mode); + return log(c.input); + }(); + + assert(check_result(result, c)); + } + + for (const auto& c : log_float_cases) { + const auto result = [&] { + rounding_guard guard(mode); + return log(c.input); + }(); + + assert(check_result(result, c)); + } + + for (const auto& c : log_double_cases) { + const auto result = [&] { + rounding_guard guard(mode); + return log(static_cast>(c.input)); + }(); + + assert(check_result(static_cast>(result), c)); + } +} + int main() { for (const auto& mode : all_rounding_modes) { test_sqrt(mode); + test_log(mode); } } From b33c29eeab300cb21aee91634a5edf07ffbda9ae Mon Sep 17 00:00:00 2001 From: statementreply Date: Sat, 8 Aug 2020 18:07:00 +0800 Subject: [PATCH 06/19] Remove internal header file from CMake source file list --- stl/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt index 6c5e1d445a..c7cfc34446 100644 --- a/stl/CMakeLists.txt +++ b/stl/CMakeLists.txt @@ -240,7 +240,6 @@ endforeach() # Objs that exist in both libcpmt[d][01].lib and msvcprt[d].lib. set(IMPLIB_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp - ${CMAKE_CURRENT_LIST_DIR}/src/float_multi_prec.hpp ${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp ${CMAKE_CURRENT_LIST_DIR}/src/math_algorithms.cpp ${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp From e25900933e2b1bff5a2fd28ab8703deadde444ff Mon Sep 17 00:00:00 2001 From: statementreply Date: Sat, 8 Aug 2020 18:09:22 +0800 Subject: [PATCH 07/19] Add new file to MSBuild project --- stl/msbuild/stl_base/stl.files.settings.targets | 1 + 1 file changed, 1 insertion(+) diff --git a/stl/msbuild/stl_base/stl.files.settings.targets b/stl/msbuild/stl_base/stl.files.settings.targets index 5fd1b26310..f1ef5cc9c9 100644 --- a/stl/msbuild/stl_base/stl.files.settings.targets +++ b/stl/msbuild/stl_base/stl.files.settings.targets @@ -171,6 +171,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception Date: Sun, 6 Sep 2020 19:51:21 +0800 Subject: [PATCH 15/19] Remove PM_CLANG --- tests/utils/stl/test/tests.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/utils/stl/test/tests.py b/tests/utils/stl/test/tests.py index 3d5cc2e013..6da478208a 100644 --- a/tests/utils/stl/test/tests.py +++ b/tests/utils/stl/test/tests.py @@ -141,8 +141,6 @@ def _configure_cxx(self, lit_config, envlst_entry, default_cxx): link_flags.extend(envlst_entry.getEnvVal('PM_LINK', '').split()) if ('clang'.casefold() in os.path.basename(cxx).casefold()): - flags.extend(map(lambda x : '/clang:' + x, envlst_entry.getEnvVal('PM_CLANG', '').split())) - target_arch = self.config.target_arch.casefold() if (target_arch == 'x64'.casefold()): compile_flags.append('-m64') From 2b39e7b2b93ce84cfeab3158fc385e89efa54172 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 4 Nov 2020 02:29:17 -0800 Subject: [PATCH 16/19] Code review feedback. --- stl/inc/complex | 10 +++++----- stl/src/float_multi_prec.hpp | 16 ++++++++-------- tests/std/include/fenv_prefix.hpp | 2 +- .../floating_point_utils.hpp | 18 +++++++++--------- .../log_test_cases.hpp | 4 ++-- .../test.cpp | 1 + .../std/tests/floating_point_model_matrix.lst | 6 +++--- 7 files changed, 29 insertions(+), 28 deletions(-) diff --git a/stl/inc/complex b/stl/inc/complex index 91e2d7c53a..0c2f51d9f4 100644 --- a/stl/inc/complex +++ b/stl/inc/complex @@ -1545,10 +1545,10 @@ _NODISCARD complex<_Ty> exp(const complex<_Ty>& _Left) { // FUNCTION TEMPLATE _Fabs template -_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return magnitude and scale factor. - // returns a non-zero even integer in *_Pexp when _Left is finite +_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // Used by sqrt(), return magnitude and scale factor. + // Returns a non-zero even integer in *_Pexp when _Left is finite // and non-zero. - // returns 0 in *_Pexp when _Left is zero, infinity or NaN. + // Returns 0 in *_Pexp when _Left is zero, infinity, or NaN. *_Pexp = 0; _Ty _Av = _Ctraits<_Ty>::_Abs(_STD real(_Left)); _Ty _Bv = _Ctraits<_Ty>::_Abs(_STD imag(_Left)); @@ -1614,11 +1614,11 @@ template _NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept { // for double, long double, and non floating point types return static_cast<_Ty>( __std_math_log_hypot(static_cast(_STD real(_Left)), static_cast(_STD imag(_Left)))); -}; +} _NODISCARD inline float _Log_abs(const complex& _Left) noexcept { return __std_math_log_hypotf(_STD real(_Left), _STD imag(_Left)); -}; +} template _NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) { diff --git a/stl/src/float_multi_prec.hpp b/stl/src/float_multi_prec.hpp index d3dbd2f55c..7239519f68 100644 --- a/stl/src/float_multi_prec.hpp +++ b/stl/src/float_multi_prec.hpp @@ -29,9 +29,9 @@ namespace _Float_multi_prec { template struct _Fmp_t<_Ty, 2> { - static_assert(_STD is_floating_point_v<_Ty>); - _Ty _Val0; // most significant numeric_limis<_Ty>::precision bits - _Ty _Val1; // least significant numeric_limis<_Ty>::precision bits + static_assert(is_floating_point_v<_Ty>); + _Ty _Val0; // most significant numeric_limits<_Ty>::precision bits + _Ty _Val1; // least significant numeric_limits<_Ty>::precision bits }; // addition @@ -92,10 +92,10 @@ namespace _Float_multi_prec { // multiplication // round to 26 significant bits, ties toward zero - _NODISCARD inline constexpr double _High_half(const double _Val) { - const auto _Bits = _STD _Bit_cast(_Val); + _NODISCARD constexpr double _High_half(const double _Val) noexcept { + const auto _Bits = _Bit_cast(_Val); const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL; - return _STD _Bit_cast(_High_half_bits); + return _Bit_cast(_High_half_bits); } // _Xval * _Xval - _Prod0 @@ -103,7 +103,7 @@ namespace _Float_multi_prec { // 1) _Prod0 is _Xval^2 faithfully rounded // 2) no internal overflow or underflow occurs // violation of condition 1 could lead to relative error on the order of epsilon - _NODISCARD inline constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept { + _NODISCARD constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept { const double _Xhigh = _High_half(_Xval); const double _Xlow = _Xval - _Xhigh; return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow; @@ -133,7 +133,7 @@ namespace _Float_multi_prec { // square(1x precision) -> 2x precision // the result is exact when no internal overflow or underflow occurs - _NODISCARD inline constexpr _Fmp_t _Sqr_x2(const double _Xval) noexcept { + _NODISCARD constexpr _Fmp_t _Sqr_x2(const double _Xval) noexcept { const double _Prod0 = _Xval * _Xval; if (_STD is_constant_evaluated()) { diff --git a/tests/std/include/fenv_prefix.hpp b/tests/std/include/fenv_prefix.hpp index ab1a82fe54..bd3055a41f 100644 --- a/tests/std/include/fenv_prefix.hpp +++ b/tests/std/include/fenv_prefix.hpp @@ -37,7 +37,7 @@ #endif // ^^^ invalid FP_CONTRACT_MODE ^^^ #endif // ^^^ MSVC ^^^ -#endif // defined(FP_CONTRACT_MODE) && !defined(__clang__) +#endif // defined(FP_CONTRACT_MODE) #include #include diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp index 22c2ad18f7..0688d763ea 100644 --- a/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp @@ -36,13 +36,13 @@ namespace fputil { _INLINE_VAR constexpr float_bits_t infinity_bits_v = exponent_mask_v; // not affected by abrupt underflow - template ::value, int> = 0> + template , int> = 0> constexpr bool iszero(const T& x) { return _STD _Float_abs_bits(x) == 0; } // not affected by /fp:fast - template ::value, int> = 0> + template , int> = 0> constexpr bool signbit(const T& x) { const auto bits = std::_Bit_cast>(x); return (bits & sign_mask_v) != 0; @@ -126,7 +126,7 @@ namespace fputil { // compares whether two floating point values are equal // all NaNs are equal, +0.0 and -0.0 are not equal - template ::value, int> = 0> + template , int> = 0> bool precise_equal(const T& actual, const T& expected) { if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) { return _STD _Is_nan(actual) == _STD _Is_nan(expected); @@ -137,13 +137,13 @@ namespace fputil { namespace detail { // 0x80...00 = zero, 0x80...01 = numeric_limits::denorm_min(), 0x7f...ff = -numeric_limits::denorm_min() - template ::value, int> = 0> + template , int> = 0> float_bits_t offset_representation(const T& x) { const float_bits_t abs_bits = _STD _Float_abs_bits(x); return fputil::signbit(x) ? sign_mask_v - abs_bits : sign_mask_v + abs_bits; } - template ::value, int> = 0> + template , int> = 0> float_bits_t is_offset_value_subnormal_or_zero(const float_bits_t offset_value) { constexpr float_bits_t positive_norm_min_offset = sign_mask_v + norm_min_bits_v; constexpr float_bits_t negative_norm_min_offset = sign_mask_v - norm_min_bits_v; @@ -152,7 +152,7 @@ namespace fputil { } // number of ulps above zero, if we count [0, numeric_limits::min()) as 1 ulp - template ::value, int> = 0> + template , int> = 0> double abrupt_underflow_ulp(const float_bits_t offset_value) { using bits_type = float_bits_t; @@ -170,7 +170,7 @@ namespace fputil { } } - template ::value, int> = 0> + template , int> = 0> bool is_within_ulp_tolerance(const T& actual, const T& expected, const int ulp_tolerance) { if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) { return _STD _Is_nan(actual) == _STD _Is_nan(expected); @@ -213,7 +213,7 @@ namespace fputil { return false; } - template ::value, int> = 0> + template , int> = 0> bool is_within_absolute_tolerance(const T& actual, const T& expected, const double absolute_tolerance) { return _STD _Is_finite(actual) && _STD _Is_finite(expected) && std::abs(actual - expected) <= absolute_tolerance; @@ -221,7 +221,7 @@ namespace fputil { } // namespace detail // returns whether floating point result is nearly equal to the expected value - template ::value, int> = 0> + template , int> = 0> bool near_equal( const T& actual, const T& expected, const int ulp_tolerance = 1, const double absolute_tolerance = 0) { if (precise_equal(actual, expected)) { diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp index 29b349e440..d67e12dd1b 100644 --- a/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp @@ -114,7 +114,7 @@ constexpr complex_unary_test_case log_double_cases[] = { {{+0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.0000000000000p-52}}, {{+0x1.fffffffffffffp-1, +0x1p-26}, {+0x1.0000000000000p-107, +0x1.0000000000000p-26}}, - // z close to -1, i or -i + // z close to -1, i, or -i {{-0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.921fb54442d18p+1}}, {{-0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.921fb54442d18p+1}}, {{-0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d18p+1}}, @@ -243,7 +243,7 @@ constexpr complex_unary_test_case log_float_cases[] = { {{+0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.000000p-23F}}, {{+0x1.fffffep-1F, +0x1.6a09e6p-12F}, {-0x1.302ae0p-52F, +0x1.6a09e6p-12F}}, - // z close to -1, i or -i + // z close to -1, i, or -i {{-0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.921fb4p+1F}}, {{-0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.921fb4p+1F}}, {{-0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.921fb4p+1F}}, diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp index ffce4d5ec2..dc4b03cd6a 100644 --- a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp +++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "floating_point_utils.hpp" #include "log_test_cases.hpp" diff --git a/tests/std/tests/floating_point_model_matrix.lst b/tests/std/tests/floating_point_model_matrix.lst index 63e5f1eecb..0aba02fbe4 100644 --- a/tests/std/tests/floating_point_model_matrix.lst +++ b/tests/std/tests/floating_point_model_matrix.lst @@ -7,9 +7,9 @@ PM_CL="/FIfenv_prefix.hpp" RUNALL_CROSSLIST PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest" RUNALL_CROSSLIST -PM_CL="/Od /MDd /D_ITERATOR_DEBUG_LEVEL=2" -PM_CL="/O2 /MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive-" -PM_CL="/O2 /MT /D_ITERATOR_DEBUG_LEVEL=0 /GL" +PM_CL="/Od /MDd" +PM_CL="/O2 /MD /permissive-" +PM_CL="/O2 /MT /GL" PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /Od /MTd" PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MT" PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MD /Oi-" From 984c8a42b66c6acd6eef26ff960c138e43ef05e5 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Nov 2020 23:01:12 -0800 Subject: [PATCH 17/19] Add -Wno-unused-command-line-argument for internal tests --- tests/std/tests/floating_point_model_matrix.lst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/std/tests/floating_point_model_matrix.lst b/tests/std/tests/floating_point_model_matrix.lst index 0aba02fbe4..8a9b369c3b 100644 --- a/tests/std/tests/floating_point_model_matrix.lst +++ b/tests/std/tests/floating_point_model_matrix.lst @@ -10,9 +10,10 @@ RUNALL_CROSSLIST PM_CL="/Od /MDd" PM_CL="/O2 /MD /permissive-" PM_CL="/O2 /MT /GL" -PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /Od /MTd" -PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MT" -PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MD /Oi-" +# TRANSITION, -Wno-unused-command-line-argument is needed for the internal test harness +PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /Od /MTd" +PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /O2 /MT" +PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /O2 /MD /Oi-" RUNALL_CROSSLIST PM_CL="" PM_CL="/arch:IA32" From 5a089e9036a268cebe7d5d0471e56d120dbdda1a Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 8 Nov 2020 18:16:22 -0800 Subject: [PATCH 18/19] Use header-only code to fix /clr:pure. --- stl/CMakeLists.txt | 1 - stl/inc/complex | 317 +++++++++++++++++- stl/inc/xutility | 1 - .../stl_base/stl.files.settings.targets | 1 - stl/src/float_multi_prec.hpp | 158 --------- stl/src/math_algorithms.cpp | 157 --------- 6 files changed, 310 insertions(+), 325 deletions(-) delete mode 100644 stl/src/float_multi_prec.hpp delete mode 100644 stl/src/math_algorithms.cpp diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt index a7ada94f13..394f28cd06 100644 --- a/stl/CMakeLists.txt +++ b/stl/CMakeLists.txt @@ -245,7 +245,6 @@ endforeach() set(IMPLIB_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp ${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp - ${CMAKE_CURRENT_LIST_DIR}/src/math_algorithms.cpp ${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp ${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp ${CMAKE_CURRENT_LIST_DIR}/src/syserror_import_lib.cpp diff --git a/stl/inc/complex b/stl/inc/complex index 0c2f51d9f4..3eafcb9602 100644 --- a/stl/inc/complex +++ b/stl/inc/complex @@ -10,9 +10,27 @@ #if _STL_COMPILER_PREPROCESSOR #include #include +#include #include +#include +#include #include +#ifdef _M_CEE_PURE +// no intrinsics for /clr:pure +#elif defined(__clang__) +// TRANSITION, not using FMA intrinsics for Clang yet +#elif defined(_M_IX86) || defined(_M_X64) +#define _FMP_USING_X86_X64_INTRINSICS +#include +#include +extern "C" int __isa_available; +extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d); +#elif defined(_M_ARM64) +#define _FMP_USING_ARM64_INTRINSICS +#include +#endif // ^^^ defined(_M_ARM64) ^^^ + #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) #pragma warning(disable : _STL_DISABLED_WARNINGS) @@ -40,12 +58,297 @@ struct _C_ldouble_complex { #define _RE 0 #define _IM 1 -_EXTERN_C -_NODISCARD double __stdcall __std_math_log_hypot(double, double) noexcept; -_NODISCARD float __stdcall __std_math_log_hypotf(float, float) noexcept; -_END_EXTERN_C - _STD_BEGIN + +// implements multi-precision floating point arithmetic for numerical algorithms +#pragma float_control(precise, on, push) +namespace _Float_multi_prec { + // multi-precision floating point types + template + struct _Fmp_t; + + template + struct _Fmp_t<_Ty, 2> { + static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point"); + _Ty _Val0; // most significant numeric_limits<_Ty>::precision bits + _Ty _Val1; // least significant numeric_limits<_Ty>::precision bits + }; + + // addition + + // 1x precision + 1x precision -> 2x precision + // the result is exact when: + // 1) the result doesn't overflow + // 2) either underflow is gradual, or no internal underflow occurs + // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty + // 4) parameters and local variables do not retain extra intermediate precision + // 5) rounding mode is rounding to nearest + // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2 + // violation of other conditions could lead to worse results + template + _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept { + const _Ty _Sum0 = _Xval + _Yval; + const _Ty _Ymod = _Sum0 - _Xval; + const _Ty _Xmod = _Sum0 - _Ymod; + const _Ty _Yerr = _Yval - _Ymod; + const _Ty _Xerr = _Xval - _Xmod; + return {_Sum0, _Xerr + _Yerr}; + } + + // 1x precision + 1x precision -> 2x precision + // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0 + // the result is exact when: + // 0) the requirement above is satisfied + // 1) no internal overflow occurs + // 2) either underflow is gradual, or no internal underflow occurs + // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty + // 4) parameters and local variables do not retain extra intermediate precision + // 5) rounding mode is rounding to nearest + // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2 + // violation of other conditions could lead to worse results + template + _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept { + const _Ty _Sum0 = _Xval + _Yval; + const _Ty _Ymod = _Sum0 - _Xval; + const _Ty _Yerr = _Yval - _Ymod; + return {_Sum0, _Yerr}; + } + + // 1x precision + 2x precision -> 2x precision + // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0 + template + _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept { + const _Fmp_t<_Ty, 2> _Sum0 = _Add_small_x2(_Xval, _Yval._Val0); + return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1); + } + + // 2x precision + 2x precision -> 1x precision + template + _NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept { + const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0); + return _Sum00._Val0 + (_Sum00._Val1 + (_Xval._Val1 + _Yval._Val1)); + } + + // multiplication + + // round to 26 significant bits, ties toward zero + _NODISCARD _CONSTEXPR_BIT_CAST double _High_half(const double _Val) noexcept { + const auto _Bits = _Bit_cast(_Val); + const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL; + return _Bit_cast(_High_half_bits); + } + + // _Xval * _Xval - _Prod0 + // the result is exact when: + // 1) _Prod0 is _Xval^2 faithfully rounded + // 2) no internal overflow or underflow occurs + // violation of condition 1 could lead to relative error on the order of epsilon + _NODISCARD _CONSTEXPR_BIT_CAST double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept { + const double _Xhigh = _High_half(_Xval); + const double _Xlow = _Xval - _Xhigh; + return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow; + } + +#ifdef _FMP_USING_X86_X64_INTRINSICS + _NODISCARD inline double _Sqr_error_x86_x64_fma(const double _Xval, const double _Prod0) noexcept { + const __m128d _Mx = _mm_set_sd(_Xval); + const __m128d _Mprod0 = _mm_set_sd(_Prod0); + const __m128d _Mresult = _mm_fmsub_sd(_Mx, _Mx, _Mprod0); + double _Result; + _mm_store_sd(&_Result, _Mresult); + return _Result; + } +#endif // _FMP_USING_X86_X64_INTRINSICS + +#ifdef _FMP_USING_ARM64_INTRINSICS + _NODISCARD inline double _Sqr_error_arm64_neon(const double _Xval, const double _Prod0) noexcept { + const float64x1_t _Mx = vld1_f64(&_Xval); + const float64x1_t _Mprod0 = vld1_f64(&_Prod0); + const float64x1_t _Mresult = vfma_f64(vneg_f64(_Mprod0), _Mx, _Mx); + double _Result; + vst1_f64(&_Result, _Mresult); + return _Result; + } +#endif // _FMP_USING_ARM64_INTRINSICS + + // square(1x precision) -> 2x precision + // the result is exact when no internal overflow or underflow occurs + _NODISCARD inline _Fmp_t _Sqr_x2(const double _Xval) noexcept { + const double _Prod0 = _Xval * _Xval; + +#if defined(_FMP_USING_X86_X64_INTRINSICS) + +#ifdef __AVX2__ + return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)}; +#else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv + const bool _Definitely_have_fma = __isa_available >= __ISA_AVAILABLE_AVX2; + if (_Definitely_have_fma) { + return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)}; + } else { + return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)}; + } +#endif // ^^^ !defined(__AVX2__) ^^^ + +#elif defined(_FMP_USING_ARM64_INTRINSICS) + // https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#base-requirements + // Both floating-point and NEON support are presumed to be present in hardware. + return {_Prod0, _Sqr_error_arm64_neon(_Xval, _Prod0)}; +#else // ^^^ defined(_FMP_USING_ARM64_INTRINSICS) / not using intrinsics vvv + return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)}; +#endif // ^^^ not using intrinsics ^^^ + } +} // namespace _Float_multi_prec +#pragma float_control(pop) + +#undef _FMP_USING_X86_X64_INTRINSICS +#undef _FMP_USING_ARM64_INTRINSICS + +#define _FMP ::std::_Float_multi_prec:: + +// implements numerical algorithms for +namespace _Math_algorithms { + // TRANSITION: sqrt() isn't constexpr + // _Hypot_leg_huge = _Ty{0.5} * _STD sqrt((_STD numeric_limits<_Ty>::max)()); + // _Hypot_leg_tiny = _STD sqrt(_Ty{2.0} * (_STD numeric_limits<_Ty>::min)() / _STD numeric_limits<_Ty>::epsilon()); + template + struct _Hypot_leg_huge_helper { + static constexpr _Ty value{6.703903964971298e+153}; + }; + template <> + struct _Hypot_leg_huge_helper { + static constexpr float value{9.2233715e+18f}; + }; + template + _INLINE_VAR constexpr _Ty _Hypot_leg_huge = _Hypot_leg_huge_helper<_Ty>::value; + + template + struct _Hypot_leg_tiny_helper { + static constexpr _Ty value{1.4156865331029228e-146}; + }; + template <> + struct _Hypot_leg_tiny_helper { + static constexpr float value{4.440892e-16f}; + }; + template + _INLINE_VAR constexpr _Ty _Hypot_leg_tiny = _Hypot_leg_tiny_helper<_Ty>::value; + + template + _NODISCARD _Ty _Norm_minus_one(const _Ty _Xval, const _Ty _Yval) noexcept { + // requires |_Xval| >= |_Yval| and 0.5 <= |_Xval| < 2^12 + // returns _Xval * _Xval + _Yval * _Yval - 1 + const _FMP _Fmp_t<_Ty, 2> _Xsqr = _FMP _Sqr_x2(_Xval); + const _FMP _Fmp_t<_Ty, 2> _Ysqr = _FMP _Sqr_x2(_Yval); + const _FMP _Fmp_t<_Ty, 2> _Xsqr_m1 = _FMP _Add_small_x2(_Ty{-1.0}, _Xsqr); + return _Add_x1(_Xsqr_m1, _Ysqr); + } + + _NODISCARD inline float _Norm_minus_one(const float _Xval, const float _Yval) noexcept { + const auto _Dx = static_cast(_Xval); + const auto _Dy = static_cast(_Yval); + return static_cast((_Dx * _Dx - 1.0) + _Dy * _Dy); + } + + // TRANSITION: CRT log1p can be inaccurate for tiny inputs under directed rounding modes + template + _NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval) + static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point"); + + if (_Is_nan(_Xval)) { // NaN + return _Xval + _Xval; // raise FE_INVALID if _Xval is a signaling NaN + } + + if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is moderately accurate + if (_Xval == (numeric_limits<_Ty>::max)()) { // avoid overflow + return _STD log(_Xval); + } + + return _STD log(_Ty{1.0} + _Xval); + } + + const _Ty _Xabs = _Float_abs(_Xval); + if (_Xabs < numeric_limits<_Ty>::epsilon()) { // zero or tiny + if (_Xval == _Ty{0.0}) { + return _Xval; + } + + // honor rounding mode, raise FE_INEXACT + return _Xval - _Ty{0.5} * _Xval * _Xval; + } + + // compute log(1 + _Xval) with fixup for small _Xval + const _FMP _Fmp_t<_Ty, 2> _Xp1 = _FMP _Add_small_x2(_Ty{1.0}, _Xval); + return _STD log(_Xp1._Val0) + _Xp1._Val1 / _Xp1._Val0; + } + + template + _NODISCARD _Ty _Log_hypot(const _Ty _Xval, const _Ty _Yval) noexcept { // returns log(hypot(_Xval, _Yval)) + static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point"); + + if (!_Is_finite(_Xval) || !_Is_finite(_Yval)) { // Inf or NaN + // raise FE_INVALID and return NaN if at least one of them is a signaling NaN + if (_Is_signaling_nan(_Xval) || _Is_signaling_nan(_Yval)) { + return _Xval + _Yval; + } + + // return +Inf if at least one of them is an infinity, even when the other is a quiet NaN + if (_Is_inf(_Xval)) { + return _Float_abs(_Xval); + } + + if (_Is_inf(_Yval)) { + return _Float_abs(_Yval); + } + + // at least one of them is a quiet NaN, and the other is not an infinity + return _Xval + _Yval; + } + + _Ty _Av = _Float_abs(_Xval); + _Ty _Bv = _Float_abs(_Yval); + + if (_Av < _Bv) { // ensure that _Bv <= _Av + _STD swap(_Av, _Bv); + } + + if (_Bv == 0) { + return _STD log(_Av); + } + + if (_Hypot_leg_tiny<_Ty> < _Av && _Av < _Hypot_leg_huge<_Ty>) { // no overflow or harmful underflow + constexpr _Ty _Norm_small = _Ty{0.5}; + constexpr _Ty _Norm_big = _Ty{3.0}; + + const _Ty _Bv_sqr = _Bv * _Bv; + + if (_Av == _Ty{1.0}) { // correctly return +0 when _Av == 1 and _Bv * _Bv underflows + // _Norm_minus_one(_Av, _Bv) could return -0 under FE_DOWNWARD rounding mode + return _Logp1(_Bv_sqr) * _Ty{0.5}; + } + + const _Ty _Norm = _Av * _Av + _Bv_sqr; + + if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation + return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5}; + } else { + return _STD log(_Norm) * _Ty{0.5}; + } + } else { // use 1 1/2 precision to preserve bits + constexpr _Ty _Cm = _Ty{22713.0L / 32768.0L}; + constexpr _Ty _Cl = _Ty{1.4286068203094172321214581765680755e-6L}; + + const int _Exponent = _STD ilogb(_Av); + const _Ty _Av_scaled = _STD scalbn(_Av, -_Exponent); + const _Ty _Bv_scaled = _STD scalbn(_Bv, -_Exponent); + const _Ty _Bv_scaled_sqr = _Bv_scaled * _Bv_scaled; + const _Ty _Norm_scaled = _Av_scaled * _Av_scaled + _Bv_scaled_sqr; + const _Ty _Real_shifted = _STD log(_Norm_scaled) * _Ty{0.5}; + return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm; + } + } +} // namespace _Math_algorithms + +#undef _FMP + using _Dcomplex_value = _CSTD _C_double_complex; using _Fcomplex_value = _CSTD _C_float_complex; using _Lcomplex_value = _CSTD _C_ldouble_complex; @@ -1613,11 +1916,11 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // Used by sqrt(), return mag template _NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept { // for double, long double, and non floating point types return static_cast<_Ty>( - __std_math_log_hypot(static_cast(_STD real(_Left)), static_cast(_STD imag(_Left)))); + _Math_algorithms::_Log_hypot(static_cast(_STD real(_Left)), static_cast(_STD imag(_Left)))); } _NODISCARD inline float _Log_abs(const complex& _Left) noexcept { - return __std_math_log_hypotf(_STD real(_Left), _STD imag(_Left)); + return _Math_algorithms::_Log_hypot(_STD real(_Left), _STD imag(_Left)); } template diff --git a/stl/inc/xutility b/stl/inc/xutility index 2714892919..ca2499bd1c 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6205,7 +6205,6 @@ struct _Nontrivial_dummy_type { _STL_INTERNAL_STATIC_ASSERT(!is_trivially_default_constructible_v<_Nontrivial_dummy_type>); _STD_END -#undef _CONSTEXPR_BIT_CAST #pragma pop_macro("new") _STL_RESTORE_CLANG_WARNINGS #pragma warning(pop) diff --git a/stl/msbuild/stl_base/stl.files.settings.targets b/stl/msbuild/stl_base/stl.files.settings.targets index f1ef5cc9c9..5fd1b26310 100644 --- a/stl/msbuild/stl_base/stl.files.settings.targets +++ b/stl/msbuild/stl_base/stl.files.settings.targets @@ -171,7 +171,6 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - struct _Fmp_t; - - template - struct _Fmp_t<_Ty, 2> { - static_assert(is_floating_point_v<_Ty>); - _Ty _Val0; // most significant numeric_limits<_Ty>::precision bits - _Ty _Val1; // least significant numeric_limits<_Ty>::precision bits - }; - - // addition - - // 1x precision + 1x precision -> 2x precision - // the result is exact when: - // 1) the result doesn't overflow - // 2) either underflow is gradual, or no internal underflow occurs - // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty - // 4) parameters and local variables do not retain extra intermediate precision - // 5) rounding mode is rounding to nearest - // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2 - // violation of other conditions could lead to worse results - template - _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept { - const _Ty _Sum0 = _Xval + _Yval; - const _Ty _Ymod = _Sum0 - _Xval; - const _Ty _Xmod = _Sum0 - _Ymod; - const _Ty _Yerr = _Yval - _Ymod; - const _Ty _Xerr = _Xval - _Xmod; - return {_Sum0, _Xerr + _Yerr}; - } - - // 1x precision + 1x precision -> 2x precision - // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0 - // the result is exact when: - // 0) the requirement above is satisfied - // 1) no internal overflow occurs - // 2) either underflow is gradual, or no internal underflow occurs - // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty - // 4) parameters and local variables do not retain extra intermediate precision - // 5) rounding mode is rounding to nearest - // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2 - // violation of other conditions could lead to worse results - template - _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept { - const _Ty _Sum0 = _Xval + _Yval; - const _Ty _Ymod = _Sum0 - _Xval; - const _Ty _Yerr = _Yval - _Ymod; - return {_Sum0, _Yerr}; - } - - // 1x precision + 2x precision -> 2x precision - // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0 - template - _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept { - const _Fmp_t<_Ty, 2> _Sum0 = _Add_small_x2(_Xval, _Yval._Val0); - return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1); - } - - // 2x precision + 2x precision -> 1x precision - template - _NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept { - const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0); - return _Sum00._Val0 + (_Sum00._Val1 + (_Xval._Val1 + _Yval._Val1)); - } - - // multiplication - - // round to 26 significant bits, ties toward zero - _NODISCARD constexpr double _High_half(const double _Val) noexcept { - const auto _Bits = _Bit_cast(_Val); - const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL; - return _Bit_cast(_High_half_bits); - } - - // _Xval * _Xval - _Prod0 - // the result is exact when: - // 1) _Prod0 is _Xval^2 faithfully rounded - // 2) no internal overflow or underflow occurs - // violation of condition 1 could lead to relative error on the order of epsilon - _NODISCARD constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept { - const double _Xhigh = _High_half(_Xval); - const double _Xlow = _Xval - _Xhigh; - return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow; - } - -#if defined(_M_IX86) || defined(_M_X64) - _NODISCARD inline double _Sqr_error_x86_x64_fma(const double _Xval, const double _Prod0) noexcept { - const __m128d _Mx = _mm_set_sd(_Xval); - const __m128d _Mprod0 = _mm_set_sd(_Prod0); - const __m128d _Mresult = _mm_fmsub_sd(_Mx, _Mx, _Mprod0); - double _Result; - _mm_store_sd(&_Result, _Mresult); - return _Result; - } -#endif // defined(_M_IX86) || defined(_M_X64) - -#if defined(_M_ARM64) - _NODISCARD inline double _Sqr_error_arm64_neon(const double _Xval, const double _Prod0) noexcept { - const float64x1_t _Mx = vld1_f64(&_Xval); - const float64x1_t _Mprod0 = vld1_f64(&_Prod0); - const float64x1_t _Mresult = vfma_f64(vneg_f64(_Mprod0), _Mx, _Mx); - double _Result; - vst1_f64(&_Result, _Mresult); - return _Result; - } -#endif // defined(_M_ARM64) - - // square(1x precision) -> 2x precision - // the result is exact when no internal overflow or underflow occurs - _NODISCARD constexpr _Fmp_t _Sqr_x2(const double _Xval) noexcept { - const double _Prod0 = _Xval * _Xval; - - if (_STD is_constant_evaluated()) { - return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)}; - } else { -#if defined(_M_IX86) || defined(_M_X64) - if (__isa_available >= __ISA_AVAILABLE_AVX2) { - return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)}; - } else { - return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)}; - } -#elif defined(_M_ARM64) // ^^^ x86, x64 / arm64 vvv - // https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#base-requirements - // Both floating-point and NEON support are presumed to be present in hardware. - return {_Prod0, _Sqr_error_arm64_neon(_Xval, _Prod0)}; -#else // ^^^ arm64 / arm vvv - return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)}; -#endif // ^^^ arm ^^^ - } - } -} // namespace _Float_multi_prec -_STD_END diff --git a/stl/src/math_algorithms.cpp b/stl/src/math_algorithms.cpp deleted file mode 100644 index 8530a558ad..0000000000 --- a/stl/src/math_algorithms.cpp +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -// implements numerical algorithms for - -// This must be as small as possible, because its contents are -// injected into the msvcprt.lib and msvcprtd.lib import libraries. -// Do not include or define anything else here. -// In particular, basic_string must not be included here. - -#include -#include -#include -#include - -#include "float_multi_prec.hpp" - -#define _FMP ::std::_Float_multi_prec:: - -namespace { - // TRANSITION: sqrt() isn't constexpr - // _Hypot_leg_huge = _Ty{0.5} * _STD sqrt((_STD numeric_limits<_Ty>::max)()); - // _Hypot_leg_tiny = _STD sqrt(_Ty{2.0} * (_STD numeric_limits<_Ty>::min)() / _STD numeric_limits<_Ty>::epsilon()); - template - inline constexpr _Ty _Hypot_leg_huge = _Ty{6.703903964971298e+153}; - template <> - inline constexpr float _Hypot_leg_huge = 9.2233715e+18f; - - template - inline constexpr _Ty _Hypot_leg_tiny = _Ty{1.4156865331029228e-146}; - template <> - inline constexpr float _Hypot_leg_tiny = 4.440892e-16f; - - template - _NODISCARD _Ty _Norm_minus_one(const _Ty _Xval, const _Ty _Yval) noexcept { - // requires |_Xval| >= |_Yval| and 0.5 <= |_Xval| < 2^12 - // returns _Xval * _Xval + _Yval * _Yval - 1 - const _FMP _Fmp_t<_Ty, 2> _Xsqr = _FMP _Sqr_x2(_Xval); - const _FMP _Fmp_t<_Ty, 2> _Ysqr = _FMP _Sqr_x2(_Yval); - const _FMP _Fmp_t<_Ty, 2> _Xsqr_m1 = _FMP _Add_small_x2(_Ty{-1.0}, _Xsqr); - return _Add_x1(_Xsqr_m1, _Ysqr); - } - - _NODISCARD inline float _Norm_minus_one(const float _Xval, const float _Yval) noexcept { - const auto _Dx = static_cast(_Xval); - const auto _Dy = static_cast(_Yval); - return static_cast((_Dx * _Dx - 1.0) + _Dy * _Dy); - } - - // TRANSITION: CRT log1p can be inaccurate for tiny inputs under directed rounding modes - template - _NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval) - static_assert(_STD is_floating_point_v<_Ty>); - - if (_STD _Is_nan(_Xval)) { // NaN - return _Xval + _Xval; // raise FE_INVALID if _Xval is a signaling NaN - } - - if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is moderately accurate - if (_Xval == (_STD numeric_limits<_Ty>::max)()) { // avoid overflow - return _STD log(_Xval); - } - - return _STD log(_Ty{1.0} + _Xval); - } - - const _Ty _Xabs = _STD _Float_abs(_Xval); - if (_Xabs < _STD numeric_limits<_Ty>::epsilon()) { // zero or tiny - if (_Xval == _Ty{0.0}) { - return _Xval; - } - - // honor rounding mode, raise FE_INEXACT - return _Xval - _Ty{0.5} * _Xval * _Xval; - } - - // compute log(1 + _Xval) with fixup for small _Xval - const _FMP _Fmp_t<_Ty, 2> _Xp1 = _FMP _Add_small_x2(_Ty{1.0}, _Xval); - return _STD log(_Xp1._Val0) + _Xp1._Val1 / _Xp1._Val0; - } - - template - _NODISCARD _Ty _Log_hypot(const _Ty _Xval, const _Ty _Yval) noexcept { // returns log(hypot(_Xval, _Yval)) - static_assert(_STD is_floating_point_v<_Ty>); - - if (!_STD _Is_finite(_Xval) || !_STD _Is_finite(_Yval)) { // Inf or NaN - // raise FE_INVALID and return NaN if at least one of them is a signaling NaN - if (_STD _Is_signaling_nan(_Xval) || _STD _Is_signaling_nan(_Yval)) { - return _Xval + _Yval; - } - - // return +Inf if at least one of them is an infinity, even when the other is a quiet NaN - if (_STD _Is_inf(_Xval)) { - return _STD _Float_abs(_Xval); - } - - if (_STD _Is_inf(_Yval)) { - return _STD _Float_abs(_Yval); - } - - // at least one of them is a quiet NaN, and the other is not an infinity - return _Xval + _Yval; - } - - _Ty _Av = _STD _Float_abs(_Xval); - _Ty _Bv = _STD _Float_abs(_Yval); - - if (_Av < _Bv) { // ensure that _Bv <= _Av - _STD swap(_Av, _Bv); - } - - if (_Bv == 0) { - return _STD log(_Av); - } - - if (_Hypot_leg_tiny<_Ty> < _Av && _Av < _Hypot_leg_huge<_Ty>) { // no overflow or harmful underflow - constexpr _Ty _Norm_small = _Ty{0.5}; - constexpr _Ty _Norm_big = _Ty{3.0}; - - const _Ty _Bv_sqr = _Bv * _Bv; - - if (_Av == _Ty{1.0}) { // correctly return +0 when _Av == 1 and _Bv * _Bv underflows - // _Norm_minus_one(_Av, _Bv) could return -0 under FE_DOWNWARD rounding mode - return _Logp1(_Bv_sqr) * _Ty{0.5}; - } - - const _Ty _Norm = _Av * _Av + _Bv_sqr; - - if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation - return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5}; - } else { - return _STD log(_Norm) * _Ty{0.5}; - } - } else { // use 1 1/2 precision to preserve bits - constexpr _Ty _Cm = _Ty{22713.0L / 32768.0L}; - constexpr _Ty _Cl = _Ty{1.4286068203094172321214581765680755e-6L}; - - const int _Exponent = _STD ilogb(_Av); - const _Ty _Av_scaled = _STD scalbn(_Av, -_Exponent); - const _Ty _Bv_scaled = _STD scalbn(_Bv, -_Exponent); - const _Ty _Bv_scaled_sqr = _Bv_scaled * _Bv_scaled; - const _Ty _Norm_scaled = _Av_scaled * _Av_scaled + _Bv_scaled_sqr; - const _Ty _Real_shifted = _STD log(_Norm_scaled) * _Ty{0.5}; - return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm; - } - } -} // unnamed namespace - -_EXTERN_C -_NODISCARD double __stdcall __std_math_log_hypot(const double _Xval, const double _Yval) noexcept { - return _Log_hypot(_Xval, _Yval); -} - -_NODISCARD float __stdcall __std_math_log_hypotf(const float _Xval, const float _Yval) noexcept { - return _Log_hypot(_Xval, _Yval); -} -_END_EXTERN_C From ba88342dd6cde2bfe1605554c7aaecf0eace6273 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 8 Nov 2020 20:24:59 -0800 Subject: [PATCH 19/19] Include arm64_neon.h, no arm64 subdirectory. --- stl/inc/complex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/complex b/stl/inc/complex index 3eafcb9602..60b017fdb9 100644 --- a/stl/inc/complex +++ b/stl/inc/complex @@ -28,7 +28,7 @@ extern "C" int __isa_available; extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d); #elif defined(_M_ARM64) #define _FMP_USING_ARM64_INTRINSICS -#include +#include #endif // ^^^ defined(_M_ARM64) ^^^ #pragma pack(push, _CRT_PACKING)