From 903c59faff91610c3ea05e698ef16754c3de28df Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Mon, 27 Jul 2020 20:39:23 +0800
Subject: [PATCH 01/19] Fix undue overflow and underflow in complex sqrt

Modifies the scale factors in `_Fabs` (used by `sqrt`) such that:

- `_Fabs` doesn't underflow when the input is tiny.

- `sqrt` doesn't overflow when the input is huge.
---
 stl/inc/complex | 65 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 20 deletions(-)
diff --git a/stl/inc/complex b/stl/inc/complex
index 2bbba58573..8ca4aa2449 100644
--- a/stl/inc/complex
+++ b/stl/inc/complex
@@ -66,8 +66,12 @@ public:
         return (numeric_limits<_Ty>::max)();
     }
 
+    static constexpr _Ty _Flt_norm_min() {
+        return (numeric_limits<_Ty>::min)() > 0 ? (numeric_limits<_Ty>::min)() : 0;
+    }
+
     static _Ty _Abs(_Ty _Left) {
-        return static_cast<_Ty>(_Left < 0 ? -_Left : _Left);
+        return static_cast<_Ty>(_Signbit(_Left) ? -_Left : _Left);
     }
 
     static _Ty _Cosh(_Ty _Left, _Ty _Right) { // return cosh(_Left) * _Right
@@ -75,7 +79,7 @@ public:
     }
 
     static _Ty _Copysign(_Ty _Magnitude, _Ty _Sign) {
-        return static_cast<_Ty>(_Sign < 0 ? -_Abs(_Magnitude) : _Abs(_Magnitude));
+        return static_cast<_Ty>(_Signbit(_Sign) ? -_Abs(_Magnitude) : _Abs(_Magnitude));
     }
 
     static short _Exp(_Ty* _Pleft, _Ty _Right, short _Exponent) { // compute exp(*_Pleft) * _Right * 2 ^ _Exponent
@@ -106,7 +110,7 @@ public:
     }
 
     static bool _Signbit(_Ty _Left) {
-        return _Left < 0;
+        return _CSTD signbit(static_cast<double>(_Left));
     }
 
     static _Ty _Sinh(_Ty _Left, _Ty _Right) { // return sinh(_Left) * _Right
@@ -200,6 +204,10 @@ public:
         return (numeric_limits<long double>::max)();
     }
 
+    static constexpr _Ty _Flt_norm_min() {
+        return (numeric_limits<long double>::min)();
+    }
+
     static _Ty _Abs(_Ty _Left) {
         // testing _Left < 0 would be incorrect when _Left is -0.0
         return _CSTD fabsl(_Left);
@@ -332,6 +340,10 @@ public:
         return (numeric_limits<double>::max)();
     }
 
+    static constexpr _Ty _Flt_norm_min() {
+        return (numeric_limits<double>::min)();
+    }
+
     static _Ty _Abs(_Ty _Left) {
         // testing _Left < 0 would be incorrect when _Left is -0.0
         return _CSTD fabs(_Left);
@@ -467,6 +479,10 @@ public:
         return (numeric_limits<float>::max)();
     }
 
+    static constexpr _Ty _Flt_norm_min() {
+        return (numeric_limits<float>::min)();
+    }
+
     static _Ty _Abs(_Ty _Left) {
         // testing _Left < 0 would be incorrect when _Left is -0.0
         return _CSTD fabsf(_Left);
@@ -1537,10 +1553,13 @@ _NODISCARD complex<_Ty> exp(const complex<_Ty>& _Left) {
 
 // FUNCTION TEMPLATE _Fabs
 template <class _Ty>
-_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale factor
+_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return magnitude and scale factor.
+                                                   // returns a non-zero even integer in *_Pexp when _Left is finite
+                                                   // and non-zero.
+                                                   // returns 0 in *_Pexp when _Left is zero, infinity or NaN.
     *_Pexp  = 0;
-    _Ty _Av = real(_Left);
-    _Ty _Bv = imag(_Left);
+    _Ty _Av = _Ctraits<_Ty>::_Abs(_STD real(_Left));
+    _Ty _Bv = _Ctraits<_Ty>::_Abs(_STD imag(_Left));
 
     if (_Ctraits<_Ty>::_Isinf(_Av) || _Ctraits<_Ty>::_Isinf(_Bv)) {
         return _Ctraits<_Ty>::_Infv(); // at least one component is INF
@@ -1549,13 +1568,8 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale
     } else if (_Ctraits<_Ty>::_Isnan(_Bv)) {
         return _Bv; // imaginary component is NaN
     } else { // neither component is NaN or INF
-        _Av = _Ctraits<_Ty>::_Abs(_Av);
-        _Bv = _Ctraits<_Ty>::_Abs(_Bv);
-
         if (_Av < _Bv) { // ensure that |_Bv| <= |_Av|
-            _Ty _Tmp = _Av;
-            _Av      = _Bv;
-            _Bv      = _Tmp;
+            _STD swap(_Av, _Bv);
         }
 
         if (_Av == 0) {
@@ -1563,16 +1577,27 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale
         }
 
         if (1 <= _Av) {
-            *_Pexp = 2;
-            _Av    = _Av * static_cast<_Ty>(0.25);
-            _Bv    = _Bv * static_cast<_Ty>(0.25);
+            *_Pexp = 4;
+            _Av    = _Av * static_cast<_Ty>(0.0625);
+            _Bv    = _Bv * static_cast<_Ty>(0.0625);
         } else {
-            *_Pexp = -2;
-            _Av    = _Av * 4;
-            _Bv    = _Bv * 4;
+            constexpr _Ty _Flt_eps = _Ctraits<_Ty>::_Flt_eps();
+
+            // TRANSITION, workaround for non floating point _Ty
+            if (_Flt_eps != 0 && _Av < 2 * _Ctraits<_Ty>::_Flt_norm_min() / _Flt_eps) {
+                constexpr int _Exponent = -2 * numeric_limits<_Ty>::digits;
+
+                *_Pexp = _Exponent;
+                _Av    = _Ctraits<_Ty>::ldexp(_Av, -_Exponent);
+                _Bv    = _Ctraits<_Ty>::ldexp(_Bv, -_Exponent);
+            } else {
+                *_Pexp = -2;
+                _Av    = _Av * 4;
+                _Bv    = _Bv * 4;
+            }
         }
 
-        _Ty _Tmp = _Av - _Bv;
+        const _Ty _Tmp = _Av - _Bv;
         if (_Tmp == _Av) {
             return _Av; // _Bv unimportant
         } else if (_Bv < _Tmp) { // use simple approximation
@@ -1691,7 +1716,7 @@ _NODISCARD complex<_Ty> sqrt(const complex<_Ty>& _Left) {
                 return complex<_Ty>(_Ctraits<_Ty>::_Infv(), _Im); // (any, +/-Inf)
             } else if (_Ctraits<_Ty>::_Isnan(_Im)) {
                 if (_Re < 0) {
-                    return complex<_Ty>(_Im, _Re); // (-Inf, NaN)
+                    return complex<_Ty>(_Ctraits<_Ty>::_Abs(_Im), _Ctraits<_Ty>::_Copysign(_Re, _Im)); // (-Inf, NaN)
                 } else {
                     return _Left; // (+Inf, NaN)
                 }

From a6a59345b8cd261e3e96fc6d4eb37293624ca0cf Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Mon, 3 Aug 2020 22:13:29 +0800
Subject: [PATCH 02/19] Improve accuracy of `log` when |z| is close to 1

When |z| is close to 1, compute log(|z|) as log1p(norm_minus_1(z)) / 2,
where norm_minus_1(z) = real(z) ^ 2 + imag(z) ^ 2 - 1 computed with
double width arithmetic to avoid catastrophic cancellation.
---
 stl/CMakeLists.txt           |   2 +
 stl/inc/complex              |  41 ++++------
 stl/inc/xutility             |  18 +++++
 stl/src/float_multi_prec.hpp | 112 ++++++++++++++++++++++++++
 stl/src/math_algorithms.cpp  | 148 +++++++++++++++++++++++++++++++++++
 5 files changed, 297 insertions(+), 24 deletions(-)
 create mode 100644 stl/src/float_multi_prec.hpp
 create mode 100644 stl/src/math_algorithms.cpp

diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt
index 23dc688099..6c5e1d445a 100644
--- a/stl/CMakeLists.txt
+++ b/stl/CMakeLists.txt
@@ -240,7 +240,9 @@ endforeach()
 # Objs that exist in both libcpmt[d][01].lib and msvcprt[d].lib.
 set(IMPLIB_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/src/float_multi_prec.hpp
     ${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/src/math_algorithms.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/syserror_import_lib.cpp
diff --git a/stl/inc/complex b/stl/inc/complex
index 8ca4aa2449..03ff979401 100644
--- a/stl/inc/complex
+++ b/stl/inc/complex
@@ -40,6 +40,11 @@ struct _C_ldouble_complex {
 #define _RE 0
 #define _IM 1
 
+_EXTERN_C
+_NODISCARD double __stdcall __std_math_log_hypot(double, double) noexcept;
+_NODISCARD float __stdcall __std_math_log_hypotf(float, float) noexcept;
+_END_EXTERN_C
+
 _STD_BEGIN
 using _Dcomplex_value = _CSTD _C_double_complex;
 using _Fcomplex_value = _CSTD _C_float_complex;
@@ -1618,32 +1623,20 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return mag
 
 // FUNCTION TEMPLATE log
 template <class _Ty>
-_NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) {
-    _Ty _Theta = _Ctraits<_Ty>::atan2(imag(_Left), real(_Left)); // get phase
-
-    if (_Ctraits<_Ty>::_Isnan(_Theta)) {
-        return complex<_Ty>(_Theta, _Theta); // real or imag is NaN
-    } else { // use 1 1/2 precision to preserve bits
-        constexpr _Ty _Cm = static_cast<_Ty>(22713.0L / 32768.0L);
-        constexpr _Ty _Cl = static_cast<_Ty>(1.4286068203094172321214581765680755e-6L);
-        int _Leftexp;
-        _Ty _Rho = _Fabs(_Left, &_Leftexp); // get magnitude and scale factor
-
-        _Ty _Leftn = static_cast<_Ty>(_Leftexp);
+_NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept {
+    return static_cast<_Ty>(
+        __std_math_log_hypot(static_cast<double>(_STD real(_Left)), static_cast<double>(_STD imag(_Left))));
+};
 
-        _Ty _Real;
-        if (_Rho == 0) {
-            _Real = -_Ctraits<_Ty>::_Infv(); // log(0) == -INF
-        } else if (_Ctraits<_Ty>::_Isinf(_Rho)) {
-            _Real = _Rho; // log(INF) == INF
-        } else {
-            _Real = static_cast<_Ty>(_Ctraits<_Ty>::log(_Rho)); // These casts are TRANSITION, DevCom-1093507
-            _Real += static_cast<_Ty>(_Leftn * _Cl);
-            _Real += static_cast<_Ty>(_Leftn * _Cm);
-        }
+_NODISCARD inline float _Log_abs(const complex<float>& _Left) noexcept {
+    return __std_math_log_hypotf(_STD real(_Left), _STD imag(_Left));
+};
 
-        return complex<_Ty>(_Real, _Theta);
-    }
+template <class _Ty>
+_NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) {
+    const _Ty _Log_abs = _STD _Log_abs(_Left); // get logarithm of magnitude
+    const _Ty _Theta   = _Ctraits<_Ty>::atan2(_STD imag(_Left), _STD real(_Left)); // get phase
+    return complex<_Ty>(_Log_abs, _Theta);
 }
 
 // FUNCTION TEMPLATE pow
diff --git a/stl/inc/xutility b/stl/inc/xutility
index 9c82bb27c5..9e76c2145e 100644
--- a/stl/inc/xutility
+++ b/stl/inc/xutility
@@ -5927,16 +5927,20 @@ struct _Float_traits {
     // traits for double and long double:
     using type = unsigned long long;
 
+    static constexpr type _Sign_mask      = 0x8000'0000'0000'0000ULL;
     static constexpr type _Magnitude_mask = 0x7fff'ffff'ffff'ffffULL;
     static constexpr type _Exponent_mask  = 0x7ff0'0000'0000'0000ULL;
+    static constexpr type _Quiet_nan_mask = 0x0008'0000'0000'0000ULL;
 };
 
 template <>
 struct _Float_traits<float> {
     using type = unsigned int;
 
+    static constexpr type _Sign_mask      = 0x8000'0000U;
     static constexpr type _Magnitude_mask = 0x7fff'ffffU;
     static constexpr type _Exponent_mask  = 0x7f80'0000U;
+    static constexpr type _Quiet_nan_mask = 0x0040'0000U;
 };
 
 // FUNCTION TEMPLATE _Float_abs_bits
@@ -5952,12 +5956,26 @@ _NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_abs(const _Ty _Xx) { // constexpr floa
     return _Bit_cast<_Ty>(_Float_abs_bits(_Xx));
 }
 
+// FUNCTION TEMPLATE _Float_copysign
+template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
+_NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_copysign(const _Ty _Magnitude, const _Ty _Sign) { // constexpr copysign()
+    const auto _Signbit = _Bit_cast<typename _Float_traits<_Ty>::type>(_Sign) & _Float_traits<_Ty>::_Sign_mask;
+    return _Bit_cast<_Ty>(_Float_abs_bits(_Magnitude) | _Signbit);
+}
+
 // FUNCTION TEMPLATE _Is_nan
 template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
 _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_nan(const _Ty _Xx) { // constexpr isnan()
     return _Float_abs_bits(_Xx) > _Float_traits<_Ty>::_Exponent_mask;
 }
 
+// FUNCTION TEMPLATE _Is_signaling_nan
+template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
+_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_signaling_nan(const _Ty& _Xx) { // returns true if input is a signaling NaN
+    const auto _Abs_bits = _Float_abs_bits(_Xx);
+    return _Abs_bits > _Float_traits<_Ty>::_Exponent_mask && ((_Abs_bits & _Float_traits<_Ty>::_Quiet_nan_mask) == 0);
+}
+
 // FUNCTION TEMPLATE _Is_finite
 template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
 _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_finite(const _Ty _Xx) { // constexpr isfinite()
diff --git a/stl/src/float_multi_prec.hpp b/stl/src/float_multi_prec.hpp
new file mode 100644
index 0000000000..6599616df5
--- /dev/null
+++ b/stl/src/float_multi_prec.hpp
@@ -0,0 +1,112 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// implements multi-precision floating point arithmetic for numerical algorithms
+
+// This must be as small as possible, because its contents are
+// injected into the msvcprt.lib and msvcprtd.lib import libraries.
+// Do not include or define anything else here.
+// In particular, basic_string must not be included here.
+
+#pragma once
+
+#if defined(_M_FP_FAST) || defined(__FAST_MATH__)
+#error float_multi_prec.hpp should not be compiled with /fp:fast or -ffast-math
+#endif // defined(_M_FP_FAST) || defined(__FAST_MATH__)
+
+#include <intrin.h>
+#include <isa_availability.h>
+#include <type_traits>
+#include <xutility>
+
+extern "C" int __isa_available;
+
+_STD_BEGIN
+namespace _Float_multi_prec {
+    // multi-precision floating point types
+    template <class _Ty, int _Prec>
+    struct _Fmp_t;
+
+    template <class _Ty>
+    struct _Fmp_t<_Ty, 2> {
+        static_assert(_STD is_floating_point_v<_Ty>);
+        _Ty _Val0;
+        _Ty _Val1;
+    };
+
+    // addition
+    template <class _Ty>
+    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
+        const _Ty _Sum0 = _Xval + _Yval;
+        const _Ty _Ymod = _Sum0 - _Xval;
+        const _Ty _Xmod = _Sum0 - _Ymod;
+        const _Ty _Yerr = _Yval - _Ymod;
+        const _Ty _Xerr = _Xval - _Xmod;
+        return {_Sum0, _Xerr + _Yerr};
+    }
+
+    // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0
+    template <class _Ty>
+    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
+        const _Ty _Sum0 = _Xval + _Yval;
+        const _Ty _Ymod = _Sum0 - _Xval;
+        const _Ty _Yerr = _Yval - _Ymod;
+        return {_Sum0, _Yerr};
+    }
+
+    // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0
+    template <class _Ty>
+    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
+        const _Fmp_t<_Ty, 2> _Sum0 = _Add_small_x2(_Xval, _Yval._Val0);
+        return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1);
+    }
+
+    template <class _Ty>
+    _NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
+        const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0);
+        return _Sum00._Val0 + (_Sum00._Val1 + (_Xval._Val1 + _Yval._Val1));
+    }
+
+    // multiplication
+    _NODISCARD inline constexpr double _High_half(const double _Val) {
+        const auto _Bits           = _STD _Bit_cast<unsigned long long>(_Val);
+        const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL;
+        return _STD _Bit_cast<double>(_High_half_bits);
+    }
+
+    _NODISCARD inline constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept {
+        const double _Xhigh = _High_half(_Xval);
+        const double _Xlow  = _Xval - _Xhigh;
+        return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow;
+    }
+
+#if defined(_M_IX86) || defined(_M_X64)
+    _NODISCARD inline double _Sqr_error_x86_x64_fma(const double _Xval, const double _Prod0) noexcept {
+        const __m128d _Mx      = _mm_set_sd(_Xval);
+        const __m128d _Mprod0  = _mm_set_sd(_Prod0);
+        const __m128d _Mresult = _mm_fmsub_sd(_Mx, _Mx, _Mprod0);
+        double _Result;
+        _mm_store_sd(&_Result, _Mresult);
+        return _Result;
+    }
+#endif // defined(_M_IX86) || defined(_M_X64)
+
+    _NODISCARD inline constexpr _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
+        const double _Prod0 = _Xval * _Xval;
+
+        if (_STD is_constant_evaluated()) {
+            return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
+        } else {
+#if defined(_M_IX86) || defined(_M_X64)
+            if (__isa_available >= __ISA_AVAILABLE_AVX2) {
+                return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
+            } else {
+                return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
+            }
+#else // ^^^ x86, x64 / arm, arm64 vvv
+            return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
+#endif // ^^^ arm, arm64 ^^^
+        }
+    }
+} // namespace _Float_multi_prec
+_STD_END
diff --git a/stl/src/math_algorithms.cpp b/stl/src/math_algorithms.cpp
new file mode 100644
index 0000000000..8658271b63
--- /dev/null
+++ b/stl/src/math_algorithms.cpp
@@ -0,0 +1,148 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// implements numerical algorithms for <complex>
+
+// This must be as small as possible, because its contents are
+// injected into the msvcprt.lib and msvcprtd.lib import libraries.
+// Do not include or define anything else here.
+// In particular, basic_string must not be included here.
+
+#include <cmath>
+#include <limits>
+#include <type_traits>
+#include <xutility>
+
+#include "float_multi_prec.hpp"
+
+#define _FMP ::std::_Float_multi_prec::
+
+namespace {
+    // TRANSITION: sqrt() isn't constexpr
+    // _Hypot_leg_huge = _Ty{0.5} * _STD sqrt((_STD numeric_limits<_Ty>::max)());
+    // _Hypot_leg_tiny = _STD sqrt(_Ty{2.0} * (_STD numeric_limits<_Ty>::min)() / _STD numeric_limits<_Ty>::epsilon());
+    template <class _Ty>
+    inline constexpr _Ty _Hypot_leg_huge = _Ty{6.703903964971298e+153};
+    template <>
+    inline constexpr float _Hypot_leg_huge<float> = 9.2233715e+18f;
+
+    template <class _Ty>
+    inline constexpr _Ty _Hypot_leg_tiny = _Ty{1.4156865331029228e-146};
+    template <>
+    inline constexpr float _Hypot_leg_tiny<float> = 4.440892e-16f;
+
+    template <class _Ty>
+    _NODISCARD _Ty _Norm_minus_one(const _Ty _Xval, const _Ty _Yval) noexcept {
+        // requires |_Xval| >= |_Yval| and 0.5 <= |_Xval| < 2^12
+        // returns _Xval * _Xval + _Yval * _Yval - 1
+        const _FMP _Fmp_t<_Ty, 2> _Xsqr    = _FMP _Sqr_x2(_Xval);
+        const _FMP _Fmp_t<_Ty, 2> _Ysqr    = _FMP _Sqr_x2(_Yval);
+        const _FMP _Fmp_t<_Ty, 2> _Xsqr_m1 = _FMP _Add_small_x2(_Ty{-1.0}, _Xsqr);
+        return _Add_x1(_Xsqr_m1, _Ysqr);
+    }
+
+    _NODISCARD inline float _Norm_minus_one(const float _Xval, const float _Yval) noexcept {
+        const auto _Dx = static_cast<double>(_Xval);
+        const auto _Dy = static_cast<double>(_Yval);
+        return static_cast<float>((_Dx * _Dx - 1.0) + _Dy * _Dy);
+    }
+
+    // TRANSITION: CRT log1p can be inaccurate for tiny inputs under directed rounding modes
+    template <class _Ty>
+    _NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval)
+        static_assert(_STD is_floating_point_v<_Ty>);
+
+        if (_STD _Is_nan(_Xval)) {
+            return _Xval + _Xval; // raise FE_INVALID if necessary
+        }
+
+        if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is accurate
+            if (_Xval == (_STD numeric_limits<_Ty>::max)()) { // avoid overflow
+                return _STD log(_Xval);
+            }
+
+            return _STD log(_Ty{1.0} + _Xval);
+        }
+
+        const _Ty _Xabs = _STD _Float_abs(_Xval);
+        if (_Xabs < _STD numeric_limits<_Ty>::epsilon()) { // zero or tiny
+            if (_Xabs == _Ty{0.0}) {
+                return _Xval;
+            }
+
+            // honor rounding mode, raise FE_INEXACT
+            return _Xabs * (_STD _Float_copysign(_Ty{1.0}, _Xval) - _Ty{0.5} * _Xabs);
+        }
+
+        // compute log(1 + _Xval) with fixup for small _Xval
+        const _FMP _Fmp_t<_Ty, 2> _Xp1 = _FMP _Add_small_x2(_Ty{1.0}, _Xval);
+        return _STD log(_Xp1._Val0) + _Xp1._Val1 / _Xp1._Val0;
+    }
+
+    template <class _Ty>
+    _NODISCARD _Ty _Log_hypot(const _Ty _Xval, const _Ty _Yval) noexcept { // returns log(hypot(_Xval, _Yval))
+        static_assert(_STD is_floating_point_v<_Ty>);
+
+        if (!_STD _Is_finite(_Xval) || !_STD _Is_finite(_Yval)) { // Inf or NaN
+            if (_STD _Is_signaling_nan(_Xval) || _STD _Is_signaling_nan(_Yval)) {
+                return _Xval + _Yval;
+            }
+
+            if (_STD isinf(_Xval)) {
+                return _STD _Float_abs(_Xval);
+            }
+
+            if (_STD isinf(_Yval)) {
+                return _STD _Float_abs(_Yval);
+            }
+
+            return _Xval + _Yval;
+        }
+
+        _Ty _Av = _STD _Float_abs(_Xval);
+        _Ty _Bv = _STD _Float_abs(_Yval);
+
+        if (_Av < _Bv) { // ensure that _Bv <= _Av
+            _STD swap(_Av, _Bv);
+        }
+
+        if (_Bv == 0) {
+            return _STD log(_Av);
+        }
+
+        if (_Hypot_leg_tiny<_Ty> < _Av && _Av < _Hypot_leg_huge<_Ty>) { // no overflow or harmful underflow
+            constexpr _Ty _Norm_small = _Ty{0.5};
+            constexpr _Ty _Norm_big   = _Ty{3.0};
+
+            const _Ty _Bv_sqr = _Bv * _Bv;
+            const _Ty _Norm   = _Av * _Av + _Bv_sqr;
+
+            if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation
+                return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5};
+            } else {
+                return _STD log(_Norm) * _Ty{0.5};
+            }
+        } else { // use 1 1/2 precision to preserve bits
+            constexpr _Ty _Cm = static_cast<_Ty>(22713.0L / 32768.0L);
+            constexpr _Ty _Cl = static_cast<_Ty>(1.4286068203094172321214581765680755e-6L);
+
+            const int _Exponent      = _STD ilogb(_Av);
+            const _Ty _Av_scaled     = _STD scalbn(_Av, -_Exponent);
+            const _Ty _Bv_scaled     = _STD scalbn(_Bv, -_Exponent);
+            const _Ty _Bv_scaled_sqr = _Bv_scaled * _Bv_scaled;
+            const _Ty _Norm_scaled   = _Av_scaled * _Av_scaled + _Bv_scaled_sqr;
+            const _Ty _Real_shifted  = _STD log(_Norm_scaled) * _Ty{0.5};
+            return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm;
+        }
+    }
+} // namespace
+
+_EXTERN_C
+_NODISCARD double __stdcall __std_math_log_hypot(const double _Xval, const double _Yval) noexcept {
+    return _Log_hypot(_Xval, _Yval);
+}
+
+_NODISCARD float __stdcall __std_math_log_hypotf(const float _Xval, const float _Yval) noexcept {
+    return _Log_hypot(_Xval, _Yval);
+}
+_END_EXTERN_C

From 784311854dfd8095549aa278c078f1770022fe51 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Thu, 6 Aug 2020 21:34:14 +0800
Subject: [PATCH 03/19] Minor fixes, clarify comments

---
 stl/inc/complex             |  5 +++--
 stl/inc/xutility            | 10 ++++++++++
 stl/src/math_algorithms.cpp | 23 +++++++++++++----------
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/stl/inc/complex b/stl/inc/complex
index 03ff979401..60fa9c911c 100644
--- a/stl/inc/complex
+++ b/stl/inc/complex
@@ -1587,9 +1587,10 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return mag
             _Bv    = _Bv * static_cast<_Ty>(0.0625);
         } else {
             constexpr _Ty _Flt_eps = _Ctraits<_Ty>::_Flt_eps();
-
             // TRANSITION, workaround for non floating point _Ty
-            if (_Flt_eps != 0 && _Av < 2 * _Ctraits<_Ty>::_Flt_norm_min() / _Flt_eps) {
+            constexpr _Ty _Leg_tiny = _Flt_eps == 0 ? _Ty{0} : 2 * _Ctraits<_Ty>::_Flt_norm_min() / _Flt_eps;
+
+            if (_Av < _Leg_tiny) {
                 constexpr int _Exponent = -2 * numeric_limits<_Ty>::digits;
 
                 *_Pexp = _Exponent;
diff --git a/stl/inc/xutility b/stl/inc/xutility
index 9e76c2145e..453da3a8e7 100644
--- a/stl/inc/xutility
+++ b/stl/inc/xutility
@@ -5970,12 +5970,22 @@ _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_nan(const _Ty _Xx) { // constexpr isnan(
 }
 
 // FUNCTION TEMPLATE _Is_signaling_nan
+// TRANSITION, workaround x86 ABI
+// On x86 ABI, floating point by-value arguments and return values are passed in 80-bit x87 registers.
+// When the value is a 32-bit or 64-bit signaling NaN, the conversion to/from 80-bit raises FE_INVALID
+// and turns it into a quiet NaN. This behavior is undesirable if we want to test for signaling NaNs.
 template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
 _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_signaling_nan(const _Ty& _Xx) { // returns true if input is a signaling NaN
     const auto _Abs_bits = _Float_abs_bits(_Xx);
     return _Abs_bits > _Float_traits<_Ty>::_Exponent_mask && ((_Abs_bits & _Float_traits<_Ty>::_Quiet_nan_mask) == 0);
 }
 
+// FUNCTION TEMPLATE _Is_inf
+template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
+_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_inf(const _Ty _Xx) { // constexpr isinf()
+    return _Float_abs_bits(_Xx) == _Float_traits<_Ty>::_Exponent_mask;
+}
+
 // FUNCTION TEMPLATE _Is_finite
 template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
 _NODISCARD _CONSTEXPR_BIT_CAST bool _Is_finite(const _Ty _Xx) { // constexpr isfinite()
diff --git a/stl/src/math_algorithms.cpp b/stl/src/math_algorithms.cpp
index 8658271b63..dd28c435d4 100644
--- a/stl/src/math_algorithms.cpp
+++ b/stl/src/math_algorithms.cpp
@@ -52,11 +52,11 @@ namespace {
     _NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval)
         static_assert(_STD is_floating_point_v<_Ty>);
 
-        if (_STD _Is_nan(_Xval)) {
-            return _Xval + _Xval; // raise FE_INVALID if necessary
+        if (_STD _Is_nan(_Xval)) { // NaN
+            return _Xval + _Xval; // raise FE_INVALID if _Xval is a signaling NaN
         }
 
-        if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is accurate
+        if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is moderately accurate
             if (_Xval == (_STD numeric_limits<_Ty>::max)()) { // avoid overflow
                 return _STD log(_Xval);
             }
@@ -66,12 +66,12 @@ namespace {
 
         const _Ty _Xabs = _STD _Float_abs(_Xval);
         if (_Xabs < _STD numeric_limits<_Ty>::epsilon()) { // zero or tiny
-            if (_Xabs == _Ty{0.0}) {
+            if (_Xval == _Ty{0.0}) {
                 return _Xval;
             }
 
             // honor rounding mode, raise FE_INEXACT
-            return _Xabs * (_STD _Float_copysign(_Ty{1.0}, _Xval) - _Ty{0.5} * _Xabs);
+            return _Xval - _Ty{0.5} * _Xval * _Xval;
         }
 
         // compute log(1 + _Xval) with fixup for small _Xval
@@ -84,18 +84,21 @@ namespace {
         static_assert(_STD is_floating_point_v<_Ty>);
 
         if (!_STD _Is_finite(_Xval) || !_STD _Is_finite(_Yval)) { // Inf or NaN
+            // raise FE_INVALID and return NaN if at least one of them is a signaling NaN
             if (_STD _Is_signaling_nan(_Xval) || _STD _Is_signaling_nan(_Yval)) {
                 return _Xval + _Yval;
             }
 
-            if (_STD isinf(_Xval)) {
+            // return +Inf if at least one of them is an infinity, even when the other is a quiet NaN
+            if (_STD _Is_inf(_Xval)) {
                 return _STD _Float_abs(_Xval);
             }
 
-            if (_STD isinf(_Yval)) {
+            if (_STD _Is_inf(_Yval)) {
                 return _STD _Float_abs(_Yval);
             }
 
+            // at least one of them is a quiet NaN, and the other is not an infinity
             return _Xval + _Yval;
         }
 
@@ -123,8 +126,8 @@ namespace {
                 return _STD log(_Norm) * _Ty{0.5};
             }
         } else { // use 1 1/2 precision to preserve bits
-            constexpr _Ty _Cm = static_cast<_Ty>(22713.0L / 32768.0L);
-            constexpr _Ty _Cl = static_cast<_Ty>(1.4286068203094172321214581765680755e-6L);
+            constexpr _Ty _Cm = _Ty{22713.0L / 32768.0L};
+            constexpr _Ty _Cl = _Ty{1.4286068203094172321214581765680755e-6L};
 
             const int _Exponent      = _STD ilogb(_Av);
             const _Ty _Av_scaled     = _STD scalbn(_Av, -_Exponent);
@@ -135,7 +138,7 @@ namespace {
             return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm;
         }
     }
-} // namespace
+} // unnamed namespace
 
 _EXTERN_C
 _NODISCARD double __stdcall __std_math_log_hypot(const double _Xval, const double _Yval) noexcept {

From 1d34722e6df7617a3afc282b74dea92be3258565 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Thu, 6 Aug 2020 22:19:58 +0800
Subject: [PATCH 04/19] Add complex sqrt test

---
 tests/std/include/fenv_prefix.hpp             |  61 +++++
 tests/std/test.lst                            |   1 +
 .../env.lst                                   |  32 +++
 .../floating_point_utils.hpp                  | 241 +++++++++++++++++
 .../sqrt_test_cases.hpp                       | 249 ++++++++++++++++++
 .../test.cpp                                  |  59 +++++
 .../test.hpp                                  |  27 ++
 tests/utils/stl/test/config.py                |   6 +
 tests/utils/stl/test/tests.py                 |   8 +
 9 files changed, 684 insertions(+)
 create mode 100644 tests/std/include/fenv_prefix.hpp
 create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst
 create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp
 create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/sqrt_test_cases.hpp
 create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
 create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/test.hpp

diff --git a/tests/std/include/fenv_prefix.hpp b/tests/std/include/fenv_prefix.hpp
new file mode 100644
index 0000000000..ab1a82fe54
--- /dev/null
+++ b/tests/std/include/fenv_prefix.hpp
@@ -0,0 +1,61 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#pragma once
+
+#ifdef FP_CONFIG_PRESET
+#if FP_CONFIG_PRESET == 3
+#define FP_PRESET_FAST 1
+#else // ^^^ FP_CONFIG_PRESET == 3 / FP_CONFIG_PRESET != 3 vvv
+#define FP_PRESET_FAST 0
+#endif // ^^^ FP_CONFIG_PRESET != 3 ^^^
+#endif // defined(FP_CONFIG_PRESET)
+
+#ifdef FP_CONTRACT_MODE
+#ifdef __clang__
+
+#if FP_CONTRACT_MODE == 0
+#pragma STDC FP_CONTRACT OFF
+#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv
+#pragma STDC FP_CONTRACT ON
+#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv
+#pragma STDC FP_CONTRACT ON
+#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv
+#error invalid FP_CONTRACT_MODE
+#endif // ^^^ invalid FP_CONTRACT_MODE ^^^
+
+#else // ^^^ clang / MSVC vvv
+
+#if FP_CONTRACT_MODE == 0
+#pragma fp_contract(off)
+#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv
+#pragma fp_contract(on)
+#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv
+#pragma fp_contract(on)
+#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv
+#error invalid FP_CONTRACT_MODE
+#endif // ^^^ invalid FP_CONTRACT_MODE ^^^
+
+#endif // ^^^ MSVC ^^^
+#endif // defined(FP_CONTRACT_MODE) && !defined(__clang__)
+
+#include <cassert>
+#include <float.h>
+
+struct fenv_initializer_t {
+    fenv_initializer_t() {
+#if WITH_FP_ABRUPT_UNDERFLOW
+        {
+            const errno_t result = _controlfp_s(nullptr, _DN_FLUSH, _MCW_DN);
+            assert(result == 0);
+        }
+#endif // WITH_FP_ABRUPT_UNDERFLOW
+    }
+
+    ~fenv_initializer_t() = default;
+
+    fenv_initializer_t(const fenv_initializer_t&) = delete;
+    fenv_initializer_t& operator=(const fenv_initializer_t&) = delete;
+};
+
+const fenv_initializer_t fenv_initializer{};
diff --git a/tests/std/test.lst b/tests/std/test.lst
index f6eda1e96f..5c0ac0973a 100644
--- a/tests/std/test.lst
+++ b/tests/std/test.lst
@@ -161,6 +161,7 @@ tests\GH_000625_vector_bool_optimization
 tests\GH_000685_condition_variable_any
 tests\GH_000690_overaligned_function
 tests\GH_000890_pow_template
+tests\GH_000935_complex_numerical_accuracy
 tests\GH_000940_missing_valarray_copy
 tests\GH_001010_filesystem_error_encoding
 tests\GH_001017_discrete_distribution_out_of_range
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst b/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst
new file mode 100644
index 0000000000..918098b2eb
--- /dev/null
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst
@@ -0,0 +1,32 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+RUNALL_INCLUDE ..\prefix.lst
+RUNALL_CROSSLIST
+PM_CL="/FIfenv_prefix.hpp"
+RUNALL_CROSSLIST
+PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"
+RUNALL_CROSSLIST
+PM_CL="/Od /MDd /D_ITERATOR_DEBUG_LEVEL=2"
+PM_CL="/O2 /MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive-"
+PM_CL="/O2 /MT /D_ITERATOR_DEBUG_LEVEL=0 /GL"
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /Od /MTd"
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MT"
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MD /Oi-"
+RUNALL_CROSSLIST
+PM_CL=""
+PM_CL="/arch:IA32"
+PM_CL="/arch:AVX2"
+PM_CL="/arch:VFPv4"
+RUNALL_CROSSLIST
+PM_CL="/fp:strict  /DFP_CONFIG_PRESET=1 /DTEST_FP_ROUNDING=1"
+PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=1"
+PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=0"
+PM_CL="/fp:fast    /DFP_CONFIG_PRESET=3 /DTEST_FP_ROUNDING=0"
+RUNALL_CROSSLIST
+PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=0"
+PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=1" PM_LINK="loosefpmath.obj"
+RUNALL_CROSSLIST
+PM_CL="/DFP_CONTRACT_MODE=0" PM_CLANG="-ffp-contract=off"
+PM_CL="/DFP_CONTRACT_MODE=1" PM_CLANG="-ffp-contract=on"
+PM_CL="/DFP_CONTRACT_MODE=2" PM_CLANG="-ffp-contract=fast"
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp
new file mode 100644
index 0000000000..22c2ad18f7
--- /dev/null
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp
@@ -0,0 +1,241 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#pragma once
+
+#include <cassert>
+#include <cfenv>
+#include <cmath>
+#include <float.h>
+#include <type_traits>
+#include <xutility>
+
+namespace fputil {
+    template <typename T>
+    using float_bits_t = typename _STD _Float_traits<T>::type;
+
+    template <typename T>
+    _INLINE_VAR constexpr float_bits_t<T> magnitude_mask_v = _STD _Float_traits<T>::_Magnitude_mask;
+
+    template <typename T>
+    _INLINE_VAR constexpr float_bits_t<T> exponent_mask_v = _STD _Float_traits<T>::_Exponent_mask;
+
+    template <typename T>
+    _INLINE_VAR constexpr float_bits_t<T> significand_mask_v = magnitude_mask_v<T> & ~exponent_mask_v<T>;
+
+    template <typename T>
+    _INLINE_VAR constexpr float_bits_t<T> sign_mask_v = _STD _Float_traits<T>::_Sign_mask;
+
+    template <typename T>
+    _INLINE_VAR constexpr float_bits_t<T> norm_min_bits_v = significand_mask_v<T> + 1U;
+
+    template <typename T>
+    _INLINE_VAR constexpr float_bits_t<T> norm_max_bits_v = exponent_mask_v<T> - 1U;
+
+    template <typename T>
+    _INLINE_VAR constexpr float_bits_t<T> infinity_bits_v = exponent_mask_v<T>;
+
+    // not affected by abrupt underflow
+    template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+    constexpr bool iszero(const T& x) {
+        return _STD _Float_abs_bits(x) == 0;
+    }
+
+    // not affected by /fp:fast
+    template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+    constexpr bool signbit(const T& x) {
+        const auto bits = std::_Bit_cast<float_bits_t<T>>(x);
+        return (bits & sign_mask_v<T>) != 0;
+    }
+
+    enum class rounding_mode {
+        to_nearest_ties_even = FE_TONEAREST,
+        toward_zero          = FE_TOWARDZERO,
+        toward_positive      = FE_UPWARD,
+        toward_negative      = FE_DOWNWARD,
+    };
+
+    bool is_directed_rounding_mode(const rounding_mode mode) {
+        switch (mode) {
+        case rounding_mode::to_nearest_ties_even:
+            return false;
+
+        case rounding_mode::toward_zero:
+        case rounding_mode::toward_positive:
+        case rounding_mode::toward_negative:
+            return true;
+
+        default:
+            assert(false);
+            return false;
+        }
+    }
+
+#if TEST_FP_ROUNDING
+
+#ifdef __clang__
+// TRANSITION, should be #pragma STDC FENV_ACCESS ON
+#else // ^^^ clang / MSVC vvv
+// TRANSITION, VSO-923474 -- should be #pragma STDC FENV_ACCESS ON
+#pragma fenv_access(on)
+#endif // ^^^ MSVC ^^^
+
+    constexpr rounding_mode all_rounding_modes[] = {
+        rounding_mode::to_nearest_ties_even,
+        rounding_mode::toward_zero,
+        rounding_mode::toward_positive,
+        rounding_mode::toward_negative,
+    };
+
+    class rounding_guard {
+    public:
+        explicit rounding_guard(const rounding_mode mode) : old_mode{static_cast<rounding_mode>(std::fegetround())} {
+            const int result = std::fesetround(static_cast<int>(mode));
+            assert(result == 0);
+        }
+
+        ~rounding_guard() {
+            const int result = std::fesetround(static_cast<int>(old_mode));
+            assert(result == 0);
+        }
+
+        rounding_guard(const rounding_guard&) = delete;
+        rounding_guard& operator=(const rounding_guard&) = delete;
+
+    private:
+        rounding_mode old_mode;
+    };
+
+#else // ^^^ alternative rounding modes / default rounding mode only vvv
+
+    constexpr rounding_mode all_rounding_modes[] = {rounding_mode::to_nearest_ties_even};
+
+    class rounding_guard {
+    public:
+        explicit rounding_guard(const rounding_mode mode) {
+            static_cast<void>(mode);
+        }
+
+        ~rounding_guard() = default;
+
+        rounding_guard(const rounding_guard&) = delete;
+        rounding_guard& operator=(const rounding_guard&) = delete;
+    };
+
+#endif // ^^^ default rounding mode only ^^^
+
+    // compares whether two floating point values are equal
+    // all NaNs are equal, +0.0 and -0.0 are not equal
+    template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+    bool precise_equal(const T& actual, const T& expected) {
+        if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
+            return _STD _Is_nan(actual) == _STD _Is_nan(expected);
+        } else {
+            return actual == expected && fputil::signbit(actual) == fputil::signbit(expected);
+        }
+    }
+
+    namespace detail {
+        // 0x80...00 = zero, 0x80...01 = numeric_limits<T>::denorm_min(), 0x7f...ff = -numeric_limits<T>::denorm_min()
+        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        float_bits_t<T> offset_representation(const T& x) {
+            const float_bits_t<T> abs_bits = _STD _Float_abs_bits(x);
+            return fputil::signbit(x) ? sign_mask_v<T> - abs_bits : sign_mask_v<T> + abs_bits;
+        }
+
+        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        float_bits_t<T> is_offset_value_subnormal_or_zero(const float_bits_t<T> offset_value) {
+            constexpr float_bits_t<T> positive_norm_min_offset = sign_mask_v<T> + norm_min_bits_v<T>;
+            constexpr float_bits_t<T> negative_norm_min_offset = sign_mask_v<T> - norm_min_bits_v<T>;
+
+            return negative_norm_min_offset < offset_value && offset_value < positive_norm_min_offset;
+        }
+
+        // number of ulps above zero, if we count [0, numeric_limits<T>::min()) as 1 ulp
+        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        double abrupt_underflow_ulp(const float_bits_t<T> offset_value) {
+            using bits_type = float_bits_t<T>;
+
+            constexpr bits_type offset_positive_norm_min = sign_mask_v<T> + norm_min_bits_v<T>;
+            constexpr bits_type offset_negative_norm_min = sign_mask_v<T> - norm_min_bits_v<T>;
+
+            if (offset_value >= offset_positive_norm_min) {
+                return 1.0 + (offset_value - offset_positive_norm_min);
+            } else if (offset_value <= offset_negative_norm_min) {
+                return -1.0 - (offset_negative_norm_min - offset_value);
+            } else if (offset_value >= sign_mask_v<T>) {
+                return static_cast<double>(offset_value - sign_mask_v<T>) / norm_min_bits_v<T>;
+            } else {
+                return -static_cast<double>(sign_mask_v<T> - offset_value) / norm_min_bits_v<T>;
+            }
+        }
+
+        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        bool is_within_ulp_tolerance(const T& actual, const T& expected, const int ulp_tolerance) {
+            if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
+                return _STD _Is_nan(actual) == _STD _Is_nan(expected);
+            }
+
+            if (_STD _Is_inf(expected)) {
+                return actual == expected;
+            }
+
+            if (fputil::signbit(actual) != fputil::signbit(expected)) {
+                return false;
+            }
+
+            using bits_type = float_bits_t<T>;
+
+            // compute ulp difference
+            const bits_type actual_offset   = detail::offset_representation(actual);
+            const bits_type expected_offset = detail::offset_representation(expected);
+            const bits_type ulp_diff =
+                actual_offset < expected_offset ? expected_offset - actual_offset : actual_offset - expected_offset;
+
+            if (ulp_diff <= static_cast<unsigned int>(ulp_tolerance) && ulp_tolerance >= 0) {
+                return true;
+            }
+
+#if WITH_FP_ABRUPT_UNDERFLOW
+            // handle abrupt underflow
+            if (detail::is_offset_value_subnormal_or_zero<T>(expected_offset)
+                || detail::is_offset_value_subnormal_or_zero<T>(actual_offset)) {
+                const double adjusted_actual_ulp   = detail::abrupt_underflow_ulp<T>(actual_offset);
+                const double adjusted_expected_ulp = detail::abrupt_underflow_ulp<T>(expected_offset);
+                const double adjusted_ulp_diff     = std::abs(adjusted_actual_ulp - adjusted_expected_ulp);
+
+                if (adjusted_ulp_diff <= ulp_tolerance) {
+                    return true;
+                }
+            }
+#endif // WITH_FP_ABRUPT_UNDERFLOW
+
+            return false;
+        }
+
+        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        bool is_within_absolute_tolerance(const T& actual, const T& expected, const double absolute_tolerance) {
+            return _STD _Is_finite(actual) && _STD _Is_finite(expected)
+                   && std::abs(actual - expected) <= absolute_tolerance;
+        }
+    } // namespace detail
+
+    // returns whether floating point result is nearly equal to the expected value
+    template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+    bool near_equal(
+        const T& actual, const T& expected, const int ulp_tolerance = 1, const double absolute_tolerance = 0) {
+        if (precise_equal(actual, expected)) {
+            return true;
+        }
+
+        if (ulp_tolerance > 0 && detail::is_within_ulp_tolerance(actual, expected, ulp_tolerance)) {
+            return true;
+        }
+
+        if (absolute_tolerance > 0 && detail::is_within_absolute_tolerance(actual, expected, absolute_tolerance)) {
+            return true;
+        }
+
+        return false;
+    }
+} // namespace fputil
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/sqrt_test_cases.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/sqrt_test_cases.hpp
new file mode 100644
index 0000000000..82c45a80c6
--- /dev/null
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/sqrt_test_cases.hpp
@@ -0,0 +1,249 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#pragma once
+
+#include <complex>
+
+#include "floating_point_utils.hpp"
+#include "test.hpp"
+
+constexpr complex_unary_test_case<double> sqrt_double_cases[] = {
+    // normal cases
+    {{+0x3p-0, +0x4p-0}, {+0x2p-0, +0x1p-0}},
+    {{+0x3p-0, -0x4p-0}, {+0x2p-0, -0x1p-0}},
+    {{-0x3p-0, +0x4p-0}, {+0x1p-0, +0x2p-0}},
+    {{-0x3p-0, -0x4p-0}, {+0x1p-0, -0x2p-0}},
+    {{+0x3p-2, +0x4p-2}, {+0x2p-1, +0x1p-1}},
+    {{+0x3p-2, -0x4p-2}, {+0x2p-1, -0x1p-1}},
+    {{-0x3p-2, +0x4p-2}, {+0x1p-1, +0x2p-1}},
+    {{-0x3p-2, -0x4p-2}, {+0x1p-1, -0x2p-1}},
+    {{+0x3p-4, +0x4p-4}, {+0x2p-2, +0x1p-2}},
+    {{+0x3p-4, -0x4p-4}, {+0x2p-2, -0x1p-2}},
+    {{-0x3p-4, +0x4p-4}, {+0x1p-2, +0x2p-2}},
+    {{-0x3p-4, -0x4p-4}, {+0x1p-2, -0x2p-2}},
+
+    // special cases
+    {{+0.0, +0.0}, {+0.0, +0.0}, {true, true}},
+    {{+0.0, -0.0}, {+0.0, -0.0}, {true, true}},
+    {{-0.0, +0.0}, {+0.0, +0.0}, {true, true}},
+    {{-0.0, -0.0}, {+0.0, -0.0}, {true, true}},
+    {{+1.0, +0.0}, {+1.0, +0.0}, {false, true}},
+    {{+1.0, -0.0}, {+1.0, -0.0}, {false, true}},
+    {{-1.0, +0.0}, {+0.0, +1.0}, {true, false}},
+    {{-1.0, -0.0}, {+0.0, -1.0}, {true, false}},
+    {{+0.0, +1.0}, {+0x1.6a09e667f3bcdp-1, +0x1.6a09e667f3bcdp-1}},
+    {{+0.0, -1.0}, {+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}},
+    {{-0.0, +1.0}, {+0x1.6a09e667f3bcdp-1, +0x1.6a09e667f3bcdp-1}},
+    {{-0.0, -1.0}, {+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}},
+
+#if !FP_PRESET_FAST
+    {{+double_inf, +0.0}, {+double_inf, +0.0}, {true, true}},
+    {{+double_inf, -0.0}, {+double_inf, -0.0}, {true, true}},
+    {{-double_inf, +0.0}, {+0.0, +double_inf}, {true, true}},
+    {{-double_inf, -0.0}, {+0.0, -double_inf}, {true, true}},
+    {{+double_inf, +1.0}, {+double_inf, +0.0}, {true, true}},
+    {{+double_inf, -1.0}, {+double_inf, -0.0}, {true, true}},
+    {{-double_inf, +1.0}, {+0.0, +double_inf}, {true, true}},
+    {{-double_inf, -1.0}, {+0.0, -double_inf}, {true, true}},
+    {{+double_inf, +double_inf}, {+double_inf, +double_inf}, {true, true}},
+    {{+double_inf, -double_inf}, {+double_inf, -double_inf}, {true, true}},
+    {{-double_inf, +double_inf}, {+double_inf, +double_inf}, {true, true}},
+    {{-double_inf, -double_inf}, {+double_inf, -double_inf}, {true, true}},
+    {{+1.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
+    {{+1.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
+    {{-1.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
+    {{-1.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
+    {{+0.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
+    {{+0.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
+    {{-0.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
+    {{-0.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
+    {{+double_inf, +double_nan}, {+double_inf, +double_nan}, {true, true}},
+    {{+double_inf, -double_nan}, {+double_inf, -double_nan}, {true, true}},
+    {{-double_inf, +double_nan}, {+double_nan, +double_inf}, {true, true}},
+    {{-double_inf, -double_nan}, {+double_nan, -double_inf}, {true, true}},
+    {{+double_nan, +double_inf}, {+double_inf, +double_inf}, {true, true}},
+    {{+double_nan, -double_inf}, {+double_inf, -double_inf}, {true, true}},
+    {{+double_nan, +0.0}, {+double_nan, +double_nan}, {true, true}},
+    {{+double_nan, -0.0}, {+double_nan, -double_nan}, {true, true}},
+    {{+0.0, +double_nan}, {+double_nan, +double_nan}, {true, true}},
+    {{+0.0, -double_nan}, {+double_nan, -double_nan}, {true, true}},
+    {{+double_nan, +double_nan}, {+double_nan, +double_nan}, {true, true}},
+    {{+double_nan, -double_nan}, {+double_nan, -double_nan}, {true, true}},
+#endif // !FP_PRESET_FAST
+
+    // abs(z) overflows
+    {{+0x1.fffffffffffffp+1023, +0x1.fffffffffffffp+1023}, {+0x1.19435caffa9f8p+512, +0x1.d203138f6c828p+510}},
+    {{-0x1.bb67ae8584caap+1023, +0x1.0000000000000p+1023}, {+0x1.0907dc1930691p+510, +0x1.ee8dd4748bf15p+511}},
+    {{+0x1.fffffffffffffp+1023, -0x0.0000000000001p-1022}, {+0x1.fffffffffffffp+511, -0x0.0000000000000p-1022}},
+
+    // norm(z) overflows
+    {{-0x1.4e718d7d7625ap+664, -0x1.4e718d7d7625ap+665}, {+0x1.cc1033be914a7p+331, -0x1.7432f2f528ea0p+332}},
+    {{+0x1.ca3d8e6d80cbbp+511, -0x1.57ae2ad22098cp+511}, {+0x1.00e0ed3ec75c3p+256, -0x1.56813c53b47afp+254}},
+
+#if !WITH_FP_ABRUPT_UNDERFLOW
+    // abs(z) underflows
+    {{-0x0.0000000000001p-1022, +0x0.0000000000001p-1022}, {+0x1.d203138f6c828p-539, +0x1.19435caffa9f9p-537}},
+    {{+0x0.0000000000001p-1022, +0x0.8000000000000p-1022}, {+0x1.0000000000001p-512, +0x1.ffffffffffffep-513}},
+#endif // !WITH_FP_ABRUPT_UNDERFLOW
+
+    // abs(z) close to underflow
+    {{+0x1.4p-1022, +0x1p-1022}, {+0x1.31a33f3eb2fd9p-511, +0x1.acd8ff10ebe7ep-513}},
+
+    // norm(z) underflows
+    {{+0x1.87e92154ef7acp-664, -0x1.87e92154ef7acp-665}, {+0x1.45f5e3f782563p-332, -0x1.33cb9c4327c54p-334}},
+    {{-0x1.9be34ac46b18fp-513, -0x1.1297872d9cbb5p-512}, {+0x1.09220ecd9c241p-257, -0x1.09220ecd9c241p-256}},
+
+    // control flow edge cases
+    {{+0x1p-2, +0x1.fffffffffffffp-1}, {+0x1.99b96593b936dp-1, +0x1.3fe72a921c6f4p-1}},
+    {{+0x1p-2, +0x1.0000000000000p+0}, {+0x1.99b96593b936ep-1, +0x1.3fe72a921c6f4p-1}},
+    {{+0x1p-2, +0x1.0000000000001p+0}, {+0x1.99b96593b936ep-1, +0x1.3fe72a921c6f5p-1}},
+    {{+0x1p+0, +0x1p-54}, {+0x1.0000000000000p+0, +0x1.0000000000000p-55}},
+    {{+0x1p+0, +0x1p-53}, {+0x1.0000000000000p+0, +0x1.0000000000000p-54}},
+    {{+0x1p+0, +0x1p-52}, {+0x1.0000000000000p+0, +0x1.0000000000000p-53}},
+    {{+0x1p+0, +0x1.ffffffffffffep-2}, {+0x1.077225f1da572p+0, +0x1.f18773c56f720p-3}},
+    {{+0x1p+0, +0x1.fffffffffffffp-2}, {+0x1.077225f1da572p+0, +0x1.f18773c56f721p-3}},
+    {{+0x1p+0, +0x1.0000000000000p-1}, {+0x1.077225f1da572p+0, +0x1.f18773c56f721p-3}},
+    {{+0x1p+0, +0x1.0000000000001p-1}, {+0x1.077225f1da572p+0, +0x1.f18773c56f723p-3}},
+    {{+0x1.ffffffffffffep-970, +0x1.fffffffffffffp-970}, {+0x1.8dc42193d5c02p-485, +0x1.49852f983efddp-486}},
+    {{+0x1.fffffffffffffp-970, +0x1.0000000000000p-969}, {+0x1.8dc42193d5c02p-485, +0x1.49852f983efdep-486}},
+    {{+0x1.0000000000000p-969, +0x1.0000000000001p-969}, {+0x1.8dc42193d5c03p-485, +0x1.49852f983efdep-486}},
+    {{+0x1.ffffffffffffep-971, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88455p-485, +0x1.92826ef258d1bp-486}},
+    {{+0x1.fffffffffffffp-971, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
+    {{+0x1.0000000000000p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1cp-486}},
+    {{+0x1.fffffffffffffp-971, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
+    {{+0x1.0000000000000p-970, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
+    {{+0x1.0000000000001p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88457p-485, +0x1.92826ef258d1cp-486}},
+    {{+0x1.0000000000000p-970, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
+    {{+0x1.0000000000001p-970, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
+    {{+0x1.0000000000002p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88457p-485, +0x1.92826ef258d1cp-486}},
+    {{+0x1p-1022, +0x1.fffffffffffffp-970}, {+0x1.0000000000000p-485, +0x1.fffffffffffffp-486}},
+    {{+0x1p-1022, +0x1.0000000000000p-969}, {+0x1.0000000000000p-485, +0x1.0000000000000p-485}},
+    {{+0x1p-1022, +0x1.0000000000001p-969}, {+0x1.0000000000001p-485, +0x1.0000000000000p-485}},
+
+#if !WITH_FP_ABRUPT_UNDERFLOW
+    {{+0x0.0000000000001p-1022, +0x1.fffffffffffffp-970}, {+0x1.0000000000000p-485, +0x1.fffffffffffffp-486}},
+    {{+0x0.0000000000001p-1022, +0x1.0000000000000p-969}, {+0x1.0000000000000p-485, +0x1.0000000000000p-485}},
+    {{+0x0.0000000000001p-1022, +0x1.0000000000001p-969}, {+0x1.0000000000001p-485, +0x1.0000000000000p-485}},
+#endif // !WITH_FP_ABRUPT_UNDERFLOW
+};
+
+constexpr complex_unary_test_case<float> sqrt_float_cases[] = {
+    // normal cases
+    {{+0x3p-0F, +0x4p-0F}, {+0x2p-0F, +0x1p-0F}},
+    {{+0x3p-0F, -0x4p-0F}, {+0x2p-0F, -0x1p-0F}},
+    {{-0x3p-0F, +0x4p-0F}, {+0x1p-0F, +0x2p-0F}},
+    {{-0x3p-0F, -0x4p-0F}, {+0x1p-0F, -0x2p-0F}},
+    {{+0x3p-2F, +0x4p-2F}, {+0x2p-1F, +0x1p-1F}},
+    {{+0x3p-2F, -0x4p-2F}, {+0x2p-1F, -0x1p-1F}},
+    {{-0x3p-2F, +0x4p-2F}, {+0x1p-1F, +0x2p-1F}},
+    {{-0x3p-2F, -0x4p-2F}, {+0x1p-1F, -0x2p-1F}},
+    {{+0x3p-4F, +0x4p-4F}, {+0x2p-2F, +0x1p-2F}},
+    {{+0x3p-4F, -0x4p-4F}, {+0x2p-2F, -0x1p-2F}},
+    {{-0x3p-4F, +0x4p-4F}, {+0x1p-2F, +0x2p-2F}},
+    {{-0x3p-4F, -0x4p-4F}, {+0x1p-2F, -0x2p-2F}},
+
+    // special cases
+    {{+0.0F, +0.0F}, {+0.0F, +0.0F}, {true, true}},
+    {{+0.0F, -0.0F}, {+0.0F, -0.0F}, {true, true}},
+    {{-0.0F, +0.0F}, {+0.0F, +0.0F}, {true, true}},
+    {{-0.0F, -0.0F}, {+0.0F, -0.0F}, {true, true}},
+    {{+1.0F, +0.0F}, {+1.0F, +0.0F}, {false, true}},
+    {{+1.0F, -0.0F}, {+1.0F, -0.0F}, {false, true}},
+    {{-1.0F, +0.0F}, {+0.0F, +1.0F}, {true, false}},
+    {{-1.0F, -0.0F}, {+0.0F, -1.0F}, {true, false}},
+    {{+0.0F, +1.0F}, {+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}},
+    {{+0.0F, -1.0F}, {+0x1.6a09e6p-1F, -0x1.6a09e6p-1F}},
+    {{-0.0F, +1.0F}, {+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}},
+    {{-0.0F, -1.0F}, {+0x1.6a09e6p-1F, -0x1.6a09e6p-1F}},
+
+#if !FP_PRESET_FAST
+    {{+float_inf, +0.0F}, {+float_inf, +0.0F}, {true, true}},
+    {{+float_inf, -0.0F}, {+float_inf, -0.0F}, {true, true}},
+    {{-float_inf, +0.0F}, {+0.0F, +float_inf}, {true, true}},
+    {{-float_inf, -0.0F}, {+0.0F, -float_inf}, {true, true}},
+    {{+float_inf, +1.0F}, {+float_inf, +0.0F}, {true, true}},
+    {{+float_inf, -1.0F}, {+float_inf, -0.0F}, {true, true}},
+    {{-float_inf, +1.0F}, {+0.0F, +float_inf}, {true, true}},
+    {{-float_inf, -1.0F}, {+0.0F, -float_inf}, {true, true}},
+    {{+float_inf, +float_inf}, {+float_inf, +float_inf}, {true, true}},
+    {{+float_inf, -float_inf}, {+float_inf, -float_inf}, {true, true}},
+    {{-float_inf, +float_inf}, {+float_inf, +float_inf}, {true, true}},
+    {{-float_inf, -float_inf}, {+float_inf, -float_inf}, {true, true}},
+    {{+1.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
+    {{+1.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
+    {{-1.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
+    {{-1.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
+    {{+0.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
+    {{+0.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
+    {{-0.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
+    {{-0.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
+    {{+float_inf, +float_nan}, {+float_inf, +float_nan}, {true, true}},
+    {{+float_inf, -float_nan}, {+float_inf, -float_nan}, {true, true}},
+    {{-float_inf, +float_nan}, {+float_nan, +float_inf}, {true, true}},
+    {{-float_inf, -float_nan}, {+float_nan, -float_inf}, {true, true}},
+    {{+float_nan, +float_inf}, {+float_inf, +float_inf}, {true, true}},
+    {{+float_nan, -float_inf}, {+float_inf, -float_inf}, {true, true}},
+    {{+float_nan, +0.0}, {+float_nan, +float_nan}, {true, true}},
+    {{+float_nan, -0.0}, {+float_nan, -float_nan}, {true, true}},
+    {{+0.0, +float_nan}, {+float_nan, +float_nan}, {true, true}},
+    {{+0.0, -float_nan}, {+float_nan, -float_nan}, {true, true}},
+    {{+float_nan, +float_nan}, {+float_nan, +float_nan}, {true, true}},
+    {{+float_nan, -float_nan}, {+float_nan, -float_nan}, {true, true}},
+#endif // !FP_PRESET_FAST
+
+    // abs(z) overflows
+    {{+0x1.fffffep+127F, +0x1.fffffep+127F}, {+0x1.19435cp+64F, +0x1.d20312p+62F}},
+    {{-0x1.bb67aep+127F, +0x1.000000p+127F}, {+0x1.0907dcp+62F, +0x1.ee8dd4p+63F}},
+    {{+0x1.fffffep+127F, -0x0.000002p-126F}, {+0x1.fffffep+63F, -0x0.000000p-126F}},
+
+    // norm(z) overflows
+    {{-0x1.08b2a2p+83F, -0x1.08b2a2p+84F}, {+0x1.216970p+41F, -0x1.d4473ap+41F}},
+    {{+0x1.bc16d6p+63F, -0x1.4d1120p+63F}, {+0x1.f9c31ep+31F, -0x1.512cbep+30F}},
+
+#if !WITH_FP_ABRUPT_UNDERFLOW
+    // abs(z) underflows
+    {{-0x0.000002p-126F, +0x0.000002p-126F}, {+0x1.498530p-76F, +0x1.8dc422p-75F}},
+    {{+0x0.000002p-126F, +0x0.800000p-126F}, {+0x1.000002p-64F, +0x1.fffffcp-65F}},
+#endif // !WITH_FP_ABRUPT_UNDERFLOW
+
+    // abs(z) close to underflow
+    {{+0x1.4p-126F, +0x1p-126F}, {+0x1.31a340p-63F, +0x1.acd900p-65F}},
+
+    // norm(z) underflows
+    {{+0x1.ef2d1p-83F, -0x1.ef2d10p-84F}, {+0x1.0314d8p-41F, -0x1.e9495ep-44F}},
+    {{-0x1.622d7p-61F, -0x1.d83c94p-61F}, {+0x1.ebb76ep-32F, -0x1.ebb770p-31F}},
+
+    // control flow edge cases
+    {{+0x1p-2F, +0x1.fffffep-1F}, {+0x1.99b964p-1F, +0x1.3fe72ap-1F}},
+    {{+0x1p-2F, +0x1.000000p+0F}, {+0x1.99b966p-1F, +0x1.3fe72ap-1F}},
+    {{+0x1p-2F, +0x1.000002p+0F}, {+0x1.99b966p-1F, +0x1.3fe72cp-1F}},
+    {{+0x1p+0F, +0x1p-25F}, {+0x1.000000p+0F, +0x1.000000p-26F}},
+    {{+0x1p+0F, +0x1p-24F}, {+0x1.000000p+0F, +0x1.000000p-25F}},
+    {{+0x1p+0F, +0x1p-23F}, {+0x1.000000p+0F, +0x1.000000p-24F}},
+    {{+0x1p+0F, +0x1.fffffcp-2F}, {+0x1.077226p+0F, +0x1.f18770p-3F}},
+    {{+0x1p+0F, +0x1.fffffep-2F}, {+0x1.077226p+0F, +0x1.f18772p-3F}},
+    {{+0x1p+0F, +0x1.000000p-1F}, {+0x1.077226p+0F, +0x1.f18774p-3F}},
+    {{+0x1p+0F, +0x1.000002p-1F}, {+0x1.077226p+0F, +0x1.f18778p-3F}},
+    {{+0x1.fffffcp-103F, +0x1.fffffep-103F}, {+0x1.19435cp-51F, +0x1.d20314p-53F}},
+    {{+0x1.fffffep-103F, +0x1.000000p-102F}, {+0x1.19435cp-51F, +0x1.d20314p-53F}},
+    {{+0x1.000000p-102F, +0x1.000002p-102F}, {+0x1.19435ep-51F, +0x1.d20316p-53F}},
+    {{+0x1.fffffcp-104F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
+    {{+0x1.fffffep-104F, +0x1.000000p-102F}, {+0x1.cc8532p-52F, +0x1.1c9e02p-52F}},
+    {{+0x1.000000p-103F, +0x1.000002p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e02p-52F}},
+    {{+0x1.fffffep-104F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
+    {{+0x1.000000p-103F, +0x1.000000p-102F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
+    {{+0x1.000002p-103F, +0x1.000002p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e02p-52F}},
+    {{+0x1.000000p-103F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
+    {{+0x1.000002p-103F, +0x1.000000p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e00p-52F}},
+    {{+0x1.000004p-103F, +0x1.000002p-102F}, {+0x1.cc8536p-52F, +0x1.1c9e02p-52F}},
+    {{+0x1.000000p-126F, +0x1.fffffep-103F}, {+0x1.6a09e6p-52F, +0x1.6a09e4p-52F}},
+    {{+0x1.000000p-126F, +0x1.000000p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e6p-52F}},
+    {{+0x1.000000p-126F, +0x1.000002p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e8p-52F}},
+
+#if !WITH_FP_ABRUPT_UNDERFLOW
+    {{+0x0.000002p-126F, +0x1.fffffep-103F}, {+0x1.6a09e6p-52F, +0x1.6a09e6p-52F}},
+    {{+0x0.000002p-126F, +0x1.000000p-102F}, {+0x1.6a09e6p-52F, +0x1.6a09e6p-52F}},
+    {{+0x0.000002p-126F, +0x1.000002p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e8p-52F}},
+#endif // !WITH_FP_ABRUPT_UNDERFLOW
+};
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
new file mode 100644
index 0000000000..363c3649f2
--- /dev/null
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
@@ -0,0 +1,59 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cassert>
+#include <complex>
+
+#include "floating_point_utils.hpp"
+#include "sqrt_test_cases.hpp"
+
+using namespace std;
+using namespace fputil;
+
+void test_sqrt(const rounding_mode mode) {
+#if FP_PRESET_FAST
+    constexpr int ulp_tolerance = 4;
+#else // ^^^ fp:fast / not fp:fast vvv
+    const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2;
+#endif // ^^^ not fp:fast ^^^
+
+    const auto check_result = [&](const auto& result, const auto& test_case) {
+        const int case_real_ulp_tolerance = test_case.result_exactness.real ? 0 : ulp_tolerance;
+        const int case_imag_ulp_tolerance = test_case.result_exactness.imag ? 0 : ulp_tolerance;
+        return near_equal(result.real(), test_case.expected_result.real(), case_real_ulp_tolerance)
+               && near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
+    };
+
+    for (const auto& c : sqrt_double_cases) {
+        const auto result = [&] {
+            rounding_guard guard(mode);
+            return sqrt(c.input);
+        }();
+
+        assert(check_result(result, c));
+    }
+
+    for (const auto& c : sqrt_float_cases) {
+        const auto result = [&] {
+            rounding_guard guard(mode);
+            return sqrt(c.input);
+        }();
+
+        assert(check_result(result, c));
+    }
+
+    for (const auto& c : sqrt_double_cases) {
+        const auto result = [&] {
+            rounding_guard guard(mode);
+            return sqrt(static_cast<complex<long double>>(c.input));
+        }();
+
+        assert(check_result(static_cast<complex<double>>(result), c));
+    }
+}
+
+int main() {
+    for (const auto& mode : all_rounding_modes) {
+        test_sqrt(mode);
+    }
+}
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.hpp
new file mode 100644
index 0000000000..681d0e4523
--- /dev/null
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.hpp
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#pragma once
+
+#include <complex>
+#include <limits>
+
+#include "floating_point_utils.hpp"
+
+constexpr double double_inf = std::numeric_limits<double>::infinity();
+constexpr double double_nan = std::numeric_limits<double>::quiet_NaN();
+
+constexpr float float_inf = std::numeric_limits<float>::infinity();
+constexpr float float_nan = std::numeric_limits<float>::quiet_NaN();
+
+struct complex_result_exactness {
+    bool real = false;
+    bool imag = false;
+};
+
+template <typename T>
+struct complex_unary_test_case {
+    std::complex<T> input;
+    std::complex<T> expected_result;
+    complex_result_exactness result_exactness{};
+};
diff --git a/tests/utils/stl/test/config.py b/tests/utils/stl/test/config.py
index d9a6c04035..209aa4d9f3 100644
--- a/tests/utils/stl/test/config.py
+++ b/tests/utils/stl/test/config.py
@@ -129,6 +129,12 @@ def configure_features(self):
 
         if self.target_arch == 'x86':
             self.config.available_features.add('edg')
+            self.config.available_features.add('arch_ia32')
+            self.config.available_features.add('arch_avx2')
+        elif self.target_arch == 'x64':
+            self.config.available_features.add('arch_avx2')
+        elif self.target_arch == 'arm':
+            self.config.available_features.add('arch_vfpv4')
 
     def configure_test_source_root(self):
         test_source_root = self.get_lit_conf('test_source_root', None)
diff --git a/tests/utils/stl/test/tests.py b/tests/utils/stl/test/tests.py
index 6728132870..3d5cc2e013 100644
--- a/tests/utils/stl/test/tests.py
+++ b/tests/utils/stl/test/tests.py
@@ -41,6 +41,12 @@ def __init__(self, suite, path_in_suite, lit_config, test_config,
                 self.requires.append('clr') # TRANSITION, GH-797
             elif flag[1:] == 'BE':
                 self.requires.append('edg') # available for x86, see config.py
+            elif flag[1:] == 'arch:AVX2':
+                self.requires.append('arch_avx2') # available for x86 and x64, see config.py
+            elif flag[1:] == 'arch:IA32':
+                self.requires.append('arch_ia32') # available for x86, see config.py
+            elif flag[1:] == 'arch:VFPv4':
+                self.requires.append('arch_vfpv4') # available for arm, see config.py
 
     def getOutputDir(self):
         return Path(os.path.join(
@@ -135,6 +141,8 @@ def _configure_cxx(self, lit_config, envlst_entry, default_cxx):
         link_flags.extend(envlst_entry.getEnvVal('PM_LINK', '').split())
 
         if ('clang'.casefold() in os.path.basename(cxx).casefold()):
+            flags.extend(map(lambda x : '/clang:' + x, envlst_entry.getEnvVal('PM_CLANG', '').split()))
+
             target_arch = self.config.target_arch.casefold()
             if (target_arch == 'x64'.casefold()):
                 compile_flags.append('-m64')

From d79c9f39401b170547eff9b024929fca39d2390f Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sat, 8 Aug 2020 18:02:30 +0800
Subject: [PATCH 05/19] Add complex log test

---
 .../log_test_cases.hpp                        | 276 ++++++++++++++++++
 .../test.cpp                                  |  67 ++++-
 2 files changed, 342 insertions(+), 1 deletion(-)
 create mode 100644 tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp

diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp
new file mode 100644
index 0000000000..29b349e440
--- /dev/null
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp
@@ -0,0 +1,276 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#pragma once
+
+#include <complex>
+
+#include "floating_point_utils.hpp"
+#include "test.hpp"
+
+template <typename T>
+constexpr T pi_over_4_v = T{0.7853981633974483};
+template <typename T>
+constexpr T pi_over_2_v = T{1.5707963267948966};
+template <typename T>
+constexpr T pi_3_over_4_v = T{2.356194490192345};
+template <typename T>
+constexpr T pi_v = T{3.141592653589793};
+
+constexpr complex_unary_test_case<double> log_double_cases[] = {
+    // normal cases
+    {{+0x1.8p+0, +0x1p+1}, {+0x1.d5240f0e0e078p-1, +0x1.dac670561bb4fp-1}},
+    {{+0x1.8p+0, -0x1p+1}, {+0x1.d5240f0e0e078p-1, -0x1.dac670561bb4fp-1}},
+    {{-0x1.8p+0, +0x1p+1}, {+0x1.d5240f0e0e078p-1, +0x1.1b6e192ebbe44p+1}},
+    {{-0x1.8p+0, -0x1p+1}, {+0x1.d5240f0e0e078p-1, -0x1.1b6e192ebbe44p+1}},
+    {{+0x1.8p-1, +0x1p+0}, {+0x1.c8ff7c79a9a22p-3, +0x1.dac670561bb4fp-1}},
+    {{+0x1.8p-1, -0x1p+0}, {+0x1.c8ff7c79a9a22p-3, -0x1.dac670561bb4fp-1}},
+    {{-0x1.8p-1, +0x1p+0}, {+0x1.c8ff7c79a9a22p-3, +0x1.1b6e192ebbe44p+1}},
+    {{-0x1.8p-1, -0x1p+0}, {+0x1.c8ff7c79a9a22p-3, -0x1.1b6e192ebbe44p+1}},
+    {{+0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.dac670561bb4fp-1}},
+    {{+0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.dac670561bb4fp-1}},
+    {{-0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.1b6e192ebbe44p+1}},
+    {{-0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.1b6e192ebbe44p+1}},
+
+    // special cases
+    {{+1.0, +0.0}, {0.0, +0.0}, {true, true}},
+    {{+1.0, -0.0}, {0.0, -0.0}, {true, true}},
+    {{+0.0, +1.0}, {0.0, +pi_over_2_v<double>}, {true, false}},
+    {{+0.0, -1.0}, {0.0, -pi_over_2_v<double>}, {true, false}},
+    {{-0.0, +1.0}, {0.0, +pi_over_2_v<double>}, {true, false}},
+    {{-0.0, -1.0}, {0.0, -pi_over_2_v<double>}, {true, false}},
+    {{-1.0, +0.0}, {0.0, +pi_v<double>}, {true, false}},
+    {{-1.0, -0.0}, {0.0, -pi_v<double>}, {true, false}},
+
+#if !FP_PRESET_FAST
+    {{+0.0, +0.0}, {-double_inf, +0.0}, {true, true}},
+    {{+0.0, -0.0}, {-double_inf, -0.0}, {true, true}},
+    {{-0.0, +0.0}, {-double_inf, +pi_v<double>}, {true, false}},
+    {{-0.0, -0.0}, {-double_inf, -pi_v<double>}, {true, false}},
+    {{+double_inf, +0.0}, {+double_inf, +0.0}, {true, true}},
+    {{+double_inf, -0.0}, {+double_inf, -0.0}, {true, true}},
+    {{+double_inf, +1.0}, {+double_inf, +0.0}, {true, true}},
+    {{+double_inf, -1.0}, {+double_inf, -0.0}, {true, true}},
+    {{+double_inf, +double_inf}, {+double_inf, +pi_over_4_v<double>}, {true, false}},
+    {{+double_inf, -double_inf}, {+double_inf, -pi_over_4_v<double>}, {true, false}},
+    {{+1.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
+    {{+1.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
+    {{+0.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
+    {{+0.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
+    {{-0.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
+    {{-0.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
+    {{-1.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
+    {{-1.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
+    {{-double_inf, +double_inf}, {+double_inf, +pi_3_over_4_v<double>}, {true, false}},
+    {{-double_inf, -double_inf}, {+double_inf, -pi_3_over_4_v<double>}, {true, false}},
+    {{-double_inf, +1.0}, {+double_inf, +pi_v<double>}, {true, false}},
+    {{-double_inf, -1.0}, {+double_inf, -pi_v<double>}, {true, false}},
+    {{-double_inf, +0.0}, {+double_inf, +pi_v<double>}, {true, false}},
+    {{-double_inf, -0.0}, {+double_inf, -pi_v<double>}, {true, false}},
+    {{+double_inf, double_nan}, {+double_inf, double_nan}, {true, true}},
+    {{-double_inf, double_nan}, {+double_inf, double_nan}, {true, true}},
+    {{double_nan, +double_inf}, {+double_inf, double_nan}, {true, true}},
+    {{double_nan, -double_inf}, {+double_inf, double_nan}, {true, true}},
+    {{double_nan, +0.0}, {double_nan, double_nan}, {true, true}},
+    {{+0.0, double_nan}, {double_nan, double_nan}, {true, true}},
+    {{double_nan, double_nan}, {double_nan, double_nan}, {true, true}},
+#endif // !FP_PRESET_FAST
+
+    // abs(z) overflows
+    {{+0x1.fffffffffffffp+1023, +0x1.fffffffffffffp+1023}, {+0x1.63108c75a1936p+9, +0x1.921fb54442d18p-1}},
+    {{-0x1.bb67ae8584caap+1023, +0x1.0000000000000p+1023}, {+0x1.62e42fefa39efp+9, +0x1.4f1a6c638d03fp+1}},
+    {{+0x1.fffffffffffffp+1023, -0x0.0000000000001p-1022}, {+0x1.62e42fefa39efp+9, -0x0.0000000000000p-1022}},
+
+    // norm(z) overflows
+    {{-0x1.4e718d7d7625ap+664, -0x1.4e718d7d7625ap+665}, {+0x1.cd525d6474bb8p+8, -0x1.0468a8ace4df6p+1}},
+    {{+0x1.ca3d8e6d80cbbp+511, -0x1.57ae2ad22098cp+511}, {+0x1.6300e9ed15a44p+8, -0x1.4978fa3269ee1p-1}},
+
+#if !WITH_FP_ABRUPT_UNDERFLOW
+    // abs(z) underflows
+    {{-0x0.0000000000001p-1022, +0x0.0000000000001p-1022}, {-0x1.740bf7c0d927cp+9, +0x1.2d97c7f3321d2p+1}},
+    {{+0x0.0000000000001p-1022, +0x0.8000000000000p-1022}, {-0x1.628b76e3a7b61p+9, +0x1.921fb54442d16p+0}},
+#endif // !WITH_FP_ABRUPT_UNDERFLOW
+
+    // abs(z) close to underflow
+    {{+0x1.4p-1022, +0x1p-1022}, {-0x1.61f684c577299p+9, +0x1.5977a5103ea92p-1}},
+
+    // norm(z) underflows
+    {{+0x1.87e92154ef7acp-664, -0x1.87e92154ef7acp-665}, {-0x1.cbb65944f5e2bp+8, -0x1.dac670561bb4fp-2}},
+    {{-0x1.9be34ac46b18fp-513, -0x1.1297872d9cbb5p-512}, {-0x1.62991d5d62a5ep+8, -0x1.1b6e192ebbe44p+1}},
+
+    // z close to 1
+    {{+0x1.0000000000001p+0, -0.0}, {+0x1.fffffffffffffp-53, -0.0}, {false, true}},
+    {{+0x1.fffffffffffffp-1, +0.0}, {-0x1.0000000000000p-53, +0.0}, {false, true}},
+#if !WITH_FP_ABRUPT_UNDERFLOW
+    {{+0x1.0000000000001p+0, -0x0.0000000000001p-1022}, {+0x1.fffffffffffffp-53, -0x0.0000000000001p-1022}},
+    {{+0x1.0000000000000p+0, +0x0.0000000000001p-1022}, {+0x0.0000000000000p-1022, +0x0.0000000000001p-1022}},
+    {{+0x1.fffffffffffffp-1, -0x0.0000000000001p-1022}, {-0x1.0000000000000p-53, -0x0.0000000000001p-1022}},
+#endif // !WITH_FP_ABRUPT_UNDERFLOW
+    {{+0x1.0000000000001p+0, +0x1p-1022}, {+0x1.fffffffffffffp-53, +0x0.fffffffffffffp-1022}},
+    {{+0x1.0000000000000p+0, -0x1p-1022}, {+0x0.0000000000000p-1022, -0x1.0000000000000p-1022}},
+    {{+0x1.fffffffffffffp-1, +0x1p-1022}, {-0x1.0000000000000p-53, +0x1.0000000000001p-1022}},
+    {{+0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.ffffffffffffep-53}},
+    {{+0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.0000000000000p-52}},
+    {{+0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.0000000000000p-52}},
+    {{+0x1.fffffffffffffp-1, +0x1p-26}, {+0x1.0000000000000p-107, +0x1.0000000000000p-26}},
+
+    // z close to -1, i or -i
+    {{-0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.921fb54442d18p+1}},
+    {{-0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.921fb54442d18p+1}},
+    {{-0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d18p+1}},
+    {{+0x1p-52, +0x1.0000000000001p+0}, {+0x1.0000000000000p-52, +0x1.921fb54442d17p+0}},
+    {{-0x1p-52, +0x1.0000000000000p+0}, {+0x1.0000000000000p-105, +0x1.921fb54442d19p+0}},
+    {{+0x1p-52, +0x1.fffffffffffffp-1}, {-0x1.ffffffffffffep-54, +0x1.921fb54442d17p+0}},
+    {{-0x1p-52, -0x1.0000000000001p+0}, {+0x1.0000000000000p-52, -0x1.921fb54442d19p+0}},
+    {{+0x1p-52, -0x1.0000000000000p+0}, {+0x1.0000000000000p-105, -0x1.921fb54442d17p+0}},
+    {{-0x1p-52, -0x1.fffffffffffffp-1}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d19p+0}},
+
+    // abs(z) close to 1
+    {{+0x1.6a09e667f3bccp-1, +0x1.6a09e667f3bccp-1}, {-0x1.98d4d0da05571p-54, +0x1.921fb54442d18p-1}},
+    {{+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}, {+0x1.3b3efbf5e2229p-54, -0x1.921fb54442d18p-1}},
+    {{-0x1.3333333333333p-1, -0x1.999999999999ap-1}, {+0x1.999999999999ap-56, -0x1.1b6e192ebbe44p+1}},
+    {{-0x1.3333333333333p-1, +0x1.9999999999999p-1}, {-0x1.3333333333333p-54, +0x1.1b6e192ebbe44p+1}},
+    {{+0x1.69fbe76c8b439p-1, +0x1.69fbe76c8b439p-1}, {-0x1.3cb7c059d6699p-13, +0x1.921fb54442d18p-1}},
+    {{-0x1.d89d89d89d89ep-1, +0x1.89d89d89d89d6p-2}, {-0x1.3b13b13b13b0cp-57, +0x1.5f97315254857p+1}},
+
+    // control flow edge cases
+    {{+0x1p-1, +0x1.fffffffffffffp-2}, {-0x1.62e42fefa39f0p-2, +0x1.921fb54442d18p-1}},
+    {{+0x1p-1, +0x1.0000000000000p-1}, {-0x1.62e42fefa39efp-2, +0x1.921fb54442d18p-1}},
+    {{+0x1p-1, +0x1.0000000000001p-1}, {-0x1.62e42fefa39edp-2, +0x1.921fb54442d19p-1}},
+    {{+0x1p-1, +0x1.a887293fd6f33p+0}, {+0x1.193ea7aad0309p-1, +0x1.4727f6d4d118cp+0}},
+    {{+0x1p-1, +0x1.a887293fd6f34p+0}, {+0x1.193ea7aad030ap-1, +0x1.4727f6d4d118dp+0}},
+    {{+0x1p-1, +0x1.a887293fd6f35p+0}, {+0x1.193ea7aad030cp-1, +0x1.4727f6d4d118dp+0}},
+    {{+6.703903964971297e+153, +6e+153}, {+0x1.627e0d1e7a85dp+8, +0x1.75c8a07421461p-1}},
+    {{+6.703903964971298e+153, +6e+153}, {+0x1.627e0d1e7a85dp+8, +0x1.75c8a07421461p-1}},
+    {{+1e-154, +1.4156865331029228e-146}, {-0x1.4fd46e5c84953p+8, +0x1.921fb525ec2fcp+0}},
+    {{+1e-154, +1.415686533102923e-146}, {-0x1.4fd46e5c84953p+8, +0x1.921fb525ec2fcp+0}},
+};
+
+constexpr complex_unary_test_case<float> log_float_cases[] = {
+    // normal cases
+    {{+0x1.8p+0F, +0x1p+1F}, {+0x1.d52410p-1F, +0x1.dac670p-1F}},
+    {{+0x1.8p+0F, -0x1p+1F}, {+0x1.d52410p-1F, -0x1.dac670p-1F}},
+    {{-0x1.8p+0F, +0x1p+1F}, {+0x1.d52410p-1F, +0x1.1b6e1ap+1F}},
+    {{-0x1.8p+0F, -0x1p+1F}, {+0x1.d52410p-1F, -0x1.1b6e1ap+1F}},
+    {{+0x1.8p-1F, +0x1p+0F}, {+0x1.c8ff7cp-3F, +0x1.dac670p-1F}},
+    {{+0x1.8p-1F, -0x1p+0F}, {+0x1.c8ff7cp-3F, -0x1.dac670p-1F}},
+    {{-0x1.8p-1F, +0x1p+0F}, {+0x1.c8ff7cp-3F, +0x1.1b6e1ap+1F}},
+    {{-0x1.8p-1F, -0x1p+0F}, {+0x1.c8ff7cp-3F, -0x1.1b6e1ap+1F}},
+    {{+0x1.8p-2F, +0x1p-1F}, {-0x1.e148a2p-2F, +0x1.dac670p-1F}},
+    {{+0x1.8p-2F, -0x1p-1F}, {-0x1.e148a2p-2F, -0x1.dac670p-1F}},
+    {{-0x1.8p-2F, +0x1p-1F}, {-0x1.e148a2p-2F, +0x1.1b6e1ap+1F}},
+    {{-0x1.8p-2F, -0x1p-1F}, {-0x1.e148a2p-2F, -0x1.1b6e1ap+1F}},
+
+    // special cases
+    {{+1.0F, +0.0F}, {0.0F, +0.0F}, {true, true}},
+    {{+1.0F, -0.0F}, {0.0F, -0.0F}, {true, true}},
+    {{+0.0F, +1.0F}, {0.0F, +pi_over_2_v<float>}, {true, false}},
+    {{+0.0F, -1.0F}, {0.0F, -pi_over_2_v<float>}, {true, false}},
+    {{-0.0F, +1.0F}, {0.0F, +pi_over_2_v<float>}, {true, false}},
+    {{-0.0F, -1.0F}, {0.0F, -pi_over_2_v<float>}, {true, false}},
+    {{-1.0F, +0.0F}, {0.0F, +pi_v<float>}, {true, false}},
+    {{-1.0F, -0.0F}, {0.0F, -pi_v<float>}, {true, false}},
+
+#if !FP_PRESET_FAST
+    {{+0.0F, +0.0F}, {-float_inf, +0.0F}, {true, true}},
+    {{+0.0F, -0.0F}, {-float_inf, -0.0F}, {true, true}},
+    {{-0.0F, +0.0F}, {-float_inf, +pi_v<float>}, {true, false}},
+    {{-0.0F, -0.0F}, {-float_inf, -pi_v<float>}, {true, false}},
+    {{+float_inf, +0.0F}, {+float_inf, +0.0F}, {true, true}},
+    {{+float_inf, -0.0F}, {+float_inf, -0.0F}, {true, true}},
+    {{+float_inf, +1.0F}, {+float_inf, +0.0F}, {true, true}},
+    {{+float_inf, -1.0F}, {+float_inf, -0.0F}, {true, true}},
+    {{+float_inf, +float_inf}, {+float_inf, +pi_over_4_v<float>}, {true, false}},
+    {{+float_inf, -float_inf}, {+float_inf, -pi_over_4_v<float>}, {true, false}},
+    {{+1.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
+    {{+1.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
+    {{+0.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
+    {{+0.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
+    {{-0.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
+    {{-0.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
+    {{-1.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
+    {{-1.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
+    {{-float_inf, +float_inf}, {+float_inf, +pi_3_over_4_v<float>}, {true, false}},
+    {{-float_inf, -float_inf}, {+float_inf, -pi_3_over_4_v<float>}, {true, false}},
+    {{-float_inf, +1.0F}, {+float_inf, +pi_v<float>}, {true, false}},
+    {{-float_inf, -1.0F}, {+float_inf, -pi_v<float>}, {true, false}},
+    {{-float_inf, +0.0F}, {+float_inf, +pi_v<float>}, {true, false}},
+    {{-float_inf, -0.0F}, {+float_inf, -pi_v<float>}, {true, false}},
+    {{+float_inf, float_nan}, {+float_inf, float_nan}, {true, true}},
+    {{-float_inf, float_nan}, {+float_inf, float_nan}, {true, true}},
+    {{float_nan, +float_inf}, {+float_inf, float_nan}, {true, true}},
+    {{float_nan, -float_inf}, {+float_inf, float_nan}, {true, true}},
+    {{float_nan, +0.0F}, {float_nan, float_nan}, {true, true}},
+    {{+0.0F, float_nan}, {float_nan, float_nan}, {true, true}},
+    {{float_nan, float_nan}, {float_nan, float_nan}, {true, true}},
+#endif // !FP_PRESET_FAST
+
+    // abs(z) overflows
+    {{+0x1.fffffep+127F, +0x1.fffffep+127F}, {+0x1.644714p+6F, +0x1.921fb6p-1F}},
+    {{-0x1.bb67aep+127F, +0x1.000000p+127F}, {+0x1.62e430p+6F, +0x1.4f1a6cp+1F}},
+    {{+0x1.fffffep+127F, -0x0.000002p-126F}, {+0x1.62e430p+6F, -0x0.000000p-126F}},
+
+    // norm(z) overflows
+    {{-0x1.08b2a2p+83F, -0x1.08b2a2p+84F}, {+0x1.d2f46cp+5F, -0x1.0468a8p+1F}},
+    {{+0x1.bc16d6p+63F, -0x1.4d1120p+63F}, {+0x1.6389c2p+5F, -0x1.4978fap-1F}},
+
+#if !WITH_FP_ABRUPT_UNDERFLOW
+    // abs(z) underflows
+    {{-0x0.000002p-126F, +0x0.000002p-126F}, {-0x1.9bbabcp+6F, +0x1.2d97c8p+1F}},
+    {{+0x0.000002p-126F, +0x0.800000p-126F}, {-0x1.601e68p+6F, +0x1.921fb2p+0F}},
+#endif // !WITH_FP_ABRUPT_UNDERFLOW
+
+    // abs(z) close to underflow
+    {{+0x1.4p-126F, +0x1p-126F}, {-0x1.5b76d6p+6F, +0x1.5977a6p-1F}},
+
+    // norm(z) underflows
+    {{+0x1.ef2d10p-83F, -0x1.ef2d10p-84F}, {-0x1.c6144ap+5F, -0x1.dac670p-2F}},
+    {{-0x1.622d70p-61F, -0x1.d83c94p-61F}, {-0x1.4b9280p+5F, -0x1.1b6e1ap+1F}},
+
+    // z close to 1
+    {{+0x1.000002p+0F, -0.0F}, {+0x1.fffffep-24F, -0.0F}, {false, true}},
+    {{+0x1.fffffep-1F, +0.0F}, {-0x1.000000p-24F, +0.0F}, {false, true}},
+#if !WITH_FP_ABRUPT_UNDERFLOW
+    {{+0x1.000002p+0F, -0x0.000002p-126F}, {+0x1.fffffep-24F, -0x0.000002p-126F}},
+    {{+0x1.000000p+0F, +0x0.000002p-126F}, {+0x0.000000p-126F, +0x0.000002p-126F}},
+    {{+0x1.fffffep-1F, -0x0.000002p-126F}, {-0x1.000000p-24F, -0x0.000002p-126F}},
+#endif // !WITH_FP_ABRUPT_UNDERFLOW
+    {{+0x1.000002p+0F, +0x1.000000p-126F}, {+0x1.fffffep-24F, +0x0.fffffep-126F}},
+    {{+0x1.000000p+0F, -0x1.000000p-126F}, {+0x0.000000p+0F, -0x1.000000p-126F}},
+    {{+0x1.fffffep-1F, +0x1.000000p-126F}, {-0x1.000000p-24F, +0x1.000002p-126F}},
+    {{+0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.fffffcp-24F}},
+    {{+0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.000000p-23F}},
+    {{+0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.000000p-23F}},
+    {{+0x1.fffffep-1F, +0x1.6a09e6p-12F}, {-0x1.302ae0p-52F, +0x1.6a09e6p-12F}},
+
+    // z close to -1, i or -i
+    {{-0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.921fb4p+1F}},
+    {{-0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.921fb4p+1F}},
+    {{-0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.921fb4p+1F}},
+    {{+0x1.000000p-23F, +0x1.000002p+0F}, {+0x1.000000p-23F, +0x1.921fb4p+0F}},
+    {{-0x1.000000p-23F, +0x1.000000p+0F}, {+0x1.000000p-47F, +0x1.921fb8p+0F}},
+    {{+0x1.000000p-23F, +0x1.fffffep-1F}, {-0x1.fffffcp-25F, +0x1.921fb4p+0F}},
+    {{-0x1.000000p-23F, -0x1.000002p+0F}, {+0x1.000000p-23F, -0x1.921fb8p+0F}},
+    {{+0x1.000000p-23F, -0x1.000000p+0F}, {+0x1.000000p-47F, -0x1.921fb4p+0F}},
+    {{-0x1.000000p-23F, -0x1.fffffep-1F}, {-0x1.fffffcp-25F, -0x1.921fb8p+0F}},
+
+    // abs(z) close to 1
+    {{+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}, {-0x1.26055cp-26F, +0x1.921fb6p-1F}},
+    {{+0x1.6a09e8p-1F, -0x1.6a09e8p-1F}, {+0x1.20888ep-24F, -0x1.921fb6p-1F}},
+    {{-0x1.333334p-1F, -0x1.99999ap-1F}, {+0x1.99999ap-26F, -0x1.1b6e1ap+1F}},
+    {{-0x1.333332p-1F, +0x1.99999ap-1F}, {-0x1.999998p-27F, +0x1.1b6e18p+1F}},
+    {{+0x1.69fbe8p-1F, +0x1.69fbe8p-1F}, {-0x1.3caab8p-13F, +0x1.921fb6p-1F}},
+    {{-0x1.d89d8ap-1F, +0x1.89d89ep-2F}, {+0x1.d89d8ap-28F, +0x1.5f9732p+1F}},
+
+    // control flow edge cases
+    {{+0x1p-1F, +0x1.fffffep-2F}, {-0x1.62e432p-2F, +0x1.921fb4p-1F}},
+    {{+0x1p-1F, +0x1.000000p-1F}, {-0x1.62e430p-2F, +0x1.921fb6p-1F}},
+    {{+0x1p-1F, +0x1.000002p-1F}, {-0x1.62e42cp-2F, +0x1.921fb8p-1F}},
+    {{+0x1p-1F, +0x1.a88728p+0F}, {+0x1.193ea6p-1F, +0x1.4727f6p+0F}},
+    {{+0x1p-1F, +0x1.a8872ap+0F}, {+0x1.193ea8p-1F, +0x1.4727f6p+0F}},
+    {{+0x1p-1F, +0x1.a8872cp+0F}, {+0x1.193eaap-1F, +0x1.4727f8p+0F}},
+    {{+9.223371e+18F, +9e+18F}, {+0x1.60059cp+5F, +0x1.8bd930p-1F}},
+    {{+9.2233715e+18F, +9e+18F}, {+0x1.60059cp+5F, +0x1.8bd930p-1F}},
+    {{+7e-20F, +4.440892e-16F}, {-0x1.1acdd6p+5F, +0x1.921560p+0F}},
+    {{+7e-20F, +4.4408926e-16F}, {-0x1.1acdd6p+5F, +0x1.921560p+0F}},
+};
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
index 363c3649f2..ffce4d5ec2 100644
--- a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
@@ -5,6 +5,7 @@
 #include <complex>
 
 #include "floating_point_utils.hpp"
+#include "log_test_cases.hpp"
 #include "sqrt_test_cases.hpp"
 
 using namespace std;
@@ -14,7 +15,7 @@ void test_sqrt(const rounding_mode mode) {
 #if FP_PRESET_FAST
     constexpr int ulp_tolerance = 4;
 #else // ^^^ fp:fast / not fp:fast vvv
-    const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2;
+    const int ulp_tolerance                          = is_directed_rounding_mode(mode) ? 3 : 2;
 #endif // ^^^ not fp:fast ^^^
 
     const auto check_result = [&](const auto& result, const auto& test_case) {
@@ -52,8 +53,72 @@ void test_sqrt(const rounding_mode mode) {
     }
 }
 
+void test_log(const rounding_mode mode) {
+#if FP_PRESET_FAST
+    constexpr int ulp_tolerance = 4;
+    // under /fp:fast, allow inaccurate real(log(z)) when |z| is close to 1
+    constexpr double real_absolute_epsilon_tolerance = 4;
+#else // ^^^ fp:fast / not fp:fast vvv
+    const int ulp_tolerance                          = is_directed_rounding_mode(mode) ? 3 : 2;
+    constexpr double real_absolute_epsilon_tolerance = 0;
+#endif // ^^^ not fp:fast ^^^
+
+    const auto check_result = [&](const auto& result, const auto& test_case) {
+        using Float = decltype(result.real());
+
+        constexpr auto epsilon            = static_cast<double>(numeric_limits<Float>::epsilon());
+        const int case_real_ulp_tolerance = test_case.result_exactness.real ? 0 : ulp_tolerance;
+        const int case_imag_ulp_tolerance = test_case.result_exactness.imag ? 0 : ulp_tolerance;
+        const double case_real_absolute_tolerance =
+            test_case.result_exactness.real ? 0.0 : real_absolute_epsilon_tolerance * epsilon;
+
+        // TRANSITION: under rounding toward negative mode, log(1.0) returns +0.0 on x86, -0.0 on x64
+        const auto is_mod_exactly_one = [](const auto& z) {
+            // no other complex<binary_floating_point> has mod of exactly 1
+            return (abs(real(z)) == 1 && imag(z) == 0) || (real(z) == 0 && abs(imag(z)) == 1);
+        };
+
+        if (mode == rounding_mode::toward_negative && is_mod_exactly_one(test_case.input)) {
+            return abs(result.real()) <= case_real_absolute_tolerance
+                   && near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
+        }
+
+        return near_equal(result.real(), test_case.expected_result.real(), case_real_ulp_tolerance,
+                   case_real_absolute_tolerance)
+               && near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
+    };
+
+    for (const auto& c : log_double_cases) {
+        const auto result = [&] {
+            rounding_guard guard(mode);
+            return log(c.input);
+        }();
+
+        assert(check_result(result, c));
+    }
+
+    for (const auto& c : log_float_cases) {
+        const auto result = [&] {
+            rounding_guard guard(mode);
+            return log(c.input);
+        }();
+
+        assert(check_result(result, c));
+    }
+
+    for (const auto& c : log_double_cases) {
+        const auto result = [&] {
+            rounding_guard guard(mode);
+            return log(static_cast<complex<long double>>(c.input));
+        }();
+
+        assert(check_result(static_cast<complex<double>>(result), c));
+    }
+}
+
 int main() {
     for (const auto& mode : all_rounding_modes) {
         test_sqrt(mode);
+        test_log(mode);
     }
 }

From b33c29eeab300cb21aee91634a5edf07ffbda9ae Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sat, 8 Aug 2020 18:07:00 +0800
Subject: [PATCH 06/19] Remove internal header file from CMake source file list

---
 stl/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt
index 6c5e1d445a..c7cfc34446 100644
--- a/stl/CMakeLists.txt
+++ b/stl/CMakeLists.txt
@@ -240,7 +240,6 @@ endforeach()
 # Objs that exist in both libcpmt[d][01].lib and msvcprt[d].lib.
 set(IMPLIB_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/src/float_multi_prec.hpp
     ${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/math_algorithms.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp

From e25900933e2b1bff5a2fd28ab8703deadde444ff Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sat, 8 Aug 2020 18:09:22 +0800
Subject: [PATCH 07/19] Add new file to MSBuild project

---
 stl/msbuild/stl_base/stl.files.settings.targets | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stl/msbuild/stl_base/stl.files.settings.targets b/stl/msbuild/stl_base/stl.files.settings.targets
index 5fd1b26310..f1ef5cc9c9 100644
--- a/stl/msbuild/stl_base/stl.files.settings.targets
+++ b/stl/msbuild/stl_base/stl.files.settings.targets
@@ -171,6 +171,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
         <BuildFiles Include="
             $(CrtRoot)\github\stl\src\filesystem.cpp;
             $(CrtRoot)\github\stl\src\locale0_implib.cpp;
+            $(CrtRoot)\github\stl\src\math_algorithms.cpp;
             $(CrtRoot)\github\stl\src\nothrow.cpp;
             $(CrtRoot)\github\stl\src\sharedmutex.cpp;
             $(CrtRoot)\github\stl\src\syserror_import_lib.cpp;

From ccf5cdc44db72cafebc146ce3efcc25ec4db03f6 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sat, 8 Aug 2020 20:05:01 +0800
Subject: [PATCH 08/19] Fix log(complex{1, tiny}) incorrectly returning -0
 under FE_DOWNWARD

---
 stl/src/math_algorithms.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/stl/src/math_algorithms.cpp b/stl/src/math_algorithms.cpp
index dd28c435d4..8530a558ad 100644
--- a/stl/src/math_algorithms.cpp
+++ b/stl/src/math_algorithms.cpp
@@ -118,7 +118,13 @@ namespace {
             constexpr _Ty _Norm_big   = _Ty{3.0};
 
             const _Ty _Bv_sqr = _Bv * _Bv;
-            const _Ty _Norm   = _Av * _Av + _Bv_sqr;
+
+            if (_Av == _Ty{1.0}) { // correctly return +0 when _Av == 1 and _Bv * _Bv underflows
+                                   // _Norm_minus_one(_Av, _Bv) could return -0 under FE_DOWNWARD rounding mode
+                return _Logp1(_Bv_sqr) * _Ty{0.5};
+            }
+
+            const _Ty _Norm = _Av * _Av + _Bv_sqr;
 
             if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation
                 return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5};

From 5bcece44ae1251777921372b2a6d2dee3d81de21 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sat, 8 Aug 2020 20:23:36 +0800
Subject: [PATCH 09/19] Use hardware FMA on arm64

---
 stl/src/float_multi_prec.hpp | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/stl/src/float_multi_prec.hpp b/stl/src/float_multi_prec.hpp
index 6599616df5..452a039245 100644
--- a/stl/src/float_multi_prec.hpp
+++ b/stl/src/float_multi_prec.hpp
@@ -91,6 +91,17 @@ namespace _Float_multi_prec {
     }
 #endif // defined(_M_IX86) || defined(_M_X64)
 
+#if defined(_M_ARM64)
+    _NODISCARD inline double _Sqr_error_arm64_neon(const double _Xval, const double _Prod0) noexcept {
+        const float64x1_t _Mx      = vld1_f64(&_Xval);
+        const float64x1_t _Mprod0  = vld1_f64(&_Prod0);
+        const float64x1_t _Mresult = vfma_f64(vneg_f64(_Mprod0), _Mx, _Mx);
+        double _Result;
+        vst1_f64(&_Result, _Mresult);
+        return _Result;
+    }
+#endif // defined(_M_ARM64)
+
     _NODISCARD inline constexpr _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
         const double _Prod0 = _Xval * _Xval;
 
@@ -103,9 +114,13 @@ namespace _Float_multi_prec {
             } else {
                 return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
             }
-#else // ^^^ x86, x64 / arm, arm64 vvv
+#elif defined(_M_ARM64) // ^^^ x86, x64 / arm64 vvv
+            // https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#base-requirements
+            // Both floating-point and NEON support are presumed to be present in hardware.
+            return {_Prod0, _Sqr_error_arm64_neon(_Xval, _Prod0)};
+#else // ^^^ arm64 / arm vvv
             return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
-#endif // ^^^ arm, arm64 ^^^
+#endif // ^^^ arm ^^^
         }
     }
 } // namespace _Float_multi_prec

From 09e805a38d054c0b420f1ca9620c2f574373125c Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Fri, 14 Aug 2020 21:05:16 +0800
Subject: [PATCH 10/19] Fix shadowing

---
 stl/inc/complex | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/stl/inc/complex b/stl/inc/complex
index 60fa9c911c..f2aeeed5c7 100644
--- a/stl/inc/complex
+++ b/stl/inc/complex
@@ -1624,7 +1624,7 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return mag
 
 // FUNCTION TEMPLATE log
 template <class _Ty>
-_NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept {
+_NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept { // for double, long double, and non floating point types
     return static_cast<_Ty>(
         __std_math_log_hypot(static_cast<double>(_STD real(_Left)), static_cast<double>(_STD imag(_Left))));
 };
@@ -1635,9 +1635,9 @@ _NODISCARD inline float _Log_abs(const complex<float>& _Left) noexcept {
 
 template <class _Ty>
 _NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) {
-    const _Ty _Log_abs = _STD _Log_abs(_Left); // get logarithm of magnitude
-    const _Ty _Theta   = _Ctraits<_Ty>::atan2(_STD imag(_Left), _STD real(_Left)); // get phase
-    return complex<_Ty>(_Log_abs, _Theta);
+    const _Ty _Log_abs_v = _STD _Log_abs(_Left); // get logarithm of magnitude
+    const _Ty _Theta     = _Ctraits<_Ty>::atan2(_STD imag(_Left), _STD real(_Left)); // get phase
+    return complex<_Ty>(_Log_abs_v, _Theta);
 }
 
 // FUNCTION TEMPLATE pow

From d1e2ab811b71fa63bed21dbd5cf8eeb397b60af4 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sun, 16 Aug 2020 20:54:33 +0800
Subject: [PATCH 11/19] Add comments

---
 stl/src/float_multi_prec.hpp | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/stl/src/float_multi_prec.hpp b/stl/src/float_multi_prec.hpp
index 452a039245..5dfc8f1a25 100644
--- a/stl/src/float_multi_prec.hpp
+++ b/stl/src/float_multi_prec.hpp
@@ -30,11 +30,21 @@ namespace _Float_multi_prec {
     template <class _Ty>
     struct _Fmp_t<_Ty, 2> {
         static_assert(_STD is_floating_point_v<_Ty>);
-        _Ty _Val0;
-        _Ty _Val1;
+        _Ty _Val0; // most significant numeric_limis<_Ty>::precision bits
+        _Ty _Val1; // least significant numeric_limis<_Ty>::precision bits
     };
 
     // addition
+
+    // 1x precision + 1x precision -> 2x precision
+    // the result is exact when:
+    // 1) the result doesn't overflow
+    // 2) either underflow is gradual, or no internal underflow occurs
+    // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
+    // 4) parameters and local variables do not retain extra intermediate precision
+    // 5) rounding mode is rounding to nearest
+    // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
+    // violation of other conditions could lead to worse results
     template <class _Ty>
     _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
         const _Ty _Sum0 = _Xval + _Yval;
@@ -45,7 +55,17 @@ namespace _Float_multi_prec {
         return {_Sum0, _Xerr + _Yerr};
     }
 
+    // 1x precision + 1x precision -> 2x precision
     // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0
+    // the result is exact when:
+    // 0) the requirement above is satisfied
+    // 1) no internal overflow occurs
+    // 2) either underflow is gradual, or no internal underflow occurs
+    // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
+    // 4) parameters and local variables do not retain extra intermediate precision
+    // 5) rounding mode is rounding to nearest
+    // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
+    // violation of other conditions could lead to worse results
     template <class _Ty>
     _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
         const _Ty _Sum0 = _Xval + _Yval;
@@ -54,6 +74,7 @@ namespace _Float_multi_prec {
         return {_Sum0, _Yerr};
     }
 
+    // 1x precision + 2x precision -> 2x precision
     // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0
     template <class _Ty>
     _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
@@ -61,6 +82,7 @@ namespace _Float_multi_prec {
         return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1);
     }
 
+    // 2x precision + 2x precision -> 1x precision
     template <class _Ty>
     _NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
         const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0);
@@ -68,12 +90,19 @@ namespace _Float_multi_prec {
     }
 
     // multiplication
+
+    // round to 26 significant bits, ties to zero
     _NODISCARD inline constexpr double _High_half(const double _Val) {
         const auto _Bits           = _STD _Bit_cast<unsigned long long>(_Val);
         const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL;
         return _STD _Bit_cast<double>(_High_half_bits);
     }
 
+    // _Xval * _Xval - _Prod0
+    // the result is exact when:
+    // 1) _Prod0 is _Xval^2 faithfully rounded
+    // 2) no internal overflow or underflow occurs
+    // violation of condition 1 could lead to relative error on the order of epsilon
     _NODISCARD inline constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept {
         const double _Xhigh = _High_half(_Xval);
         const double _Xlow  = _Xval - _Xhigh;
@@ -102,6 +131,8 @@ namespace _Float_multi_prec {
     }
 #endif // defined(_M_ARM64)
 
+    // square(1x precision) -> 2x precision
+    // the result is exact when no internal overflow or underflow occurs
     _NODISCARD inline constexpr _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
         const double _Prod0 = _Xval * _Xval;
 

From 0b281c7d972b1e9a043239acc8d645bfd90e0f4b Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Thu, 27 Aug 2020 15:26:33 +0800
Subject: [PATCH 12/19] Fix calling nonexistent ::signbit

---
 stl/inc/complex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stl/inc/complex b/stl/inc/complex
index f2aeeed5c7..054333ecf5 100644
--- a/stl/inc/complex
+++ b/stl/inc/complex
@@ -115,7 +115,7 @@ public:
     }
 
     static bool _Signbit(_Ty _Left) {
-        return _CSTD signbit(static_cast<double>(_Left));
+        return (_STD signbit)(static_cast<double>(_Left));
     }
 
     static _Ty _Sinh(_Ty _Left, _Ty _Right) { // return sinh(_Left) * _Right

From 5853238e712d504869f1575b50681f9400e7e363 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Fri, 28 Aug 2020 01:24:13 +0800
Subject: [PATCH 13/19] Fix comments

---
 stl/src/float_multi_prec.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stl/src/float_multi_prec.hpp b/stl/src/float_multi_prec.hpp
index 5dfc8f1a25..d3dbd2f55c 100644
--- a/stl/src/float_multi_prec.hpp
+++ b/stl/src/float_multi_prec.hpp
@@ -91,7 +91,7 @@ namespace _Float_multi_prec {
 
     // multiplication
 
-    // round to 26 significant bits, ties to zero
+    // round to 26 significant bits, ties toward zero
     _NODISCARD inline constexpr double _High_half(const double _Val) {
         const auto _Bits           = _STD _Bit_cast<unsigned long long>(_Val);
         const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL;

From ad16ad0f9ecb300f504e79c387fa7e44c68f4112 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sun, 6 Sep 2020 19:46:46 +0800
Subject: [PATCH 14/19] Code review comments

Co-authored-by: Curtis J Bezault <curtbezault@gmail.com>
---
 .../env.lst                                   | 30 +----------------
 .../std/tests/floating_point_model_matrix.lst | 32 +++++++++++++++++++
 2 files changed, 33 insertions(+), 29 deletions(-)
 create mode 100644 tests/std/tests/floating_point_model_matrix.lst

diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst b/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst
index 918098b2eb..a5fd2defde 100644
--- a/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/env.lst
@@ -1,32 +1,4 @@
 # Copyright (c) Microsoft Corporation.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-RUNALL_INCLUDE ..\prefix.lst
-RUNALL_CROSSLIST
-PM_CL="/FIfenv_prefix.hpp"
-RUNALL_CROSSLIST
-PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"
-RUNALL_CROSSLIST
-PM_CL="/Od /MDd /D_ITERATOR_DEBUG_LEVEL=2"
-PM_CL="/O2 /MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive-"
-PM_CL="/O2 /MT /D_ITERATOR_DEBUG_LEVEL=0 /GL"
-PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /Od /MTd"
-PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MT"
-PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MD /Oi-"
-RUNALL_CROSSLIST
-PM_CL=""
-PM_CL="/arch:IA32"
-PM_CL="/arch:AVX2"
-PM_CL="/arch:VFPv4"
-RUNALL_CROSSLIST
-PM_CL="/fp:strict  /DFP_CONFIG_PRESET=1 /DTEST_FP_ROUNDING=1"
-PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=1"
-PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=0"
-PM_CL="/fp:fast    /DFP_CONFIG_PRESET=3 /DTEST_FP_ROUNDING=0"
-RUNALL_CROSSLIST
-PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=0"
-PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=1" PM_LINK="loosefpmath.obj"
-RUNALL_CROSSLIST
-PM_CL="/DFP_CONTRACT_MODE=0" PM_CLANG="-ffp-contract=off"
-PM_CL="/DFP_CONTRACT_MODE=1" PM_CLANG="-ffp-contract=on"
-PM_CL="/DFP_CONTRACT_MODE=2" PM_CLANG="-ffp-contract=fast"
+RUNALL_INCLUDE ..\floating_point_model_matrix.lst
diff --git a/tests/std/tests/floating_point_model_matrix.lst b/tests/std/tests/floating_point_model_matrix.lst
new file mode 100644
index 0000000000..63e5f1eecb
--- /dev/null
+++ b/tests/std/tests/floating_point_model_matrix.lst
@@ -0,0 +1,32 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+RUNALL_INCLUDE .\prefix.lst
+RUNALL_CROSSLIST
+PM_CL="/FIfenv_prefix.hpp"
+RUNALL_CROSSLIST
+PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"
+RUNALL_CROSSLIST
+PM_CL="/Od /MDd /D_ITERATOR_DEBUG_LEVEL=2"
+PM_CL="/O2 /MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive-"
+PM_CL="/O2 /MT /D_ITERATOR_DEBUG_LEVEL=0 /GL"
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /Od /MTd"
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MT"
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MD /Oi-"
+RUNALL_CROSSLIST
+PM_CL=""
+PM_CL="/arch:IA32"
+PM_CL="/arch:AVX2"
+PM_CL="/arch:VFPv4"
+RUNALL_CROSSLIST
+PM_CL="/fp:strict  /DFP_CONFIG_PRESET=1 /DTEST_FP_ROUNDING=1"
+PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=1"
+PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=0"
+PM_CL="/fp:fast    /DFP_CONFIG_PRESET=3 /DTEST_FP_ROUNDING=0"
+RUNALL_CROSSLIST
+PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=0"
+PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=1" PM_LINK="loosefpmath.obj"
+RUNALL_CROSSLIST
+PM_CL="/DFP_CONTRACT_MODE=0 /clang:-ffp-contract=off"
+PM_CL="/DFP_CONTRACT_MODE=1 /clang:-ffp-contract=on"
+PM_CL="/DFP_CONTRACT_MODE=2 /clang:-ffp-contract=fast"

From be1431b2e3f92bb28788a9191c6ddcaf5a26b398 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sun, 6 Sep 2020 19:51:21 +0800
Subject: [PATCH 15/19] Remove PM_CLANG

---
 tests/utils/stl/test/tests.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/utils/stl/test/tests.py b/tests/utils/stl/test/tests.py
index 3d5cc2e013..6da478208a 100644
--- a/tests/utils/stl/test/tests.py
+++ b/tests/utils/stl/test/tests.py
@@ -141,8 +141,6 @@ def _configure_cxx(self, lit_config, envlst_entry, default_cxx):
         link_flags.extend(envlst_entry.getEnvVal('PM_LINK', '').split())
 
         if ('clang'.casefold() in os.path.basename(cxx).casefold()):
-            flags.extend(map(lambda x : '/clang:' + x, envlst_entry.getEnvVal('PM_CLANG', '').split()))
-
             target_arch = self.config.target_arch.casefold()
             if (target_arch == 'x64'.casefold()):
                 compile_flags.append('-m64')

From 2b39e7b2b93ce84cfeab3158fc385e89efa54172 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Wed, 4 Nov 2020 02:29:17 -0800
Subject: [PATCH 16/19] Code review feedback.

---
 stl/inc/complex                                | 10 +++++-----
 stl/src/float_multi_prec.hpp                   | 16 ++++++++--------
 tests/std/include/fenv_prefix.hpp              |  2 +-
 .../floating_point_utils.hpp                   | 18 +++++++++---------
 .../log_test_cases.hpp                         |  4 ++--
 .../test.cpp                                   |  1 +
 .../std/tests/floating_point_model_matrix.lst  |  6 +++---
 7 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/stl/inc/complex b/stl/inc/complex
index 91e2d7c53a..0c2f51d9f4 100644
--- a/stl/inc/complex
+++ b/stl/inc/complex
@@ -1545,10 +1545,10 @@ _NODISCARD complex<_Ty> exp(const complex<_Ty>& _Left) {
 
 // FUNCTION TEMPLATE _Fabs
 template <class _Ty>
-_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // used by sqrt(), return magnitude and scale factor.
-                                                   // returns a non-zero even integer in *_Pexp when _Left is finite
+_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // Used by sqrt(), return magnitude and scale factor.
+                                                   // Returns a non-zero even integer in *_Pexp when _Left is finite
                                                    // and non-zero.
-                                                   // returns 0 in *_Pexp when _Left is zero, infinity or NaN.
+                                                   // Returns 0 in *_Pexp when _Left is zero, infinity, or NaN.
     *_Pexp  = 0;
     _Ty _Av = _Ctraits<_Ty>::_Abs(_STD real(_Left));
     _Ty _Bv = _Ctraits<_Ty>::_Abs(_STD imag(_Left));
@@ -1614,11 +1614,11 @@ template <class _Ty>
 _NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept { // for double, long double, and non floating point types
     return static_cast<_Ty>(
         __std_math_log_hypot(static_cast<double>(_STD real(_Left)), static_cast<double>(_STD imag(_Left))));
-};
+}
 
 _NODISCARD inline float _Log_abs(const complex<float>& _Left) noexcept {
     return __std_math_log_hypotf(_STD real(_Left), _STD imag(_Left));
-};
+}
 
 template <class _Ty>
 _NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) {
diff --git a/stl/src/float_multi_prec.hpp b/stl/src/float_multi_prec.hpp
index d3dbd2f55c..7239519f68 100644
--- a/stl/src/float_multi_prec.hpp
+++ b/stl/src/float_multi_prec.hpp
@@ -29,9 +29,9 @@ namespace _Float_multi_prec {
 
     template <class _Ty>
     struct _Fmp_t<_Ty, 2> {
-        static_assert(_STD is_floating_point_v<_Ty>);
-        _Ty _Val0; // most significant numeric_limis<_Ty>::precision bits
-        _Ty _Val1; // least significant numeric_limis<_Ty>::precision bits
+        static_assert(is_floating_point_v<_Ty>);
+        _Ty _Val0; // most significant numeric_limits<_Ty>::precision bits
+        _Ty _Val1; // least significant numeric_limits<_Ty>::precision bits
     };
 
     // addition
@@ -92,10 +92,10 @@ namespace _Float_multi_prec {
     // multiplication
 
     // round to 26 significant bits, ties toward zero
-    _NODISCARD inline constexpr double _High_half(const double _Val) {
-        const auto _Bits           = _STD _Bit_cast<unsigned long long>(_Val);
+    _NODISCARD constexpr double _High_half(const double _Val) noexcept {
+        const auto _Bits           = _Bit_cast<unsigned long long>(_Val);
         const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL;
-        return _STD _Bit_cast<double>(_High_half_bits);
+        return _Bit_cast<double>(_High_half_bits);
     }
 
     // _Xval * _Xval - _Prod0
@@ -103,7 +103,7 @@ namespace _Float_multi_prec {
     // 1) _Prod0 is _Xval^2 faithfully rounded
     // 2) no internal overflow or underflow occurs
     // violation of condition 1 could lead to relative error on the order of epsilon
-    _NODISCARD inline constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept {
+    _NODISCARD constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept {
         const double _Xhigh = _High_half(_Xval);
         const double _Xlow  = _Xval - _Xhigh;
         return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow;
@@ -133,7 +133,7 @@ namespace _Float_multi_prec {
 
     // square(1x precision) -> 2x precision
     // the result is exact when no internal overflow or underflow occurs
-    _NODISCARD inline constexpr _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
+    _NODISCARD constexpr _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
         const double _Prod0 = _Xval * _Xval;
 
         if (_STD is_constant_evaluated()) {
diff --git a/tests/std/include/fenv_prefix.hpp b/tests/std/include/fenv_prefix.hpp
index ab1a82fe54..bd3055a41f 100644
--- a/tests/std/include/fenv_prefix.hpp
+++ b/tests/std/include/fenv_prefix.hpp
@@ -37,7 +37,7 @@
 #endif // ^^^ invalid FP_CONTRACT_MODE ^^^
 
 #endif // ^^^ MSVC ^^^
-#endif // defined(FP_CONTRACT_MODE) && !defined(__clang__)
+#endif // defined(FP_CONTRACT_MODE)
 
 #include <cassert>
 #include <float.h>
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp
index 22c2ad18f7..0688d763ea 100644
--- a/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/floating_point_utils.hpp
@@ -36,13 +36,13 @@ namespace fputil {
     _INLINE_VAR constexpr float_bits_t<T> infinity_bits_v = exponent_mask_v<T>;
 
     // not affected by abrupt underflow
-    template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+    template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
     constexpr bool iszero(const T& x) {
         return _STD _Float_abs_bits(x) == 0;
     }
 
     // not affected by /fp:fast
-    template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+    template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
     constexpr bool signbit(const T& x) {
         const auto bits = std::_Bit_cast<float_bits_t<T>>(x);
         return (bits & sign_mask_v<T>) != 0;
@@ -126,7 +126,7 @@ namespace fputil {
 
     // compares whether two floating point values are equal
     // all NaNs are equal, +0.0 and -0.0 are not equal
-    template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+    template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
     bool precise_equal(const T& actual, const T& expected) {
         if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
             return _STD _Is_nan(actual) == _STD _Is_nan(expected);
@@ -137,13 +137,13 @@ namespace fputil {
 
     namespace detail {
         // 0x80...00 = zero, 0x80...01 = numeric_limits<T>::denorm_min(), 0x7f...ff = -numeric_limits<T>::denorm_min()
-        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
         float_bits_t<T> offset_representation(const T& x) {
             const float_bits_t<T> abs_bits = _STD _Float_abs_bits(x);
             return fputil::signbit(x) ? sign_mask_v<T> - abs_bits : sign_mask_v<T> + abs_bits;
         }
 
-        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
         float_bits_t<T> is_offset_value_subnormal_or_zero(const float_bits_t<T> offset_value) {
             constexpr float_bits_t<T> positive_norm_min_offset = sign_mask_v<T> + norm_min_bits_v<T>;
             constexpr float_bits_t<T> negative_norm_min_offset = sign_mask_v<T> - norm_min_bits_v<T>;
@@ -152,7 +152,7 @@ namespace fputil {
         }
 
         // number of ulps above zero, if we count [0, numeric_limits<T>::min()) as 1 ulp
-        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
         double abrupt_underflow_ulp(const float_bits_t<T> offset_value) {
             using bits_type = float_bits_t<T>;
 
@@ -170,7 +170,7 @@ namespace fputil {
             }
         }
 
-        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
         bool is_within_ulp_tolerance(const T& actual, const T& expected, const int ulp_tolerance) {
             if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
                 return _STD _Is_nan(actual) == _STD _Is_nan(expected);
@@ -213,7 +213,7 @@ namespace fputil {
             return false;
         }
 
-        template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+        template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
         bool is_within_absolute_tolerance(const T& actual, const T& expected, const double absolute_tolerance) {
             return _STD _Is_finite(actual) && _STD _Is_finite(expected)
                    && std::abs(actual - expected) <= absolute_tolerance;
@@ -221,7 +221,7 @@ namespace fputil {
     } // namespace detail
 
     // returns whether floating point result is nearly equal to the expected value
-    template <typename T, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
+    template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
     bool near_equal(
         const T& actual, const T& expected, const int ulp_tolerance = 1, const double absolute_tolerance = 0) {
         if (precise_equal(actual, expected)) {
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp
index 29b349e440..d67e12dd1b 100644
--- a/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp
@@ -114,7 +114,7 @@ constexpr complex_unary_test_case<double> log_double_cases[] = {
     {{+0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.0000000000000p-52}},
     {{+0x1.fffffffffffffp-1, +0x1p-26}, {+0x1.0000000000000p-107, +0x1.0000000000000p-26}},
 
-    // z close to -1, i or -i
+    // z close to -1, i, or -i
     {{-0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.921fb54442d18p+1}},
     {{-0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.921fb54442d18p+1}},
     {{-0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d18p+1}},
@@ -243,7 +243,7 @@ constexpr complex_unary_test_case<float> log_float_cases[] = {
     {{+0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.000000p-23F}},
     {{+0x1.fffffep-1F, +0x1.6a09e6p-12F}, {-0x1.302ae0p-52F, +0x1.6a09e6p-12F}},
 
-    // z close to -1, i or -i
+    // z close to -1, i, or -i
     {{-0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.921fb4p+1F}},
     {{-0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.921fb4p+1F}},
     {{-0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.921fb4p+1F}},
diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
index ffce4d5ec2..dc4b03cd6a 100644
--- a/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
+++ b/tests/std/tests/GH_000935_complex_numerical_accuracy/test.cpp
@@ -3,6 +3,7 @@
 
 #include <cassert>
 #include <complex>
+#include <limits>
 
 #include "floating_point_utils.hpp"
 #include "log_test_cases.hpp"
diff --git a/tests/std/tests/floating_point_model_matrix.lst b/tests/std/tests/floating_point_model_matrix.lst
index 63e5f1eecb..0aba02fbe4 100644
--- a/tests/std/tests/floating_point_model_matrix.lst
+++ b/tests/std/tests/floating_point_model_matrix.lst
@@ -7,9 +7,9 @@ PM_CL="/FIfenv_prefix.hpp"
 RUNALL_CROSSLIST
 PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"
 RUNALL_CROSSLIST
-PM_CL="/Od /MDd /D_ITERATOR_DEBUG_LEVEL=2"
-PM_CL="/O2 /MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive-"
-PM_CL="/O2 /MT /D_ITERATOR_DEBUG_LEVEL=0 /GL"
+PM_CL="/Od /MDd"
+PM_CL="/O2 /MD /permissive-"
+PM_CL="/O2 /MT /GL"
 PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /Od /MTd"
 PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MT"
 PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MD /Oi-"

From 984c8a42b66c6acd6eef26ff960c138e43ef05e5 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Thu, 5 Nov 2020 23:01:12 -0800
Subject: [PATCH 17/19] Add -Wno-unused-command-line-argument for internal
 tests

---
 tests/std/tests/floating_point_model_matrix.lst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/std/tests/floating_point_model_matrix.lst b/tests/std/tests/floating_point_model_matrix.lst
index 0aba02fbe4..8a9b369c3b 100644
--- a/tests/std/tests/floating_point_model_matrix.lst
+++ b/tests/std/tests/floating_point_model_matrix.lst
@@ -10,9 +10,10 @@ RUNALL_CROSSLIST
 PM_CL="/Od /MDd"
 PM_CL="/O2 /MD /permissive-"
 PM_CL="/O2 /MT /GL"
-PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /Od /MTd"
-PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MT"
-PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing /O2 /MD /Oi-"
+# TRANSITION, -Wno-unused-command-line-argument is needed for the internal test harness
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /Od /MTd"
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /O2 /MT"
+PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /O2 /MD /Oi-"
 RUNALL_CROSSLIST
 PM_CL=""
 PM_CL="/arch:IA32"

From 5a089e9036a268cebe7d5d0471e56d120dbdda1a Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Sun, 8 Nov 2020 18:16:22 -0800
Subject: [PATCH 18/19] Use header-only code to fix /clr:pure.

---
 stl/CMakeLists.txt                            |   1 -
 stl/inc/complex                               | 317 +++++++++++++++++-
 stl/inc/xutility                              |   1 -
 .../stl_base/stl.files.settings.targets       |   1 -
 stl/src/float_multi_prec.hpp                  | 158 ---------
 stl/src/math_algorithms.cpp                   | 157 ---------
 6 files changed, 310 insertions(+), 325 deletions(-)
 delete mode 100644 stl/src/float_multi_prec.hpp
 delete mode 100644 stl/src/math_algorithms.cpp

diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt
index a7ada94f13..394f28cd06 100644
--- a/stl/CMakeLists.txt
+++ b/stl/CMakeLists.txt
@@ -245,7 +245,6 @@ endforeach()
 set(IMPLIB_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp
-    ${CMAKE_CURRENT_LIST_DIR}/src/math_algorithms.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/syserror_import_lib.cpp
diff --git a/stl/inc/complex b/stl/inc/complex
index 0c2f51d9f4..3eafcb9602 100644
--- a/stl/inc/complex
+++ b/stl/inc/complex
@@ -10,9 +10,27 @@
 #if _STL_COMPILER_PREPROCESSOR
 #include <cmath>
 #include <cstdint>
+#include <limits>
 #include <sstream>
+#include <type_traits>
+#include <xutility>
 #include <ymath.h>
 
+#ifdef _M_CEE_PURE
+// no intrinsics for /clr:pure
+#elif defined(__clang__)
+// TRANSITION, not using FMA intrinsics for Clang yet
+#elif defined(_M_IX86) || defined(_M_X64)
+#define _FMP_USING_X86_X64_INTRINSICS
+#include <emmintrin.h>
+#include <isa_availability.h>
+extern "C" int __isa_available;
+extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d);
+#elif defined(_M_ARM64)
+#define _FMP_USING_ARM64_INTRINSICS
+#include <arm64/arm64_neon.h>
+#endif // ^^^ defined(_M_ARM64) ^^^
+
 #pragma pack(push, _CRT_PACKING)
 #pragma warning(push, _STL_WARNING_LEVEL)
 #pragma warning(disable : _STL_DISABLED_WARNINGS)
@@ -40,12 +58,297 @@ struct _C_ldouble_complex {
 #define _RE 0
 #define _IM 1
 
-_EXTERN_C
-_NODISCARD double __stdcall __std_math_log_hypot(double, double) noexcept;
-_NODISCARD float __stdcall __std_math_log_hypotf(float, float) noexcept;
-_END_EXTERN_C
-
 _STD_BEGIN
+
+// implements multi-precision floating point arithmetic for numerical algorithms
+#pragma float_control(precise, on, push)
+namespace _Float_multi_prec {
+    // multi-precision floating point types
+    template <class _Ty, int _Prec>
+    struct _Fmp_t;
+
+    template <class _Ty>
+    struct _Fmp_t<_Ty, 2> {
+        static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
+        _Ty _Val0; // most significant numeric_limits<_Ty>::precision bits
+        _Ty _Val1; // least significant numeric_limits<_Ty>::precision bits
+    };
+
+    // addition
+
+    // 1x precision + 1x precision -> 2x precision
+    // the result is exact when:
+    // 1) the result doesn't overflow
+    // 2) either underflow is gradual, or no internal underflow occurs
+    // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
+    // 4) parameters and local variables do not retain extra intermediate precision
+    // 5) rounding mode is rounding to nearest
+    // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
+    // violation of other conditions could lead to worse results
+    template <class _Ty>
+    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
+        const _Ty _Sum0 = _Xval + _Yval;
+        const _Ty _Ymod = _Sum0 - _Xval;
+        const _Ty _Xmod = _Sum0 - _Ymod;
+        const _Ty _Yerr = _Yval - _Ymod;
+        const _Ty _Xerr = _Xval - _Xmod;
+        return {_Sum0, _Xerr + _Yerr};
+    }
+
+    // 1x precision + 1x precision -> 2x precision
+    // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0
+    // the result is exact when:
+    // 0) the requirement above is satisfied
+    // 1) no internal overflow occurs
+    // 2) either underflow is gradual, or no internal underflow occurs
+    // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
+    // 4) parameters and local variables do not retain extra intermediate precision
+    // 5) rounding mode is rounding to nearest
+    // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
+    // violation of other conditions could lead to worse results
+    template <class _Ty>
+    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
+        const _Ty _Sum0 = _Xval + _Yval;
+        const _Ty _Ymod = _Sum0 - _Xval;
+        const _Ty _Yerr = _Yval - _Ymod;
+        return {_Sum0, _Yerr};
+    }
+
+    // 1x precision + 2x precision -> 2x precision
+    // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0
+    template <class _Ty>
+    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
+        const _Fmp_t<_Ty, 2> _Sum0 = _Add_small_x2(_Xval, _Yval._Val0);
+        return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1);
+    }
+
+    // 2x precision + 2x precision -> 1x precision
+    template <class _Ty>
+    _NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
+        const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0);
+        return _Sum00._Val0 + (_Sum00._Val1 + (_Xval._Val1 + _Yval._Val1));
+    }
+
+    // multiplication
+
+    // round to 26 significant bits, ties toward zero
+    _NODISCARD _CONSTEXPR_BIT_CAST double _High_half(const double _Val) noexcept {
+        const auto _Bits           = _Bit_cast<unsigned long long>(_Val);
+        const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL;
+        return _Bit_cast<double>(_High_half_bits);
+    }
+
+    // _Xval * _Xval - _Prod0
+    // the result is exact when:
+    // 1) _Prod0 is _Xval^2 faithfully rounded
+    // 2) no internal overflow or underflow occurs
+    // violation of condition 1 could lead to relative error on the order of epsilon
+    _NODISCARD _CONSTEXPR_BIT_CAST double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept {
+        const double _Xhigh = _High_half(_Xval);
+        const double _Xlow  = _Xval - _Xhigh;
+        return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow;
+    }
+
+#ifdef _FMP_USING_X86_X64_INTRINSICS
+    _NODISCARD inline double _Sqr_error_x86_x64_fma(const double _Xval, const double _Prod0) noexcept {
+        const __m128d _Mx      = _mm_set_sd(_Xval);
+        const __m128d _Mprod0  = _mm_set_sd(_Prod0);
+        const __m128d _Mresult = _mm_fmsub_sd(_Mx, _Mx, _Mprod0);
+        double _Result;
+        _mm_store_sd(&_Result, _Mresult);
+        return _Result;
+    }
+#endif // _FMP_USING_X86_X64_INTRINSICS
+
+#ifdef _FMP_USING_ARM64_INTRINSICS
+    _NODISCARD inline double _Sqr_error_arm64_neon(const double _Xval, const double _Prod0) noexcept {
+        const float64x1_t _Mx      = vld1_f64(&_Xval);
+        const float64x1_t _Mprod0  = vld1_f64(&_Prod0);
+        const float64x1_t _Mresult = vfma_f64(vneg_f64(_Mprod0), _Mx, _Mx);
+        double _Result;
+        vst1_f64(&_Result, _Mresult);
+        return _Result;
+    }
+#endif // _FMP_USING_ARM64_INTRINSICS
+
+    // square(1x precision) -> 2x precision
+    // the result is exact when no internal overflow or underflow occurs
+    _NODISCARD inline _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
+        const double _Prod0 = _Xval * _Xval;
+
+#if defined(_FMP_USING_X86_X64_INTRINSICS)
+
+#ifdef __AVX2__
+        return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
+#else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv
+        const bool _Definitely_have_fma = __isa_available >= __ISA_AVAILABLE_AVX2;
+        if (_Definitely_have_fma) {
+            return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
+        } else {
+            return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
+        }
+#endif // ^^^ !defined(__AVX2__) ^^^
+
+#elif defined(_FMP_USING_ARM64_INTRINSICS)
+        // https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#base-requirements
+        // Both floating-point and NEON support are presumed to be present in hardware.
+        return {_Prod0, _Sqr_error_arm64_neon(_Xval, _Prod0)};
+#else // ^^^ defined(_FMP_USING_ARM64_INTRINSICS) / not using intrinsics vvv
+        return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
+#endif // ^^^ not using intrinsics ^^^
+    }
+} // namespace _Float_multi_prec
+#pragma float_control(pop)
+
+#undef _FMP_USING_X86_X64_INTRINSICS
+#undef _FMP_USING_ARM64_INTRINSICS
+
+#define _FMP ::std::_Float_multi_prec::
+
+// implements numerical algorithms for <complex>
+namespace _Math_algorithms {
+    // TRANSITION: sqrt() isn't constexpr
+    // _Hypot_leg_huge = _Ty{0.5} * _STD sqrt((_STD numeric_limits<_Ty>::max)());
+    // _Hypot_leg_tiny = _STD sqrt(_Ty{2.0} * (_STD numeric_limits<_Ty>::min)() / _STD numeric_limits<_Ty>::epsilon());
+    template <class _Ty>
+    struct _Hypot_leg_huge_helper {
+        static constexpr _Ty value{6.703903964971298e+153};
+    };
+    template <>
+    struct _Hypot_leg_huge_helper<float> {
+        static constexpr float value{9.2233715e+18f};
+    };
+    template <class _Ty>
+    _INLINE_VAR constexpr _Ty _Hypot_leg_huge = _Hypot_leg_huge_helper<_Ty>::value;
+
+    template <class _Ty>
+    struct _Hypot_leg_tiny_helper {
+        static constexpr _Ty value{1.4156865331029228e-146};
+    };
+    template <>
+    struct _Hypot_leg_tiny_helper<float> {
+        static constexpr float value{4.440892e-16f};
+    };
+    template <class _Ty>
+    _INLINE_VAR constexpr _Ty _Hypot_leg_tiny = _Hypot_leg_tiny_helper<_Ty>::value;
+
+    template <class _Ty>
+    _NODISCARD _Ty _Norm_minus_one(const _Ty _Xval, const _Ty _Yval) noexcept {
+        // requires |_Xval| >= |_Yval| and 0.5 <= |_Xval| < 2^12
+        // returns _Xval * _Xval + _Yval * _Yval - 1
+        const _FMP _Fmp_t<_Ty, 2> _Xsqr    = _FMP _Sqr_x2(_Xval);
+        const _FMP _Fmp_t<_Ty, 2> _Ysqr    = _FMP _Sqr_x2(_Yval);
+        const _FMP _Fmp_t<_Ty, 2> _Xsqr_m1 = _FMP _Add_small_x2(_Ty{-1.0}, _Xsqr);
+        return _Add_x1(_Xsqr_m1, _Ysqr);
+    }
+
+    _NODISCARD inline float _Norm_minus_one(const float _Xval, const float _Yval) noexcept {
+        const auto _Dx = static_cast<double>(_Xval);
+        const auto _Dy = static_cast<double>(_Yval);
+        return static_cast<float>((_Dx * _Dx - 1.0) + _Dy * _Dy);
+    }
+
+    // TRANSITION: CRT log1p can be inaccurate for tiny inputs under directed rounding modes
+    template <class _Ty>
+    _NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval)
+        static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
+
+        if (_Is_nan(_Xval)) { // NaN
+            return _Xval + _Xval; // raise FE_INVALID if _Xval is a signaling NaN
+        }
+
+        if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is moderately accurate
+            if (_Xval == (numeric_limits<_Ty>::max)()) { // avoid overflow
+                return _STD log(_Xval);
+            }
+
+            return _STD log(_Ty{1.0} + _Xval);
+        }
+
+        const _Ty _Xabs = _Float_abs(_Xval);
+        if (_Xabs < numeric_limits<_Ty>::epsilon()) { // zero or tiny
+            if (_Xval == _Ty{0.0}) {
+                return _Xval;
+            }
+
+            // honor rounding mode, raise FE_INEXACT
+            return _Xval - _Ty{0.5} * _Xval * _Xval;
+        }
+
+        // compute log(1 + _Xval) with fixup for small _Xval
+        const _FMP _Fmp_t<_Ty, 2> _Xp1 = _FMP _Add_small_x2(_Ty{1.0}, _Xval);
+        return _STD log(_Xp1._Val0) + _Xp1._Val1 / _Xp1._Val0;
+    }
+
+    template <class _Ty>
+    _NODISCARD _Ty _Log_hypot(const _Ty _Xval, const _Ty _Yval) noexcept { // returns log(hypot(_Xval, _Yval))
+        static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
+
+        if (!_Is_finite(_Xval) || !_Is_finite(_Yval)) { // Inf or NaN
+            // raise FE_INVALID and return NaN if at least one of them is a signaling NaN
+            if (_Is_signaling_nan(_Xval) || _Is_signaling_nan(_Yval)) {
+                return _Xval + _Yval;
+            }
+
+            // return +Inf if at least one of them is an infinity, even when the other is a quiet NaN
+            if (_Is_inf(_Xval)) {
+                return _Float_abs(_Xval);
+            }
+
+            if (_Is_inf(_Yval)) {
+                return _Float_abs(_Yval);
+            }
+
+            // at least one of them is a quiet NaN, and the other is not an infinity
+            return _Xval + _Yval;
+        }
+
+        _Ty _Av = _Float_abs(_Xval);
+        _Ty _Bv = _Float_abs(_Yval);
+
+        if (_Av < _Bv) { // ensure that _Bv <= _Av
+            _STD swap(_Av, _Bv);
+        }
+
+        if (_Bv == 0) {
+            return _STD log(_Av);
+        }
+
+        if (_Hypot_leg_tiny<_Ty> < _Av && _Av < _Hypot_leg_huge<_Ty>) { // no overflow or harmful underflow
+            constexpr _Ty _Norm_small = _Ty{0.5};
+            constexpr _Ty _Norm_big   = _Ty{3.0};
+
+            const _Ty _Bv_sqr = _Bv * _Bv;
+
+            if (_Av == _Ty{1.0}) { // correctly return +0 when _Av == 1 and _Bv * _Bv underflows
+                                   // _Norm_minus_one(_Av, _Bv) could return -0 under FE_DOWNWARD rounding mode
+                return _Logp1(_Bv_sqr) * _Ty{0.5};
+            }
+
+            const _Ty _Norm = _Av * _Av + _Bv_sqr;
+
+            if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation
+                return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5};
+            } else {
+                return _STD log(_Norm) * _Ty{0.5};
+            }
+        } else { // use 1 1/2 precision to preserve bits
+            constexpr _Ty _Cm = _Ty{22713.0L / 32768.0L};
+            constexpr _Ty _Cl = _Ty{1.4286068203094172321214581765680755e-6L};
+
+            const int _Exponent      = _STD ilogb(_Av);
+            const _Ty _Av_scaled     = _STD scalbn(_Av, -_Exponent);
+            const _Ty _Bv_scaled     = _STD scalbn(_Bv, -_Exponent);
+            const _Ty _Bv_scaled_sqr = _Bv_scaled * _Bv_scaled;
+            const _Ty _Norm_scaled   = _Av_scaled * _Av_scaled + _Bv_scaled_sqr;
+            const _Ty _Real_shifted  = _STD log(_Norm_scaled) * _Ty{0.5};
+            return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm;
+        }
+    }
+} // namespace _Math_algorithms
+
+#undef _FMP
+
 using _Dcomplex_value = _CSTD _C_double_complex;
 using _Fcomplex_value = _CSTD _C_float_complex;
 using _Lcomplex_value = _CSTD _C_ldouble_complex;
@@ -1613,11 +1916,11 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // Used by sqrt(), return mag
 template <class _Ty>
 _NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept { // for double, long double, and non floating point types
     return static_cast<_Ty>(
-        __std_math_log_hypot(static_cast<double>(_STD real(_Left)), static_cast<double>(_STD imag(_Left))));
+        _Math_algorithms::_Log_hypot(static_cast<double>(_STD real(_Left)), static_cast<double>(_STD imag(_Left))));
 }
 
 _NODISCARD inline float _Log_abs(const complex<float>& _Left) noexcept {
-    return __std_math_log_hypotf(_STD real(_Left), _STD imag(_Left));
+    return _Math_algorithms::_Log_hypot(_STD real(_Left), _STD imag(_Left));
 }
 
 template <class _Ty>
diff --git a/stl/inc/xutility b/stl/inc/xutility
index 2714892919..ca2499bd1c 100644
--- a/stl/inc/xutility
+++ b/stl/inc/xutility
@@ -6205,7 +6205,6 @@ struct _Nontrivial_dummy_type {
 _STL_INTERNAL_STATIC_ASSERT(!is_trivially_default_constructible_v<_Nontrivial_dummy_type>);
 
 _STD_END
-#undef _CONSTEXPR_BIT_CAST
 #pragma pop_macro("new")
 _STL_RESTORE_CLANG_WARNINGS
 #pragma warning(pop)
diff --git a/stl/msbuild/stl_base/stl.files.settings.targets b/stl/msbuild/stl_base/stl.files.settings.targets
index f1ef5cc9c9..5fd1b26310 100644
--- a/stl/msbuild/stl_base/stl.files.settings.targets
+++ b/stl/msbuild/stl_base/stl.files.settings.targets
@@ -171,7 +171,6 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
         <BuildFiles Include="
             $(CrtRoot)\github\stl\src\filesystem.cpp;
             $(CrtRoot)\github\stl\src\locale0_implib.cpp;
-            $(CrtRoot)\github\stl\src\math_algorithms.cpp;
             $(CrtRoot)\github\stl\src\nothrow.cpp;
             $(CrtRoot)\github\stl\src\sharedmutex.cpp;
             $(CrtRoot)\github\stl\src\syserror_import_lib.cpp;
diff --git a/stl/src/float_multi_prec.hpp b/stl/src/float_multi_prec.hpp
deleted file mode 100644
index 7239519f68..0000000000
--- a/stl/src/float_multi_prec.hpp
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// implements multi-precision floating point arithmetic for numerical algorithms
-
-// This must be as small as possible, because its contents are
-// injected into the msvcprt.lib and msvcprtd.lib import libraries.
-// Do not include or define anything else here.
-// In particular, basic_string must not be included here.
-
-#pragma once
-
-#if defined(_M_FP_FAST) || defined(__FAST_MATH__)
-#error float_multi_prec.hpp should not be compiled with /fp:fast or -ffast-math
-#endif // defined(_M_FP_FAST) || defined(__FAST_MATH__)
-
-#include <intrin.h>
-#include <isa_availability.h>
-#include <type_traits>
-#include <xutility>
-
-extern "C" int __isa_available;
-
-_STD_BEGIN
-namespace _Float_multi_prec {
-    // multi-precision floating point types
-    template <class _Ty, int _Prec>
-    struct _Fmp_t;
-
-    template <class _Ty>
-    struct _Fmp_t<_Ty, 2> {
-        static_assert(is_floating_point_v<_Ty>);
-        _Ty _Val0; // most significant numeric_limits<_Ty>::precision bits
-        _Ty _Val1; // least significant numeric_limits<_Ty>::precision bits
-    };
-
-    // addition
-
-    // 1x precision + 1x precision -> 2x precision
-    // the result is exact when:
-    // 1) the result doesn't overflow
-    // 2) either underflow is gradual, or no internal underflow occurs
-    // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
-    // 4) parameters and local variables do not retain extra intermediate precision
-    // 5) rounding mode is rounding to nearest
-    // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
-    // violation of other conditions could lead to worse results
-    template <class _Ty>
-    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
-        const _Ty _Sum0 = _Xval + _Yval;
-        const _Ty _Ymod = _Sum0 - _Xval;
-        const _Ty _Xmod = _Sum0 - _Ymod;
-        const _Ty _Yerr = _Yval - _Ymod;
-        const _Ty _Xerr = _Xval - _Xmod;
-        return {_Sum0, _Xerr + _Yerr};
-    }
-
-    // 1x precision + 1x precision -> 2x precision
-    // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0
-    // the result is exact when:
-    // 0) the requirement above is satisfied
-    // 1) no internal overflow occurs
-    // 2) either underflow is gradual, or no internal underflow occurs
-    // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
-    // 4) parameters and local variables do not retain extra intermediate precision
-    // 5) rounding mode is rounding to nearest
-    // violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
-    // violation of other conditions could lead to worse results
-    template <class _Ty>
-    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
-        const _Ty _Sum0 = _Xval + _Yval;
-        const _Ty _Ymod = _Sum0 - _Xval;
-        const _Ty _Yerr = _Yval - _Ymod;
-        return {_Sum0, _Yerr};
-    }
-
-    // 1x precision + 2x precision -> 2x precision
-    // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0
-    template <class _Ty>
-    _NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
-        const _Fmp_t<_Ty, 2> _Sum0 = _Add_small_x2(_Xval, _Yval._Val0);
-        return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1);
-    }
-
-    // 2x precision + 2x precision -> 1x precision
-    template <class _Ty>
-    _NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
-        const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0);
-        return _Sum00._Val0 + (_Sum00._Val1 + (_Xval._Val1 + _Yval._Val1));
-    }
-
-    // multiplication
-
-    // round to 26 significant bits, ties toward zero
-    _NODISCARD constexpr double _High_half(const double _Val) noexcept {
-        const auto _Bits           = _Bit_cast<unsigned long long>(_Val);
-        const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL;
-        return _Bit_cast<double>(_High_half_bits);
-    }
-
-    // _Xval * _Xval - _Prod0
-    // the result is exact when:
-    // 1) _Prod0 is _Xval^2 faithfully rounded
-    // 2) no internal overflow or underflow occurs
-    // violation of condition 1 could lead to relative error on the order of epsilon
-    _NODISCARD constexpr double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept {
-        const double _Xhigh = _High_half(_Xval);
-        const double _Xlow  = _Xval - _Xhigh;
-        return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow;
-    }
-
-#if defined(_M_IX86) || defined(_M_X64)
-    _NODISCARD inline double _Sqr_error_x86_x64_fma(const double _Xval, const double _Prod0) noexcept {
-        const __m128d _Mx      = _mm_set_sd(_Xval);
-        const __m128d _Mprod0  = _mm_set_sd(_Prod0);
-        const __m128d _Mresult = _mm_fmsub_sd(_Mx, _Mx, _Mprod0);
-        double _Result;
-        _mm_store_sd(&_Result, _Mresult);
-        return _Result;
-    }
-#endif // defined(_M_IX86) || defined(_M_X64)
-
-#if defined(_M_ARM64)
-    _NODISCARD inline double _Sqr_error_arm64_neon(const double _Xval, const double _Prod0) noexcept {
-        const float64x1_t _Mx      = vld1_f64(&_Xval);
-        const float64x1_t _Mprod0  = vld1_f64(&_Prod0);
-        const float64x1_t _Mresult = vfma_f64(vneg_f64(_Mprod0), _Mx, _Mx);
-        double _Result;
-        vst1_f64(&_Result, _Mresult);
-        return _Result;
-    }
-#endif // defined(_M_ARM64)
-
-    // square(1x precision) -> 2x precision
-    // the result is exact when no internal overflow or underflow occurs
-    _NODISCARD constexpr _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
-        const double _Prod0 = _Xval * _Xval;
-
-        if (_STD is_constant_evaluated()) {
-            return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
-        } else {
-#if defined(_M_IX86) || defined(_M_X64)
-            if (__isa_available >= __ISA_AVAILABLE_AVX2) {
-                return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
-            } else {
-                return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
-            }
-#elif defined(_M_ARM64) // ^^^ x86, x64 / arm64 vvv
-            // https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#base-requirements
-            // Both floating-point and NEON support are presumed to be present in hardware.
-            return {_Prod0, _Sqr_error_arm64_neon(_Xval, _Prod0)};
-#else // ^^^ arm64 / arm vvv
-            return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
-#endif // ^^^ arm ^^^
-        }
-    }
-} // namespace _Float_multi_prec
-_STD_END
diff --git a/stl/src/math_algorithms.cpp b/stl/src/math_algorithms.cpp
deleted file mode 100644
index 8530a558ad..0000000000
--- a/stl/src/math_algorithms.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// implements numerical algorithms for <complex>
-
-// This must be as small as possible, because its contents are
-// injected into the msvcprt.lib and msvcprtd.lib import libraries.
-// Do not include or define anything else here.
-// In particular, basic_string must not be included here.
-
-#include <cmath>
-#include <limits>
-#include <type_traits>
-#include <xutility>
-
-#include "float_multi_prec.hpp"
-
-#define _FMP ::std::_Float_multi_prec::
-
-namespace {
-    // TRANSITION: sqrt() isn't constexpr
-    // _Hypot_leg_huge = _Ty{0.5} * _STD sqrt((_STD numeric_limits<_Ty>::max)());
-    // _Hypot_leg_tiny = _STD sqrt(_Ty{2.0} * (_STD numeric_limits<_Ty>::min)() / _STD numeric_limits<_Ty>::epsilon());
-    template <class _Ty>
-    inline constexpr _Ty _Hypot_leg_huge = _Ty{6.703903964971298e+153};
-    template <>
-    inline constexpr float _Hypot_leg_huge<float> = 9.2233715e+18f;
-
-    template <class _Ty>
-    inline constexpr _Ty _Hypot_leg_tiny = _Ty{1.4156865331029228e-146};
-    template <>
-    inline constexpr float _Hypot_leg_tiny<float> = 4.440892e-16f;
-
-    template <class _Ty>
-    _NODISCARD _Ty _Norm_minus_one(const _Ty _Xval, const _Ty _Yval) noexcept {
-        // requires |_Xval| >= |_Yval| and 0.5 <= |_Xval| < 2^12
-        // returns _Xval * _Xval + _Yval * _Yval - 1
-        const _FMP _Fmp_t<_Ty, 2> _Xsqr    = _FMP _Sqr_x2(_Xval);
-        const _FMP _Fmp_t<_Ty, 2> _Ysqr    = _FMP _Sqr_x2(_Yval);
-        const _FMP _Fmp_t<_Ty, 2> _Xsqr_m1 = _FMP _Add_small_x2(_Ty{-1.0}, _Xsqr);
-        return _Add_x1(_Xsqr_m1, _Ysqr);
-    }
-
-    _NODISCARD inline float _Norm_minus_one(const float _Xval, const float _Yval) noexcept {
-        const auto _Dx = static_cast<double>(_Xval);
-        const auto _Dy = static_cast<double>(_Yval);
-        return static_cast<float>((_Dx * _Dx - 1.0) + _Dy * _Dy);
-    }
-
-    // TRANSITION: CRT log1p can be inaccurate for tiny inputs under directed rounding modes
-    template <class _Ty>
-    _NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval)
-        static_assert(_STD is_floating_point_v<_Ty>);
-
-        if (_STD _Is_nan(_Xval)) { // NaN
-            return _Xval + _Xval; // raise FE_INVALID if _Xval is a signaling NaN
-        }
-
-        if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is moderately accurate
-            if (_Xval == (_STD numeric_limits<_Ty>::max)()) { // avoid overflow
-                return _STD log(_Xval);
-            }
-
-            return _STD log(_Ty{1.0} + _Xval);
-        }
-
-        const _Ty _Xabs = _STD _Float_abs(_Xval);
-        if (_Xabs < _STD numeric_limits<_Ty>::epsilon()) { // zero or tiny
-            if (_Xval == _Ty{0.0}) {
-                return _Xval;
-            }
-
-            // honor rounding mode, raise FE_INEXACT
-            return _Xval - _Ty{0.5} * _Xval * _Xval;
-        }
-
-        // compute log(1 + _Xval) with fixup for small _Xval
-        const _FMP _Fmp_t<_Ty, 2> _Xp1 = _FMP _Add_small_x2(_Ty{1.0}, _Xval);
-        return _STD log(_Xp1._Val0) + _Xp1._Val1 / _Xp1._Val0;
-    }
-
-    template <class _Ty>
-    _NODISCARD _Ty _Log_hypot(const _Ty _Xval, const _Ty _Yval) noexcept { // returns log(hypot(_Xval, _Yval))
-        static_assert(_STD is_floating_point_v<_Ty>);
-
-        if (!_STD _Is_finite(_Xval) || !_STD _Is_finite(_Yval)) { // Inf or NaN
-            // raise FE_INVALID and return NaN if at least one of them is a signaling NaN
-            if (_STD _Is_signaling_nan(_Xval) || _STD _Is_signaling_nan(_Yval)) {
-                return _Xval + _Yval;
-            }
-
-            // return +Inf if at least one of them is an infinity, even when the other is a quiet NaN
-            if (_STD _Is_inf(_Xval)) {
-                return _STD _Float_abs(_Xval);
-            }
-
-            if (_STD _Is_inf(_Yval)) {
-                return _STD _Float_abs(_Yval);
-            }
-
-            // at least one of them is a quiet NaN, and the other is not an infinity
-            return _Xval + _Yval;
-        }
-
-        _Ty _Av = _STD _Float_abs(_Xval);
-        _Ty _Bv = _STD _Float_abs(_Yval);
-
-        if (_Av < _Bv) { // ensure that _Bv <= _Av
-            _STD swap(_Av, _Bv);
-        }
-
-        if (_Bv == 0) {
-            return _STD log(_Av);
-        }
-
-        if (_Hypot_leg_tiny<_Ty> < _Av && _Av < _Hypot_leg_huge<_Ty>) { // no overflow or harmful underflow
-            constexpr _Ty _Norm_small = _Ty{0.5};
-            constexpr _Ty _Norm_big   = _Ty{3.0};
-
-            const _Ty _Bv_sqr = _Bv * _Bv;
-
-            if (_Av == _Ty{1.0}) { // correctly return +0 when _Av == 1 and _Bv * _Bv underflows
-                                   // _Norm_minus_one(_Av, _Bv) could return -0 under FE_DOWNWARD rounding mode
-                return _Logp1(_Bv_sqr) * _Ty{0.5};
-            }
-
-            const _Ty _Norm = _Av * _Av + _Bv_sqr;
-
-            if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation
-                return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5};
-            } else {
-                return _STD log(_Norm) * _Ty{0.5};
-            }
-        } else { // use 1 1/2 precision to preserve bits
-            constexpr _Ty _Cm = _Ty{22713.0L / 32768.0L};
-            constexpr _Ty _Cl = _Ty{1.4286068203094172321214581765680755e-6L};
-
-            const int _Exponent      = _STD ilogb(_Av);
-            const _Ty _Av_scaled     = _STD scalbn(_Av, -_Exponent);
-            const _Ty _Bv_scaled     = _STD scalbn(_Bv, -_Exponent);
-            const _Ty _Bv_scaled_sqr = _Bv_scaled * _Bv_scaled;
-            const _Ty _Norm_scaled   = _Av_scaled * _Av_scaled + _Bv_scaled_sqr;
-            const _Ty _Real_shifted  = _STD log(_Norm_scaled) * _Ty{0.5};
-            return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm;
-        }
-    }
-} // unnamed namespace
-
-_EXTERN_C
-_NODISCARD double __stdcall __std_math_log_hypot(const double _Xval, const double _Yval) noexcept {
-    return _Log_hypot(_Xval, _Yval);
-}
-
-_NODISCARD float __stdcall __std_math_log_hypotf(const float _Xval, const float _Yval) noexcept {
-    return _Log_hypot(_Xval, _Yval);
-}
-_END_EXTERN_C

From ba88342dd6cde2bfe1605554c7aaecf0eace6273 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Sun, 8 Nov 2020 20:24:59 -0800
Subject: [PATCH 19/19] Include arm64_neon.h, no arm64 subdirectory.

---
 stl/inc/complex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stl/inc/complex b/stl/inc/complex
index 3eafcb9602..60b017fdb9 100644
--- a/stl/inc/complex
+++ b/stl/inc/complex
@@ -28,7 +28,7 @@ extern "C" int __isa_available;
 extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d);
 #elif defined(_M_ARM64)
 #define _FMP_USING_ARM64_INTRINSICS
-#include <arm64/arm64_neon.h>
+#include <arm64_neon.h>
 #endif // ^^^ defined(_M_ARM64) ^^^
 
 #pragma pack(push, _CRT_PACKING)