Fix incorrect result of complex log/log10/pow on ARM64 (micro…

…soft#2870) Co-authored-by: Stephan T. Lavavej <[email protected]>
fsb4000 · Aug 13, 2022 · 9ce0bd2 · 9ce0bd2
1 parent bcdaf57
commit 9ce0bd2
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 24 deletions.
diff --git a/stl/inc/complex b/stl/inc/complex
@@ -18,18 +18,19 @@
 
 #ifdef _M_CEE_PURE
 // no intrinsics for /clr:pure
-#elif defined(__clang__)
-// TRANSITION, not using FMA intrinsics for Clang yet
-#elif defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
+#elif defined(_M_ARM64) || defined(_M_ARM64EC)
+// https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#base-requirements
+// Both floating-point and NEON support are presumed to be present in hardware.
+#define _FMP_USING_STD_FMA
+#elif defined(__clang__) // ^^^ defined(_M_ARM64) || defined(_M_ARM64EC) ^^^
+// TRANSITION, not using x86/x64 FMA intrinsics for Clang yet
+#elif defined(_M_IX86) || defined(_M_X64)
 #define _FMP_USING_X86_X64_INTRINSICS
 #include <emmintrin.h>
 #include <isa_availability.h>
 extern "C" int __isa_available;
 extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d);
-#elif defined(_M_ARM64) || defined(_M_ARM64EC)
-#define _FMP_USING_ARM64_INTRINSICS
-#include <arm64_neon.h>
-#endif // ^^^ defined(_M_ARM64) || defined(_M_ARM64EC) ^^^
+#endif // ^^^ defined(_M_IX86) || defined(_M_X64) ^^^
 
 #pragma pack(push, _CRT_PACKING)
 #pragma warning(push, _STL_WARNING_LEVEL)
@@ -78,7 +79,7 @@ namespace _Float_multi_prec {
 
     // 1x precision + 1x precision -> 2x precision
     // the result is exact when:
-    // 1) the result doesn't overflow
+    // 1) no internal overflow occurs
     // 2) either underflow is gradual, or no internal underflow occurs
     // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
     // 4) parameters and local variables do not retain extra intermediate precision
@@ -99,7 +100,7 @@ namespace _Float_multi_prec {
     // requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0
     // the result is exact when:
     // 0) the requirement above is satisfied
-    // 1) no internal overflow occurs
+    // 1) the result doesn't overflow
     // 2) either underflow is gradual, or no internal underflow occurs
     // 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
     // 4) parameters and local variables do not retain extra intermediate precision
@@ -160,16 +161,11 @@ namespace _Float_multi_prec {
     }
 #endif // _FMP_USING_X86_X64_INTRINSICS
 
-#ifdef _FMP_USING_ARM64_INTRINSICS
-    _NODISCARD inline double _Sqr_error_arm64_neon(const double _Xval, const double _Prod0) noexcept {
-        const float64x1_t _Mx      = vld1_f64(&_Xval);
-        const float64x1_t _Mprod0  = vld1_f64(&_Prod0);
-        const float64x1_t _Mresult = vfma_f64(vneg_f64(_Mprod0), _Mx, _Mx);
-        double _Result;
-        vst1_f64(&_Result, _Mresult);
-        return _Result;
+#ifdef _FMP_USING_STD_FMA
+    _NODISCARD inline double _Sqr_error_std_fma(const double _Xval, const double _Prod0) noexcept {
+        return _STD fma(_Xval, _Xval, -_Prod0);
     }
-#endif // _FMP_USING_ARM64_INTRINSICS
+#endif // _FMP_USING_STD_FMA
 
     // square(1x precision) -> 2x precision
     // the result is exact when no internal overflow or underflow occurs
@@ -189,19 +185,17 @@ namespace _Float_multi_prec {
         }
 #endif // ^^^ !defined(__AVX2__) ^^^
 
-#elif defined(_FMP_USING_ARM64_INTRINSICS)
-        // https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#base-requirements
-        // Both floating-point and NEON support are presumed to be present in hardware.
-        return {_Prod0, _Sqr_error_arm64_neon(_Xval, _Prod0)};
-#else // ^^^ defined(_FMP_USING_ARM64_INTRINSICS) / not using intrinsics vvv
+#elif defined(_FMP_USING_STD_FMA)
+        return {_Prod0, _Sqr_error_std_fma(_Xval, _Prod0)};
+#else // ^^^ defined(_FMP_USING_STD_FMA) / not using intrinsics vvv
         return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
 #endif // ^^^ not using intrinsics ^^^
     }
 } // namespace _Float_multi_prec
 #pragma float_control(pop)
 
 #undef _FMP_USING_X86_X64_INTRINSICS
-#undef _FMP_USING_ARM64_INTRINSICS
+#undef _FMP_USING_STD_FMA
 
 #define _FMP _STD _Float_multi_prec::
 

diff --git a/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp b/tests/std/tests/GH_000935_complex_numerical_accuracy/log_test_cases.hpp
@@ -32,6 +32,10 @@ constexpr complex_unary_test_case<double> log_double_cases[] = {
     {{-0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.1b6e192ebbe44p+1}},
     {{-0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.1b6e192ebbe44p+1}},
 
+    // DevCom-10088405: Incorrect result for std::complex operations on ARM64 platform
+    {{0.1, 1.2}, {0.18578177821624148, 1.4876550949064553}},
+    {{-1.1698230349239351, 0.46519593659281616}, {0.23025850929940467, 2.763102111592855}},
+
     // special cases
     {{+1.0, +0.0}, {0.0, +0.0}, {true, true}},
     {{+1.0, -0.0}, {0.0, -0.0}, {true, true}},