From ae6a88dc50b9a74a6db47817397a40a151d92ac7 Mon Sep 17 00:00:00 2001 From: Geotale Date: Sun, 8 Sep 2024 16:14:21 -0500 Subject: [PATCH 1/3] Initial Change For Fixes Matches hardware in rounding paired singles after move operations!! Move operations consist of operations which only transfer direct bits This also accounts for ps_rsqrte, because it has a similar quirk Specifically in hardware they're rounded in accordance to their slot: - PS0 rounds *only the mantissa* in accordance to the set rounding mode - PS1 truncates the mantissa ps_rsqrte also truncates for PS0 for some reason ^^; This has all been tested on hardware, along with a few edge case tests Co-Authored-By: JosJuice --- .../Interpreter/Interpreter_Paired.cpp | 101 +++++++++++++++--- 1 file changed, 84 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp index 69048d453037..abb5f129b96f 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -10,6 +10,66 @@ #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h" #include "Core/PowerPC/PowerPC.h" +// Instructions which move data without performing operations round a bit weirdly +// Specifically they rounding the mantissa to be like that of a 32-bit float, +// going as far as to focus on the rounding mode, but never actually care about +// making sure the exponent becomes 32-bit +// Either this, or they'll truncate the mantissa down, which will always happen to +// PS1 OR PS0 in ps_rsqrte +inline u64 TruncateMantissaBits(u64 bits) +{ + // Truncation can be done by simply cutting off the mantissa bits that don't + // exist in a single precision float + constexpr u64 remove_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH; + constexpr u64 remove_mask = (1 << remove_bits) - 1; + return bits & ~remove_mask; +} + +inline double TruncateMantissa(double value) +{ + u64 bits = std::bit_cast(value); + u64 trunc_bits = TruncateMantissaBits(bits); + return std::bit_cast(trunc_bits); +} + +inline u64 RoundMantissaBits(u64 bits) +{ + // Checking if the value is non-finite + if ((bits & Common::DOUBLE_EXP) == Common::DOUBLE_EXP) + { + // For infinite and NaN values, the mantissa is simply truncated + return TruncateMantissaBits(bits); + } + + const u64 replacement_exp = 0x4000000000000000ull; + + // To round only the mantissa, we assume the CPU can change the rounding mode, + // create new double with an exponent that won't cause issues, round to a single, + // and convert back to a double while restoring the original exponent again! + // The removing the exponent is done via subtraction instead of bitwise + // operations due to the possibility that the rounding will cause an overflow + // into the exponent + u64 resized_bits = (bits & (Common::DOUBLE_FRAC | Common::DOUBLE_SIGN)) | replacement_exp; + + float rounded_float = static_cast(std::bit_cast(resized_bits)); + double extended_float = static_cast(rounded_float); + u64 rounded_bits = std::bit_cast(extended_float); + + u64 orig_exp_bits = bits & Common::DOUBLE_EXP; + rounded_bits = (rounded_bits - replacement_exp) | orig_exp_bits; + + return rounded_bits; +} + +inline double RoundMantissa(double value) +{ + // The double version of the function just converts to and from bits again + // This would be a necessary step anyways, so it just simplifies code + u64 bits = std::bit_cast(value); + u64 rounded_bits = RoundMantissaBits(bits); + return std::bit_cast(rounded_bits); +} + // These "binary instructions" do not alter FPSCR. void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst) { @@ -18,8 +78,9 @@ void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst) const auto& b = ppc_state.ps[inst.FB]; const auto& c = ppc_state.ps[inst.FC]; - ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(), - a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble()); + double ps0 = a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(); + double ps1 = a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble(); + ppc_state.ps[inst.FD].SetBoth(RoundMantissa(ps0), TruncateMantissa(ps1)); if (inst.Rc) ppc_state.UpdateCR1(); @@ -30,8 +91,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63), - b.PS1AsU64() ^ (UINT64_C(1) << 63)); + u64 ps0 = b.PS0AsU64() ^ (UINT64_C(1) << 63); + u64 ps1 = b.PS1AsU64() ^ (UINT64_C(1) << 63); + ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1)); if (inst.Rc) ppc_state.UpdateCR1(); @@ -40,7 +102,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst) void Interpreter::ps_mr(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; - ppc_state.ps[inst.FD] = ppc_state.ps[inst.FB]; + const auto& b = ppc_state.ps[inst.FB]; + + ppc_state.ps[inst.FD].SetBoth(RoundMantissa(b.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble())); if (inst.Rc) ppc_state.UpdateCR1(); @@ -51,8 +115,9 @@ void Interpreter::ps_nabs(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63), - b.PS1AsU64() | (UINT64_C(1) << 63)); + u64 ps0 = b.PS0AsU64() | (UINT64_C(1) << 63); + u64 ps1 = b.PS1AsU64() | (UINT64_C(1) << 63); + ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1)); if (inst.Rc) ppc_state.UpdateCR1(); @@ -63,8 +128,9 @@ void Interpreter::ps_abs(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63), - b.PS1AsU64() & ~(UINT64_C(1) << 63)); + u64 ps0 = b.PS0AsU64() & ~(UINT64_C(1) << 63); + u64 ps1 = b.PS1AsU64() & ~(UINT64_C(1) << 63); + ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1)); if (inst.Rc) ppc_state.UpdateCR1(); @@ -77,7 +143,7 @@ void Interpreter::ps_merge00(Interpreter& interpreter, UGeckoInstruction inst) const auto& a = ppc_state.ps[inst.FA]; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS0AsDouble()); + ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS0AsDouble())); if (inst.Rc) ppc_state.UpdateCR1(); @@ -89,7 +155,7 @@ void Interpreter::ps_merge01(Interpreter& interpreter, UGeckoInstruction inst) const auto& a = ppc_state.ps[inst.FA]; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS1AsDouble()); + ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble())); if (inst.Rc) ppc_state.UpdateCR1(); @@ -101,7 +167,7 @@ void Interpreter::ps_merge10(Interpreter& interpreter, UGeckoInstruction inst) const auto& a = ppc_state.ps[inst.FA]; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS0AsDouble()); + ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS0AsDouble())); if (inst.Rc) ppc_state.UpdateCR1(); @@ -113,7 +179,7 @@ void Interpreter::ps_merge11(Interpreter& interpreter, UGeckoInstruction inst) const auto& a = ppc_state.ps[inst.FA]; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS1AsDouble()); + ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS1AsDouble())); if (inst.Rc) ppc_state.UpdateCR1(); @@ -191,8 +257,9 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst) if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1)) SetFPException(ppc_state, FPSCR_VXSNAN); - const float dst_ps0 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps0)); - const float dst_ps1 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps1)); + // For some reason ps0 is also truncated for this operation rather than rounded + const double dst_ps0 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps0)); + const double dst_ps1 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps1)); ppc_state.ps[inst.FD].SetBoth(dst_ps0, dst_ps1); ppc_state.UpdateFPRFSingle(dst_ps0); @@ -359,7 +426,7 @@ void Interpreter::ps_sum0(Interpreter& interpreter, UGeckoInstruction inst) const float ps0 = ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value); - const float ps1 = ForceSingle(ppc_state.fpscr, c.PS1AsDouble()); + const double ps1 = TruncateMantissa(c.PS1AsDouble()); ppc_state.ps[inst.FD].SetBoth(ps0, ps1); ppc_state.UpdateFPRFSingle(ps0); @@ -375,7 +442,7 @@ void Interpreter::ps_sum1(Interpreter& interpreter, UGeckoInstruction inst) const auto& b = ppc_state.ps[inst.FB]; const auto& c = ppc_state.ps[inst.FC]; - const float ps0 = ForceSingle(ppc_state.fpscr, c.PS0AsDouble()); + const double ps0 = RoundMantissa(c.PS0AsDouble()); const float ps1 = ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value); From f0e6a1363f689a137eac29012e41703e160542bd Mon Sep 17 00:00:00 2001 From: Geotale Date: Wed, 2 Oct 2024 01:09:23 -0500 Subject: [PATCH 2/3] Fix To Match Hwtests Also changes >= 0 to > 0.0 This technically leads to fewer branches taken ^^; More importantly it looks/feels nicer to me Fixes the approximate reciprocal function - Currently not optimized - Considering rewrite for cleanliness Moves PS rounding to FloatUtils - Done because it's used in more places now Changes TruncateMantissa to occur on read - This is to account for reciprocal cases Adds PS1 getting function for reciprocals Fixes ps_sum1 edge case with rounding TODO: Test what ops can set PS1 edge case - ps_merge is known to be able to --- Source/Core/Common/FloatUtils.cpp | 76 ++++++++----- Source/Core/Common/FloatUtils.h | 85 +++++++++++++- .../Interpreter/Interpreter_FloatingPoint.cpp | 2 +- .../Interpreter/Interpreter_Paired.cpp | 106 +++++------------- Source/Core/Core/PowerPC/PowerPC.cpp | 20 +++- Source/Core/Core/PowerPC/PowerPC.h | 10 +- 6 files changed, 190 insertions(+), 109 deletions(-) diff --git a/Source/Core/Common/FloatUtils.cpp b/Source/Core/Common/FloatUtils.cpp index a671eb278ce6..6fd271d4d435 100644 --- a/Source/Core/Common/FloatUtils.cpp +++ b/Source/Core/Common/FloatUtils.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "Common/FloatUtils.h" +#include "Core/PowerPC/Gekko.h" #include #include @@ -98,7 +99,7 @@ double ApproximateReciprocalSquareRoot(double val) } // Special case NaN-ish numbers - if (exponent == (0x7FFLL << 52)) + if (exponent == DOUBLE_EXP) { if (mantissa == 0) { @@ -123,7 +124,7 @@ double ApproximateReciprocalSquareRoot(double val) exponent -= 1LL << 52; mantissa <<= 1; } while (!(mantissa & (1LL << 52))); - mantissa &= (1LL << 52) - 1; + mantissa &= DOUBLE_FRAC; exponent += 1LL << 52; } @@ -139,51 +140,70 @@ double ApproximateReciprocalSquareRoot(double val) } const std::array fres_expected = {{ - {0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313}, - {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261}, - {0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5}, - {0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b}, - {0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143}, - {0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a}, - {0x041c00, 0x108}, {0x020c00, 0x106}, + {0xfff000, -0x3e1}, {0xf07000, -0x3a7}, {0xe1d400, -0x371}, {0xd41000, -0x340}, + {0xc71000, -0x313}, {0xbac400, -0x2ea}, {0xaf2000, -0x2c4}, {0xa41000, -0x2a0}, + {0x999000, -0x27f}, {0x8f9400, -0x261}, {0x861000, -0x245}, {0x7d0000, -0x22a}, + {0x745800, -0x212}, {0x6c1000, -0x1fb}, {0x642800, -0x1e5}, {0x5c9400, -0x1d1}, + {0x555000, -0x1be}, {0x4e5800, -0x1ac}, {0x47ac00, -0x19b}, {0x413c00, -0x18b}, + {0x3b1000, -0x17c}, {0x352000, -0x16e}, {0x2f5c00, -0x15b}, {0x29f000, -0x15b}, + {0x248800, -0x143}, {0x1f7c00, -0x143}, {0x1a7000, -0x12d}, {0x15bc00, -0x12d}, + {0x110800, -0x11a}, {0x0ca000, -0x11a}, {0x083800, -0x108}, {0x041800, -0x106}, }}; // Used by fres and ps_res. -double ApproximateReciprocal(double val) +double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val) { - s64 integral = std::bit_cast(val); - const s64 mantissa = integral & ((1LL << 52) - 1); - const s64 sign = integral & (1ULL << 63); - s64 exponent = integral & (0x7FFLL << 52); + u64 integral = std::bit_cast(val); + + // Convert into a float when possible + u64 signless = integral & ~DOUBLE_SIGN; + const u32 mantissa = + static_cast((integral & DOUBLE_FRAC) >> (DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH)); + const u32 sign = static_cast((integral >> 32) & FLOAT_SIGN); + s32 exponent = static_cast((integral & DOUBLE_EXP) >> DOUBLE_FRAC_WIDTH) - 0x380; + + // The largest floats possible just return 0 + const u64 huge_float = fpscr.NI ? 0x47d0000000000000ULL : 0x4940000000000000ULL; // Special case 0 - if (mantissa == 0 && exponent == 0) + if (signless == 0) return std::copysign(std::numeric_limits::infinity(), val); - // Special case NaN-ish numbers - if (exponent == (0x7FFLL << 52)) + // Special case huge or NaN-ish numbers + if (signless >= huge_float) { - if (mantissa == 0) + if (!std::isnan(val)) return std::copysign(0.0, val); return 0.0 + val; } // Special case small inputs - if (exponent < (895LL << 52)) + if (exponent < -1) return std::copysign(std::numeric_limits::max(), val); - // Special case large inputs - if (exponent >= (1149LL << 52)) - return std::copysign(0.0, val); - - exponent = (0x7FDLL << 52) - exponent; + exponent = 253 - exponent; - const int i = static_cast(mantissa >> 37); + const u32 i = static_cast(mantissa >> 8); const auto& entry = fres_expected[i / 1024]; - integral = sign | exponent; - integral |= static_cast(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29; + const u32 new_mantissa = static_cast(entry.m_base + entry.m_dec * (i % 1024)) / 2; - return std::bit_cast(integral); + u32 result = sign | (static_cast(exponent) << FLOAT_FRAC_WIDTH) | new_mantissa; + if (exponent <= 0) + { + // Result is subnormal so format it properly! + if (fpscr.NI) + { + // Flush to 0 if inexact + result = sign; + } + else + { + // Shift by the exponent amount + u32 shift = 1 + static_cast(-exponent); + result = sign | (((1 << FLOAT_FRAC_WIDTH) | new_mantissa) >> shift); + } + } + return static_cast(std::bit_cast(result)); } } // namespace Common diff --git a/Source/Core/Common/FloatUtils.h b/Source/Core/Common/FloatUtils.h index 0456a7665eea..46636577c199 100644 --- a/Source/Core/Common/FloatUtils.h +++ b/Source/Core/Common/FloatUtils.h @@ -8,6 +8,7 @@ #include #include "Common/CommonTypes.h" +#include "Core/PowerPC/Gekko.h" namespace Common { @@ -96,6 +97,88 @@ extern const std::array fres_expected; // PowerPC approximation algorithms double ApproximateReciprocalSquareRoot(double val); -double ApproximateReciprocal(double val); +double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val); + +// Instructions which move data without performing operations round a bit weirdly +// Specifically they rounding the mantissa to be like that of a 32-bit float, +// going as far as to focus on the rounding mode, but never actually care about +// making sure the exponent becomes 32-bit +// Either this, or they'll truncate the mantissa down, which will always happen to +// PS1 OR PS0 in ps_rsqrte +inline u64 TruncateMantissaBits(u64 bits) +{ + // Truncation can be done by simply cutting off the mantissa bits that don't + // exist in a single precision float + constexpr u64 remove_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH; + constexpr u64 remove_mask = (1 << remove_bits) - 1; + return bits & ~remove_mask; +} + +inline double TruncateMantissa(double value) +{ + u64 bits = std::bit_cast(value); + u64 trunc_bits = TruncateMantissaBits(bits); + return std::bit_cast(trunc_bits); +} + +inline u64 RoundMantissaBitsFinite(u64 bits) +{ + const u64 replacement_exp = 0x4000000000000000ull; + + // To round only the mantissa, we assume the CPU can change the rounding mode, + // create new double with an exponent that won't cause issues, round to a single, + // and convert back to a double while restoring the original exponent again! + // The removing the exponent is done via subtraction instead of bitwise + // operations due to the possibility that the rounding will cause an overflow + // into the exponent + u64 resized_bits = (bits & (Common::DOUBLE_FRAC | Common::DOUBLE_SIGN)) | replacement_exp; + + float rounded_float = static_cast(std::bit_cast(resized_bits)); + double extended_float = static_cast(rounded_float); + u64 rounded_bits = std::bit_cast(extended_float); + + u64 orig_exp_bits = bits & Common::DOUBLE_EXP; + + if (orig_exp_bits == 0) + { + // The exponent isn't incremented for double subnormals + return rounded_bits & ~Common::DOUBLE_EXP; + } + + // Handle the change accordingly otherwise! + rounded_bits = (rounded_bits - replacement_exp) + orig_exp_bits; + return rounded_bits; +} + +inline u64 RoundMantissaBits(u64 bits) +{ + // Checking if the value is non-finite + if ((bits & Common::DOUBLE_EXP) == Common::DOUBLE_EXP) + { + // For infinite and NaN values, the mantissa is simply truncated + return TruncateMantissaBits(bits); + } + + return RoundMantissaBitsFinite(bits); +} + +inline double RoundMantissaFinite(double value) +{ + // This function is only ever used by ps_sum1, because + // for some reason it assumes that ps0 should be rounded with + // finite values rather than checking if they might be infinite + u64 bits = std::bit_cast(value); + u64 rounded_bits = RoundMantissaBitsFinite(bits); + return std::bit_cast(rounded_bits); +} + +inline double RoundMantissa(double value) +{ + // The double version of the function just converts to and from bits again + // This would be a necessary step anyways, so it just simplifies code + u64 bits = std::bit_cast(value); + u64 rounded_bits = RoundMantissaBits(bits); + return std::bit_cast(rounded_bits); +} } // namespace Common diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index b1dbbf0cc108..ddc023fb96ee 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -515,7 +515,7 @@ void Interpreter::fresx(Interpreter& interpreter, UGeckoInstruction inst) const double b = ppc_state.ps[inst.FB].PS0AsDouble(); const auto compute_result = [&ppc_state, inst](double value) { - const double result = Common::ApproximateReciprocal(value); + const double result = Common::ApproximateReciprocal(ppc_state.fpscr, value); ppc_state.ps[inst.FD].Fill(result); ppc_state.UpdateFPRFSingle(float(result)); }; diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp index abb5f129b96f..a040c6297ea0 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -10,66 +10,6 @@ #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h" #include "Core/PowerPC/PowerPC.h" -// Instructions which move data without performing operations round a bit weirdly -// Specifically they rounding the mantissa to be like that of a 32-bit float, -// going as far as to focus on the rounding mode, but never actually care about -// making sure the exponent becomes 32-bit -// Either this, or they'll truncate the mantissa down, which will always happen to -// PS1 OR PS0 in ps_rsqrte -inline u64 TruncateMantissaBits(u64 bits) -{ - // Truncation can be done by simply cutting off the mantissa bits that don't - // exist in a single precision float - constexpr u64 remove_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH; - constexpr u64 remove_mask = (1 << remove_bits) - 1; - return bits & ~remove_mask; -} - -inline double TruncateMantissa(double value) -{ - u64 bits = std::bit_cast(value); - u64 trunc_bits = TruncateMantissaBits(bits); - return std::bit_cast(trunc_bits); -} - -inline u64 RoundMantissaBits(u64 bits) -{ - // Checking if the value is non-finite - if ((bits & Common::DOUBLE_EXP) == Common::DOUBLE_EXP) - { - // For infinite and NaN values, the mantissa is simply truncated - return TruncateMantissaBits(bits); - } - - const u64 replacement_exp = 0x4000000000000000ull; - - // To round only the mantissa, we assume the CPU can change the rounding mode, - // create new double with an exponent that won't cause issues, round to a single, - // and convert back to a double while restoring the original exponent again! - // The removing the exponent is done via subtraction instead of bitwise - // operations due to the possibility that the rounding will cause an overflow - // into the exponent - u64 resized_bits = (bits & (Common::DOUBLE_FRAC | Common::DOUBLE_SIGN)) | replacement_exp; - - float rounded_float = static_cast(std::bit_cast(resized_bits)); - double extended_float = static_cast(rounded_float); - u64 rounded_bits = std::bit_cast(extended_float); - - u64 orig_exp_bits = bits & Common::DOUBLE_EXP; - rounded_bits = (rounded_bits - replacement_exp) | orig_exp_bits; - - return rounded_bits; -} - -inline double RoundMantissa(double value) -{ - // The double version of the function just converts to and from bits again - // This would be a necessary step anyways, so it just simplifies code - u64 bits = std::bit_cast(value); - u64 rounded_bits = RoundMantissaBits(bits); - return std::bit_cast(rounded_bits); -} - // These "binary instructions" do not alter FPSCR. void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst) { @@ -80,7 +20,7 @@ void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst) double ps0 = a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(); double ps1 = a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble(); - ppc_state.ps[inst.FD].SetBoth(RoundMantissa(ps0), TruncateMantissa(ps1)); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissa(ps0), ps1); if (inst.Rc) ppc_state.UpdateCR1(); @@ -93,7 +33,7 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst) u64 ps0 = b.PS0AsU64() ^ (UINT64_C(1) << 63); u64 ps1 = b.PS1AsU64() ^ (UINT64_C(1) << 63); - ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1)); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(ps0), ps1); if (inst.Rc) ppc_state.UpdateCR1(); @@ -104,7 +44,7 @@ void Interpreter::ps_mr(Interpreter& interpreter, UGeckoInstruction inst) auto& ppc_state = interpreter.m_ppc_state; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(RoundMantissa(b.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble())); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissa(b.PS0AsDouble()), b.PS1AsDouble()); if (inst.Rc) ppc_state.UpdateCR1(); @@ -117,7 +57,7 @@ void Interpreter::ps_nabs(Interpreter& interpreter, UGeckoInstruction inst) u64 ps0 = b.PS0AsU64() | (UINT64_C(1) << 63); u64 ps1 = b.PS1AsU64() | (UINT64_C(1) << 63); - ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1)); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(ps0), ps1); if (inst.Rc) ppc_state.UpdateCR1(); @@ -130,7 +70,7 @@ void Interpreter::ps_abs(Interpreter& interpreter, UGeckoInstruction inst) u64 ps0 = b.PS0AsU64() & ~(UINT64_C(1) << 63); u64 ps1 = b.PS1AsU64() & ~(UINT64_C(1) << 63); - ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1)); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(ps0), ps1); if (inst.Rc) ppc_state.UpdateCR1(); @@ -143,7 +83,7 @@ void Interpreter::ps_merge00(Interpreter& interpreter, UGeckoInstruction inst) const auto& a = ppc_state.ps[inst.FA]; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS0AsDouble())); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(a.PS0AsU64()), b.PS0AsU64()); if (inst.Rc) ppc_state.UpdateCR1(); @@ -155,7 +95,7 @@ void Interpreter::ps_merge01(Interpreter& interpreter, UGeckoInstruction inst) const auto& a = ppc_state.ps[inst.FA]; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble())); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(a.PS0AsU64()), b.PS1AsU64()); if (inst.Rc) ppc_state.UpdateCR1(); @@ -167,7 +107,7 @@ void Interpreter::ps_merge10(Interpreter& interpreter, UGeckoInstruction inst) const auto& a = ppc_state.ps[inst.FA]; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS0AsDouble())); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(a.PS1AsU64()), b.PS0AsU64()); if (inst.Rc) ppc_state.UpdateCR1(); @@ -179,7 +119,7 @@ void Interpreter::ps_merge11(Interpreter& interpreter, UGeckoInstruction inst) const auto& a = ppc_state.ps[inst.FA]; const auto& b = ppc_state.ps[inst.FB]; - ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS1AsDouble())); + ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(a.PS1AsU64()), b.PS1AsU64()); if (inst.Rc) ppc_state.UpdateCR1(); @@ -209,7 +149,11 @@ void Interpreter::ps_res(Interpreter& interpreter, UGeckoInstruction inst) // this code is based on the real hardware tests auto& ppc_state = interpreter.m_ppc_state; const double a = ppc_state.ps[inst.FB].PS0AsDouble(); - const double b = ppc_state.ps[inst.FB].PS1AsDouble(); + const double b = ppc_state.ps[inst.FB].PS1AsReciprocalDouble(); + + // The entire process of conditionally handling b doesn't matter + // for ps_res, becauase it never reads the bottom bits of a double + // when doing operations a standard value (not 0, NaN, infinity) if (a == 0.0 || b == 0.0) { @@ -223,8 +167,8 @@ void Interpreter::ps_res(Interpreter& interpreter, UGeckoInstruction inst) if (Common::IsSNAN(a) || Common::IsSNAN(b)) SetFPException(ppc_state, FPSCR_VXSNAN); - const double ps0 = Common::ApproximateReciprocal(a); - const double ps1 = Common::ApproximateReciprocal(b); + const double ps0 = Common::TruncateMantissa(Common::ApproximateReciprocal(ppc_state.fpscr, a)); + const double ps1 = Common::ApproximateReciprocal(ppc_state.fpscr, b); ppc_state.ps[inst.FD].SetBoth(ps0, ps1); ppc_state.UpdateFPRFSingle(float(ps0)); @@ -237,7 +181,14 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst) { auto& ppc_state = interpreter.m_ppc_state; const double ps0 = ppc_state.ps[inst.FB].PS0AsDouble(); - const double ps1 = ppc_state.ps[inst.FB].PS1AsDouble(); + double ps1 = ppc_state.ps[inst.FB].PS1AsReciprocalDouble(); + + if (ps1 > 0.0) + { + // If ps1 is <0, we want the result to remain NaN even for + // the smallest of subnormals which would be truncated to 0 + ps1 = Common::TruncateMantissa(ps1); + } if (ps0 == 0.0 || ps1 == 0.0) { @@ -258,8 +209,8 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst) SetFPException(ppc_state, FPSCR_VXSNAN); // For some reason ps0 is also truncated for this operation rather than rounded - const double dst_ps0 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps0)); - const double dst_ps1 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps1)); + const double dst_ps0 = Common::TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps0)); + const double dst_ps1 = Common::ApproximateReciprocalSquareRoot(ps1); ppc_state.ps[inst.FD].SetBoth(dst_ps0, dst_ps1); ppc_state.UpdateFPRFSingle(dst_ps0); @@ -426,7 +377,7 @@ void Interpreter::ps_sum0(Interpreter& interpreter, UGeckoInstruction inst) const float ps0 = ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value); - const double ps1 = TruncateMantissa(c.PS1AsDouble()); + const double ps1 = c.PS1AsDouble(); ppc_state.ps[inst.FD].SetBoth(ps0, ps1); ppc_state.UpdateFPRFSingle(ps0); @@ -442,7 +393,8 @@ void Interpreter::ps_sum1(Interpreter& interpreter, UGeckoInstruction inst) const auto& b = ppc_state.ps[inst.FB]; const auto& c = ppc_state.ps[inst.FC]; - const double ps0 = RoundMantissa(c.PS0AsDouble()); + // Rounds assuming ps0 is finite for some reason + const double ps0 = Common::RoundMantissaFinite(c.PS0AsDouble()); const float ps1 = ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index e97db77fc710..9d0c7f99ba29 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -40,7 +40,25 @@ double PairedSingle::PS0AsDouble() const double PairedSingle::PS1AsDouble() const { - return std::bit_cast(ps1); + return Common::TruncateMantissa(std::bit_cast(ps1)); +} + +// If ps1 would get truncated to 0 if read as a raw value, set the sign +// of the input for reciprocal operations +// It's not exactly clear why this happens, but that's also why PS1 is +// truncated on read rather than on write +double PairedSingle::PS1AsReciprocalDouble() const +{ + constexpr u64 trunc_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH; + constexpr u64 trunc_mask = (1 << trunc_bits) - 1; + + u64 bits = ps1; + if ((ps1 & ~(trunc_mask | Common::DOUBLE_SIGN)) == 0 && (ps1 & trunc_mask) != 0) + { + bits |= Common::DOUBLE_SIGN; + } + + return std::bit_cast(bits); } void PairedSingle::SetPS0(double value) diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 662507697e29..b820bafa2e4e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -11,6 +11,7 @@ #include #include "Common/CommonTypes.h" +#include "Common/FloatUtils.h" #include "Core/CPUThreadConfigCallback.h" #include "Core/Debugger/BranchWatch.h" @@ -70,14 +71,21 @@ struct TLBEntry struct PairedSingle { + // By default, truncate PS1 + // Due to reciprocal operations having a quirk in which the sign + // of the input PS1 is set if the value in it beforehand would + // be truncated to 0, setting PS1 then only truncating it on read + // operations simply works easier than creating an entire flag + // for this specific case u64 PS0AsU64() const { return ps0; } - u64 PS1AsU64() const { return ps1; } + u64 PS1AsU64() const { return Common::TruncateMantissaBits(ps1); } u32 PS0AsU32() const { return static_cast(ps0); } u32 PS1AsU32() const { return static_cast(ps1); } double PS0AsDouble() const; double PS1AsDouble() const; + double PS1AsReciprocalDouble() const; void SetPS0(u64 value) { ps0 = value; } void SetPS0(double value); From c512ae13f3de0eb0c6316a42090120604b3cd28c Mon Sep 17 00:00:00 2001 From: Geotale Date: Wed, 2 Oct 2024 01:32:40 -0500 Subject: [PATCH 3/3] Update JITs and Tests Assume NI Set For Unit Tests This does *not* match x86-64, which properly handles any weird values using a function call This should hopefully pass tests though, which is important before fixing that issue I had forgotten that the JITs would use the same modified base and pair tables ^^; Also fixes call for complex inputs in x86 This saves an instruction on both x86-64 and ARM64!! TODO: Due to fixes with interpreter, ARM64 JIT likely doesn't match x86 JIT which calls a fallback on weird inputs --- .../Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp | 10 +++++----- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 5 ++--- Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp | 6 +++++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 94e7f99423e1..4a68fb5d1f8f 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -254,18 +254,17 @@ void CommonAsmRoutines::GenFres() IMUL(32, RSCRATCH, MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_dec))); - ADD(32, R(RSCRATCH), Imm8(1)); - SHR(32, R(RSCRATCH), Imm8(1)); MOV(32, R(RSCRATCH2), MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_base))); - SUB(32, R(RSCRATCH2), R(RSCRATCH)); + ADD(32, R(RSCRATCH2), R(RSCRATCH)); + SHR(32, R(RSCRATCH2), Imm8(1)); SHL(64, R(RSCRATCH2), Imm8(29)); POP(RSCRATCH_EXTRA); - OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - - // (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)((u64)(fres_expected_base[i / 1024] + + // (fres_expected_dec[i / 1024] * (i % 1024)) / 2)) // << 29 MOVQ_xmm(XMM0, R(RSCRATCH2)); RET(); @@ -279,6 +278,7 @@ void CommonAsmRoutines::GenFres() SetJumpTarget(complex); ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); + LEA(64, ABI_PARAM1, PPCSTATE(fpscr)); ABI_CallFunction(Common::ApproximateReciprocal); ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); RET(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 56c26739a398..4ad97606e948 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -292,11 +292,10 @@ void JitArm64::GenerateFres() ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3)); UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10); // Grab lower part of mantissa LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0); - MOVI2R(ARM64Reg::W4, 1); - MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4); - SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1)); + MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W2); AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, GPRSize::B64)); + LSR(ARM64Reg::W1, ARM64Reg::W1, 1); ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29)); RET(); diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp index fcfc8577a2b8..749bbc152e3c 100644 --- a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp +++ b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp @@ -40,6 +40,7 @@ class TestFres : public JitArm64 MOV(ARM64Reg::X1, ARM64Reg::X0); m_float_emit.FMOV(ARM64Reg::D0, ARM64Reg::X0); m_float_emit.FRECPE(ARM64Reg::D0, ARM64Reg::D0); + m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0); BL(raw_fres); MOV(ARM64Reg::X30, ARM64Reg::X15); MOV(PPC_REG, ARM64Reg::X14); @@ -58,11 +59,14 @@ TEST(JitArm64, Fres) TestFres test(Core::System::GetInstance()); + // FPSCR with NI set + const UReg_FPSCR fpscr = UReg_FPSCR(0x00000004); + for (const u64 ivalue : double_test_values) { const double dvalue = std::bit_cast(ivalue); - const u64 expected = std::bit_cast(Common::ApproximateReciprocal(dvalue)); + const u64 expected = std::bit_cast(Common::ApproximateReciprocal(fpscr, dvalue)); const u64 actual = test.fres(ivalue); if (expected != actual)