From ae6a88dc50b9a74a6db47817397a40a151d92ac7 Mon Sep 17 00:00:00 2001
From: Geotale <Minemaker2006@gmail.com>
Date: Sun, 8 Sep 2024 16:14:21 -0500
Subject: [PATCH 1/3] Initial Change For Fixes

Matches hardware in rounding paired singles after move operations!!
Move operations consist of operations which only transfer direct bits
This also accounts for ps_rsqrte, because it has a similar quirk
Specifically in hardware they're rounded in accordance to their slot:
- PS0 rounds *only the mantissa* in accordance to the set rounding mode
- PS1 truncates the mantissa
ps_rsqrte also truncates for PS0 for some reason ^^;
This has all been tested on hardware, along with a few edge case tests

Co-Authored-By: JosJuice <josjuice@gmail.com>
---
 .../Interpreter/Interpreter_Paired.cpp        | 101 +++++++++++++++---
 1 file changed, 84 insertions(+), 17 deletions(-)
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
index 69048d453037..abb5f129b96f 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
@@ -10,6 +10,66 @@
 #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
 #include "Core/PowerPC/PowerPC.h"
 
+// Instructions which move data without performing operations round a bit weirdly
+// Specifically they rounding the mantissa to be like that of a 32-bit float,
+// going as far as to focus on the rounding mode, but never actually care about
+// making sure the exponent becomes 32-bit
+// Either this, or they'll truncate the mantissa down, which will always happen to
+// PS1 OR PS0 in ps_rsqrte
+inline u64 TruncateMantissaBits(u64 bits)
+{
+  // Truncation can be done by simply cutting off the mantissa bits that don't
+  // exist in a single precision float
+  constexpr u64 remove_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH;
+  constexpr u64 remove_mask = (1 << remove_bits) - 1;
+  return bits & ~remove_mask;
+}
+
+inline double TruncateMantissa(double value)
+{
+  u64 bits = std::bit_cast<u64>(value);
+  u64 trunc_bits = TruncateMantissaBits(bits);
+  return std::bit_cast<double>(trunc_bits);
+}
+
+inline u64 RoundMantissaBits(u64 bits)
+{
+  // Checking if the value is non-finite
+  if ((bits & Common::DOUBLE_EXP) == Common::DOUBLE_EXP)
+  {
+    // For infinite and NaN values, the mantissa is simply truncated
+    return TruncateMantissaBits(bits);
+  }
+
+  const u64 replacement_exp = 0x4000000000000000ull;
+
+  // To round only the mantissa, we assume the CPU can change the rounding mode,
+  // create new double with an exponent that won't cause issues, round to a single,
+  // and convert back to a double while restoring the original exponent again!
+  // The removing the exponent is done via subtraction instead of bitwise
+  // operations due to the possibility that the rounding will cause an overflow
+  // into the exponent
+  u64 resized_bits = (bits & (Common::DOUBLE_FRAC | Common::DOUBLE_SIGN)) | replacement_exp;
+
+  float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
+  double extended_float = static_cast<double>(rounded_float);
+  u64 rounded_bits = std::bit_cast<u64>(extended_float);
+
+  u64 orig_exp_bits = bits & Common::DOUBLE_EXP;
+  rounded_bits = (rounded_bits - replacement_exp) | orig_exp_bits;
+
+  return rounded_bits;
+}
+
+inline double RoundMantissa(double value)
+{
+  // The double version of the function just converts to and from bits again
+  // This would be a necessary step anyways, so it just simplifies code
+  u64 bits = std::bit_cast<u64>(value);
+  u64 rounded_bits = RoundMantissaBits(bits);
+  return std::bit_cast<double>(rounded_bits);
+}
+
 // These "binary instructions" do not alter FPSCR.
 void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
 {
@@ -18,8 +78,9 @@ void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& b = ppc_state.ps[inst.FB];
   const auto& c = ppc_state.ps[inst.FC];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(),
-                                a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble());
+  double ps0 = a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble();
+  double ps1 = a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble();
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(ps0), TruncateMantissa(ps1));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -30,8 +91,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
   auto& ppc_state = interpreter.m_ppc_state;
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63),
-                                b.PS1AsU64() ^ (UINT64_C(1) << 63));
+  u64 ps0 = b.PS0AsU64() ^ (UINT64_C(1) << 63);
+  u64 ps1 = b.PS1AsU64() ^ (UINT64_C(1) << 63);
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -40,7 +102,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
 void Interpreter::ps_mr(Interpreter& interpreter, UGeckoInstruction inst)
 {
   auto& ppc_state = interpreter.m_ppc_state;
-  ppc_state.ps[inst.FD] = ppc_state.ps[inst.FB];
+  const auto& b = ppc_state.ps[inst.FB];
+
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(b.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble()));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -51,8 +115,9 @@ void Interpreter::ps_nabs(Interpreter& interpreter, UGeckoInstruction inst)
   auto& ppc_state = interpreter.m_ppc_state;
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63),
-                                b.PS1AsU64() | (UINT64_C(1) << 63));
+  u64 ps0 = b.PS0AsU64() | (UINT64_C(1) << 63);
+  u64 ps1 = b.PS1AsU64() | (UINT64_C(1) << 63);
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -63,8 +128,9 @@ void Interpreter::ps_abs(Interpreter& interpreter, UGeckoInstruction inst)
   auto& ppc_state = interpreter.m_ppc_state;
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63),
-                                b.PS1AsU64() & ~(UINT64_C(1) << 63));
+  u64 ps0 = b.PS0AsU64() & ~(UINT64_C(1) << 63);
+  u64 ps1 = b.PS1AsU64() & ~(UINT64_C(1) << 63);
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -77,7 +143,7 @@ void Interpreter::ps_merge00(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS0AsDouble());
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS0AsDouble()));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -89,7 +155,7 @@ void Interpreter::ps_merge01(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS1AsDouble());
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble()));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -101,7 +167,7 @@ void Interpreter::ps_merge10(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS0AsDouble());
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS0AsDouble()));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -113,7 +179,7 @@ void Interpreter::ps_merge11(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS1AsDouble());
+  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS1AsDouble()));
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -191,8 +257,9 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
   if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
     SetFPException(ppc_state, FPSCR_VXSNAN);
 
-  const float dst_ps0 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps0));
-  const float dst_ps1 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps1));
+  // For some reason ps0 is also truncated for this operation rather than rounded
+  const double dst_ps0 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps0));
+  const double dst_ps1 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps1));
 
   ppc_state.ps[inst.FD].SetBoth(dst_ps0, dst_ps1);
   ppc_state.UpdateFPRFSingle(dst_ps0);
@@ -359,7 +426,7 @@ void Interpreter::ps_sum0(Interpreter& interpreter, UGeckoInstruction inst)
 
   const float ps0 =
       ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
-  const float ps1 = ForceSingle(ppc_state.fpscr, c.PS1AsDouble());
+  const double ps1 = TruncateMantissa(c.PS1AsDouble());
 
   ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
   ppc_state.UpdateFPRFSingle(ps0);
@@ -375,7 +442,7 @@ void Interpreter::ps_sum1(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& b = ppc_state.ps[inst.FB];
   const auto& c = ppc_state.ps[inst.FC];
 
-  const float ps0 = ForceSingle(ppc_state.fpscr, c.PS0AsDouble());
+  const double ps0 = RoundMantissa(c.PS0AsDouble());
   const float ps1 =
       ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
 

From f0e6a1363f689a137eac29012e41703e160542bd Mon Sep 17 00:00:00 2001
From: Geotale <Minemaker2006@gmail.com>
Date: Wed, 2 Oct 2024 01:09:23 -0500
Subject: [PATCH 2/3] Fix To Match Hwtests

Also changes >= 0 to > 0.0
This technically leads to fewer branches taken ^^;
More importantly it looks/feels nicer to me

Fixes the approximate reciprocal function
- Currently not optimized
- Considering rewrite for cleanliness
Moves PS rounding to FloatUtils
- Done because it's used in more places now
Changes TruncateMantissa to occur on read
- This is to account for reciprocal cases
Adds PS1 getting function for reciprocals
Fixes ps_sum1 edge case with rounding
TODO: Test what ops can set PS1 edge case
- ps_merge is known to be able to
---
 Source/Core/Common/FloatUtils.cpp             |  76 ++++++++-----
 Source/Core/Common/FloatUtils.h               |  85 +++++++++++++-
 .../Interpreter/Interpreter_FloatingPoint.cpp |   2 +-
 .../Interpreter/Interpreter_Paired.cpp        | 106 +++++-------------
 Source/Core/Core/PowerPC/PowerPC.cpp          |  20 +++-
 Source/Core/Core/PowerPC/PowerPC.h            |  10 +-
 6 files changed, 190 insertions(+), 109 deletions(-)

diff --git a/Source/Core/Common/FloatUtils.cpp b/Source/Core/Common/FloatUtils.cpp
index a671eb278ce6..6fd271d4d435 100644
--- a/Source/Core/Common/FloatUtils.cpp
+++ b/Source/Core/Common/FloatUtils.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "Common/FloatUtils.h"
+#include "Core/PowerPC/Gekko.h"
 
 #include <bit>
 #include <cmath>
@@ -98,7 +99,7 @@ double ApproximateReciprocalSquareRoot(double val)
   }
 
   // Special case NaN-ish numbers
-  if (exponent == (0x7FFLL << 52))
+  if (exponent == DOUBLE_EXP)
   {
     if (mantissa == 0)
     {
@@ -123,7 +124,7 @@ double ApproximateReciprocalSquareRoot(double val)
       exponent -= 1LL << 52;
       mantissa <<= 1;
     } while (!(mantissa & (1LL << 52)));
-    mantissa &= (1LL << 52) - 1;
+    mantissa &= DOUBLE_FRAC;
     exponent += 1LL << 52;
   }
 
@@ -139,51 +140,70 @@ double ApproximateReciprocalSquareRoot(double val)
 }
 
 const std::array<BaseAndDec, 32> fres_expected = {{
-    {0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313},
-    {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261},
-    {0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5},
-    {0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b},
-    {0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143},
-    {0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a},
-    {0x041c00, 0x108}, {0x020c00, 0x106},
+    {0xfff000, -0x3e1}, {0xf07000, -0x3a7}, {0xe1d400, -0x371}, {0xd41000, -0x340},
+    {0xc71000, -0x313}, {0xbac400, -0x2ea}, {0xaf2000, -0x2c4}, {0xa41000, -0x2a0},
+    {0x999000, -0x27f}, {0x8f9400, -0x261}, {0x861000, -0x245}, {0x7d0000, -0x22a},
+    {0x745800, -0x212}, {0x6c1000, -0x1fb}, {0x642800, -0x1e5}, {0x5c9400, -0x1d1},
+    {0x555000, -0x1be}, {0x4e5800, -0x1ac}, {0x47ac00, -0x19b}, {0x413c00, -0x18b},
+    {0x3b1000, -0x17c}, {0x352000, -0x16e}, {0x2f5c00, -0x15b}, {0x29f000, -0x15b},
+    {0x248800, -0x143}, {0x1f7c00, -0x143}, {0x1a7000, -0x12d}, {0x15bc00, -0x12d},
+    {0x110800, -0x11a}, {0x0ca000, -0x11a}, {0x083800, -0x108}, {0x041800, -0x106},
 }};
 
 // Used by fres and ps_res.
-double ApproximateReciprocal(double val)
+double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val)
 {
-  s64 integral = std::bit_cast<s64>(val);
-  const s64 mantissa = integral & ((1LL << 52) - 1);
-  const s64 sign = integral & (1ULL << 63);
-  s64 exponent = integral & (0x7FFLL << 52);
+  u64 integral = std::bit_cast<u64>(val);
+
+  // Convert into a float when possible
+  u64 signless = integral & ~DOUBLE_SIGN;
+  const u32 mantissa =
+      static_cast<u32>((integral & DOUBLE_FRAC) >> (DOUBLE_FRAC_WIDTH - FLOAT_FRAC_WIDTH));
+  const u32 sign = static_cast<u32>((integral >> 32) & FLOAT_SIGN);
+  s32 exponent = static_cast<s32>((integral & DOUBLE_EXP) >> DOUBLE_FRAC_WIDTH) - 0x380;
+
+  // The largest floats possible just return 0
+  const u64 huge_float = fpscr.NI ? 0x47d0000000000000ULL : 0x4940000000000000ULL;
 
   // Special case 0
-  if (mantissa == 0 && exponent == 0)
+  if (signless == 0)
     return std::copysign(std::numeric_limits<double>::infinity(), val);
 
-  // Special case NaN-ish numbers
-  if (exponent == (0x7FFLL << 52))
+  // Special case huge or NaN-ish numbers
+  if (signless >= huge_float)
   {
-    if (mantissa == 0)
+    if (!std::isnan(val))
       return std::copysign(0.0, val);
     return 0.0 + val;
   }
 
   // Special case small inputs
-  if (exponent < (895LL << 52))
+  if (exponent < -1)
     return std::copysign(std::numeric_limits<float>::max(), val);
 
-  // Special case large inputs
-  if (exponent >= (1149LL << 52))
-    return std::copysign(0.0, val);
-
-  exponent = (0x7FDLL << 52) - exponent;
+  exponent = 253 - exponent;
 
-  const int i = static_cast<int>(mantissa >> 37);
+  const u32 i = static_cast<u32>(mantissa >> 8);
   const auto& entry = fres_expected[i / 1024];
-  integral = sign | exponent;
-  integral |= static_cast<s64>(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29;
+  const u32 new_mantissa = static_cast<u32>(entry.m_base + entry.m_dec * (i % 1024)) / 2;
 
-  return std::bit_cast<double>(integral);
+  u32 result = sign | (static_cast<u32>(exponent) << FLOAT_FRAC_WIDTH) | new_mantissa;
+  if (exponent <= 0)
+  {
+    // Result is subnormal so format it properly!
+    if (fpscr.NI)
+    {
+      // Flush to 0 if inexact
+      result = sign;
+    }
+    else
+    {
+      // Shift by the exponent amount
+      u32 shift = 1 + static_cast<u32>(-exponent);
+      result = sign | (((1 << FLOAT_FRAC_WIDTH) | new_mantissa) >> shift);
+    }
+  }
+  return static_cast<double>(std::bit_cast<float>(result));
 }
 
 }  // namespace Common
diff --git a/Source/Core/Common/FloatUtils.h b/Source/Core/Common/FloatUtils.h
index 0456a7665eea..46636577c199 100644
--- a/Source/Core/Common/FloatUtils.h
+++ b/Source/Core/Common/FloatUtils.h
@@ -8,6 +8,7 @@
 #include <limits>
 
 #include "Common/CommonTypes.h"
+#include "Core/PowerPC/Gekko.h"
 
 namespace Common
 {
@@ -96,6 +97,88 @@ extern const std::array<BaseAndDec, 32> fres_expected;
 
 // PowerPC approximation algorithms
 double ApproximateReciprocalSquareRoot(double val);
-double ApproximateReciprocal(double val);
+double ApproximateReciprocal(const UReg_FPSCR& fpscr, double val);
+
+// Instructions which move data without performing operations round a bit weirdly
+// Specifically they rounding the mantissa to be like that of a 32-bit float,
+// going as far as to focus on the rounding mode, but never actually care about
+// making sure the exponent becomes 32-bit
+// Either this, or they'll truncate the mantissa down, which will always happen to
+// PS1 OR PS0 in ps_rsqrte
+inline u64 TruncateMantissaBits(u64 bits)
+{
+  // Truncation can be done by simply cutting off the mantissa bits that don't
+  // exist in a single precision float
+  constexpr u64 remove_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH;
+  constexpr u64 remove_mask = (1 << remove_bits) - 1;
+  return bits & ~remove_mask;
+}
+
+inline double TruncateMantissa(double value)
+{
+  u64 bits = std::bit_cast<u64>(value);
+  u64 trunc_bits = TruncateMantissaBits(bits);
+  return std::bit_cast<double>(trunc_bits);
+}
+
+inline u64 RoundMantissaBitsFinite(u64 bits)
+{
+  const u64 replacement_exp = 0x4000000000000000ull;
+
+  // To round only the mantissa, we assume the CPU can change the rounding mode,
+  // create new double with an exponent that won't cause issues, round to a single,
+  // and convert back to a double while restoring the original exponent again!
+  // The removing the exponent is done via subtraction instead of bitwise
+  // operations due to the possibility that the rounding will cause an overflow
+  // into the exponent
+  u64 resized_bits = (bits & (Common::DOUBLE_FRAC | Common::DOUBLE_SIGN)) | replacement_exp;
+
+  float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
+  double extended_float = static_cast<double>(rounded_float);
+  u64 rounded_bits = std::bit_cast<u64>(extended_float);
+
+  u64 orig_exp_bits = bits & Common::DOUBLE_EXP;
+
+  if (orig_exp_bits == 0)
+  {
+    // The exponent isn't incremented for double subnormals
+    return rounded_bits & ~Common::DOUBLE_EXP;
+  }
+
+  // Handle the change accordingly otherwise!
+  rounded_bits = (rounded_bits - replacement_exp) + orig_exp_bits;
+  return rounded_bits;
+}
+
+inline u64 RoundMantissaBits(u64 bits)
+{
+  // Checking if the value is non-finite
+  if ((bits & Common::DOUBLE_EXP) == Common::DOUBLE_EXP)
+  {
+    // For infinite and NaN values, the mantissa is simply truncated
+    return TruncateMantissaBits(bits);
+  }
+
+  return RoundMantissaBitsFinite(bits);
+}
+
+inline double RoundMantissaFinite(double value)
+{
+  // This function is only ever used by ps_sum1, because
+  // for some reason it assumes that ps0 should be rounded with
+  // finite values rather than checking if they might be infinite
+  u64 bits = std::bit_cast<u64>(value);
+  u64 rounded_bits = RoundMantissaBitsFinite(bits);
+  return std::bit_cast<double>(rounded_bits);
+}
+
+inline double RoundMantissa(double value)
+{
+  // The double version of the function just converts to and from bits again
+  // This would be a necessary step anyways, so it just simplifies code
+  u64 bits = std::bit_cast<u64>(value);
+  u64 rounded_bits = RoundMantissaBits(bits);
+  return std::bit_cast<double>(rounded_bits);
+}
 
 }  // namespace Common
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
index b1dbbf0cc108..ddc023fb96ee 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp
@@ -515,7 +515,7 @@ void Interpreter::fresx(Interpreter& interpreter, UGeckoInstruction inst)
   const double b = ppc_state.ps[inst.FB].PS0AsDouble();
 
   const auto compute_result = [&ppc_state, inst](double value) {
-    const double result = Common::ApproximateReciprocal(value);
+    const double result = Common::ApproximateReciprocal(ppc_state.fpscr, value);
     ppc_state.ps[inst.FD].Fill(result);
     ppc_state.UpdateFPRFSingle(float(result));
   };
diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
index abb5f129b96f..a040c6297ea0 100644
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
@@ -10,66 +10,6 @@
 #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
 #include "Core/PowerPC/PowerPC.h"
 
-// Instructions which move data without performing operations round a bit weirdly
-// Specifically they rounding the mantissa to be like that of a 32-bit float,
-// going as far as to focus on the rounding mode, but never actually care about
-// making sure the exponent becomes 32-bit
-// Either this, or they'll truncate the mantissa down, which will always happen to
-// PS1 OR PS0 in ps_rsqrte
-inline u64 TruncateMantissaBits(u64 bits)
-{
-  // Truncation can be done by simply cutting off the mantissa bits that don't
-  // exist in a single precision float
-  constexpr u64 remove_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH;
-  constexpr u64 remove_mask = (1 << remove_bits) - 1;
-  return bits & ~remove_mask;
-}
-
-inline double TruncateMantissa(double value)
-{
-  u64 bits = std::bit_cast<u64>(value);
-  u64 trunc_bits = TruncateMantissaBits(bits);
-  return std::bit_cast<double>(trunc_bits);
-}
-
-inline u64 RoundMantissaBits(u64 bits)
-{
-  // Checking if the value is non-finite
-  if ((bits & Common::DOUBLE_EXP) == Common::DOUBLE_EXP)
-  {
-    // For infinite and NaN values, the mantissa is simply truncated
-    return TruncateMantissaBits(bits);
-  }
-
-  const u64 replacement_exp = 0x4000000000000000ull;
-
-  // To round only the mantissa, we assume the CPU can change the rounding mode,
-  // create new double with an exponent that won't cause issues, round to a single,
-  // and convert back to a double while restoring the original exponent again!
-  // The removing the exponent is done via subtraction instead of bitwise
-  // operations due to the possibility that the rounding will cause an overflow
-  // into the exponent
-  u64 resized_bits = (bits & (Common::DOUBLE_FRAC | Common::DOUBLE_SIGN)) | replacement_exp;
-
-  float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
-  double extended_float = static_cast<double>(rounded_float);
-  u64 rounded_bits = std::bit_cast<u64>(extended_float);
-
-  u64 orig_exp_bits = bits & Common::DOUBLE_EXP;
-  rounded_bits = (rounded_bits - replacement_exp) | orig_exp_bits;
-
-  return rounded_bits;
-}
-
-inline double RoundMantissa(double value)
-{
-  // The double version of the function just converts to and from bits again
-  // This would be a necessary step anyways, so it just simplifies code
-  u64 bits = std::bit_cast<u64>(value);
-  u64 rounded_bits = RoundMantissaBits(bits);
-  return std::bit_cast<double>(rounded_bits);
-}
-
 // These "binary instructions" do not alter FPSCR.
 void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
 {
@@ -80,7 +20,7 @@ void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
 
   double ps0 = a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble();
   double ps1 = a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble();
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(ps0), TruncateMantissa(ps1));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissa(ps0), ps1);
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -93,7 +33,7 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
 
   u64 ps0 = b.PS0AsU64() ^ (UINT64_C(1) << 63);
   u64 ps1 = b.PS1AsU64() ^ (UINT64_C(1) << 63);
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(ps0), ps1);
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -104,7 +44,7 @@ void Interpreter::ps_mr(Interpreter& interpreter, UGeckoInstruction inst)
   auto& ppc_state = interpreter.m_ppc_state;
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(b.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble()));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissa(b.PS0AsDouble()), b.PS1AsDouble());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -117,7 +57,7 @@ void Interpreter::ps_nabs(Interpreter& interpreter, UGeckoInstruction inst)
 
   u64 ps0 = b.PS0AsU64() | (UINT64_C(1) << 63);
   u64 ps1 = b.PS1AsU64() | (UINT64_C(1) << 63);
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(ps0), ps1);
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -130,7 +70,7 @@ void Interpreter::ps_abs(Interpreter& interpreter, UGeckoInstruction inst)
 
   u64 ps0 = b.PS0AsU64() & ~(UINT64_C(1) << 63);
   u64 ps1 = b.PS1AsU64() & ~(UINT64_C(1) << 63);
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(ps0), ps1);
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -143,7 +83,7 @@ void Interpreter::ps_merge00(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS0AsDouble()));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(a.PS0AsU64()), b.PS0AsU64());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -155,7 +95,7 @@ void Interpreter::ps_merge01(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble()));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(a.PS0AsU64()), b.PS1AsU64());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -167,7 +107,7 @@ void Interpreter::ps_merge10(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS0AsDouble()));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(a.PS1AsU64()), b.PS0AsU64());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -179,7 +119,7 @@ void Interpreter::ps_merge11(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& a = ppc_state.ps[inst.FA];
   const auto& b = ppc_state.ps[inst.FB];
 
-  ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS1AsDouble()));
+  ppc_state.ps[inst.FD].SetBoth(Common::RoundMantissaBits(a.PS1AsU64()), b.PS1AsU64());
 
   if (inst.Rc)
     ppc_state.UpdateCR1();
@@ -209,7 +149,11 @@ void Interpreter::ps_res(Interpreter& interpreter, UGeckoInstruction inst)
   // this code is based on the real hardware tests
   auto& ppc_state = interpreter.m_ppc_state;
   const double a = ppc_state.ps[inst.FB].PS0AsDouble();
-  const double b = ppc_state.ps[inst.FB].PS1AsDouble();
+  const double b = ppc_state.ps[inst.FB].PS1AsReciprocalDouble();
+
+  // The entire process of conditionally handling b doesn't matter
+  // for ps_res, becauase it never reads the bottom bits of a double
+  // when doing operations a standard value (not 0, NaN, infinity)
 
   if (a == 0.0 || b == 0.0)
   {
@@ -223,8 +167,8 @@ void Interpreter::ps_res(Interpreter& interpreter, UGeckoInstruction inst)
   if (Common::IsSNAN(a) || Common::IsSNAN(b))
     SetFPException(ppc_state, FPSCR_VXSNAN);
 
-  const double ps0 = Common::ApproximateReciprocal(a);
-  const double ps1 = Common::ApproximateReciprocal(b);
+  const double ps0 = Common::TruncateMantissa(Common::ApproximateReciprocal(ppc_state.fpscr, a));
+  const double ps1 = Common::ApproximateReciprocal(ppc_state.fpscr, b);
 
   ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
   ppc_state.UpdateFPRFSingle(float(ps0));
@@ -237,7 +181,14 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
 {
   auto& ppc_state = interpreter.m_ppc_state;
   const double ps0 = ppc_state.ps[inst.FB].PS0AsDouble();
-  const double ps1 = ppc_state.ps[inst.FB].PS1AsDouble();
+  double ps1 = ppc_state.ps[inst.FB].PS1AsReciprocalDouble();
+
+  if (ps1 > 0.0)
+  {
+    // If ps1 is <0, we want the result to remain NaN even for
+    // the smallest of subnormals which would be truncated to 0
+    ps1 = Common::TruncateMantissa(ps1);
+  }
 
   if (ps0 == 0.0 || ps1 == 0.0)
   {
@@ -258,8 +209,8 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
     SetFPException(ppc_state, FPSCR_VXSNAN);
 
   // For some reason ps0 is also truncated for this operation rather than rounded
-  const double dst_ps0 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps0));
-  const double dst_ps1 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps1));
+  const double dst_ps0 = Common::TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps0));
+  const double dst_ps1 = Common::ApproximateReciprocalSquareRoot(ps1);
 
   ppc_state.ps[inst.FD].SetBoth(dst_ps0, dst_ps1);
   ppc_state.UpdateFPRFSingle(dst_ps0);
@@ -426,7 +377,7 @@ void Interpreter::ps_sum0(Interpreter& interpreter, UGeckoInstruction inst)
 
   const float ps0 =
       ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
-  const double ps1 = TruncateMantissa(c.PS1AsDouble());
+  const double ps1 = c.PS1AsDouble();
 
   ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
   ppc_state.UpdateFPRFSingle(ps0);
@@ -442,7 +393,8 @@ void Interpreter::ps_sum1(Interpreter& interpreter, UGeckoInstruction inst)
   const auto& b = ppc_state.ps[inst.FB];
   const auto& c = ppc_state.ps[inst.FC];
 
-  const double ps0 = RoundMantissa(c.PS0AsDouble());
+  // Rounds assuming ps0 is finite for some reason
+  const double ps0 = Common::RoundMantissaFinite(c.PS0AsDouble());
   const float ps1 =
       ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
 
diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp
index e97db77fc710..9d0c7f99ba29 100644
--- a/Source/Core/Core/PowerPC/PowerPC.cpp
+++ b/Source/Core/Core/PowerPC/PowerPC.cpp
@@ -40,7 +40,25 @@ double PairedSingle::PS0AsDouble() const
 
 double PairedSingle::PS1AsDouble() const
 {
-  return std::bit_cast<double>(ps1);
+  return Common::TruncateMantissa(std::bit_cast<double>(ps1));
+}
+
+// If ps1 would get truncated to 0 if read as a raw value, set the sign
+// of the input for reciprocal operations
+// It's not exactly clear why this happens, but that's also why PS1 is
+// truncated on read rather than on write
+double PairedSingle::PS1AsReciprocalDouble() const
+{
+  constexpr u64 trunc_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH;
+  constexpr u64 trunc_mask = (1 << trunc_bits) - 1;
+
+  u64 bits = ps1;
+  if ((ps1 & ~(trunc_mask | Common::DOUBLE_SIGN)) == 0 && (ps1 & trunc_mask) != 0)
+  {
+    bits |= Common::DOUBLE_SIGN;
+  }
+
+  return std::bit_cast<double>(bits);
 }
 
 void PairedSingle::SetPS0(double value)
diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h
index 662507697e29..b820bafa2e4e 100644
--- a/Source/Core/Core/PowerPC/PowerPC.h
+++ b/Source/Core/Core/PowerPC/PowerPC.h
@@ -11,6 +11,7 @@
 #include <vector>
 
 #include "Common/CommonTypes.h"
+#include "Common/FloatUtils.h"
 
 #include "Core/CPUThreadConfigCallback.h"
 #include "Core/Debugger/BranchWatch.h"
@@ -70,14 +71,21 @@ struct TLBEntry
 
 struct PairedSingle
 {
+  // By default, truncate PS1
+  // Due to reciprocal operations having a quirk in which the sign
+  // of the input PS1 is set if the value in it beforehand would
+  // be truncated to 0, setting PS1 then only truncating it on read
+  // operations simply works easier than creating an entire flag
+  // for this specific case
   u64 PS0AsU64() const { return ps0; }
-  u64 PS1AsU64() const { return ps1; }
+  u64 PS1AsU64() const { return Common::TruncateMantissaBits(ps1); }
 
   u32 PS0AsU32() const { return static_cast<u32>(ps0); }
   u32 PS1AsU32() const { return static_cast<u32>(ps1); }
 
   double PS0AsDouble() const;
   double PS1AsDouble() const;
+  double PS1AsReciprocalDouble() const;
 
   void SetPS0(u64 value) { ps0 = value; }
   void SetPS0(double value);

From c512ae13f3de0eb0c6316a42090120604b3cd28c Mon Sep 17 00:00:00 2001
From: Geotale <Minemaker2006@gmail.com>
Date: Wed, 2 Oct 2024 01:32:40 -0500
Subject: [PATCH 3/3] Update JITs and Tests

Assume NI Set For Unit Tests

This does *not* match x86-64, which properly handles any weird values using a function call
This should hopefully pass tests though, which is important before fixing that issue

I had forgotten that the JITs would use the same modified base and pair tables ^^;
Also fixes call for complex inputs in x86
This saves an instruction on both x86-64 and ARM64!!
TODO: Due to fixes with interpreter, ARM64 JIT likely doesn't match x86 JIT which calls a fallback on weird inputs
---
 .../Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp   | 10 +++++-----
 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp           |  5 ++---
 Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp        |  6 +++++-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
index 94e7f99423e1..4a68fb5d1f8f 100644
--- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp
@@ -254,18 +254,17 @@ void CommonAsmRoutines::GenFres()
 
   IMUL(32, RSCRATCH,
        MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_dec)));
-  ADD(32, R(RSCRATCH), Imm8(1));
-  SHR(32, R(RSCRATCH), Imm8(1));
 
   MOV(32, R(RSCRATCH2),
       MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_base)));
-  SUB(32, R(RSCRATCH2), R(RSCRATCH));
+  ADD(32, R(RSCRATCH2), R(RSCRATCH));
+  SHR(32, R(RSCRATCH2), Imm8(1));
   SHL(64, R(RSCRATCH2), Imm8(29));
 
   POP(RSCRATCH_EXTRA);
 
-  OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));  // vali |= (s64)(fres_expected_base[i / 1024] -
-                                            // (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
+  OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));  // vali |= (s64)((u64)(fres_expected_base[i / 1024] +
+                                            // (fres_expected_dec[i / 1024] * (i % 1024)) / 2))
                                             // << 29
   MOVQ_xmm(XMM0, R(RSCRATCH2));
   RET();
@@ -279,6 +278,7 @@ void CommonAsmRoutines::GenFres()
 
   SetJumpTarget(complex);
   ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
+  LEA(64, ABI_PARAM1, PPCSTATE(fpscr));
   ABI_CallFunction(Common::ApproximateReciprocal);
   ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
   RET();
diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
index 56c26739a398..4ad97606e948 100644
--- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
@@ -292,11 +292,10 @@ void JitArm64::GenerateFres()
   ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3));
   UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10);  // Grab lower part of mantissa
   LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0);
-  MOVI2R(ARM64Reg::W4, 1);
-  MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4);
-  SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1));
+  MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W2);
   AND(ARM64Reg::X0, ARM64Reg::X0,
       LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, GPRSize::B64));
+  LSR(ARM64Reg::W1, ARM64Reg::W1, 1);
   ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29));
   RET();
 
diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
index fcfc8577a2b8..749bbc152e3c 100644
--- a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
+++ b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
@@ -40,6 +40,7 @@ class TestFres : public JitArm64
     MOV(ARM64Reg::X1, ARM64Reg::X0);
     m_float_emit.FMOV(ARM64Reg::D0, ARM64Reg::X0);
     m_float_emit.FRECPE(ARM64Reg::D0, ARM64Reg::D0);
+    m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
     BL(raw_fres);
     MOV(ARM64Reg::X30, ARM64Reg::X15);
     MOV(PPC_REG, ARM64Reg::X14);
@@ -58,11 +59,14 @@ TEST(JitArm64, Fres)
 
   TestFres test(Core::System::GetInstance());
 
+  // FPSCR with NI set
+  const UReg_FPSCR fpscr = UReg_FPSCR(0x00000004);
+
   for (const u64 ivalue : double_test_values)
   {
     const double dvalue = std::bit_cast<double>(ivalue);
 
-    const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocal(dvalue));
+    const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocal(fpscr, dvalue));
     const u64 actual = test.fres(ivalue);
 
     if (expected != actual)