Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Propertly Truncate Paired Single Move Instructions #13059

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 84 additions & 17 deletions Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,66 @@
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
#include "Core/PowerPC/PowerPC.h"

// Instructions which move data without performing operations round a bit weirdly
// Specifically they rounding the mantissa to be like that of a 32-bit float,
// going as far as to focus on the rounding mode, but never actually care about
// making sure the exponent becomes 32-bit
// Either this, or they'll truncate the mantissa down, which will always happen to
// PS1 OR PS0 in ps_rsqrte
inline u64 TruncateMantissaBits(u64 bits)
{
// Truncation can be done by simply cutting off the mantissa bits that don't
// exist in a single precision float
constexpr u64 remove_bits = Common::DOUBLE_FRAC_WIDTH - Common::FLOAT_FRAC_WIDTH;
constexpr u64 remove_mask = (1 << remove_bits) - 1;
return bits & ~remove_mask;
}

inline double TruncateMantissa(double value)
{
u64 bits = std::bit_cast<u64>(value);
u64 trunc_bits = TruncateMantissaBits(bits);
return std::bit_cast<double>(trunc_bits);
}

inline u64 RoundMantissaBits(u64 bits)
{
// Checking if the value is non-finite
if ((bits & Common::DOUBLE_EXP) == Common::DOUBLE_EXP)
{
// For infinite and NaN values, the mantissa is simply truncated
return TruncateMantissaBits(bits);
}

const u64 replacement_exp = 0x4000000000000000ull;

// To round only the mantissa, we assume the CPU can change the rounding mode,
// create new double with an exponent that won't cause issues, round to a single,
// and convert back to a double while restoring the original exponent again!
// The removing the exponent is done via subtraction instead of bitwise
// operations due to the possibility that the rounding will cause an overflow
// into the exponent
u64 resized_bits = (bits & (Common::DOUBLE_FRAC | Common::DOUBLE_SIGN)) | replacement_exp;

float rounded_float = static_cast<float>(std::bit_cast<double>(resized_bits));
double extended_float = static_cast<double>(rounded_float);
u64 rounded_bits = std::bit_cast<u64>(extended_float);

u64 orig_exp_bits = bits & Common::DOUBLE_EXP;
rounded_bits = (rounded_bits - replacement_exp) | orig_exp_bits;

return rounded_bits;
}

inline double RoundMantissa(double value)
{
// The double version of the function just converts to and from bits again
// This would be a necessary step anyways, so it just simplifies code
u64 bits = std::bit_cast<u64>(value);
u64 rounded_bits = RoundMantissaBits(bits);
return std::bit_cast<double>(rounded_bits);
}

// These "binary instructions" do not alter FPSCR.
void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
{
Expand All @@ -18,8 +78,9 @@ void Interpreter::ps_sel(Interpreter& interpreter, UGeckoInstruction inst)
const auto& b = ppc_state.ps[inst.FB];
const auto& c = ppc_state.ps[inst.FC];

ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(),
a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble());
double ps0 = a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble();
double ps1 = a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble();
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(ps0), TruncateMantissa(ps1));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand All @@ -30,8 +91,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const auto& b = ppc_state.ps[inst.FB];

ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63),
b.PS1AsU64() ^ (UINT64_C(1) << 63));
u64 ps0 = b.PS0AsU64() ^ (UINT64_C(1) << 63);
u64 ps1 = b.PS1AsU64() ^ (UINT64_C(1) << 63);
ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand All @@ -40,7 +102,9 @@ void Interpreter::ps_neg(Interpreter& interpreter, UGeckoInstruction inst)
void Interpreter::ps_mr(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
ppc_state.ps[inst.FD] = ppc_state.ps[inst.FB];
const auto& b = ppc_state.ps[inst.FB];

ppc_state.ps[inst.FD].SetBoth(RoundMantissa(b.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble()));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand All @@ -51,8 +115,9 @@ void Interpreter::ps_nabs(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const auto& b = ppc_state.ps[inst.FB];

ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63),
b.PS1AsU64() | (UINT64_C(1) << 63));
u64 ps0 = b.PS0AsU64() | (UINT64_C(1) << 63);
u64 ps1 = b.PS1AsU64() | (UINT64_C(1) << 63);
ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand All @@ -63,8 +128,9 @@ void Interpreter::ps_abs(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const auto& b = ppc_state.ps[inst.FB];

ppc_state.ps[inst.FD].SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63),
b.PS1AsU64() & ~(UINT64_C(1) << 63));
u64 ps0 = b.PS0AsU64() & ~(UINT64_C(1) << 63);
u64 ps1 = b.PS1AsU64() & ~(UINT64_C(1) << 63);
ppc_state.ps[inst.FD].SetBoth(RoundMantissaBits(ps0), TruncateMantissaBits(ps1));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand All @@ -77,7 +143,7 @@ void Interpreter::ps_merge00(Interpreter& interpreter, UGeckoInstruction inst)
const auto& a = ppc_state.ps[inst.FA];
const auto& b = ppc_state.ps[inst.FB];

ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS0AsDouble());
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS0AsDouble()));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand All @@ -89,7 +155,7 @@ void Interpreter::ps_merge01(Interpreter& interpreter, UGeckoInstruction inst)
const auto& a = ppc_state.ps[inst.FA];
const auto& b = ppc_state.ps[inst.FB];

ppc_state.ps[inst.FD].SetBoth(a.PS0AsDouble(), b.PS1AsDouble());
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS0AsDouble()), TruncateMantissa(b.PS1AsDouble()));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand All @@ -101,7 +167,7 @@ void Interpreter::ps_merge10(Interpreter& interpreter, UGeckoInstruction inst)
const auto& a = ppc_state.ps[inst.FA];
const auto& b = ppc_state.ps[inst.FB];

ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS0AsDouble());
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS0AsDouble()));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand All @@ -113,7 +179,7 @@ void Interpreter::ps_merge11(Interpreter& interpreter, UGeckoInstruction inst)
const auto& a = ppc_state.ps[inst.FA];
const auto& b = ppc_state.ps[inst.FB];

ppc_state.ps[inst.FD].SetBoth(a.PS1AsDouble(), b.PS1AsDouble());
ppc_state.ps[inst.FD].SetBoth(RoundMantissa(a.PS1AsDouble()), TruncateMantissa(b.PS1AsDouble()));

if (inst.Rc)
ppc_state.UpdateCR1();
Expand Down Expand Up @@ -191,8 +257,9 @@ void Interpreter::ps_rsqrte(Interpreter& interpreter, UGeckoInstruction inst)
if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1))
SetFPException(ppc_state, FPSCR_VXSNAN);

const float dst_ps0 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps0));
const float dst_ps1 = ForceSingle(ppc_state.fpscr, Common::ApproximateReciprocalSquareRoot(ps1));
// For some reason ps0 is also truncated for this operation rather than rounded
const double dst_ps0 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps0));
const double dst_ps1 = TruncateMantissa(Common::ApproximateReciprocalSquareRoot(ps1));

ppc_state.ps[inst.FD].SetBoth(dst_ps0, dst_ps1);
ppc_state.UpdateFPRFSingle(dst_ps0);
Expand Down Expand Up @@ -359,7 +426,7 @@ void Interpreter::ps_sum0(Interpreter& interpreter, UGeckoInstruction inst)

const float ps0 =
ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);
const float ps1 = ForceSingle(ppc_state.fpscr, c.PS1AsDouble());
const double ps1 = TruncateMantissa(c.PS1AsDouble());

ppc_state.ps[inst.FD].SetBoth(ps0, ps1);
ppc_state.UpdateFPRFSingle(ps0);
Expand All @@ -375,7 +442,7 @@ void Interpreter::ps_sum1(Interpreter& interpreter, UGeckoInstruction inst)
const auto& b = ppc_state.ps[inst.FB];
const auto& c = ppc_state.ps[inst.FC];

const float ps0 = ForceSingle(ppc_state.fpscr, c.PS0AsDouble());
const double ps0 = RoundMantissa(c.PS0AsDouble());
const float ps1 =
ForceSingle(ppc_state.fpscr, NI_add(ppc_state, a.PS0AsDouble(), b.PS1AsDouble()).value);

Expand Down