diff --git a/Source/Core/Core/GeckoCode.cpp b/Source/Core/Core/GeckoCode.cpp index eb90eda5a0..ae3d0f66f5 100644 --- a/Source/Core/Core/GeckoCode.cpp +++ b/Source/Core/Core/GeckoCode.cpp @@ -272,8 +272,8 @@ void RunCodeHandler() // Registers FPR0->13 are volatile for (int i = 0; i < 14; ++i) { - PowerPC::HostWrite_U64(riPS0(i), SP + 24 + 2 * i * sizeof(u64)); - PowerPC::HostWrite_U64(riPS1(i), SP + 24 + (2 * i + 1) * sizeof(u64)); + PowerPC::HostWrite_U64(rPS(i).PS0AsU64(), SP + 24 + 2 * i * sizeof(u64)); + PowerPC::HostWrite_U64(rPS(i).PS1AsU64(), SP + 24 + (2 * i + 1) * sizeof(u64)); } DEBUG_LOG(ACTIONREPLAY, "GeckoCodes: Initiating phantom branch-and-link. " diff --git a/Source/Core/Core/HLE/HLE_Misc.cpp b/Source/Core/Core/HLE/HLE_Misc.cpp index bfba82ed24..27cb3012a7 100644 --- a/Source/Core/Core/HLE/HLE_Misc.cpp +++ b/Source/Core/Core/HLE/HLE_Misc.cpp @@ -64,8 +64,8 @@ void GeckoReturnTrampoline() PowerPC::ExpandCR(PowerPC::HostRead_U32(SP + 20)); for (int i = 0; i < 14; ++i) { - riPS0(i) = PowerPC::HostRead_U64(SP + 24 + 2 * i * sizeof(u64)); - riPS1(i) = PowerPC::HostRead_U64(SP + 24 + (2 * i + 1) * sizeof(u64)); + rPS(i).SetBoth(PowerPC::HostRead_U64(SP + 24 + 2 * i * sizeof(u64)), + PowerPC::HostRead_U64(SP + 24 + (2 * i + 1) * sizeof(u64))); } } } diff --git a/Source/Core/Core/HLE/HLE_VarArgs.cpp b/Source/Core/Core/HLE/HLE_VarArgs.cpp index 0517fa8637..e7e6a9099e 100644 --- a/Source/Core/Core/HLE/HLE_VarArgs.cpp +++ b/Source/Core/Core/HLE/HLE_VarArgs.cpp @@ -15,7 +15,7 @@ u32 HLE::SystemVABI::VAList::GetGPR(u32 gpr) const double HLE::SystemVABI::VAList::GetFPR(u32 fpr) const { - return rPS0(fpr); + return rPS(fpr).PS0AsDouble(); } HLE::SystemVABI::VAListStruct::VAListStruct(u32 address) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp index 8f912cd73b..8a26ade061 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp @@ -88,8 +88,10 @@ static void Trace(UGeckoInstruction& inst) std::string fregs = ""; for (int i = 0; i < 32; i++) { - fregs += StringFromFormat("f%02d: %08" PRIx64 " %08" PRIx64 " ", i, PowerPC::ppcState.ps[i][0], - PowerPC::ppcState.ps[i][1]); + const auto& ps = PowerPC::ppcState.ps[i]; + + fregs += + StringFromFormat("f%02d: %08" PRIx64 " %08" PRIx64 " ", i, ps.PS0AsU64(), ps.PS1AsU64()); } const std::string ppc_inst = Common::GekkoDisassembler::Disassemble(inst.hex, PC); diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp index 3a68997bf0..e39c39e1fa 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_FloatingPoint.cpp @@ -27,7 +27,7 @@ enum class RoundingMode // The Programming Environments Manual for 32 and 64-bit Microprocessors void ConvertToInteger(UGeckoInstruction inst, RoundingMode rounding_mode) { - const double b = rPS0(inst.FB); + const double b = rPS(inst.FB).PS0AsDouble(); u32 value; bool exception_occurred = false; @@ -111,9 +111,11 @@ void ConvertToInteger(UGeckoInstruction inst, RoundingMode rounding_mode) { // Based on HW tests // FPRF is not affected - riPS0(inst.FD) = 0xfff8000000000000ull | value; + u64 result = 0xfff8000000000000ull | value; if (value == 0 && std::signbit(b)) - riPS0(inst.FD) |= 0x100000000ull; + result |= 0x100000000ull; + + rPS(inst.FD).SetPS0(result); } if (inst.Rc) @@ -198,12 +200,18 @@ void Interpreter::Helper_FloatCompareUnordered(UGeckoInstruction inst, double fa void Interpreter::fcmpo(UGeckoInstruction inst) { - Helper_FloatCompareOrdered(inst, rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareOrdered(inst, a.PS0AsDouble(), b.PS0AsDouble()); } void Interpreter::fcmpu(UGeckoInstruction inst) { - Helper_FloatCompareUnordered(inst, rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareUnordered(inst, a.PS0AsDouble(), b.PS0AsDouble()); } void Interpreter::fctiwx(UGeckoInstruction inst) @@ -218,7 +226,7 @@ void Interpreter::fctiwzx(UGeckoInstruction inst) void Interpreter::fmrx(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB); + rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64()); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -227,7 +235,7 @@ void Interpreter::fmrx(UGeckoInstruction inst) void Interpreter::fabsx(UGeckoInstruction inst) { - rPS0(inst.FD) = fabs(rPS0(inst.FB)); + rPS(inst.FD).SetPS0(fabs(rPS(inst.FB).PS0AsDouble())); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -236,7 +244,7 @@ void Interpreter::fabsx(UGeckoInstruction inst) void Interpreter::fnabsx(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) | (1ULL << 63); + rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64() | (UINT64_C(1) << 63)); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -245,7 +253,7 @@ void Interpreter::fnabsx(UGeckoInstruction inst) void Interpreter::fnegx(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) ^ (1ULL << 63); + rPS(inst.FD).SetPS0(rPS(inst.FB).PS0AsU64() ^ (UINT64_C(1) << 63)); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -254,7 +262,11 @@ void Interpreter::fnegx(UGeckoInstruction inst) void Interpreter::fselx(UGeckoInstruction inst) { - rPS0(inst.FD) = (rPS0(inst.FA) >= -0.0) ? rPS0(inst.FC) : rPS0(inst.FB); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + rPS(inst.FD).SetPS0((a.PS0AsDouble() >= -0.0) ? c.PS0AsDouble() : b.PS0AsDouble()); // This is a binary instruction. Does not alter FPSCR if (inst.Rc) @@ -266,7 +278,7 @@ void Interpreter::fselx(UGeckoInstruction inst) // PS1 is said to be undefined void Interpreter::frspx(UGeckoInstruction inst) // round to single { - const double b = rPS0(inst.FB); + const double b = rPS(inst.FB).PS0AsDouble(); const double rounded = ForceSingle(b); if (std::isnan(b)) @@ -278,8 +290,7 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single if (!is_snan || FPSCR.VE == 0) { - rPS0(inst.FD) = rounded; - rPS1(inst.FD) = rounded; + rPS(inst.FD).Fill(rounded); PowerPC::UpdateFPRF(b); } @@ -290,8 +301,7 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single SetFI(b != rounded); FPSCR.FR = fabs(rounded) > fabs(b); PowerPC::UpdateFPRF(rounded); - rPS0(inst.FD) = rounded; - rPS1(inst.FD) = rounded; + rPS(inst.FD).Fill(rounded); } if (inst.Rc) @@ -300,13 +310,16 @@ void Interpreter::frspx(UGeckoInstruction inst) // round to single void Interpreter::fmulx(UGeckoInstruction inst) { - const FPResult product = NI_mul(rPS0(inst.FA), rPS0(inst.FC)); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const FPResult product = NI_mul(a.PS0AsDouble(), c.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { const double result = ForceDouble(product.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); FPSCR.FI = 0; // are these flags important? FPSCR.FR = 0; PowerPC::UpdateFPRF(result); @@ -317,14 +330,17 @@ void Interpreter::fmulx(UGeckoInstruction inst) } void Interpreter::fmulsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult d_value = NI_mul(rPS0(inst.FA), c_value); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult d_value = NI_mul(a.PS0AsDouble(), c_value); if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) { const double result = ForceSingle(d_value.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); FPSCR.FI = 0; FPSCR.FR = 0; PowerPC::UpdateFPRF(result); @@ -336,12 +352,15 @@ void Interpreter::fmulsx(UGeckoInstruction inst) void Interpreter::fmaddx(UGeckoInstruction inst) { - const FPResult product = NI_madd(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + const FPResult product = NI_madd(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { const double result = ForceDouble(product.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -351,14 +370,18 @@ void Interpreter::fmaddx(UGeckoInstruction inst) void Interpreter::fmaddsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult d_value = NI_madd(rPS0(inst.FA), c_value, rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult d_value = NI_madd(a.PS0AsDouble(), c_value, b.PS0AsDouble()); if (FPSCR.VE == 0 || d_value.HasNoInvalidExceptions()) { const double result = ForceSingle(d_value.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); FPSCR.FI = d_value.value != result; FPSCR.FR = 0; PowerPC::UpdateFPRF(result); @@ -370,12 +393,15 @@ void Interpreter::fmaddsx(UGeckoInstruction inst) void Interpreter::faddx(UGeckoInstruction inst) { - const FPResult sum = NI_add(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult sum = NI_add(a.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions()) { const double result = ForceDouble(sum.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -384,12 +410,15 @@ void Interpreter::faddx(UGeckoInstruction inst) } void Interpreter::faddsx(UGeckoInstruction inst) { - const FPResult sum = NI_add(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult sum = NI_add(a.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || sum.HasNoInvalidExceptions()) { const double result = ForceSingle(sum.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); } @@ -399,14 +428,17 @@ void Interpreter::faddsx(UGeckoInstruction inst) void Interpreter::fdivx(UGeckoInstruction inst) { - const FPResult quotient = NI_div(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult quotient = NI_div(a.PS0AsDouble(), b.PS0AsDouble()); const bool not_divide_by_zero = FPSCR.ZE == 0 || quotient.exception != FPSCR_ZX; const bool not_invalid = FPSCR.VE == 0 || quotient.HasNoInvalidExceptions(); if (not_divide_by_zero && not_invalid) { const double result = ForceDouble(quotient.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -416,14 +448,17 @@ void Interpreter::fdivx(UGeckoInstruction inst) } void Interpreter::fdivsx(UGeckoInstruction inst) { - const FPResult quotient = NI_div(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult quotient = NI_div(a.PS0AsDouble(), b.PS0AsDouble()); const bool not_divide_by_zero = FPSCR.ZE == 0 || quotient.exception != FPSCR_ZX; const bool not_invalid = FPSCR.VE == 0 || quotient.HasNoInvalidExceptions(); if (not_divide_by_zero && not_invalid) { const double result = ForceSingle(quotient.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); } @@ -434,11 +469,11 @@ void Interpreter::fdivsx(UGeckoInstruction inst) // Single precision only. void Interpreter::fresx(UGeckoInstruction inst) { - const double b = rPS0(inst.FB); + const double b = rPS(inst.FB).PS0AsDouble(); const auto compute_result = [inst](double value) { const double result = Common::ApproximateReciprocal(value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); }; @@ -472,11 +507,11 @@ void Interpreter::fresx(UGeckoInstruction inst) void Interpreter::frsqrtex(UGeckoInstruction inst) { - const double b = rPS0(inst.FB); + const double b = rPS(inst.FB).PS0AsDouble(); const auto compute_result = [inst](double value) { const double result = Common::ApproximateReciprocalSquareRoot(value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); }; @@ -518,12 +553,16 @@ void Interpreter::frsqrtex(UGeckoInstruction inst) void Interpreter::fmsubx(UGeckoInstruction inst) { - const FPResult product = NI_msub(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const FPResult product = NI_msub(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { const double result = ForceDouble(product.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -533,13 +572,17 @@ void Interpreter::fmsubx(UGeckoInstruction inst) void Interpreter::fmsubsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult product = NI_msub(rPS0(inst.FA), c_value, rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult product = NI_msub(a.PS0AsDouble(), c_value, b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { const double result = ForceSingle(product.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); } @@ -549,13 +592,19 @@ void Interpreter::fmsubsx(UGeckoInstruction inst) void Interpreter::fnmaddx(UGeckoInstruction inst) { - const FPResult product = NI_madd(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const FPResult product = NI_madd(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceDouble(product.value); - rPS0(inst.FD) = std::isnan(result) ? result : -result; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double tmp = ForceDouble(product.value); + const double result = std::isnan(tmp) ? tmp : -tmp; + + rPS(inst.FD).SetPS0(result); + PowerPC::UpdateFPRF(result); } if (inst.Rc) @@ -564,14 +613,20 @@ void Interpreter::fnmaddx(UGeckoInstruction inst) void Interpreter::fnmaddsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult product = NI_madd(rPS0(inst.FA), c_value, rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult product = NI_madd(a.PS0AsDouble(), c_value, b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceSingle(product.value); - rPS0(inst.FD) = rPS1(inst.FD) = std::isnan(result) ? result : -result; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double tmp = ForceSingle(product.value); + const double result = std::isnan(tmp) ? tmp : -tmp; + + rPS(inst.FD).Fill(result); + PowerPC::UpdateFPRF(result); } if (inst.Rc) @@ -580,13 +635,19 @@ void Interpreter::fnmaddsx(UGeckoInstruction inst) void Interpreter::fnmsubx(UGeckoInstruction inst) { - const FPResult product = NI_msub(rPS0(inst.FA), rPS0(inst.FC), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const FPResult product = NI_msub(a.PS0AsDouble(), c.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceDouble(product.value); - rPS0(inst.FD) = std::isnan(result) ? result : -result; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double tmp = ForceDouble(product.value); + const double result = std::isnan(tmp) ? tmp : -tmp; + + rPS(inst.FD).SetPS0(result); + PowerPC::UpdateFPRF(result); } if (inst.Rc) @@ -595,14 +656,20 @@ void Interpreter::fnmsubx(UGeckoInstruction inst) void Interpreter::fnmsubsx(UGeckoInstruction inst) { - const double c_value = Force25Bit(rPS0(inst.FC)); - const FPResult product = NI_msub(rPS0(inst.FA), c_value, rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c_value = Force25Bit(c.PS0AsDouble()); + const FPResult product = NI_msub(a.PS0AsDouble(), c_value, b.PS0AsDouble()); if (FPSCR.VE == 0 || product.HasNoInvalidExceptions()) { - const double result = ForceSingle(product.value); - rPS0(inst.FD) = rPS1(inst.FD) = std::isnan(result) ? result : -result; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double tmp = ForceSingle(product.value); + const double result = std::isnan(tmp) ? tmp : -tmp; + + rPS(inst.FD).Fill(result); + PowerPC::UpdateFPRF(result); } if (inst.Rc) @@ -611,12 +678,15 @@ void Interpreter::fnmsubsx(UGeckoInstruction inst) void Interpreter::fsubx(UGeckoInstruction inst) { - const FPResult difference = NI_sub(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult difference = NI_sub(a.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions()) { const double result = ForceDouble(difference.value); - rPS0(inst.FD) = result; + rPS(inst.FD).SetPS0(result); PowerPC::UpdateFPRF(result); } @@ -626,12 +696,15 @@ void Interpreter::fsubx(UGeckoInstruction inst) void Interpreter::fsubsx(UGeckoInstruction inst) { - const FPResult difference = NI_sub(rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const FPResult difference = NI_sub(a.PS0AsDouble(), b.PS0AsDouble()); if (FPSCR.VE == 0 || difference.HasNoInvalidExceptions()) { const double result = ForceSingle(difference.value); - rPS0(inst.FD) = rPS1(inst.FD) = result; + rPS(inst.FD).Fill(result); PowerPC::UpdateFPRF(result); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index e9eb2820b8..8e270dfc9f 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -72,7 +72,7 @@ void Interpreter::lfd(UGeckoInstruction inst) const u64 temp = PowerPC::Read_U64(address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) - riPS0(inst.FD) = temp; + rPS(inst.FD).SetPS0(temp); } void Interpreter::lfdu(UGeckoInstruction inst) @@ -89,7 +89,7 @@ void Interpreter::lfdu(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { - riPS0(inst.FD) = temp; + rPS(inst.FD).SetPS0(temp); rGPR[inst.RA] = address; } } @@ -108,7 +108,7 @@ void Interpreter::lfdux(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { - riPS0(inst.FD) = temp; + rPS(inst.FD).SetPS0(temp); rGPR[inst.RA] = address; } } @@ -126,7 +126,7 @@ void Interpreter::lfdx(UGeckoInstruction inst) const u64 temp = PowerPC::Read_U64(address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) - riPS0(inst.FD) = temp; + rPS(inst.FD).SetPS0(temp); } void Interpreter::lfs(UGeckoInstruction inst) @@ -144,8 +144,7 @@ void Interpreter::lfs(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { const u64 value = ConvertToDouble(temp); - riPS0(inst.FD) = value; - riPS1(inst.FD) = value; + rPS(inst.FD).Fill(value); } } @@ -164,8 +163,7 @@ void Interpreter::lfsu(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { const u64 value = ConvertToDouble(temp); - riPS0(inst.FD) = value; - riPS1(inst.FD) = value; + rPS(inst.FD).Fill(value); rGPR[inst.RA] = address; } } @@ -184,9 +182,8 @@ void Interpreter::lfsux(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { - u64 value = ConvertToDouble(temp); - riPS0(inst.FD) = value; - riPS1(inst.FD) = value; + const u64 value = ConvertToDouble(temp); + rPS(inst.FD).Fill(value); rGPR[inst.RA] = address; } } @@ -206,8 +203,7 @@ void Interpreter::lfsx(UGeckoInstruction inst) if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { const u64 value = ConvertToDouble(temp); - riPS0(inst.FD) = value; - riPS1(inst.FD) = value; + rPS(inst.FD).Fill(value); } } @@ -355,7 +351,7 @@ void Interpreter::stfd(UGeckoInstruction inst) return; } - PowerPC::Write_U64(riPS0(inst.FS), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); } void Interpreter::stfdu(UGeckoInstruction inst) @@ -368,7 +364,7 @@ void Interpreter::stfdu(UGeckoInstruction inst) return; } - PowerPC::Write_U64(riPS0(inst.FS), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -385,7 +381,7 @@ void Interpreter::stfs(UGeckoInstruction inst) return; } - PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); } void Interpreter::stfsu(UGeckoInstruction inst) @@ -398,7 +394,7 @@ void Interpreter::stfsu(UGeckoInstruction inst) return; } - PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -761,7 +757,7 @@ void Interpreter::stfdux(UGeckoInstruction inst) return; } - PowerPC::Write_U64(riPS0(inst.FS), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -778,7 +774,7 @@ void Interpreter::stfdx(UGeckoInstruction inst) return; } - PowerPC::Write_U64(riPS0(inst.FS), address); + PowerPC::Write_U64(rPS(inst.FS).PS0AsU64(), address); } // Stores Floating points into Integers indeXed @@ -792,7 +788,7 @@ void Interpreter::stfiwx(UGeckoInstruction inst) return; } - PowerPC::Write_U32((u32)riPS0(inst.FS), address); + PowerPC::Write_U32(rPS(inst.FS).PS0AsU32(), address); } void Interpreter::stfsux(UGeckoInstruction inst) @@ -805,7 +801,7 @@ void Interpreter::stfsux(UGeckoInstruction inst) return; } - PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -822,7 +818,7 @@ void Interpreter::stfsx(UGeckoInstruction inst) return; } - PowerPC::Write_U32(ConvertToSingle(riPS0(inst.FS)), address); + PowerPC::Write_U32(ConvertToSingle(rPS(inst.FS).PS0AsU64()), address); } void Interpreter::sthbrx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp index c6fc5f507b..af96d59df7 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp @@ -176,8 +176,8 @@ void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW) const EQuantizeType stType = gqr.st_type; const unsigned int stScale = gqr.st_scale; - const double ps0 = rPS0(instRS); - const double ps1 = rPS1(instRS); + const double ps0 = rPS(instRS).PS0AsDouble(); + const double ps1 = rPS(instRS).PS1AsDouble(); switch (stType) { @@ -301,8 +301,7 @@ void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW) return; } - rPS0(instRD) = ps0; - rPS1(instRD) = ps1; + rPS(instRD).SetBoth(ps0, ps1); } void Interpreter::psq_l(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp index 02df9993a9..81054866ac 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Paired.cpp @@ -13,8 +13,12 @@ // These "binary instructions" do not alter FPSCR. void Interpreter::ps_sel(UGeckoInstruction inst) { - rPS0(inst.FD) = rPS0(inst.FA) >= -0.0 ? rPS0(inst.FC) : rPS0(inst.FB); - rPS1(inst.FD) = rPS1(inst.FA) >= -0.0 ? rPS1(inst.FC) : rPS1(inst.FB); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + rPS(inst.FD).SetBoth(a.PS0AsDouble() >= -0.0 ? c.PS0AsDouble() : b.PS0AsDouble(), + a.PS1AsDouble() >= -0.0 ? c.PS1AsDouble() : b.PS1AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -22,8 +26,9 @@ void Interpreter::ps_sel(UGeckoInstruction inst) void Interpreter::ps_neg(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) ^ (1ULL << 63); - riPS1(inst.FD) = riPS1(inst.FB) ^ (1ULL << 63); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(b.PS0AsU64() ^ (UINT64_C(1) << 63), b.PS1AsU64() ^ (UINT64_C(1) << 63)); if (inst.Rc) Helper_UpdateCR1(); @@ -31,8 +36,7 @@ void Interpreter::ps_neg(UGeckoInstruction inst) void Interpreter::ps_mr(UGeckoInstruction inst) { - rPS0(inst.FD) = rPS0(inst.FB); - rPS1(inst.FD) = rPS1(inst.FB); + rPS(inst.FD) = rPS(inst.FB); if (inst.Rc) Helper_UpdateCR1(); @@ -40,8 +44,9 @@ void Interpreter::ps_mr(UGeckoInstruction inst) void Interpreter::ps_nabs(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) | (1ULL << 63); - riPS1(inst.FD) = riPS1(inst.FB) | (1ULL << 63); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(b.PS0AsU64() | (UINT64_C(1) << 63), b.PS1AsU64() | (UINT64_C(1) << 63)); if (inst.Rc) Helper_UpdateCR1(); @@ -49,8 +54,9 @@ void Interpreter::ps_nabs(UGeckoInstruction inst) void Interpreter::ps_abs(UGeckoInstruction inst) { - riPS0(inst.FD) = riPS0(inst.FB) & ~(1ULL << 63); - riPS1(inst.FD) = riPS1(inst.FB) & ~(1ULL << 63); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(b.PS0AsU64() & ~(UINT64_C(1) << 63), b.PS1AsU64() & ~(UINT64_C(1) << 63)); if (inst.Rc) Helper_UpdateCR1(); @@ -59,10 +65,10 @@ void Interpreter::ps_abs(UGeckoInstruction inst) // These are just moves, double is OK. void Interpreter::ps_merge00(UGeckoInstruction inst) { - double p0 = rPS0(inst.FA); - double p1 = rPS0(inst.FB); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(a.PS0AsDouble(), b.PS0AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -70,10 +76,10 @@ void Interpreter::ps_merge00(UGeckoInstruction inst) void Interpreter::ps_merge01(UGeckoInstruction inst) { - double p0 = rPS0(inst.FA); - double p1 = rPS1(inst.FB); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(a.PS0AsDouble(), b.PS1AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -81,10 +87,10 @@ void Interpreter::ps_merge01(UGeckoInstruction inst) void Interpreter::ps_merge10(UGeckoInstruction inst) { - double p0 = rPS1(inst.FA); - double p1 = rPS0(inst.FB); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(a.PS1AsDouble(), b.PS0AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -92,10 +98,10 @@ void Interpreter::ps_merge10(UGeckoInstruction inst) void Interpreter::ps_merge11(UGeckoInstruction inst) { - double p0 = rPS1(inst.FA); - double p1 = rPS1(inst.FB); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + rPS(inst.FD).SetBoth(a.PS1AsDouble(), b.PS1AsDouble()); if (inst.Rc) Helper_UpdateCR1(); @@ -104,9 +110,14 @@ void Interpreter::ps_merge11(UGeckoInstruction inst) // From here on, the real deal. void Interpreter::ps_div(UGeckoInstruction inst) { - rPS0(inst.FD) = ForceSingle(NI_div(rPS0(inst.FA), rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_div(rPS1(inst.FA), rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const double ps0 = ForceSingle(NI_div(a.PS0AsDouble(), b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_div(a.PS1AsDouble(), b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -115,8 +126,8 @@ void Interpreter::ps_div(UGeckoInstruction inst) void Interpreter::ps_res(UGeckoInstruction inst) { // this code is based on the real hardware tests - const double a = rPS0(inst.FB); - const double b = rPS1(inst.FB); + const double a = rPS(inst.FB).PS0AsDouble(); + const double b = rPS(inst.FB).PS1AsDouble(); if (a == 0.0 || b == 0.0) { @@ -130,9 +141,11 @@ void Interpreter::ps_res(UGeckoInstruction inst) if (Common::IsSNAN(a) || Common::IsSNAN(b)) SetFPException(FPSCR_VXSNAN); - rPS0(inst.FD) = Common::ApproximateReciprocal(a); - rPS1(inst.FD) = Common::ApproximateReciprocal(b); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const double ps0 = Common::ApproximateReciprocal(a); + const double ps1 = Common::ApproximateReciprocal(b); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -140,8 +153,8 @@ void Interpreter::ps_res(UGeckoInstruction inst) void Interpreter::ps_rsqrte(UGeckoInstruction inst) { - const double ps0 = rPS0(inst.FB); - const double ps1 = rPS1(inst.FB); + const double ps0 = rPS(inst.FB).PS0AsDouble(); + const double ps1 = rPS(inst.FB).PS1AsDouble(); if (ps0 == 0.0 || ps1 == 0.0) { @@ -161,10 +174,11 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst) if (Common::IsSNAN(ps0) || Common::IsSNAN(ps1)) SetFPException(FPSCR_VXSNAN); - rPS0(inst.FD) = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps0)); - rPS1(inst.FD) = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps1)); + const double dst_ps0 = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps0)); + const double dst_ps1 = ForceSingle(Common::ApproximateReciprocalSquareRoot(ps1)); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + rPS(inst.FD).SetBoth(dst_ps0, dst_ps1); + PowerPC::UpdateFPRF(dst_ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -172,9 +186,14 @@ void Interpreter::ps_rsqrte(UGeckoInstruction inst) void Interpreter::ps_sub(UGeckoInstruction inst) { - rPS0(inst.FD) = ForceSingle(NI_sub(rPS0(inst.FA), rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_sub(rPS1(inst.FA), rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const double ps0 = ForceSingle(NI_sub(a.PS0AsDouble(), b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_sub(a.PS1AsDouble(), b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -182,9 +201,14 @@ void Interpreter::ps_sub(UGeckoInstruction inst) void Interpreter::ps_add(UGeckoInstruction inst) { - rPS0(inst.FD) = ForceSingle(NI_add(rPS0(inst.FA), rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_add(rPS1(inst.FA), rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + const double ps0 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_add(a.PS1AsDouble(), b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -192,11 +216,17 @@ void Interpreter::ps_add(UGeckoInstruction inst) void Interpreter::ps_mul(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - rPS0(inst.FD) = ForceSingle(NI_mul(rPS0(inst.FA), c0).value); - rPS1(inst.FD) = ForceSingle(NI_mul(rPS1(inst.FA), c1).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c0).value); + const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c1).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -204,11 +234,18 @@ void Interpreter::ps_mul(UGeckoInstruction inst) void Interpreter::ps_msub(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - rPS0(inst.FD) = ForceSingle(NI_msub(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_msub(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double ps0 = ForceSingle(NI_msub(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_msub(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -216,11 +253,18 @@ void Interpreter::ps_msub(UGeckoInstruction inst) void Interpreter::ps_madd(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - rPS0(inst.FD) = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - rPS1(inst.FD) = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -228,13 +272,21 @@ void Interpreter::ps_madd(UGeckoInstruction inst) void Interpreter::ps_nmsub(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - const double result0 = ForceSingle(NI_msub(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - const double result1 = ForceSingle(NI_msub(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - rPS0(inst.FD) = std::isnan(result0) ? result0 : -result0; - rPS1(inst.FD) = std::isnan(result1) ? result1 : -result1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double tmp0 = ForceSingle(NI_msub(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double tmp1 = ForceSingle(NI_msub(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; + const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -242,13 +294,21 @@ void Interpreter::ps_nmsub(UGeckoInstruction inst) void Interpreter::ps_nmadd(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double c1 = Force25Bit(rPS1(inst.FC)); - const double result0 = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - const double result1 = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - rPS0(inst.FD) = std::isnan(result0) ? result0 : -result0; - rPS1(inst.FD) = std::isnan(result1) ? result1 : -result1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double c1 = Force25Bit(c.PS1AsDouble()); + + const double tmp0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double tmp1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + const double ps0 = std::isnan(tmp0) ? tmp0 : -tmp0; + const double ps1 = std::isnan(tmp1) ? tmp1 : -tmp1; + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -256,11 +316,15 @@ void Interpreter::ps_nmadd(UGeckoInstruction inst) void Interpreter::ps_sum0(UGeckoInstruction inst) { - const double p0 = ForceSingle(NI_add(rPS0(inst.FA), rPS1(inst.FB)).value); - const double p1 = ForceSingle(rPS1(inst.FC)); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double ps0 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS1AsDouble()).value); + const double ps1 = ForceSingle(c.PS1AsDouble()); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -268,11 +332,15 @@ void Interpreter::ps_sum0(UGeckoInstruction inst) void Interpreter::ps_sum1(UGeckoInstruction inst) { - const double p0 = ForceSingle(rPS0(inst.FC)); - const double p1 = ForceSingle(NI_add(rPS0(inst.FA), rPS1(inst.FB)).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS1(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double ps0 = ForceSingle(c.PS0AsDouble()); + const double ps1 = ForceSingle(NI_add(a.PS0AsDouble(), b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps1); if (inst.Rc) Helper_UpdateCR1(); @@ -280,12 +348,15 @@ void Interpreter::ps_sum1(UGeckoInstruction inst) void Interpreter::ps_muls0(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double p0 = ForceSingle(NI_mul(rPS0(inst.FA), c0).value); - const double p1 = ForceSingle(NI_mul(rPS1(inst.FA), c0).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c0).value); + const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c0).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -293,12 +364,15 @@ void Interpreter::ps_muls0(UGeckoInstruction inst) void Interpreter::ps_muls1(UGeckoInstruction inst) { - const double c1 = Force25Bit(rPS1(inst.FC)); - const double p0 = ForceSingle(NI_mul(rPS0(inst.FA), c1).value); - const double p1 = ForceSingle(NI_mul(rPS1(inst.FA), c1).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& c = rPS(inst.FC); + + const double c1 = Force25Bit(c.PS1AsDouble()); + const double ps0 = ForceSingle(NI_mul(a.PS0AsDouble(), c1).value); + const double ps1 = ForceSingle(NI_mul(a.PS1AsDouble(), c1).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -306,12 +380,16 @@ void Interpreter::ps_muls1(UGeckoInstruction inst) void Interpreter::ps_madds0(UGeckoInstruction inst) { - const double c0 = Force25Bit(rPS0(inst.FC)); - const double p0 = ForceSingle(NI_madd(rPS0(inst.FA), c0, rPS0(inst.FB)).value); - const double p1 = ForceSingle(NI_madd(rPS1(inst.FA), c0, rPS1(inst.FB)).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c0 = Force25Bit(c.PS0AsDouble()); + const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c0, b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c0, b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -319,12 +397,16 @@ void Interpreter::ps_madds0(UGeckoInstruction inst) void Interpreter::ps_madds1(UGeckoInstruction inst) { - const double c1 = Force25Bit(rPS1(inst.FC)); - const double p0 = ForceSingle(NI_madd(rPS0(inst.FA), c1, rPS0(inst.FB)).value); - const double p1 = ForceSingle(NI_madd(rPS1(inst.FA), c1, rPS1(inst.FB)).value); - rPS0(inst.FD) = p0; - rPS1(inst.FD) = p1; - PowerPC::UpdateFPRF(rPS0(inst.FD)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + const auto& c = rPS(inst.FC); + + const double c1 = Force25Bit(c.PS1AsDouble()); + const double ps0 = ForceSingle(NI_madd(a.PS0AsDouble(), c1, b.PS0AsDouble()).value); + const double ps1 = ForceSingle(NI_madd(a.PS1AsDouble(), c1, b.PS1AsDouble()).value); + + rPS(inst.FD).SetBoth(ps0, ps1); + PowerPC::UpdateFPRF(ps0); if (inst.Rc) Helper_UpdateCR1(); @@ -332,20 +414,32 @@ void Interpreter::ps_madds1(UGeckoInstruction inst) void Interpreter::ps_cmpu0(UGeckoInstruction inst) { - Helper_FloatCompareUnordered(inst, rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareUnordered(inst, a.PS0AsDouble(), b.PS0AsDouble()); } void Interpreter::ps_cmpo0(UGeckoInstruction inst) { - Helper_FloatCompareOrdered(inst, rPS0(inst.FA), rPS0(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareOrdered(inst, a.PS0AsDouble(), b.PS0AsDouble()); } void Interpreter::ps_cmpu1(UGeckoInstruction inst) { - Helper_FloatCompareUnordered(inst, rPS1(inst.FA), rPS1(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareUnordered(inst, a.PS1AsDouble(), b.PS1AsDouble()); } void Interpreter::ps_cmpo1(UGeckoInstruction inst) { - Helper_FloatCompareOrdered(inst, rPS1(inst.FA), rPS1(inst.FB)); + const auto& a = rPS(inst.FA); + const auto& b = rPS(inst.FB); + + Helper_FloatCompareOrdered(inst, a.PS1AsDouble(), b.PS1AsDouble()); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 6de8588307..19cae33ce5 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -98,7 +98,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst) m |= (0xFU << (i * 4)); } - FPSCR = (FPSCR.Hex & ~m) | (static_cast(riPS0(inst.FB)) & m); + FPSCR = (FPSCR.Hex & ~m) | (static_cast(rPS(inst.FB).PS0AsU64()) & m); FPSCRtoFPUSettings(FPSCR); if (inst.Rc) @@ -554,7 +554,7 @@ void Interpreter::mffsx(UGeckoInstruction inst) // TODO(ector): grab all overflow flags etc and set them in FPSCR UpdateFPSCR(); - riPS0(inst.FD) = 0xFFF8000000000000 | FPSCR.Hex; + rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex); if (inst.Rc) Helper_UpdateCR1(); diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp index f671afb74c..39a73c6963 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp @@ -34,7 +34,7 @@ const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) const OpArg FPURegCache::GetDefaultLocation(preg_t preg) const { - return PPCSTATE(ps[preg][0]); + return PPCSTATE(ps[preg].ps0); } BitSet32 FPURegCache::GetRegUtilization() const diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 64532c968f..78fd2633d9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -452,7 +452,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) // Load the high 64bits from the file and insert them in to the high 64bits of the host // register ARM64Reg tmp_reg = GetReg(); - m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); + m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1)); m_float_emit->INS(64, host_reg, 1, tmp_reg, 0); UnlockRegister(tmp_reg); @@ -506,7 +506,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) reg.Load(host_reg, REG_LOWER_PAIR); } reg.SetDirty(false); - m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0)); return host_reg; } default: @@ -554,7 +554,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit // store. // It would take longer to do an insert to a temporary and a 64bit store than to just do this. - m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0)); break; case REG_DUP_SINGLE: flush_reg = GetReg(); @@ -562,7 +562,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) // fall through case REG_DUP: // Store PSR1 (which is equal to PSR0) in memory. - m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); + m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1)); break; default: // All other types doesn't store anything in PSR1. @@ -687,7 +687,7 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state) store_size = 64; if (dirty) - m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0)); if (!maintain_state) { @@ -702,9 +702,9 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state) // If the paired registers were at the start of ppcState we could do an STP here. // Too bad moving them would break savestate compatibility between x86_64 and AArch64 // m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG, - // PPCSTATE_OFF(ps[preg][0])); - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); + // PPCSTATE_OFF(ps[preg].ps0)); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0)); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1)); } if (!maintain_state) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 3d8c6a9837..9860e4843e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -24,7 +24,7 @@ static const Arm64Gen::ARM64Reg DISPATCHER_PC = // Some asserts to make sure we will be able to load everything static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); -static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, +static_assert((PPCSTATE_OFF(ps[0].ps0) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!"); static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 174406d1ce..52d19543a3 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -4,6 +4,7 @@ #include "Core/PowerPC/PowerPC.h" +#include #include #include #include @@ -11,6 +12,7 @@ #include #include "Common/Assert.h" +#include "Common/BitUtils.h" #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/FPURoundMode.h" @@ -42,6 +44,27 @@ MemChecks memchecks; PPCDebugInterface debug_interface; static CoreTiming::EventType* s_invalidate_cache_thread_safe; + +double PairedSingle::PS0AsDouble() const +{ + return Common::BitCast(ps0); +} + +double PairedSingle::PS1AsDouble() const +{ + return Common::BitCast(ps1); +} + +void PairedSingle::SetPS0(double value) +{ + ps0 = Common::BitCast(value); +} + +void PairedSingle::SetPS1(double value) +{ + ps1 = Common::BitCast(value); +} + static void InvalidateCacheThreadSafe(u64 userdata, s64 cyclesLate) { ppcState.iCache.Invalidate(static_cast(userdata)); @@ -135,10 +158,11 @@ void DoState(PointerWrap& p) static void ResetRegisters() { - memset(ppcState.ps, 0, sizeof(ppcState.ps)); - memset(ppcState.sr, 0, sizeof(ppcState.sr)); - memset(ppcState.gpr, 0, sizeof(ppcState.gpr)); - memset(ppcState.spr, 0, sizeof(ppcState.spr)); + std::fill(std::begin(ppcState.ps), std::end(ppcState.ps), PairedSingle{}); + std::fill(std::begin(ppcState.sr), std::end(ppcState.sr), 0U); + std::fill(std::begin(ppcState.gpr), std::end(ppcState.gpr), 0U); + std::fill(std::begin(ppcState.spr), std::end(ppcState.spr), 0U); + /* 0x00080200 = lonestar 2.0 0x00088202 = lonestar 2.2 diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 28dd4dc26b..2c43bca14f 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "Common/CommonTypes.h" @@ -57,6 +58,43 @@ struct TLBEntry u8 recent = 0; }; +struct PairedSingle +{ + u64 PS0AsU64() const { return ps0; } + u64 PS1AsU64() const { return ps1; } + + u32 PS0AsU32() const { return static_cast(ps0); } + u32 PS1AsU32() const { return static_cast(ps1); } + + double PS0AsDouble() const; + double PS1AsDouble() const; + + void SetPS0(u64 value) { ps0 = value; } + void SetPS0(double value); + + void SetPS1(u64 value) { ps1 = value; } + void SetPS1(double value); + + void SetBoth(u64 lhs, u64 rhs) + { + SetPS0(lhs); + SetPS1(rhs); + } + void SetBoth(double lhs, double rhs) + { + SetPS0(lhs); + SetPS1(rhs); + } + + void Fill(u64 value) { SetBoth(value, value); } + void Fill(double value) { SetBoth(value, value); } + + u64 ps0 = 0; + u64 ps1 = 0; +}; +// Paired single must be standard layout in order for offsetof to work, which is used by the JITs +static_assert(std::is_standard_layout(), "PairedSingle must be standard layout"); + // This contains the entire state of the emulated PowerPC "Gekko" CPU. struct PowerPCState { @@ -114,7 +152,7 @@ struct PowerPCState // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. - alignas(16) u64 ps[32][2]; + alignas(16) PairedSingle ps[32]; u32 sr[16]; // Segment registers. @@ -212,11 +250,7 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst); #define TL PowerPC::ppcState.spr[SPR_TL] #define TU PowerPC::ppcState.spr[SPR_TU] -#define rPS0(i) (*(double*)(&PowerPC::ppcState.ps[i][0])) -#define rPS1(i) (*(double*)(&PowerPC::ppcState.ps[i][1])) - -#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0])) -#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1])) +#define rPS(i) (PowerPC::ppcState.ps[(i)]) enum CRBits { diff --git a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp index f1e06bdfb4..93bc112f67 100644 --- a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp +++ b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp @@ -227,11 +227,11 @@ void RegisterWidget::PopulateTable() [i](u64 value) { GPR(i) = value; }); // Floating point registers (double) - AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return riPS0(i); }, - [i](u64 value) { riPS0(i) = value; }); + AddRegister(i, 2, RegisterType::fpr, "f" + std::to_string(i), [i] { return rPS(i).PS0AsU64(); }, + [i](u64 value) { rPS(i).SetPS0(value); }); - AddRegister(i, 4, RegisterType::fpr, "", [i] { return riPS1(i); }, - [i](u64 value) { riPS1(i) = value; }); + AddRegister(i, 4, RegisterType::fpr, "", [i] { return rPS(i).PS1AsU64(); }, + [i](u64 value) { rPS(i).SetPS1(value); }); } for (int i = 0; i < 8; i++)