PowerPC: Update FEX on FPSCR store instead of FPSCR load

This is needed not only for the next commit, but also for
correctly emulating float instructions that write to CR1.
This commit is contained in:
JosJuice 2021-08-17 19:57:06 +02:00
parent 89a464dafa
commit c3bcc67653
6 changed files with 200 additions and 76 deletions

View File

@ -24,6 +24,12 @@ enum class FPCC
FU = 1, // ? FU = 1, // ?
}; };
inline void UpdateFPExceptionSummary(UReg_FPSCR* fpscr)
{
fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0;
}
inline void SetFPException(UReg_FPSCR* fpscr, u32 mask) inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
{ {
if ((fpscr->Hex & mask) != mask) if ((fpscr->Hex & mask) != mask)
@ -32,7 +38,7 @@ inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
} }
fpscr->Hex |= mask; fpscr->Hex |= mask;
fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0; UpdateFPExceptionSummary(fpscr);
} }
inline float ForceSingle(const UReg_FPSCR& fpscr, double value) inline float ForceSingle(const UReg_FPSCR& fpscr, double value)

View File

@ -25,22 +25,10 @@ mffsx: 80036650 (huh?)
*/ */
static void FPSCRUpdated(UReg_FPSCR fp) static void FPSCRUpdated(UReg_FPSCR* fpscr)
{ {
UpdateFPExceptionSummary(fpscr);
PowerPC::RoundingModeUpdated(); PowerPC::RoundingModeUpdated();
if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
{
// PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i",
// fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE);
// Pokemon Colosseum does this. Gah.
}
}
static void UpdateFPSCR(UReg_FPSCR* fpscr)
{
fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0;
} }
void Interpreter::mtfsb0x(UGeckoInstruction inst) void Interpreter::mtfsb0x(UGeckoInstruction inst)
@ -48,7 +36,7 @@ void Interpreter::mtfsb0x(UGeckoInstruction inst)
u32 b = 0x80000000 >> inst.CRBD; u32 b = 0x80000000 >> inst.CRBD;
FPSCR.Hex &= ~b; FPSCR.Hex &= ~b;
FPSCRUpdated(FPSCR); FPSCRUpdated(&FPSCR);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -65,7 +53,7 @@ void Interpreter::mtfsb1x(UGeckoInstruction inst)
else else
FPSCR |= b; FPSCR |= b;
FPSCRUpdated(FPSCR); FPSCRUpdated(&FPSCR);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -80,7 +68,7 @@ void Interpreter::mtfsfix(UGeckoInstruction inst)
FPSCR = (FPSCR.Hex & ~mask) | (imm >> (4 * field)); FPSCR = (FPSCR.Hex & ~mask) | (imm >> (4 * field));
FPSCRUpdated(FPSCR); FPSCRUpdated(&FPSCR);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -97,7 +85,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst)
} }
FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(rPS(inst.FB).PS0AsU64()) & m); FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(rPS(inst.FB).PS0AsU64()) & m);
FPSCRUpdated(FPSCR); FPSCRUpdated(&FPSCR);
if (inst.Rc) if (inst.Rc)
PowerPC::ppcState.UpdateCR1(); PowerPC::ppcState.UpdateCR1();
@ -563,22 +551,18 @@ void Interpreter::isync(UGeckoInstruction inst)
void Interpreter::mcrfs(UGeckoInstruction inst) void Interpreter::mcrfs(UGeckoInstruction inst)
{ {
UpdateFPSCR(&FPSCR);
const u32 shift = 4 * (7 - inst.CRFS); const u32 shift = 4 * (7 - inst.CRFS);
const u32 fpflags = (FPSCR.Hex >> shift) & 0xF; const u32 fpflags = (FPSCR.Hex >> shift) & 0xF;
// If any exception bits were read, clear them // If any exception bits were read, clear them
FPSCR.Hex &= ~((0xF << shift) & (FPSCR_FX | FPSCR_ANY_X)); FPSCR.Hex &= ~((0xF << shift) & (FPSCR_FX | FPSCR_ANY_X));
FPSCRUpdated(&FPSCR);
PowerPC::ppcState.cr.SetField(inst.CRFD, fpflags); PowerPC::ppcState.cr.SetField(inst.CRFD, fpflags);
} }
void Interpreter::mffsx(UGeckoInstruction inst) void Interpreter::mffsx(UGeckoInstruction inst)
{ {
// load from FPSCR
// TODO(ector): grab all overflow flags etc and set them in FPSCR
UpdateFPSCR(&FPSCR);
rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex); rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex);
if (inst.Rc) if (inst.Rc)

View File

@ -116,11 +116,12 @@ public:
void ClearCRFieldBit(int field, int bit); void ClearCRFieldBit(int field, int bit);
void SetCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit);
void FixGTBeforeSettingCRFieldBit(Gen::X64Reg reg); void FixGTBeforeSettingCRFieldBit(Gen::X64Reg reg);
// Generates a branch that will check if a given bit of a CR register part // Generates a branch that will check if a given bit of a CR register part
// is set or not. // is set or not.
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true); Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
void UpdateFPExceptionSummary(Gen::X64Reg fpscr, Gen::X64Reg tmp1, Gen::X64Reg tmp2);
void SetFPRFIfNeeded(const Gen::OpArg& xmm, bool single); void SetFPRFIfNeeded(const Gen::OpArg& xmm, bool single);
void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true, void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true,
bool duplicate = false); bool duplicate = false);

View File

@ -4,7 +4,9 @@
#include "Common/BitSet.h" #include "Common/BitSet.h"
#include "Common/CPUDetect.h" #include "Common/CPUDetect.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
#include "Core/HW/ProcessorInterface.h" #include "Core/HW/ProcessorInterface.h"
#include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/Jit.h"
@ -185,6 +187,33 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
return FixupBranch(); return FixupBranch();
} }
// Could be done with one temp register, but with two temp registers it's faster
void Jit64::UpdateFPExceptionSummary(X64Reg fpscr, X64Reg tmp1, X64Reg tmp2)
{
// Kill dependency on tmp1 (not required for correctness, since SHL will shift out upper bytes)
XOR(32, R(tmp1), R(tmp1));
// fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0
TEST(32, R(fpscr), Imm32(FPSCR_VX_ANY));
SETcc(CC_NZ, R(tmp1));
SHL(32, R(tmp1), Imm8(IntLog2(FPSCR_VX)));
AND(32, R(fpscr), Imm32(~(FPSCR_VX | FPSCR_FEX)));
OR(32, R(fpscr), R(tmp1));
// fpscr.FEX = ((fpscr >> 22) & (fpscr & FPSCR_ANY_E)) != 0
MOV(32, R(tmp1), R(fpscr));
MOV(32, R(tmp2), R(fpscr));
SHR(32, R(tmp1), Imm8(22));
AND(32, R(tmp2), Imm32(FPSCR_ANY_E));
TEST(32, R(tmp1), R(tmp2));
// Unfortunately we eat a partial register stall below - we can't zero any of the registers before
// the TEST, and we can't use XOR right after the TEST since that would overwrite flags. However,
// there is no false dependency, since SETcc depends on TEST's flags and TEST depends on tmp1.
SETcc(CC_NZ, R(tmp1));
SHL(32, R(tmp1), Imm8(IntLog2(FPSCR_FEX)));
OR(32, R(fpscr), R(tmp1));
}
static void DoICacheReset() static void DoICacheReset()
{ {
PowerPC::ppcState.iCache.Reset(); PowerPC::ppcState.iCache.Reset();
@ -637,6 +666,19 @@ void Jit64::mcrfs(UGeckoInstruction inst)
// Only clear exception bits (but not FEX/VX). // Only clear exception bits (but not FEX/VX).
mask &= FPSCR_FX | FPSCR_ANY_X; mask &= FPSCR_FX | FPSCR_ANY_X;
RCX64Reg scratch_guard;
X64Reg scratch;
if (mask != 0)
{
scratch_guard = gpr.Scratch();
RegCache::Realize(scratch_guard);
scratch = scratch_guard;
}
else
{
scratch = RSCRATCH;
}
if (cpu_info.bBMI1) if (cpu_info.bBMI1)
{ {
MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
@ -652,14 +694,17 @@ void Jit64::mcrfs(UGeckoInstruction inst)
SHR(32, R(RSCRATCH2), Imm8(shift)); SHR(32, R(RSCRATCH2), Imm8(shift));
AND(32, R(RSCRATCH2), Imm32(0xF)); AND(32, R(RSCRATCH2), Imm32(0xF));
} }
LEA(64, scratch, MConst(PowerPC::ConditionRegister::s_crTable));
MOV(64, R(scratch), MComplex(scratch, RSCRATCH2, SCALE_8, 0));
MOV(64, CROffset(inst.CRFD), R(scratch));
if (mask != 0) if (mask != 0)
{ {
AND(32, R(RSCRATCH), Imm32(~mask)); AND(32, R(RSCRATCH), Imm32(~mask));
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
} }
LEA(64, RSCRATCH, MConst(PowerPC::ConditionRegister::s_crTable));
MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0));
MOV(64, CROffset(inst.CRFD), R(RSCRATCH));
} }
void Jit64::mffsx(UGeckoInstruction inst) void Jit64::mffsx(UGeckoInstruction inst)
@ -670,18 +715,6 @@ void Jit64::mffsx(UGeckoInstruction inst)
MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
// FPSCR.FEX = 0 (and VX for below)
AND(32, R(RSCRATCH), Imm32(~0x60000000));
// FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0;
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
TEST(32, R(RSCRATCH), Imm32(FPSCR_VX_ANY));
SETcc(CC_NZ, R(RSCRATCH2));
SHL(32, R(RSCRATCH2), Imm8(31 - 2));
OR(32, R(RSCRATCH), R(RSCRATCH2));
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
int d = inst.FD; int d = inst.FD;
RCX64Reg Rd = fpr.Bind(d, RCMode::Write); RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
RegCache::Realize(Rd); RegCache::Realize(Rd);
@ -710,17 +743,32 @@ void Jit64::mtfsb0x(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
FALLBACK_IF(inst.Rc); FALLBACK_IF(inst.Rc);
u32 mask = ~(0x80000000 >> inst.CRBD); const u32 mask = 0x80000000 >> inst.CRBD;
if (inst.CRBD < 29) const u32 inverted_mask = ~mask;
if (mask == FPSCR_FEX || mask == FPSCR_VX)
return;
if (inst.CRBD < 29 && (mask & (FPSCR_ANY_X | FPSCR_ANY_E)) == 0)
{ {
AND(32, PPCSTATE(fpscr), Imm32(mask)); AND(32, PPCSTATE(fpscr), Imm32(inverted_mask));
} }
else else
{ {
MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
AND(32, R(RSCRATCH), Imm32(mask)); AND(32, R(RSCRATCH), Imm32(inverted_mask));
if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
{
RCX64Reg scratch = gpr.Scratch();
RegCache::Realize(scratch);
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
}
MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
UpdateMXCSR(); if (inst.CRBD >= 29)
UpdateMXCSR();
} }
} }
@ -730,9 +778,13 @@ void Jit64::mtfsb1x(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
FALLBACK_IF(inst.Rc); FALLBACK_IF(inst.Rc);
u32 mask = 0x80000000 >> inst.CRBD; const u32 mask = 0x80000000 >> inst.CRBD;
if (mask == FPSCR_FEX || mask == FPSCR_VX)
return;
MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
if (mask & FPSCR_ANY_X) if ((mask & FPSCR_ANY_X) != 0)
{ {
BTS(32, R(RSCRATCH), Imm32(31 - inst.CRBD)); BTS(32, R(RSCRATCH), Imm32(31 - inst.CRBD));
FixupBranch dont_set_fx = J_CC(CC_C); FixupBranch dont_set_fx = J_CC(CC_C);
@ -743,6 +795,15 @@ void Jit64::mtfsb1x(UGeckoInstruction inst)
{ {
OR(32, R(RSCRATCH), Imm32(mask)); OR(32, R(RSCRATCH), Imm32(mask));
} }
if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
{
RCX64Reg scratch = gpr.Scratch();
RegCache::Realize(scratch);
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
}
MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
if (inst.CRBD >= 29) if (inst.CRBD >= 29)
UpdateMXCSR(); UpdateMXCSR();
@ -755,12 +816,22 @@ void Jit64::mtfsfix(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc); FALLBACK_IF(inst.Rc);
u8 imm = (inst.hex >> (31 - 19)) & 0xF; u8 imm = (inst.hex >> (31 - 19)) & 0xF;
u32 mask = 0xF0000000 >> (4 * inst.CRFD);
u32 or_mask = imm << (28 - 4 * inst.CRFD); u32 or_mask = imm << (28 - 4 * inst.CRFD);
u32 and_mask = ~(0xF0000000 >> (4 * inst.CRFD)); u32 and_mask = ~mask;
MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
AND(32, R(RSCRATCH), Imm32(and_mask)); AND(32, R(RSCRATCH), Imm32(and_mask));
OR(32, R(RSCRATCH), Imm32(or_mask)); OR(32, R(RSCRATCH), Imm32(or_mask));
if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
{
RCX64Reg scratch = gpr.Scratch();
RegCache::Realize(scratch);
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
}
MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
// Field 7 contains NI and RN. // Field 7 contains NI and RN.
@ -798,6 +869,15 @@ void Jit64::mtfsfx(UGeckoInstruction inst)
AND(32, R(RSCRATCH2), Imm32(~mask)); AND(32, R(RSCRATCH2), Imm32(~mask));
OR(32, R(RSCRATCH), R(RSCRATCH2)); OR(32, R(RSCRATCH), R(RSCRATCH2));
} }
if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
{
RCX64Reg scratch = gpr.Scratch();
RegCache::Realize(scratch);
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
}
MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
if (inst.FM & 1) if (inst.FM & 1)

View File

@ -273,6 +273,7 @@ protected:
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg);
void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr);
void UpdateRoundingMode(); void UpdateRoundingMode();
void ComputeRC0(Arm64Gen::ARM64Reg reg); void ComputeRC0(Arm64Gen::ARM64Reg reg);

View File

@ -4,6 +4,7 @@
#include "Common/Arm64Emitter.h" #include "Common/Arm64Emitter.h"
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "Core/Core.h" #include "Core/Core.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
@ -49,6 +50,25 @@ void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg)
gpr.Unlock(WA); gpr.Unlock(WA);
} }
void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr)
{
ARM64Reg WA = gpr.GetReg();
// fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0
MOVI2R(WA, FPSCR_VX_ANY);
TST(WA, fpscr);
CSET(WA, CCFlags::CC_NEQ);
BFI(fpscr, WA, IntLog2(FPSCR_VX), 1);
// fpscr.FEX = ((fpscr >> 22) & (fpscr & FPSCR_ANY_E)) != 0
AND(WA, fpscr, LogicalImm(FPSCR_ANY_E, 32));
TST(WA, fpscr, ArithOption(fpscr, ShiftType::LSR, 22));
CSET(WA, CCFlags::CC_NEQ);
BFI(fpscr, WA, IntLog2(FPSCR_FEX), 1);
gpr.Unlock(WA);
}
void JitArm64::UpdateRoundingMode() void JitArm64::UpdateRoundingMode()
{ {
const BitSet32 gprs_to_save = gpr.GetCallerSavedUsed(); const BitSet32 gprs_to_save = gpr.GetCallerSavedUsed();
@ -732,6 +752,8 @@ void JitArm64::mcrfs(UGeckoInstruction inst)
{ {
const u32 inverted_mask = ~mask; const u32 inverted_mask = ~mask;
AND(WA, WA, LogicalImm(inverted_mask, 32)); AND(WA, WA, LogicalImm(inverted_mask, 32));
UpdateFPExceptionSummary(WA);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
} }
@ -753,24 +775,11 @@ void JitArm64::mffsx(UGeckoInstruction inst)
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
ARM64Reg VD = fpr.RW(inst.FD, RegType::LowerPair); ARM64Reg VD = fpr.RW(inst.FD, RegType::LowerPair);
ARM64Reg WB = gpr.GetReg();
// FPSCR.FEX = 0;
// FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0;
// (FEX is right next to VX, so we can set both using one BFI instruction)
MOVI2R(WB, FPSCR_VX_ANY);
TST(WA, WB);
CSET(WB, CCFlags::CC_NEQ);
BFI(WA, WB, 31 - 2, 2);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
// Vd = FPSCR.Hex | 0xFFF8'0000'0000'0000;
ORR(XA, XA, LogicalImm(0xFFF8'0000'0000'0000, 64)); ORR(XA, XA, LogicalImm(0xFFF8'0000'0000'0000, 64));
m_float_emit.FMOV(EncodeRegToDouble(VD), XA); m_float_emit.FMOV(EncodeRegToDouble(VD), XA);
gpr.Unlock(WA); gpr.Unlock(WA);
gpr.Unlock(WB);
} }
void JitArm64::mtfsb0x(UGeckoInstruction inst) void JitArm64::mtfsb0x(UGeckoInstruction inst)
@ -779,12 +788,20 @@ void JitArm64::mtfsb0x(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
FALLBACK_IF(inst.Rc); FALLBACK_IF(inst.Rc);
u32 mask = ~(0x80000000 >> inst.CRBD); const u32 mask = 0x80000000 >> inst.CRBD;
const u32 inverted_mask = ~mask;
if (mask == FPSCR_FEX || mask == FPSCR_VX)
return;
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
AND(WA, WA, LogicalImm(mask, 32));
AND(WA, WA, LogicalImm(inverted_mask, 32));
if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
UpdateFPExceptionSummary(WA);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
gpr.Unlock(WA); gpr.Unlock(WA);
@ -799,12 +816,16 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
FALLBACK_IF(inst.Rc); FALLBACK_IF(inst.Rc);
u32 mask = 0x80000000 >> inst.CRBD; const u32 mask = 0x80000000 >> inst.CRBD;
if (mask == FPSCR_FEX || mask == FPSCR_VX)
return;
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
if (mask & FPSCR_ANY_X)
if ((mask & FPSCR_ANY_X) != 0)
{ {
ARM64Reg WB = gpr.GetReg(); ARM64Reg WB = gpr.GetReg();
TST(WA, LogicalImm(mask, 32)); TST(WA, LogicalImm(mask, 32));
@ -813,6 +834,9 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst)
gpr.Unlock(WB); gpr.Unlock(WB);
} }
ORR(WA, WA, LogicalImm(mask, 32)); ORR(WA, WA, LogicalImm(mask, 32));
if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
UpdateFPExceptionSummary(WA);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
gpr.Unlock(WA); gpr.Unlock(WA);
@ -829,13 +853,15 @@ void JitArm64::mtfsfix(UGeckoInstruction inst)
u8 imm = (inst.hex >> (31 - 19)) & 0xF; u8 imm = (inst.hex >> (31 - 19)) & 0xF;
u8 shift = 28 - 4 * inst.CRFD; u8 shift = 28 - 4 * inst.CRFD;
u32 mask = 0xF << shift;
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
if (imm == 0xF) if (imm == 0xF)
{ {
ORR(WA, WA, LogicalImm(0xF << shift, 32)); ORR(WA, WA, LogicalImm(mask, 32));
} }
else if (imm == 0x0) else if (imm == 0x0)
{ {
@ -849,7 +875,10 @@ void JitArm64::mtfsfix(UGeckoInstruction inst)
gpr.Unlock(WB); gpr.Unlock(WB);
} }
if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
UpdateFPExceptionSummary(WA);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
gpr.Unlock(WA); gpr.Unlock(WA);
// Field 7 contains NI and RN. // Field 7 contains NI and RN.
@ -873,24 +902,47 @@ void JitArm64::mtfsfx(UGeckoInstruction inst)
if (mask == 0xFFFFFFFF) if (mask == 0xFFFFFFFF)
{ {
ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair); ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair);
ARM64Reg WA = gpr.GetReg();
m_float_emit.STR(32, IndexType::Unsigned, VB, PPC_REG, PPCSTATE_OFF(fpscr)); m_float_emit.FMOV(WA, EncodeRegToSingle(VB));
UpdateFPExceptionSummary(WA);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
gpr.Unlock(WA);
} }
else if (mask != 0) else if (mask != 0)
{ {
ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair); ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair);
ARM64Reg V0 = fpr.GetReg();
ARM64Reg V1 = fpr.GetReg();
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = gpr.GetReg();
m_float_emit.LDR(32, IndexType::Unsigned, V0, PPC_REG, PPCSTATE_OFF(fpscr)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
MOVI2R(WA, mask); m_float_emit.FMOV(WB, EncodeRegToSingle(VB));
m_float_emit.FMOV(EncodeRegToSingle(V1), WA);
m_float_emit.BIT(EncodeRegToDouble(V0), EncodeRegToDouble(VB), EncodeRegToDouble(V1)); if (LogicalImm imm = LogicalImm(mask, 32))
m_float_emit.STR(32, IndexType::Unsigned, V0, PPC_REG, PPCSTATE_OFF(fpscr)); {
AND(WA, WA, LogicalImm(~mask, 32));
AND(WB, WB, imm);
}
else
{
ARM64Reg WC = gpr.GetReg();
MOVI2R(WC, mask);
BIC(WA, WA, WC);
AND(WB, WB, WC);
gpr.Unlock(WC);
}
ORR(WA, WA, WB);
gpr.Unlock(WB);
if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
UpdateFPExceptionSummary(WA);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
fpr.Unlock(V0, V1);
gpr.Unlock(WA); gpr.Unlock(WA);
} }