mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-15 05:47:56 -07:00
PowerPC: Update FEX on FPSCR store instead of FPSCR load
This is needed not only for the next commit, but also for correctly emulating float instructions that write to CR1.
This commit is contained in:
parent
89a464dafa
commit
c3bcc67653
@ -24,6 +24,12 @@ enum class FPCC
|
||||
FU = 1, // ?
|
||||
};
|
||||
|
||||
inline void UpdateFPExceptionSummary(UReg_FPSCR* fpscr)
|
||||
{
|
||||
fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
|
||||
fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0;
|
||||
}
|
||||
|
||||
inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
|
||||
{
|
||||
if ((fpscr->Hex & mask) != mask)
|
||||
@ -32,7 +38,7 @@ inline void SetFPException(UReg_FPSCR* fpscr, u32 mask)
|
||||
}
|
||||
|
||||
fpscr->Hex |= mask;
|
||||
fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
|
||||
UpdateFPExceptionSummary(fpscr);
|
||||
}
|
||||
|
||||
inline float ForceSingle(const UReg_FPSCR& fpscr, double value)
|
||||
|
@ -25,22 +25,10 @@ mffsx: 80036650 (huh?)
|
||||
|
||||
*/
|
||||
|
||||
static void FPSCRUpdated(UReg_FPSCR fp)
|
||||
static void FPSCRUpdated(UReg_FPSCR* fpscr)
|
||||
{
|
||||
UpdateFPExceptionSummary(fpscr);
|
||||
PowerPC::RoundingModeUpdated();
|
||||
|
||||
if (fp.VE || fp.OE || fp.UE || fp.ZE || fp.XE)
|
||||
{
|
||||
// PanicAlert("FPSCR - exceptions enabled. Please report. VE=%i OE=%i UE=%i ZE=%i XE=%i",
|
||||
// fp.VE, fp.OE, fp.UE, fp.ZE, fp.XE);
|
||||
// Pokemon Colosseum does this. Gah.
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateFPSCR(UReg_FPSCR* fpscr)
|
||||
{
|
||||
fpscr->VX = (fpscr->Hex & FPSCR_VX_ANY) != 0;
|
||||
fpscr->FEX = ((fpscr->Hex >> 22) & (fpscr->Hex & FPSCR_ANY_E)) != 0;
|
||||
}
|
||||
|
||||
void Interpreter::mtfsb0x(UGeckoInstruction inst)
|
||||
@ -48,7 +36,7 @@ void Interpreter::mtfsb0x(UGeckoInstruction inst)
|
||||
u32 b = 0x80000000 >> inst.CRBD;
|
||||
|
||||
FPSCR.Hex &= ~b;
|
||||
FPSCRUpdated(FPSCR);
|
||||
FPSCRUpdated(&FPSCR);
|
||||
|
||||
if (inst.Rc)
|
||||
PowerPC::ppcState.UpdateCR1();
|
||||
@ -65,7 +53,7 @@ void Interpreter::mtfsb1x(UGeckoInstruction inst)
|
||||
else
|
||||
FPSCR |= b;
|
||||
|
||||
FPSCRUpdated(FPSCR);
|
||||
FPSCRUpdated(&FPSCR);
|
||||
|
||||
if (inst.Rc)
|
||||
PowerPC::ppcState.UpdateCR1();
|
||||
@ -80,7 +68,7 @@ void Interpreter::mtfsfix(UGeckoInstruction inst)
|
||||
|
||||
FPSCR = (FPSCR.Hex & ~mask) | (imm >> (4 * field));
|
||||
|
||||
FPSCRUpdated(FPSCR);
|
||||
FPSCRUpdated(&FPSCR);
|
||||
|
||||
if (inst.Rc)
|
||||
PowerPC::ppcState.UpdateCR1();
|
||||
@ -97,7 +85,7 @@ void Interpreter::mtfsfx(UGeckoInstruction inst)
|
||||
}
|
||||
|
||||
FPSCR = (FPSCR.Hex & ~m) | (static_cast<u32>(rPS(inst.FB).PS0AsU64()) & m);
|
||||
FPSCRUpdated(FPSCR);
|
||||
FPSCRUpdated(&FPSCR);
|
||||
|
||||
if (inst.Rc)
|
||||
PowerPC::ppcState.UpdateCR1();
|
||||
@ -563,22 +551,18 @@ void Interpreter::isync(UGeckoInstruction inst)
|
||||
|
||||
void Interpreter::mcrfs(UGeckoInstruction inst)
|
||||
{
|
||||
UpdateFPSCR(&FPSCR);
|
||||
const u32 shift = 4 * (7 - inst.CRFS);
|
||||
const u32 fpflags = (FPSCR.Hex >> shift) & 0xF;
|
||||
|
||||
// If any exception bits were read, clear them
|
||||
FPSCR.Hex &= ~((0xF << shift) & (FPSCR_FX | FPSCR_ANY_X));
|
||||
FPSCRUpdated(&FPSCR);
|
||||
|
||||
PowerPC::ppcState.cr.SetField(inst.CRFD, fpflags);
|
||||
}
|
||||
|
||||
void Interpreter::mffsx(UGeckoInstruction inst)
|
||||
{
|
||||
// load from FPSCR
|
||||
// TODO(ector): grab all overflow flags etc and set them in FPSCR
|
||||
|
||||
UpdateFPSCR(&FPSCR);
|
||||
rPS(inst.FD).SetPS0(UINT64_C(0xFFF8000000000000) | FPSCR.Hex);
|
||||
|
||||
if (inst.Rc)
|
||||
|
@ -116,11 +116,12 @@ public:
|
||||
void ClearCRFieldBit(int field, int bit);
|
||||
void SetCRFieldBit(int field, int bit);
|
||||
void FixGTBeforeSettingCRFieldBit(Gen::X64Reg reg);
|
||||
|
||||
// Generates a branch that will check if a given bit of a CR register part
|
||||
// is set or not.
|
||||
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
|
||||
|
||||
void UpdateFPExceptionSummary(Gen::X64Reg fpscr, Gen::X64Reg tmp1, Gen::X64Reg tmp2);
|
||||
|
||||
void SetFPRFIfNeeded(const Gen::OpArg& xmm, bool single);
|
||||
void FinalizeSingleResult(Gen::X64Reg output, const Gen::OpArg& input, bool packed = true,
|
||||
bool duplicate = false);
|
||||
|
@ -4,7 +4,9 @@
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MathUtil.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/HW/ProcessorInterface.h"
|
||||
#include "Core/PowerPC/Jit64/Jit.h"
|
||||
@ -185,6 +187,33 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
|
||||
return FixupBranch();
|
||||
}
|
||||
|
||||
// Could be done with one temp register, but with two temp registers it's faster
|
||||
void Jit64::UpdateFPExceptionSummary(X64Reg fpscr, X64Reg tmp1, X64Reg tmp2)
|
||||
{
|
||||
// Kill dependency on tmp1 (not required for correctness, since SHL will shift out upper bytes)
|
||||
XOR(32, R(tmp1), R(tmp1));
|
||||
|
||||
// fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0
|
||||
TEST(32, R(fpscr), Imm32(FPSCR_VX_ANY));
|
||||
SETcc(CC_NZ, R(tmp1));
|
||||
SHL(32, R(tmp1), Imm8(IntLog2(FPSCR_VX)));
|
||||
AND(32, R(fpscr), Imm32(~(FPSCR_VX | FPSCR_FEX)));
|
||||
OR(32, R(fpscr), R(tmp1));
|
||||
|
||||
// fpscr.FEX = ((fpscr >> 22) & (fpscr & FPSCR_ANY_E)) != 0
|
||||
MOV(32, R(tmp1), R(fpscr));
|
||||
MOV(32, R(tmp2), R(fpscr));
|
||||
SHR(32, R(tmp1), Imm8(22));
|
||||
AND(32, R(tmp2), Imm32(FPSCR_ANY_E));
|
||||
TEST(32, R(tmp1), R(tmp2));
|
||||
// Unfortunately we eat a partial register stall below - we can't zero any of the registers before
|
||||
// the TEST, and we can't use XOR right after the TEST since that would overwrite flags. However,
|
||||
// there is no false dependency, since SETcc depends on TEST's flags and TEST depends on tmp1.
|
||||
SETcc(CC_NZ, R(tmp1));
|
||||
SHL(32, R(tmp1), Imm8(IntLog2(FPSCR_FEX)));
|
||||
OR(32, R(fpscr), R(tmp1));
|
||||
}
|
||||
|
||||
static void DoICacheReset()
|
||||
{
|
||||
PowerPC::ppcState.iCache.Reset();
|
||||
@ -637,6 +666,19 @@ void Jit64::mcrfs(UGeckoInstruction inst)
|
||||
// Only clear exception bits (but not FEX/VX).
|
||||
mask &= FPSCR_FX | FPSCR_ANY_X;
|
||||
|
||||
RCX64Reg scratch_guard;
|
||||
X64Reg scratch;
|
||||
if (mask != 0)
|
||||
{
|
||||
scratch_guard = gpr.Scratch();
|
||||
RegCache::Realize(scratch_guard);
|
||||
scratch = scratch_guard;
|
||||
}
|
||||
else
|
||||
{
|
||||
scratch = RSCRATCH;
|
||||
}
|
||||
|
||||
if (cpu_info.bBMI1)
|
||||
{
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
|
||||
@ -652,14 +694,17 @@ void Jit64::mcrfs(UGeckoInstruction inst)
|
||||
SHR(32, R(RSCRATCH2), Imm8(shift));
|
||||
AND(32, R(RSCRATCH2), Imm32(0xF));
|
||||
}
|
||||
|
||||
LEA(64, scratch, MConst(PowerPC::ConditionRegister::s_crTable));
|
||||
MOV(64, R(scratch), MComplex(scratch, RSCRATCH2, SCALE_8, 0));
|
||||
MOV(64, CROffset(inst.CRFD), R(scratch));
|
||||
|
||||
if (mask != 0)
|
||||
{
|
||||
AND(32, R(RSCRATCH), Imm32(~mask));
|
||||
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
|
||||
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
}
|
||||
LEA(64, RSCRATCH, MConst(PowerPC::ConditionRegister::s_crTable));
|
||||
MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0));
|
||||
MOV(64, CROffset(inst.CRFD), R(RSCRATCH));
|
||||
}
|
||||
|
||||
void Jit64::mffsx(UGeckoInstruction inst)
|
||||
@ -670,18 +715,6 @@ void Jit64::mffsx(UGeckoInstruction inst)
|
||||
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
|
||||
|
||||
// FPSCR.FEX = 0 (and VX for below)
|
||||
AND(32, R(RSCRATCH), Imm32(~0x60000000));
|
||||
|
||||
// FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0;
|
||||
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
|
||||
TEST(32, R(RSCRATCH), Imm32(FPSCR_VX_ANY));
|
||||
SETcc(CC_NZ, R(RSCRATCH2));
|
||||
SHL(32, R(RSCRATCH2), Imm8(31 - 2));
|
||||
OR(32, R(RSCRATCH), R(RSCRATCH2));
|
||||
|
||||
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
|
||||
int d = inst.FD;
|
||||
RCX64Reg Rd = fpr.Bind(d, RCMode::Write);
|
||||
RegCache::Realize(Rd);
|
||||
@ -710,17 +743,32 @@ void Jit64::mtfsb0x(UGeckoInstruction inst)
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
u32 mask = ~(0x80000000 >> inst.CRBD);
|
||||
if (inst.CRBD < 29)
|
||||
const u32 mask = 0x80000000 >> inst.CRBD;
|
||||
const u32 inverted_mask = ~mask;
|
||||
|
||||
if (mask == FPSCR_FEX || mask == FPSCR_VX)
|
||||
return;
|
||||
|
||||
if (inst.CRBD < 29 && (mask & (FPSCR_ANY_X | FPSCR_ANY_E)) == 0)
|
||||
{
|
||||
AND(32, PPCSTATE(fpscr), Imm32(mask));
|
||||
AND(32, PPCSTATE(fpscr), Imm32(inverted_mask));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
|
||||
AND(32, R(RSCRATCH), Imm32(mask));
|
||||
AND(32, R(RSCRATCH), Imm32(inverted_mask));
|
||||
|
||||
if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
|
||||
{
|
||||
RCX64Reg scratch = gpr.Scratch();
|
||||
RegCache::Realize(scratch);
|
||||
|
||||
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
|
||||
}
|
||||
|
||||
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
UpdateMXCSR();
|
||||
if (inst.CRBD >= 29)
|
||||
UpdateMXCSR();
|
||||
}
|
||||
}
|
||||
|
||||
@ -730,9 +778,13 @@ void Jit64::mtfsb1x(UGeckoInstruction inst)
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
u32 mask = 0x80000000 >> inst.CRBD;
|
||||
const u32 mask = 0x80000000 >> inst.CRBD;
|
||||
|
||||
if (mask == FPSCR_FEX || mask == FPSCR_VX)
|
||||
return;
|
||||
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
|
||||
if (mask & FPSCR_ANY_X)
|
||||
if ((mask & FPSCR_ANY_X) != 0)
|
||||
{
|
||||
BTS(32, R(RSCRATCH), Imm32(31 - inst.CRBD));
|
||||
FixupBranch dont_set_fx = J_CC(CC_C);
|
||||
@ -743,6 +795,15 @@ void Jit64::mtfsb1x(UGeckoInstruction inst)
|
||||
{
|
||||
OR(32, R(RSCRATCH), Imm32(mask));
|
||||
}
|
||||
|
||||
if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
|
||||
{
|
||||
RCX64Reg scratch = gpr.Scratch();
|
||||
RegCache::Realize(scratch);
|
||||
|
||||
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
|
||||
}
|
||||
|
||||
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
if (inst.CRBD >= 29)
|
||||
UpdateMXCSR();
|
||||
@ -755,12 +816,22 @@ void Jit64::mtfsfix(UGeckoInstruction inst)
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
u8 imm = (inst.hex >> (31 - 19)) & 0xF;
|
||||
u32 mask = 0xF0000000 >> (4 * inst.CRFD);
|
||||
u32 or_mask = imm << (28 - 4 * inst.CRFD);
|
||||
u32 and_mask = ~(0xF0000000 >> (4 * inst.CRFD));
|
||||
u32 and_mask = ~mask;
|
||||
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(fpscr));
|
||||
AND(32, R(RSCRATCH), Imm32(and_mask));
|
||||
OR(32, R(RSCRATCH), Imm32(or_mask));
|
||||
|
||||
if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
|
||||
{
|
||||
RCX64Reg scratch = gpr.Scratch();
|
||||
RegCache::Realize(scratch);
|
||||
|
||||
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
|
||||
}
|
||||
|
||||
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
|
||||
// Field 7 contains NI and RN.
|
||||
@ -798,6 +869,15 @@ void Jit64::mtfsfx(UGeckoInstruction inst)
|
||||
AND(32, R(RSCRATCH2), Imm32(~mask));
|
||||
OR(32, R(RSCRATCH), R(RSCRATCH2));
|
||||
}
|
||||
|
||||
if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
|
||||
{
|
||||
RCX64Reg scratch = gpr.Scratch();
|
||||
RegCache::Realize(scratch);
|
||||
|
||||
UpdateFPExceptionSummary(RSCRATCH, RSCRATCH2, scratch);
|
||||
}
|
||||
|
||||
MOV(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
|
||||
if (inst.FM & 1)
|
||||
|
@ -273,6 +273,7 @@ protected:
|
||||
|
||||
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
|
||||
void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg);
|
||||
void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr);
|
||||
void UpdateRoundingMode();
|
||||
|
||||
void ComputeRC0(Arm64Gen::ARM64Reg reg);
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MathUtil.h"
|
||||
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
@ -49,6 +50,25 @@ void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg)
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr)
|
||||
{
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
|
||||
// fpscr.VX = (fpscr & FPSCR_VX_ANY) != 0
|
||||
MOVI2R(WA, FPSCR_VX_ANY);
|
||||
TST(WA, fpscr);
|
||||
CSET(WA, CCFlags::CC_NEQ);
|
||||
BFI(fpscr, WA, IntLog2(FPSCR_VX), 1);
|
||||
|
||||
// fpscr.FEX = ((fpscr >> 22) & (fpscr & FPSCR_ANY_E)) != 0
|
||||
AND(WA, fpscr, LogicalImm(FPSCR_ANY_E, 32));
|
||||
TST(WA, fpscr, ArithOption(fpscr, ShiftType::LSR, 22));
|
||||
CSET(WA, CCFlags::CC_NEQ);
|
||||
BFI(fpscr, WA, IntLog2(FPSCR_FEX), 1);
|
||||
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
void JitArm64::UpdateRoundingMode()
|
||||
{
|
||||
const BitSet32 gprs_to_save = gpr.GetCallerSavedUsed();
|
||||
@ -732,6 +752,8 @@ void JitArm64::mcrfs(UGeckoInstruction inst)
|
||||
{
|
||||
const u32 inverted_mask = ~mask;
|
||||
AND(WA, WA, LogicalImm(inverted_mask, 32));
|
||||
|
||||
UpdateFPExceptionSummary(WA);
|
||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
}
|
||||
|
||||
@ -753,24 +775,11 @@ void JitArm64::mffsx(UGeckoInstruction inst)
|
||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
|
||||
ARM64Reg VD = fpr.RW(inst.FD, RegType::LowerPair);
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
|
||||
// FPSCR.FEX = 0;
|
||||
// FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0;
|
||||
// (FEX is right next to VX, so we can set both using one BFI instruction)
|
||||
MOVI2R(WB, FPSCR_VX_ANY);
|
||||
TST(WA, WB);
|
||||
CSET(WB, CCFlags::CC_NEQ);
|
||||
BFI(WA, WB, 31 - 2, 2);
|
||||
|
||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
|
||||
// Vd = FPSCR.Hex | 0xFFF8'0000'0000'0000;
|
||||
ORR(XA, XA, LogicalImm(0xFFF8'0000'0000'0000, 64));
|
||||
m_float_emit.FMOV(EncodeRegToDouble(VD), XA);
|
||||
|
||||
gpr.Unlock(WA);
|
||||
gpr.Unlock(WB);
|
||||
}
|
||||
|
||||
void JitArm64::mtfsb0x(UGeckoInstruction inst)
|
||||
@ -779,12 +788,20 @@ void JitArm64::mtfsb0x(UGeckoInstruction inst)
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
u32 mask = ~(0x80000000 >> inst.CRBD);
|
||||
const u32 mask = 0x80000000 >> inst.CRBD;
|
||||
const u32 inverted_mask = ~mask;
|
||||
|
||||
if (mask == FPSCR_FEX || mask == FPSCR_VX)
|
||||
return;
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
|
||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
AND(WA, WA, LogicalImm(mask, 32));
|
||||
|
||||
AND(WA, WA, LogicalImm(inverted_mask, 32));
|
||||
|
||||
if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
|
||||
UpdateFPExceptionSummary(WA);
|
||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
|
||||
gpr.Unlock(WA);
|
||||
@ -799,12 +816,16 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst)
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
u32 mask = 0x80000000 >> inst.CRBD;
|
||||
const u32 mask = 0x80000000 >> inst.CRBD;
|
||||
|
||||
if (mask == FPSCR_FEX || mask == FPSCR_VX)
|
||||
return;
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
|
||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
if (mask & FPSCR_ANY_X)
|
||||
|
||||
if ((mask & FPSCR_ANY_X) != 0)
|
||||
{
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
TST(WA, LogicalImm(mask, 32));
|
||||
@ -813,6 +834,9 @@ void JitArm64::mtfsb1x(UGeckoInstruction inst)
|
||||
gpr.Unlock(WB);
|
||||
}
|
||||
ORR(WA, WA, LogicalImm(mask, 32));
|
||||
|
||||
if ((mask & (FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
|
||||
UpdateFPExceptionSummary(WA);
|
||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
|
||||
gpr.Unlock(WA);
|
||||
@ -829,13 +853,15 @@ void JitArm64::mtfsfix(UGeckoInstruction inst)
|
||||
|
||||
u8 imm = (inst.hex >> (31 - 19)) & 0xF;
|
||||
u8 shift = 28 - 4 * inst.CRFD;
|
||||
u32 mask = 0xF << shift;
|
||||
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
|
||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
|
||||
if (imm == 0xF)
|
||||
{
|
||||
ORR(WA, WA, LogicalImm(0xF << shift, 32));
|
||||
ORR(WA, WA, LogicalImm(mask, 32));
|
||||
}
|
||||
else if (imm == 0x0)
|
||||
{
|
||||
@ -849,7 +875,10 @@ void JitArm64::mtfsfix(UGeckoInstruction inst)
|
||||
gpr.Unlock(WB);
|
||||
}
|
||||
|
||||
if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
|
||||
UpdateFPExceptionSummary(WA);
|
||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
|
||||
gpr.Unlock(WA);
|
||||
|
||||
// Field 7 contains NI and RN.
|
||||
@ -873,24 +902,47 @@ void JitArm64::mtfsfx(UGeckoInstruction inst)
|
||||
if (mask == 0xFFFFFFFF)
|
||||
{
|
||||
ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair);
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
|
||||
m_float_emit.STR(32, IndexType::Unsigned, VB, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
m_float_emit.FMOV(WA, EncodeRegToSingle(VB));
|
||||
|
||||
UpdateFPExceptionSummary(WA);
|
||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
else if (mask != 0)
|
||||
{
|
||||
ARM64Reg VB = fpr.R(inst.FB, RegType::LowerPair);
|
||||
|
||||
ARM64Reg V0 = fpr.GetReg();
|
||||
ARM64Reg V1 = fpr.GetReg();
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
ARM64Reg WB = gpr.GetReg();
|
||||
|
||||
m_float_emit.LDR(32, IndexType::Unsigned, V0, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
MOVI2R(WA, mask);
|
||||
m_float_emit.FMOV(EncodeRegToSingle(V1), WA);
|
||||
m_float_emit.BIT(EncodeRegToDouble(V0), EncodeRegToDouble(VB), EncodeRegToDouble(V1));
|
||||
m_float_emit.STR(32, IndexType::Unsigned, V0, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
m_float_emit.FMOV(WB, EncodeRegToSingle(VB));
|
||||
|
||||
if (LogicalImm imm = LogicalImm(mask, 32))
|
||||
{
|
||||
AND(WA, WA, LogicalImm(~mask, 32));
|
||||
AND(WB, WB, imm);
|
||||
}
|
||||
else
|
||||
{
|
||||
ARM64Reg WC = gpr.GetReg();
|
||||
|
||||
MOVI2R(WC, mask);
|
||||
BIC(WA, WA, WC);
|
||||
AND(WB, WB, WC);
|
||||
|
||||
gpr.Unlock(WC);
|
||||
}
|
||||
ORR(WA, WA, WB);
|
||||
|
||||
gpr.Unlock(WB);
|
||||
|
||||
if ((mask & (FPSCR_FEX | FPSCR_VX | FPSCR_ANY_X | FPSCR_ANY_E)) != 0)
|
||||
UpdateFPExceptionSummary(WA);
|
||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(fpscr));
|
||||
|
||||
fpr.Unlock(V0, V1);
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user