mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-14 21:37:52 -07:00
[ARM] Clean up FPR cache. Rapid fire floating point instruction implementations. Adds 13 new instructions.
This commit is contained in:
parent
cd7b97f767
commit
ef05a14757
@ -186,6 +186,8 @@ public:
|
||||
|
||||
// Floating point
|
||||
void fabsx(UGeckoInstruction _inst);
|
||||
void fnabsx(UGeckoInstruction _inst);
|
||||
void fnegx(UGeckoInstruction _inst);
|
||||
void faddsx(UGeckoInstruction _inst);
|
||||
void faddx(UGeckoInstruction _inst);
|
||||
void fsubsx(UGeckoInstruction _inst);
|
||||
@ -202,9 +204,20 @@ public:
|
||||
// Paired Singles
|
||||
void ps_add(UGeckoInstruction _inst);
|
||||
void ps_sum0(UGeckoInstruction _inst);
|
||||
void ps_sum1(UGeckoInstruction _inst);
|
||||
void ps_madd(UGeckoInstruction _inst);
|
||||
void ps_sub(UGeckoInstruction _inst);
|
||||
void ps_mul(UGeckoInstruction _inst);
|
||||
void ps_muls0(UGeckoInstruction _inst);
|
||||
void ps_muls1(UGeckoInstruction _inst);
|
||||
void ps_merge00(UGeckoInstruction _inst);
|
||||
void ps_merge01(UGeckoInstruction _inst);
|
||||
void ps_merge10(UGeckoInstruction _inst);
|
||||
void ps_merge11(UGeckoInstruction _inst);
|
||||
void ps_mr(UGeckoInstruction _inst);
|
||||
void ps_neg(UGeckoInstruction _inst);
|
||||
void ps_abs(UGeckoInstruction _inst);
|
||||
void ps_nabs(UGeckoInstruction _inst);
|
||||
};
|
||||
|
||||
#endif // _JIT64_H
|
||||
|
@ -43,14 +43,46 @@ void JitArm::fabsx(UGeckoInstruction inst)
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
|
||||
ARMReg vD = fpr.R0(inst.FD);
|
||||
ARMReg vB = fpr.R0(inst.FB);
|
||||
ARMReg vD = fpr.R0(inst.FD, false);
|
||||
|
||||
VABS(vD, vB);
|
||||
|
||||
if (inst.Rc) Helper_UpdateCR1(vD);
|
||||
}
|
||||
|
||||
void JitArm::fnabsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
|
||||
ARMReg vB = fpr.R0(inst.FB);
|
||||
ARMReg vD = fpr.R0(inst.FD, false);
|
||||
ARMReg V0 = fpr.GetReg();
|
||||
|
||||
// XXX: Could be done quicker
|
||||
VABS(vD, vB);
|
||||
VMOV(V0, vD);
|
||||
VSUB(vD, vD, V0);
|
||||
VSUB(vD, vD, V0);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
if (inst.Rc) Helper_UpdateCR1(vD);
|
||||
}
|
||||
|
||||
void JitArm::fnegx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
|
||||
ARMReg vB = fpr.R0(inst.FB);
|
||||
ARMReg vD = fpr.R0(inst.FD, false);
|
||||
|
||||
VNEG(vD, vB);
|
||||
|
||||
if (inst.Rc) Helper_UpdateCR1(vD);
|
||||
}
|
||||
|
||||
void JitArm::faddsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
@ -58,8 +90,8 @@ void JitArm::faddsx(UGeckoInstruction inst)
|
||||
|
||||
ARMReg vA = fpr.R0(inst.FA);
|
||||
ARMReg vB = fpr.R0(inst.FB);
|
||||
ARMReg vD0 = fpr.R0(inst.FD);
|
||||
ARMReg vD1 = fpr.R1(inst.FD);
|
||||
ARMReg vD0 = fpr.R0(inst.FD, false);
|
||||
ARMReg vD1 = fpr.R1(inst.FD, false);
|
||||
|
||||
VADD(vD0, vA, vB);
|
||||
VMOV(vD1, vD0);
|
||||
@ -71,9 +103,9 @@ void JitArm::faddx(UGeckoInstruction inst)
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
|
||||
ARMReg vD = fpr.R0(inst.FD);
|
||||
ARMReg vA = fpr.R0(inst.FA);
|
||||
ARMReg vB = fpr.R0(inst.FB);
|
||||
ARMReg vD = fpr.R0(inst.FD, false);
|
||||
|
||||
VADD(vD, vA, vB);
|
||||
if (inst.Rc) Helper_UpdateCR1(vD);
|
||||
@ -86,8 +118,8 @@ void JitArm::fsubsx(UGeckoInstruction inst)
|
||||
|
||||
ARMReg vA = fpr.R0(inst.FA);
|
||||
ARMReg vB = fpr.R0(inst.FB);
|
||||
ARMReg vD0 = fpr.R0(inst.FD);
|
||||
ARMReg vD1 = fpr.R1(inst.FD);
|
||||
ARMReg vD0 = fpr.R0(inst.FD, false);
|
||||
ARMReg vD1 = fpr.R1(inst.FD, false);
|
||||
|
||||
VSUB(vD0, vA, vB);
|
||||
VMOV(vD1, vD0);
|
||||
@ -99,9 +131,9 @@ void JitArm::fsubx(UGeckoInstruction inst)
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
|
||||
ARMReg vD = fpr.R0(inst.FD);
|
||||
ARMReg vA = fpr.R0(inst.FA);
|
||||
ARMReg vB = fpr.R0(inst.FB);
|
||||
ARMReg vD = fpr.R0(inst.FD, false);
|
||||
|
||||
VSUB(vD, vA, vB);
|
||||
if (inst.Rc) Helper_UpdateCR1(vD);
|
||||
@ -114,8 +146,8 @@ void JitArm::fmulsx(UGeckoInstruction inst)
|
||||
|
||||
ARMReg vA = fpr.R0(inst.FA);
|
||||
ARMReg vC = fpr.R0(inst.FC);
|
||||
ARMReg vD0 = fpr.R0(inst.FD);
|
||||
ARMReg vD1 = fpr.R1(inst.FD);
|
||||
ARMReg vD0 = fpr.R0(inst.FD, false);
|
||||
ARMReg vD1 = fpr.R1(inst.FD, false);
|
||||
|
||||
VMUL(vD0, vA, vC);
|
||||
VMOV(vD1, vD0);
|
||||
@ -127,9 +159,9 @@ void JitArm::fmulx(UGeckoInstruction inst)
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
|
||||
ARMReg vD0 = fpr.R0(inst.FD);
|
||||
ARMReg vA = fpr.R0(inst.FA);
|
||||
ARMReg vC = fpr.R0(inst.FC);
|
||||
ARMReg vD0 = fpr.R0(inst.FD, false);
|
||||
|
||||
VMUL(vD0, vA, vC);
|
||||
if (inst.Rc) Helper_UpdateCR1(vD0);
|
||||
@ -139,8 +171,8 @@ void JitArm::fmrx(UGeckoInstruction inst)
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff)
|
||||
|
||||
ARMReg vD = fpr.R0(inst.FD);
|
||||
ARMReg vB = fpr.R0(inst.FB);
|
||||
ARMReg vD = fpr.R0(inst.FD, false);
|
||||
|
||||
VMOV(vD, vB);
|
||||
|
||||
|
@ -40,12 +40,11 @@ void JitArm::ps_add(UGeckoInstruction inst)
|
||||
ARMReg vA1 = fpr.R1(a);
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vD0 = fpr.R0(d);
|
||||
ARMReg vD1 = fpr.R1(d);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
|
||||
VADD(vD0, vA0, vB0);
|
||||
VADD(vD1, vA1, vB1);
|
||||
fpr.Flush();
|
||||
}
|
||||
|
||||
// Wrong, THP videos like SMS and Ikaruga show artifacts
|
||||
@ -67,8 +66,8 @@ void JitArm::ps_madd(UGeckoInstruction inst)
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vC0 = fpr.R0(c);
|
||||
ARMReg vC1 = fpr.R1(c);
|
||||
ARMReg vD0 = fpr.R0(d);
|
||||
ARMReg vD1 = fpr.R1(d);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
|
||||
ARMReg V0 = fpr.GetReg();
|
||||
ARMReg V1 = fpr.GetReg();
|
||||
@ -99,14 +98,35 @@ void JitArm::ps_sum0(UGeckoInstruction inst)
|
||||
ARMReg vA0 = fpr.R0(a);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vC1 = fpr.R1(c);
|
||||
ARMReg vD0 = fpr.R0(d);
|
||||
ARMReg vD1 = fpr.R1(d);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
|
||||
VADD(vD0, vA0, vB1);
|
||||
VMOV(vD1, vC1);
|
||||
fpr.Flush();
|
||||
|
||||
}
|
||||
|
||||
void JitArm::ps_sum1(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
ARMReg vA0 = fpr.R0(a);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vC0 = fpr.R0(c);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
|
||||
VMOV(vD0, vC0);
|
||||
VADD(vD1, vA0, vB1);
|
||||
}
|
||||
|
||||
|
||||
void JitArm::ps_sub(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
@ -120,12 +140,11 @@ void JitArm::ps_sub(UGeckoInstruction inst)
|
||||
ARMReg vA1 = fpr.R1(a);
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vD0 = fpr.R0(d);
|
||||
ARMReg vD1 = fpr.R1(d);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
|
||||
VSUB(vD0, vA0, vB0);
|
||||
VSUB(vD1, vA1, vB1);
|
||||
fpr.Flush();
|
||||
}
|
||||
|
||||
void JitArm::ps_mul(UGeckoInstruction inst)
|
||||
@ -141,11 +160,210 @@ void JitArm::ps_mul(UGeckoInstruction inst)
|
||||
ARMReg vA1 = fpr.R1(a);
|
||||
ARMReg vC0 = fpr.R0(c);
|
||||
ARMReg vC1 = fpr.R1(c);
|
||||
ARMReg vD0 = fpr.R0(d);
|
||||
ARMReg vD1 = fpr.R1(d);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
|
||||
VMUL(vD0, vA0, vC0);
|
||||
VMUL(vD1, vA1, vC1);
|
||||
fpr.Flush();
|
||||
}
|
||||
|
||||
void JitArm::ps_muls0(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
|
||||
u32 a = inst.FA, c = inst.FC, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
ARMReg vA0 = fpr.R0(a);
|
||||
ARMReg vA1 = fpr.R1(a);
|
||||
ARMReg vC0 = fpr.R0(c);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
ARMReg V0 = fpr.GetReg();
|
||||
ARMReg V1 = fpr.GetReg();
|
||||
|
||||
|
||||
VMUL(V0, vA0, vC0);
|
||||
VMUL(V1, vA1, vC0);
|
||||
VMOV(vD0, V0);
|
||||
VMOV(vD1, V1);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
fpr.Unlock(V1);
|
||||
}
|
||||
|
||||
void JitArm::ps_muls1(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
|
||||
u32 a = inst.FA, c = inst.FC, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
ARMReg vA0 = fpr.R0(a);
|
||||
ARMReg vA1 = fpr.R1(a);
|
||||
ARMReg vC1 = fpr.R1(c);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
ARMReg V0 = fpr.GetReg();
|
||||
ARMReg V1 = fpr.GetReg();
|
||||
|
||||
|
||||
VMUL(V0, vA0, vC1);
|
||||
VMUL(V1, vA1, vC1);
|
||||
VMOV(vD0, V0);
|
||||
VMOV(vD1, V1);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
fpr.Unlock(V1);
|
||||
}
|
||||
|
||||
void JitArm::ps_merge00(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
ARMReg vA0 = fpr.R0(a);
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
VMOV(vD0, vA0);
|
||||
VMOV(vD1, vB0);
|
||||
}
|
||||
|
||||
void JitArm::ps_merge01(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
ARMReg vA0 = fpr.R0(a);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
VMOV(vD0, vA0);
|
||||
VMOV(vD1, vB1);
|
||||
}
|
||||
|
||||
void JitArm::ps_merge10(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
ARMReg vA1 = fpr.R1(a);
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
VMOV(vD0, vA1);
|
||||
VMOV(vD1, vB0);
|
||||
}
|
||||
|
||||
void JitArm::ps_merge11(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
u32 a = inst.FA, b = inst.FB, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
ARMReg vA1 = fpr.R1(a);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
VMOV(vD0, vA1);
|
||||
VMOV(vD1, vB1);
|
||||
}
|
||||
|
||||
void JitArm::ps_mr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
u32 b = inst.FB, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
VMOV(vD0, vB0);
|
||||
VMOV(vD1, vB1);
|
||||
}
|
||||
|
||||
void JitArm::ps_neg(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
u32 b = inst.FB, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
VNEG(vD0, vB0);
|
||||
VNEG(vD1, vB1);
|
||||
}
|
||||
|
||||
void JitArm::ps_abs(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
u32 b = inst.FB, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
VABS(vD0, vB0);
|
||||
VABS(vD1, vB1);
|
||||
}
|
||||
|
||||
void JitArm::ps_nabs(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITPairedOff)
|
||||
u32 b = inst.FB, d = inst.FD;
|
||||
if (inst.Rc){
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
ARMReg vB0 = fpr.R0(b);
|
||||
ARMReg vB1 = fpr.R1(b);
|
||||
ARMReg vD0 = fpr.R0(d, false);
|
||||
ARMReg vD1 = fpr.R1(d, false);
|
||||
ARMReg V0 = fpr.GetReg();
|
||||
|
||||
// XXX: Could be done quicker
|
||||
VABS(vD0, vB0);
|
||||
VMOV(V0, vD0);
|
||||
VSUB(vD0, vD0, V0);
|
||||
VSUB(vD0, vD0, V0);
|
||||
VABS(vD1, vB1);
|
||||
VMOV(V0, vD1);
|
||||
VSUB(vD1, vD1, V0);
|
||||
VSUB(vD1, vD1, V0);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
}
|
||||
|
@ -127,16 +127,16 @@ static GekkoOPTemplate table4[] =
|
||||
{ //SUBOP10
|
||||
{0, &JitArm::Default}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
|
||||
{32, &JitArm::Default}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
|
||||
{40, &JitArm::Default}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
|
||||
{136, &JitArm::Default}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
|
||||
{264, &JitArm::Default}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
|
||||
{40, &JitArm::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
|
||||
{136, &JitArm::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
|
||||
{264, &JitArm::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
|
||||
{64, &JitArm::Default}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
|
||||
{72, &JitArm::Default}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
|
||||
{72, &JitArm::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
|
||||
{96, &JitArm::Default}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
|
||||
{528, &JitArm::Default}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
|
||||
{560, &JitArm::Default}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
|
||||
{592, &JitArm::Default}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
|
||||
{624, &JitArm::Default}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
|
||||
{528, &JitArm::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
|
||||
{560, &JitArm::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
|
||||
{592, &JitArm::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
|
||||
{624, &JitArm::ps_merge11}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
|
||||
|
||||
{1014, &JitArm::Default}, //"dcbz_l", OPTYPE_SYSTEM, 0}},
|
||||
};
|
||||
@ -144,9 +144,9 @@ static GekkoOPTemplate table4[] =
|
||||
static GekkoOPTemplate table4_2[] =
|
||||
{
|
||||
{10, &JitArm::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}},
|
||||
{11, &JitArm::Default}, //"ps_sum1", OPTYPE_PS, 0}},
|
||||
{12, &JitArm::Default}, //"ps_muls0", OPTYPE_PS, 0}},
|
||||
{13, &JitArm::Default}, //"ps_muls1", OPTYPE_PS, 0}},
|
||||
{11, &JitArm::ps_sum1}, //"ps_sum1", OPTYPE_PS, 0}},
|
||||
{12, &JitArm::ps_muls0}, //"ps_muls0", OPTYPE_PS, 0}},
|
||||
{13, &JitArm::ps_muls1}, //"ps_muls1", OPTYPE_PS, 0}},
|
||||
{14, &JitArm::Default}, //"ps_madds0", OPTYPE_PS, 0}},
|
||||
{15, &JitArm::Default}, //"ps_madds1", OPTYPE_PS, 0}},
|
||||
{18, &JitArm::Default}, //"ps_div", OPTYPE_PS, 0, 16}},
|
||||
@ -352,8 +352,8 @@ static GekkoOPTemplate table63[] =
|
||||
{14, &JitArm::Default}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{15, &JitArm::Default}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{136, &JitArm::Default}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{40, &JitArm::Default}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{136, &JitArm::fnabsx}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{40, &JitArm::fnegx}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
{12, &JitArm::Default}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||
|
||||
{64, &JitArm::Default}, //"mcrfs", OPTYPE_SYSTEMFP, 0}},
|
||||
|
@ -35,7 +35,6 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)
|
||||
ArmCRegs[a].Reg = PPCRegs[a];
|
||||
ArmCRegs[a].LastLoad = 0;
|
||||
ArmCRegs[a].PS1 = false;
|
||||
ArmCRegs[a].Away = true;
|
||||
}
|
||||
for(u8 a = 0; a < NUMARMREG; ++a)
|
||||
{
|
||||
@ -43,14 +42,11 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)
|
||||
ArmRegs[a].free = true;
|
||||
}
|
||||
}
|
||||
|
||||
void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats)
|
||||
{
|
||||
for(u8 a = 0; a < NUMPPCREG; ++a)
|
||||
{
|
||||
ArmCRegs[a].PPCReg = 33;
|
||||
ArmCRegs[a].LastLoad = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count)
|
||||
{
|
||||
// This will return us the allocation order of the registers we can use on
|
||||
@ -101,59 +97,78 @@ void ArmFPRCache::Unlock(ARMReg V0)
|
||||
}
|
||||
}
|
||||
}
|
||||
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad)
|
||||
u32 ArmFPRCache::GetLeastUsedRegister(bool increment)
|
||||
{
|
||||
u32 HighestUsed = 0;
|
||||
u8 Num = 0;
|
||||
u8 lastRegIndex = 0;
|
||||
for(u8 a = 0; a < NUMPPCREG; ++a){
|
||||
++ArmCRegs[a].LastLoad;
|
||||
if (increment)
|
||||
++ArmCRegs[a].LastLoad;
|
||||
if (ArmCRegs[a].LastLoad > HighestUsed)
|
||||
{
|
||||
HighestUsed = ArmCRegs[a].LastLoad;
|
||||
Num = a;
|
||||
lastRegIndex = a;
|
||||
}
|
||||
}
|
||||
// Check if already Loaded
|
||||
for(u8 a = 0; a < NUMPPCREG; ++a)
|
||||
if (ArmCRegs[a].PPCReg == preg && ArmCRegs[a].PS1 == PS1)
|
||||
{
|
||||
ArmCRegs[a].LastLoad = 0;
|
||||
// Check if the value is actually in the reg
|
||||
if (ArmCRegs[a].Away && preLoad)
|
||||
{
|
||||
// Load it now since we want it
|
||||
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
|
||||
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
|
||||
ArmCRegs[a].Away = false;
|
||||
}
|
||||
return ArmCRegs[a].Reg;
|
||||
}
|
||||
// Check if we have a free register
|
||||
return lastRegIndex;
|
||||
}
|
||||
bool ArmFPRCache::FindFreeRegister(u32 ®index)
|
||||
{
|
||||
for (u8 a = 0; a < NUMPPCREG; ++a)
|
||||
if (ArmCRegs[a].PPCReg == 33)
|
||||
{
|
||||
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
|
||||
if (preLoad)
|
||||
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
|
||||
ArmCRegs[a].PPCReg = preg;
|
||||
ArmCRegs[a].LastLoad = 0;
|
||||
ArmCRegs[a].PS1 = PS1;
|
||||
ArmCRegs[a].Away = !preLoad;
|
||||
return ArmCRegs[a].Reg;
|
||||
regindex = a;
|
||||
return true;
|
||||
}
|
||||
// Alright, we couldn't get a free space, dump that least used register
|
||||
s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[Num].PPCReg * 16) + (ArmCRegs[Num].PS1 ? 8 : 0);
|
||||
emit->VSTR(ArmCRegs[Num].Reg, R9, offsetOld);
|
||||
|
||||
s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
|
||||
if (preLoad)
|
||||
emit->VLDR(ArmCRegs[Num].Reg, R9, offsetNew);
|
||||
ArmCRegs[Num].PPCReg = preg;
|
||||
ArmCRegs[Num].LastLoad = 0;
|
||||
ArmCRegs[Num].PS1 = PS1;
|
||||
ArmCRegs[Num].Away = !preLoad;
|
||||
return ArmCRegs[Num].Reg;
|
||||
return false;
|
||||
}
|
||||
|
||||
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad)
|
||||
{
|
||||
u32 lastRegIndex = GetLeastUsedRegister(true);
|
||||
|
||||
if (_regs[preg][PS1].GetType() != REG_NOTLOADED)
|
||||
{
|
||||
u8 a = _regs[preg][PS1].GetRegIndex();
|
||||
ArmCRegs[a].LastLoad = 0;
|
||||
if (_regs[preg][PS1].GetType() == REG_AWAY && preLoad)
|
||||
{
|
||||
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
|
||||
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
|
||||
_regs[preg][PS1].LoadToReg(a);
|
||||
}
|
||||
return ArmCRegs[a].Reg;
|
||||
}
|
||||
|
||||
u32 regindex;
|
||||
if (FindFreeRegister(regindex))
|
||||
{
|
||||
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
|
||||
emit->VLDR(ArmCRegs[regindex].Reg, R9, offset);
|
||||
|
||||
ArmCRegs[regindex].PPCReg = preg;
|
||||
ArmCRegs[regindex].LastLoad = 0;
|
||||
|
||||
_regs[preg][PS1].LoadToReg(regindex);
|
||||
return ArmCRegs[regindex].Reg;
|
||||
}
|
||||
|
||||
// Alright, we couldn't get a free space, dump that least used register
|
||||
s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[lastRegIndex].PPCReg * 16) + (ArmCRegs[lastRegIndex].PS1 ? 8 : 0);
|
||||
s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
|
||||
|
||||
emit->VSTR(ArmCRegs[lastRegIndex].Reg, R9, offsetOld);
|
||||
emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew);
|
||||
|
||||
_regs[ArmCRegs[lastRegIndex].PPCReg][PS1].Flush();
|
||||
|
||||
ArmCRegs[lastRegIndex].PPCReg = preg;
|
||||
ArmCRegs[lastRegIndex].LastLoad = 0;
|
||||
ArmCRegs[lastRegIndex].PS1 = PS1;
|
||||
|
||||
_regs[preg][PS1].LoadToReg(lastRegIndex);
|
||||
|
||||
return ArmCRegs[lastRegIndex].Reg;
|
||||
}
|
||||
|
||||
ARMReg ArmFPRCache::R0(u32 preg, bool preLoad)
|
||||
@ -168,14 +183,28 @@ ARMReg ArmFPRCache::R1(u32 preg, bool preLoad)
|
||||
|
||||
void ArmFPRCache::Flush()
|
||||
{
|
||||
for(u8 a = 0; a < NUMPPCREG; ++a)
|
||||
if (ArmCRegs[a].PPCReg != 33)
|
||||
for (u8 a = 0; a < 32; ++a)
|
||||
{
|
||||
if (_regs[a][0].GetType() == REG_REG)
|
||||
{
|
||||
s16 offset = PPCSTATE_OFF(ps) + (ArmCRegs[a].PPCReg * 16) + (ArmCRegs[a].PS1 ? 8 : 0);
|
||||
emit->VSTR(ArmCRegs[a].Reg, R9, offset);
|
||||
ArmCRegs[a].PPCReg = 33;
|
||||
ArmCRegs[a].LastLoad = 0;
|
||||
ArmCRegs[a].Away = true;
|
||||
s16 offset = PPCSTATE_OFF(ps) + (a * 16);
|
||||
u32 regindex = _regs[a][0].GetRegIndex();
|
||||
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
|
||||
|
||||
ArmCRegs[regindex].PPCReg = 33;
|
||||
ArmCRegs[regindex].LastLoad = 0;
|
||||
_regs[a][0].Flush();
|
||||
}
|
||||
if (_regs[a][1].GetType() == REG_REG)
|
||||
{
|
||||
s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8;
|
||||
u32 regindex = _regs[a][1].GetRegIndex();
|
||||
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
|
||||
|
||||
ArmCRegs[regindex].PPCReg = 33;
|
||||
ArmCRegs[regindex].LastLoad = 0;
|
||||
_regs[a][1].Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@ using namespace ArmGen;
|
||||
class ArmFPRCache
|
||||
{
|
||||
private:
|
||||
OpArg _regs[32][2]; // One for each FPR reg
|
||||
JRCPPC ArmCRegs[ARMFPUREGS];
|
||||
JRCReg ArmRegs[ARMFPUREGS];
|
||||
|
||||
@ -40,6 +41,8 @@ private:
|
||||
|
||||
ARMReg GetPPCReg(u32 preg, bool PS1, bool preLoad);
|
||||
|
||||
u32 GetLeastUsedRegister(bool increment);
|
||||
bool FindFreeRegister(u32 ®index);
|
||||
protected:
|
||||
ARMXEmitter *emit;
|
||||
|
||||
|
@ -126,10 +126,8 @@ bool ArmRegCache::FindFreeRegister(u32 ®index)
|
||||
ARMReg ArmRegCache::R(u32 preg)
|
||||
{
|
||||
if (regs[preg].GetType() == REG_IMM)
|
||||
{
|
||||
return BindToRegister(preg);
|
||||
//asm ("bkpt #1;");
|
||||
}
|
||||
|
||||
u32 lastRegIndex = GetLeastUsedRegister(true);
|
||||
|
||||
// Check if already Loaded
|
||||
|
@ -38,55 +38,57 @@ using namespace ArmGen;
|
||||
enum RegType
|
||||
{
|
||||
REG_NOTLOADED = 0,
|
||||
REG_REG,
|
||||
REG_IMM,
|
||||
REG_REG, // Reg type is register
|
||||
REG_IMM, // Reg is really a IMM
|
||||
REG_AWAY, // Bound to a register, but not preloaded
|
||||
};
|
||||
|
||||
class OpArg
|
||||
{
|
||||
private:
|
||||
class Reg{
|
||||
public:
|
||||
RegType m_type;
|
||||
u8 m_reg; // index to register
|
||||
u32 m_value;
|
||||
Reg()
|
||||
{
|
||||
m_type = REG_NOTLOADED;
|
||||
m_reg = 33;
|
||||
m_value = 0;
|
||||
}
|
||||
} Reg;
|
||||
RegType m_type; // store type
|
||||
u8 m_reg; // index to register
|
||||
u32 m_value; // IMM value
|
||||
|
||||
public:
|
||||
OpArg(){}
|
||||
OpArg()
|
||||
{
|
||||
m_type = REG_NOTLOADED;
|
||||
m_reg = 33;
|
||||
m_value = 0;
|
||||
}
|
||||
|
||||
RegType GetType()
|
||||
{
|
||||
return Reg.m_type;
|
||||
return m_type;
|
||||
}
|
||||
|
||||
u8 GetRegIndex()
|
||||
{
|
||||
return Reg.m_reg;
|
||||
return m_reg;
|
||||
}
|
||||
u32 GetImm()
|
||||
{
|
||||
return Reg.m_value;
|
||||
return m_value;
|
||||
}
|
||||
void LoadToAway(u8 reg)
|
||||
{
|
||||
m_type = REG_AWAY;
|
||||
m_reg = reg;
|
||||
}
|
||||
void LoadToReg(u8 reg)
|
||||
{
|
||||
Reg.m_type = REG_REG;
|
||||
Reg.m_reg = reg;
|
||||
m_type = REG_REG;
|
||||
m_reg = reg;
|
||||
}
|
||||
void LoadToImm(u32 imm)
|
||||
{
|
||||
Reg.m_type = REG_IMM;
|
||||
Reg.m_value = imm;
|
||||
m_type = REG_IMM;
|
||||
m_value = imm;
|
||||
}
|
||||
void Flush()
|
||||
{
|
||||
Reg.m_type = REG_NOTLOADED;
|
||||
m_type = REG_NOTLOADED;
|
||||
}
|
||||
};
|
||||
|
||||
@ -96,7 +98,6 @@ struct JRCPPC
|
||||
bool PS1;
|
||||
ARMReg Reg; // Tied to which ARM Register
|
||||
u32 LastLoad;
|
||||
bool Away; // Only used in FPR cache
|
||||
};
|
||||
struct JRCReg
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user