[ARM] Clean up FPR cache. Rapid fire floating point instruction implementations. Adds 13 new instructions.

This commit is contained in:
Ryan Houdek 2013-09-07 00:19:32 +00:00
parent cd7b97f767
commit ef05a14757
8 changed files with 412 additions and 118 deletions

View File

@ -186,6 +186,8 @@ public:
// Floating point // Floating point
void fabsx(UGeckoInstruction _inst); void fabsx(UGeckoInstruction _inst);
void fnabsx(UGeckoInstruction _inst);
void fnegx(UGeckoInstruction _inst);
void faddsx(UGeckoInstruction _inst); void faddsx(UGeckoInstruction _inst);
void faddx(UGeckoInstruction _inst); void faddx(UGeckoInstruction _inst);
void fsubsx(UGeckoInstruction _inst); void fsubsx(UGeckoInstruction _inst);
@ -202,9 +204,20 @@ public:
// Paired Singles // Paired Singles
void ps_add(UGeckoInstruction _inst); void ps_add(UGeckoInstruction _inst);
void ps_sum0(UGeckoInstruction _inst); void ps_sum0(UGeckoInstruction _inst);
void ps_sum1(UGeckoInstruction _inst);
void ps_madd(UGeckoInstruction _inst); void ps_madd(UGeckoInstruction _inst);
void ps_sub(UGeckoInstruction _inst); void ps_sub(UGeckoInstruction _inst);
void ps_mul(UGeckoInstruction _inst); void ps_mul(UGeckoInstruction _inst);
void ps_muls0(UGeckoInstruction _inst);
void ps_muls1(UGeckoInstruction _inst);
void ps_merge00(UGeckoInstruction _inst);
void ps_merge01(UGeckoInstruction _inst);
void ps_merge10(UGeckoInstruction _inst);
void ps_merge11(UGeckoInstruction _inst);
void ps_mr(UGeckoInstruction _inst);
void ps_neg(UGeckoInstruction _inst);
void ps_abs(UGeckoInstruction _inst);
void ps_nabs(UGeckoInstruction _inst);
}; };
#endif // _JIT64_H #endif // _JIT64_H

View File

@ -43,14 +43,46 @@ void JitArm::fabsx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff) JITDISABLE(bJITFloatingPointOff)
ARMReg vD = fpr.R0(inst.FD);
ARMReg vB = fpr.R0(inst.FB); ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VABS(vD, vB); VABS(vD, vB);
if (inst.Rc) Helper_UpdateCR1(vD); if (inst.Rc) Helper_UpdateCR1(vD);
} }
void JitArm::fnabsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
ARMReg V0 = fpr.GetReg();
// XXX: Could be done quicker
VABS(vD, vB);
VMOV(V0, vD);
VSUB(vD, vD, V0);
VSUB(vD, vD, V0);
fpr.Unlock(V0);
if (inst.Rc) Helper_UpdateCR1(vD);
}
void JitArm::fnegx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff)
ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VNEG(vD, vB);
if (inst.Rc) Helper_UpdateCR1(vD);
}
void JitArm::faddsx(UGeckoInstruction inst) void JitArm::faddsx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
@ -58,8 +90,8 @@ void JitArm::faddsx(UGeckoInstruction inst)
ARMReg vA = fpr.R0(inst.FA); ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB); ARMReg vB = fpr.R0(inst.FB);
ARMReg vD0 = fpr.R0(inst.FD); ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD); ARMReg vD1 = fpr.R1(inst.FD, false);
VADD(vD0, vA, vB); VADD(vD0, vA, vB);
VMOV(vD1, vD0); VMOV(vD1, vD0);
@ -71,9 +103,9 @@ void JitArm::faddx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff) JITDISABLE(bJITFloatingPointOff)
ARMReg vD = fpr.R0(inst.FD);
ARMReg vA = fpr.R0(inst.FA); ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB); ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VADD(vD, vA, vB); VADD(vD, vA, vB);
if (inst.Rc) Helper_UpdateCR1(vD); if (inst.Rc) Helper_UpdateCR1(vD);
@ -86,8 +118,8 @@ void JitArm::fsubsx(UGeckoInstruction inst)
ARMReg vA = fpr.R0(inst.FA); ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB); ARMReg vB = fpr.R0(inst.FB);
ARMReg vD0 = fpr.R0(inst.FD); ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD); ARMReg vD1 = fpr.R1(inst.FD, false);
VSUB(vD0, vA, vB); VSUB(vD0, vA, vB);
VMOV(vD1, vD0); VMOV(vD1, vD0);
@ -99,9 +131,9 @@ void JitArm::fsubx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff) JITDISABLE(bJITFloatingPointOff)
ARMReg vD = fpr.R0(inst.FD);
ARMReg vA = fpr.R0(inst.FA); ARMReg vA = fpr.R0(inst.FA);
ARMReg vB = fpr.R0(inst.FB); ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VSUB(vD, vA, vB); VSUB(vD, vA, vB);
if (inst.Rc) Helper_UpdateCR1(vD); if (inst.Rc) Helper_UpdateCR1(vD);
@ -114,8 +146,8 @@ void JitArm::fmulsx(UGeckoInstruction inst)
ARMReg vA = fpr.R0(inst.FA); ARMReg vA = fpr.R0(inst.FA);
ARMReg vC = fpr.R0(inst.FC); ARMReg vC = fpr.R0(inst.FC);
ARMReg vD0 = fpr.R0(inst.FD); ARMReg vD0 = fpr.R0(inst.FD, false);
ARMReg vD1 = fpr.R1(inst.FD); ARMReg vD1 = fpr.R1(inst.FD, false);
VMUL(vD0, vA, vC); VMUL(vD0, vA, vC);
VMOV(vD1, vD0); VMOV(vD1, vD0);
@ -127,9 +159,9 @@ void JitArm::fmulx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff) JITDISABLE(bJITFloatingPointOff)
ARMReg vD0 = fpr.R0(inst.FD);
ARMReg vA = fpr.R0(inst.FA); ARMReg vA = fpr.R0(inst.FA);
ARMReg vC = fpr.R0(inst.FC); ARMReg vC = fpr.R0(inst.FC);
ARMReg vD0 = fpr.R0(inst.FD, false);
VMUL(vD0, vA, vC); VMUL(vD0, vA, vC);
if (inst.Rc) Helper_UpdateCR1(vD0); if (inst.Rc) Helper_UpdateCR1(vD0);
@ -139,8 +171,8 @@ void JitArm::fmrx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff) JITDISABLE(bJITFloatingPointOff)
ARMReg vD = fpr.R0(inst.FD);
ARMReg vB = fpr.R0(inst.FB); ARMReg vB = fpr.R0(inst.FB);
ARMReg vD = fpr.R0(inst.FD, false);
VMOV(vD, vB); VMOV(vD, vB);

View File

@ -40,12 +40,11 @@ void JitArm::ps_add(UGeckoInstruction inst)
ARMReg vA1 = fpr.R1(a); ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b); ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b); ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d); ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d); ARMReg vD1 = fpr.R1(d, false);
VADD(vD0, vA0, vB0); VADD(vD0, vA0, vB0);
VADD(vD1, vA1, vB1); VADD(vD1, vA1, vB1);
fpr.Flush();
} }
// Wrong, THP videos like SMS and Ikaruga show artifacts // Wrong, THP videos like SMS and Ikaruga show artifacts
@ -67,8 +66,8 @@ void JitArm::ps_madd(UGeckoInstruction inst)
ARMReg vB1 = fpr.R1(b); ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c); ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c); ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d); ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d); ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg(); ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg(); ARMReg V1 = fpr.GetReg();
@ -99,14 +98,35 @@ void JitArm::ps_sum0(UGeckoInstruction inst)
ARMReg vA0 = fpr.R0(a); ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b); ARMReg vB1 = fpr.R1(b);
ARMReg vC1 = fpr.R1(c); ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d); ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d); ARMReg vD1 = fpr.R1(d, false);
VADD(vD0, vA0, vB1); VADD(vD0, vA0, vB1);
VMOV(vD1, vC1); VMOV(vD1, vC1);
fpr.Flush();
} }
void JitArm::ps_sum1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
if (inst.Rc) {
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vC0);
VADD(vD1, vA0, vB1);
}
void JitArm::ps_sub(UGeckoInstruction inst) void JitArm::ps_sub(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
@ -120,12 +140,11 @@ void JitArm::ps_sub(UGeckoInstruction inst)
ARMReg vA1 = fpr.R1(a); ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b); ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b); ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d); ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d); ARMReg vD1 = fpr.R1(d, false);
VSUB(vD0, vA0, vB0); VSUB(vD0, vA0, vB0);
VSUB(vD1, vA1, vB1); VSUB(vD1, vA1, vB1);
fpr.Flush();
} }
void JitArm::ps_mul(UGeckoInstruction inst) void JitArm::ps_mul(UGeckoInstruction inst)
@ -141,11 +160,210 @@ void JitArm::ps_mul(UGeckoInstruction inst)
ARMReg vA1 = fpr.R1(a); ARMReg vA1 = fpr.R1(a);
ARMReg vC0 = fpr.R0(c); ARMReg vC0 = fpr.R0(c);
ARMReg vC1 = fpr.R1(c); ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d); ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d); ARMReg vD1 = fpr.R1(d, false);
VMUL(vD0, vA0, vC0); VMUL(vD0, vA0, vC0);
VMUL(vD1, vA1, vC1); VMUL(vD1, vA1, vC1);
fpr.Flush();
} }
void JitArm::ps_muls0(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, c = inst.FC, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC0 = fpr.R0(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC0);
VMUL(V1, vA1, vC0);
VMOV(vD0, V0);
VMOV(vD1, V1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_muls1(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, c = inst.FC, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vA1 = fpr.R1(a);
ARMReg vC1 = fpr.R1(c);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
ARMReg V1 = fpr.GetReg();
VMUL(V0, vA0, vC1);
VMUL(V1, vA1, vC1);
VMOV(vD0, V0);
VMOV(vD1, V1);
fpr.Unlock(V0);
fpr.Unlock(V1);
}
void JitArm::ps_merge00(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA0);
VMOV(vD1, vB0);
}
void JitArm::ps_merge01(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA0 = fpr.R0(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA0);
VMOV(vD1, vB1);
}
void JitArm::ps_merge10(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA1 = fpr.R1(a);
ARMReg vB0 = fpr.R0(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA1);
VMOV(vD1, vB0);
}
void JitArm::ps_merge11(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vA1 = fpr.R1(a);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vA1);
VMOV(vD1, vB1);
}
void JitArm::ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VMOV(vD0, vB0);
VMOV(vD1, vB1);
}
void JitArm::ps_neg(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VNEG(vD0, vB0);
VNEG(vD1, vB1);
}
void JitArm::ps_abs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
VABS(vD0, vB0);
VABS(vD1, vB1);
}
void JitArm::ps_nabs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITPairedOff)
u32 b = inst.FB, d = inst.FD;
if (inst.Rc){
Default(inst); return;
}
ARMReg vB0 = fpr.R0(b);
ARMReg vB1 = fpr.R1(b);
ARMReg vD0 = fpr.R0(d, false);
ARMReg vD1 = fpr.R1(d, false);
ARMReg V0 = fpr.GetReg();
// XXX: Could be done quicker
VABS(vD0, vB0);
VMOV(V0, vD0);
VSUB(vD0, vD0, V0);
VSUB(vD0, vD0, V0);
VABS(vD1, vB1);
VMOV(V0, vD1);
VSUB(vD1, vD1, V0);
VSUB(vD1, vD1, V0);
fpr.Unlock(V0);
}

View File

@ -127,16 +127,16 @@ static GekkoOPTemplate table4[] =
{ //SUBOP10 { //SUBOP10
{0, &JitArm::Default}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, {0, &JitArm::Default}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}},
{32, &JitArm::Default}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, {32, &JitArm::Default}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}},
{40, &JitArm::Default}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, {40, &JitArm::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}},
{136, &JitArm::Default}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, {136, &JitArm::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, &JitArm::Default}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, {264, &JitArm::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, &JitArm::Default}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, {64, &JitArm::Default}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, &JitArm::Default}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, {72, &JitArm::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, &JitArm::Default}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, {96, &JitArm::Default}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, &JitArm::Default}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, {528, &JitArm::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, &JitArm::Default}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, {560, &JitArm::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
{592, &JitArm::Default}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, {592, &JitArm::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
{624, &JitArm::Default}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, {624, &JitArm::ps_merge11}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}},
{1014, &JitArm::Default}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, {1014, &JitArm::Default}, //"dcbz_l", OPTYPE_SYSTEM, 0}},
}; };
@ -144,9 +144,9 @@ static GekkoOPTemplate table4[] =
static GekkoOPTemplate table4_2[] = static GekkoOPTemplate table4_2[] =
{ {
{10, &JitArm::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}}, {10, &JitArm::ps_sum0}, //"ps_sum0", OPTYPE_PS, 0}},
{11, &JitArm::Default}, //"ps_sum1", OPTYPE_PS, 0}}, {11, &JitArm::ps_sum1}, //"ps_sum1", OPTYPE_PS, 0}},
{12, &JitArm::Default}, //"ps_muls0", OPTYPE_PS, 0}}, {12, &JitArm::ps_muls0}, //"ps_muls0", OPTYPE_PS, 0}},
{13, &JitArm::Default}, //"ps_muls1", OPTYPE_PS, 0}}, {13, &JitArm::ps_muls1}, //"ps_muls1", OPTYPE_PS, 0}},
{14, &JitArm::Default}, //"ps_madds0", OPTYPE_PS, 0}}, {14, &JitArm::Default}, //"ps_madds0", OPTYPE_PS, 0}},
{15, &JitArm::Default}, //"ps_madds1", OPTYPE_PS, 0}}, {15, &JitArm::Default}, //"ps_madds1", OPTYPE_PS, 0}},
{18, &JitArm::Default}, //"ps_div", OPTYPE_PS, 0, 16}}, {18, &JitArm::Default}, //"ps_div", OPTYPE_PS, 0, 16}},
@ -352,8 +352,8 @@ static GekkoOPTemplate table63[] =
{14, &JitArm::Default}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, {14, &JitArm::Default}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
{15, &JitArm::Default}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, {15, &JitArm::Default}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, {72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, &JitArm::Default}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, {136, &JitArm::fnabsx}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, &JitArm::Default}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, {40, &JitArm::fnegx}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{12, &JitArm::Default}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, {12, &JitArm::Default}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}},
{64, &JitArm::Default}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, {64, &JitArm::Default}, //"mcrfs", OPTYPE_SYSTEMFP, 0}},

View File

@ -35,7 +35,6 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)
ArmCRegs[a].Reg = PPCRegs[a]; ArmCRegs[a].Reg = PPCRegs[a];
ArmCRegs[a].LastLoad = 0; ArmCRegs[a].LastLoad = 0;
ArmCRegs[a].PS1 = false; ArmCRegs[a].PS1 = false;
ArmCRegs[a].Away = true;
} }
for(u8 a = 0; a < NUMARMREG; ++a) for(u8 a = 0; a < NUMARMREG; ++a)
{ {
@ -43,14 +42,11 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)
ArmRegs[a].free = true; ArmRegs[a].free = true;
} }
} }
void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats) void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{ {
for(u8 a = 0; a < NUMPPCREG; ++a)
{
ArmCRegs[a].PPCReg = 33;
ArmCRegs[a].LastLoad = 0;
}
} }
ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count) ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count)
{ {
// This will return us the allocation order of the registers we can use on // This will return us the allocation order of the registers we can use on
@ -101,59 +97,78 @@ void ArmFPRCache::Unlock(ARMReg V0)
} }
} }
} }
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad) u32 ArmFPRCache::GetLeastUsedRegister(bool increment)
{ {
u32 HighestUsed = 0; u32 HighestUsed = 0;
u8 Num = 0; u8 lastRegIndex = 0;
for(u8 a = 0; a < NUMPPCREG; ++a){ for(u8 a = 0; a < NUMPPCREG; ++a){
++ArmCRegs[a].LastLoad; if (increment)
++ArmCRegs[a].LastLoad;
if (ArmCRegs[a].LastLoad > HighestUsed) if (ArmCRegs[a].LastLoad > HighestUsed)
{ {
HighestUsed = ArmCRegs[a].LastLoad; HighestUsed = ArmCRegs[a].LastLoad;
Num = a; lastRegIndex = a;
} }
} }
// Check if already Loaded return lastRegIndex;
for(u8 a = 0; a < NUMPPCREG; ++a) }
if (ArmCRegs[a].PPCReg == preg && ArmCRegs[a].PS1 == PS1) bool ArmFPRCache::FindFreeRegister(u32 &regindex)
{ {
ArmCRegs[a].LastLoad = 0;
// Check if the value is actually in the reg
if (ArmCRegs[a].Away && preLoad)
{
// Load it now since we want it
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
ArmCRegs[a].Away = false;
}
return ArmCRegs[a].Reg;
}
// Check if we have a free register
for (u8 a = 0; a < NUMPPCREG; ++a) for (u8 a = 0; a < NUMPPCREG; ++a)
if (ArmCRegs[a].PPCReg == 33) if (ArmCRegs[a].PPCReg == 33)
{ {
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0); regindex = a;
if (preLoad) return true;
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
ArmCRegs[a].PPCReg = preg;
ArmCRegs[a].LastLoad = 0;
ArmCRegs[a].PS1 = PS1;
ArmCRegs[a].Away = !preLoad;
return ArmCRegs[a].Reg;
} }
// Alright, we couldn't get a free space, dump that least used register return false;
s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[Num].PPCReg * 16) + (ArmCRegs[Num].PS1 ? 8 : 0); }
emit->VSTR(ArmCRegs[Num].Reg, R9, offsetOld);
s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
if (preLoad)
emit->VLDR(ArmCRegs[Num].Reg, R9, offsetNew);
ArmCRegs[Num].PPCReg = preg;
ArmCRegs[Num].LastLoad = 0;
ArmCRegs[Num].PS1 = PS1;
ArmCRegs[Num].Away = !preLoad;
return ArmCRegs[Num].Reg;
ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad)
{
u32 lastRegIndex = GetLeastUsedRegister(true);
if (_regs[preg][PS1].GetType() != REG_NOTLOADED)
{
u8 a = _regs[preg][PS1].GetRegIndex();
ArmCRegs[a].LastLoad = 0;
if (_regs[preg][PS1].GetType() == REG_AWAY && preLoad)
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VLDR(ArmCRegs[a].Reg, R9, offset);
_regs[preg][PS1].LoadToReg(a);
}
return ArmCRegs[a].Reg;
}
u32 regindex;
if (FindFreeRegister(regindex))
{
s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VLDR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = preg;
ArmCRegs[regindex].LastLoad = 0;
_regs[preg][PS1].LoadToReg(regindex);
return ArmCRegs[regindex].Reg;
}
// Alright, we couldn't get a free space, dump that least used register
s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[lastRegIndex].PPCReg * 16) + (ArmCRegs[lastRegIndex].PS1 ? 8 : 0);
s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0);
emit->VSTR(ArmCRegs[lastRegIndex].Reg, R9, offsetOld);
emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew);
_regs[ArmCRegs[lastRegIndex].PPCReg][PS1].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
ArmCRegs[lastRegIndex].PS1 = PS1;
_regs[preg][PS1].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
} }
ARMReg ArmFPRCache::R0(u32 preg, bool preLoad) ARMReg ArmFPRCache::R0(u32 preg, bool preLoad)
@ -168,14 +183,28 @@ ARMReg ArmFPRCache::R1(u32 preg, bool preLoad)
void ArmFPRCache::Flush() void ArmFPRCache::Flush()
{ {
for(u8 a = 0; a < NUMPPCREG; ++a) for (u8 a = 0; a < 32; ++a)
if (ArmCRegs[a].PPCReg != 33) {
if (_regs[a][0].GetType() == REG_REG)
{ {
s16 offset = PPCSTATE_OFF(ps) + (ArmCRegs[a].PPCReg * 16) + (ArmCRegs[a].PS1 ? 8 : 0); s16 offset = PPCSTATE_OFF(ps) + (a * 16);
emit->VSTR(ArmCRegs[a].Reg, R9, offset); u32 regindex = _regs[a][0].GetRegIndex();
ArmCRegs[a].PPCReg = 33; emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[a].LastLoad = 0;
ArmCRegs[a].Away = true; ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
} }
if (_regs[a][1].GetType() == REG_REG)
{
s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8;
u32 regindex = _regs[a][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
} }

View File

@ -29,6 +29,7 @@ using namespace ArmGen;
class ArmFPRCache class ArmFPRCache
{ {
private: private:
OpArg _regs[32][2]; // One for each FPR reg
JRCPPC ArmCRegs[ARMFPUREGS]; JRCPPC ArmCRegs[ARMFPUREGS];
JRCReg ArmRegs[ARMFPUREGS]; JRCReg ArmRegs[ARMFPUREGS];
@ -40,6 +41,8 @@ private:
ARMReg GetPPCReg(u32 preg, bool PS1, bool preLoad); ARMReg GetPPCReg(u32 preg, bool PS1, bool preLoad);
u32 GetLeastUsedRegister(bool increment);
bool FindFreeRegister(u32 &regindex);
protected: protected:
ARMXEmitter *emit; ARMXEmitter *emit;

View File

@ -126,10 +126,8 @@ bool ArmRegCache::FindFreeRegister(u32 &regindex)
ARMReg ArmRegCache::R(u32 preg) ARMReg ArmRegCache::R(u32 preg)
{ {
if (regs[preg].GetType() == REG_IMM) if (regs[preg].GetType() == REG_IMM)
{
return BindToRegister(preg); return BindToRegister(preg);
//asm ("bkpt #1;");
}
u32 lastRegIndex = GetLeastUsedRegister(true); u32 lastRegIndex = GetLeastUsedRegister(true);
// Check if already Loaded // Check if already Loaded

View File

@ -38,55 +38,57 @@ using namespace ArmGen;
enum RegType enum RegType
{ {
REG_NOTLOADED = 0, REG_NOTLOADED = 0,
REG_REG, REG_REG, // Reg type is register
REG_IMM, REG_IMM, // Reg is really a IMM
REG_AWAY, // Bound to a register, but not preloaded
}; };
class OpArg class OpArg
{ {
private: private:
class Reg{ RegType m_type; // store type
public: u8 m_reg; // index to register
RegType m_type; u32 m_value; // IMM value
u8 m_reg; // index to register
u32 m_value;
Reg()
{
m_type = REG_NOTLOADED;
m_reg = 33;
m_value = 0;
}
} Reg;
public: public:
OpArg(){} OpArg()
{
m_type = REG_NOTLOADED;
m_reg = 33;
m_value = 0;
}
RegType GetType() RegType GetType()
{ {
return Reg.m_type; return m_type;
} }
u8 GetRegIndex() u8 GetRegIndex()
{ {
return Reg.m_reg; return m_reg;
} }
u32 GetImm() u32 GetImm()
{ {
return Reg.m_value; return m_value;
}
void LoadToAway(u8 reg)
{
m_type = REG_AWAY;
m_reg = reg;
} }
void LoadToReg(u8 reg) void LoadToReg(u8 reg)
{ {
Reg.m_type = REG_REG; m_type = REG_REG;
Reg.m_reg = reg; m_reg = reg;
} }
void LoadToImm(u32 imm) void LoadToImm(u32 imm)
{ {
Reg.m_type = REG_IMM; m_type = REG_IMM;
Reg.m_value = imm; m_value = imm;
} }
void Flush() void Flush()
{ {
Reg.m_type = REG_NOTLOADED; m_type = REG_NOTLOADED;
} }
}; };
@ -96,7 +98,6 @@ struct JRCPPC
bool PS1; bool PS1;
ARMReg Reg; // Tied to which ARM Register ARMReg Reg; // Tied to which ARM Register
u32 LastLoad; u32 LastLoad;
bool Away; // Only used in FPR cache
}; };
struct JRCReg struct JRCReg
{ {