Implement XER optimization on ARMv7 JIT core

Not completely optimized; there's room for improvement here.
This commit is contained in:
Ryan Houdek 2014-09-15 02:21:03 -05:00 committed by Fiora
parent 5fce109ce1
commit 76697922b4
2 changed files with 68 additions and 51 deletions

View File

@ -46,51 +46,46 @@ void JitArm::ComputeRC(s32 value, int cr)
void JitArm::ComputeCarry()
{
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
SetCC(CC_CS);
ORR(tmp, tmp, mask);
ORR(tmp, tmp, 1);
SetCC(CC_CC);
BIC(tmp, tmp, mask);
BIC(tmp, tmp, 1);
SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
}
void JitArm::ComputeCarry(bool Carry)
{
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
if (Carry)
ORR(tmp, tmp, mask);
ORR(tmp, tmp, 1);
else
BIC(tmp, tmp, mask);
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
BIC(tmp, tmp, 1);
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
}
void JitArm::GetCarryAndClear(ARMReg reg)
{
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
AND(reg, tmp, mask);
BIC(tmp, tmp, mask);
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
AND(reg, tmp, 1);
BIC(tmp, tmp, 1);
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
}
void JitArm::FinalizeCarry(ARMReg reg)
{
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
SetCC(CC_CS);
ORR(reg, reg, mask);
ORR(reg, reg, 1);
SetCC();
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
ORR(tmp, tmp, reg);
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
}
@ -107,25 +102,23 @@ void JitArm::subfic(UGeckoInstruction inst)
if (imm == 0)
{
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
BIC(tmp, tmp, mask);
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, 1);
// Flags act exactly like subtracting from 0
RSBS(gpr.R(d), gpr.R(d), 0);
SetCC(CC_CS);
ORR(tmp, tmp, mask);
ORR(tmp, tmp, 1);
SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
}
else if (imm == -1)
{
// CA is always set in this case
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
ORR(tmp, tmp, mask);
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
ORR(tmp, tmp, 1);
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
MVN(gpr.R(d), gpr.R(d));
@ -134,35 +127,33 @@ void JitArm::subfic(UGeckoInstruction inst)
{
ARMReg tmp = gpr.GetReg();
ARMReg rA = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
MOVI2R(rA, imm + 1);
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
BIC(tmp, tmp, mask);
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, 1);
// Flags act exactly like subtracting from 0
MVN(gpr.R(d), gpr.R(d));
ADDS(gpr.R(d), gpr.R(d), rA);
// Output carry is inverted
SetCC(CC_CS);
ORR(tmp, tmp, mask);
ORR(tmp, tmp, 1);
SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp, rA);
}
}
else
{
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
MOVI2R(gpr.R(d), imm);
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
BIC(tmp, tmp, mask);
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, 1);
// Flags act exactly like subtracting from 0
SUBS(gpr.R(d), gpr.R(d), gpr.R(a));
// Output carry is inverted
SetCC(CC_CS);
ORR(tmp, tmp, mask);
ORR(tmp, tmp, 1);
SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
}
// This instruction has no RC flag
@ -871,7 +862,6 @@ void JitArm::srawix(UGeckoInstruction inst)
ARMReg RA = gpr.R(a);
ARMReg RS = gpr.R(s);
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
MOV(tmp, RS);
ASR(RA, RS, amount);
@ -880,12 +870,12 @@ void JitArm::srawix(UGeckoInstruction inst)
LSL(tmp, tmp, 32 - amount);
TST(tmp, RA);
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
BIC(tmp, tmp, mask);
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, 1);
SetCC(CC_NEQ);
ORR(tmp, tmp, mask);
ORR(tmp, tmp, 1);
SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
}
else
@ -895,10 +885,9 @@ void JitArm::srawix(UGeckoInstruction inst)
MOV(RA, RS);
ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
BIC(tmp, tmp, mask);
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, 1);
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp);
}

View File

@ -76,11 +76,10 @@ void JitArm::mtspr(UGeckoInstruction inst)
case SPR_SRR0:
case SPR_SRR1:
// These are safe to do the easy way, see the bottom of this function.
break;
break;
case SPR_LR:
case SPR_CTR:
case SPR_XER:
case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
@ -90,8 +89,23 @@ void JitArm::mtspr(UGeckoInstruction inst)
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
// These are safe to do the easy way, see the bottom of this function.
break;
break;
case SPR_XER:
{
ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg();
ARMReg mask = gpr.GetReg();
MOVI2R(mask, 0xFF7F);
AND(tmp, RD, mask);
STRH(tmp, R9, PPCSTATE_OFF(xer_stringctrl));
LSR(tmp, RD, XER_CA_SHIFT);
AND(tmp, tmp, 1);
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LSR(tmp, RD, XER_OV_SHIFT);
STRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(tmp, mask);
}
break;
default:
FALLBACK_IF(true);
}
@ -116,6 +130,20 @@ void JitArm::mfspr(UGeckoInstruction inst)
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
case SPR_XER:
{
ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg();
LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LSL(tmp, tmp, XER_CA_SHIFT);
ORR(RD, RD, tmp);
LDRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
LSL(tmp, tmp, XER_OV_SHIFT);
ORR(RD, RD, tmp);
gpr.Unlock(tmp);
}
break;
case SPR_WPAR:
case SPR_DEC:
case SPR_TL: