mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-15 05:47:56 -07:00
JIT: move mfcr code to JitAsmCommon
It's like 80+ instructions, so inlining it on every use of mfcr is probably not the best for the icache.
This commit is contained in:
parent
821db9798c
commit
b058bbd223
@ -226,6 +226,8 @@ void Jit64AsmRoutineManager::GenerateCommon()
|
||||
GenFrsqrte();
|
||||
fres = AlignCode4();
|
||||
GenFres();
|
||||
mfcr = AlignCode4();
|
||||
GenMfcr();
|
||||
|
||||
GenQuantizedLoads();
|
||||
GenQuantizedStores();
|
||||
|
@ -406,39 +406,12 @@ void Jit64::mfcr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
// USES_CR
|
||||
int d = inst.RD;
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
CALL((void *)asm_routines.mfcr);
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, false, true);
|
||||
XOR(32, gpr.R(d), gpr.R(d));
|
||||
|
||||
X64Reg cr_val = RSCRATCH2;
|
||||
// we only need to zero the high bits of RSCRATCH once
|
||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9};
|
||||
if (i != 0)
|
||||
SHL(32, gpr.R(d), Imm8(4));
|
||||
|
||||
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
|
||||
|
||||
// EQ: Bits 31-0 == 0; set flag bit 1
|
||||
TEST(32, R(cr_val), R(cr_val));
|
||||
SETcc(CC_Z, R(RSCRATCH));
|
||||
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0));
|
||||
|
||||
// GT: Value > 0; set flag bit 2
|
||||
TEST(64, R(cr_val), R(cr_val));
|
||||
SETcc(CC_G, R(RSCRATCH));
|
||||
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0));
|
||||
|
||||
// SO: Bit 61 set; set flag bit 0
|
||||
// LT: Bit 62 set; set flag bit 3
|
||||
SHR(64, R(cr_val), Imm8(61));
|
||||
MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable));
|
||||
OR(32, gpr.R(d), R(RSCRATCH));
|
||||
}
|
||||
|
||||
MOV(32, gpr.R(d), R(RSCRATCH));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
@ -151,6 +151,44 @@ void CommonAsmRoutines::GenFres()
|
||||
RET();
|
||||
}
|
||||
|
||||
void CommonAsmRoutines::GenMfcr()
|
||||
{
|
||||
// Input: none
|
||||
// Output: RSCRATCH
|
||||
// This function clobbers all three RSCRATCH.
|
||||
X64Reg dst = RSCRATCH;
|
||||
X64Reg tmp = RSCRATCH2;
|
||||
X64Reg cr_val = RSCRATCH_EXTRA;
|
||||
XOR(32, R(dst), R(dst));
|
||||
// we only need to zero the high bits of tmp once
|
||||
XOR(32, R(tmp), R(tmp));
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
static const u32 m_flagTable[8] = { 0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9 };
|
||||
if (i != 0)
|
||||
SHL(32, R(dst), Imm8(4));
|
||||
|
||||
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
|
||||
|
||||
// EQ: Bits 31-0 == 0; set flag bit 1
|
||||
TEST(32, R(cr_val), R(cr_val));
|
||||
// FIXME: is there a better way to do this without the partial register merging?
|
||||
SETcc(CC_Z, R(tmp));
|
||||
LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0));
|
||||
|
||||
// GT: Value > 0; set flag bit 2
|
||||
TEST(64, R(cr_val), R(cr_val));
|
||||
SETcc(CC_G, R(tmp));
|
||||
LEA(32, dst, MComplex(dst, tmp, SCALE_4, 0));
|
||||
|
||||
// SO: Bit 61 set; set flag bit 0
|
||||
// LT: Bit 62 set; set flag bit 3
|
||||
SHR(64, R(cr_val), Imm8(61));
|
||||
OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable));
|
||||
}
|
||||
RET();
|
||||
}
|
||||
|
||||
// Safe + Fast Quantizers, originally from JITIL by magumagu
|
||||
|
||||
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
|
@ -25,6 +25,7 @@ public:
|
||||
|
||||
const u8 *frsqrte;
|
||||
const u8 *fres;
|
||||
const u8 *mfcr;
|
||||
|
||||
// In: array index: GQR to use.
|
||||
// In: ECX: Address to read from.
|
||||
@ -58,4 +59,5 @@ public:
|
||||
void GenFifoWrite(int size);
|
||||
void GenFrsqrte();
|
||||
void GenFres();
|
||||
void GenMfcr();
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user