diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 3005970453..af2fe33bb8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -182,11 +182,33 @@ void Jit64::mfspr(UGeckoInstruction inst) if (offset > 0) ADD(64, R(RAX), Imm32(offset)); MOV(64, M(&TL), R(RAX)); - gpr.Lock(d); - gpr.BindToRegister(d, false); - if (iIndex == SPR_TU) + // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them + // if we can. + u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F); + // Be careful; the actual opcode is for mftb (371), not mfspr (339) + if (js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL)) + { + int n = js.next_inst.RD; + js.downcountAmount++; + js.skipnext = true; + gpr.Lock(d, n); + if (iIndex == SPR_TL) + MOV(32, gpr.R(d), R(EAX)); + if (nextIndex == SPR_TL) + MOV(32, gpr.R(n), R(EAX)); SHR(64, R(RAX), Imm8(32)); - MOV(32, gpr.R(d), R(EAX)); + if (iIndex == SPR_TU) + MOV(32, gpr.R(d), R(EAX)); + if (nextIndex == SPR_TU) + MOV(32, gpr.R(n), R(EAX)); + } + else + { + gpr.Lock(d); + if (iIndex == SPR_TU) + SHR(64, R(RAX), Imm8(32)); + MOV(32, gpr.R(d), R(EAX)); + } break; } case SPR_WPAR: