From ebe444874951fd23508b3541429bd09ee991b9ab Mon Sep 17 00:00:00 2001 From: comex Date: Wed, 25 Sep 2013 00:29:00 -0400 Subject: [PATCH] Save only the registers that need to be saved rather than going through ProtectFunction. --- Source/Core/Common/Src/x64Emitter.cpp | 68 +++++++++++++++++++ Source/Core/Common/Src/x64Emitter.h | 3 + Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 18 +++++ Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 2 + .../Core/Src/PowerPC/Jit64/JitRegCache.cpp | 5 -- .../Core/Core/Src/PowerPC/Jit64/JitRegCache.h | 6 +- .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 8 +-- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 12 ++-- .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 25 +++++-- .../Src/PowerPC/JitCommon/JitAsmCommon.cpp | 18 ++--- .../Src/PowerPC/JitCommon/JitBackpatch.cpp | 35 ++++++---- .../Core/Src/PowerPC/JitCommon/JitBackpatch.h | 4 +- .../Core/Src/PowerPC/JitCommon/Jit_Util.cpp | 34 +++++++--- .../Core/Src/PowerPC/JitCommon/Jit_Util.h | 13 ++-- 14 files changed, 193 insertions(+), 58 deletions(-) diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index a5d7cb2f0f..01fb0c5268 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1634,6 +1634,74 @@ void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u CALLptr(M(impptr)); } +void XEmitter::PushRegistersAndAlignStack(u32 mask) +{ + int shadow = 0; +#ifdef _WIN32 + shadow = 0x20; +#endif + int count = 0; + for (int r = 0; r < 16; r++) + { + if (mask & (1 << r)) + { + PUSH((X64Reg) r); + count++; + } + } + int size = (count & 1) ? 0 : 8; + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + size += 16; + } + size += shadow; + if (size) + SUB(64, R(RSP), size >= 0x100 ? Imm32(size) : Imm8(size)); + int offset = shadow; + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + { + MOVAPD(MDisp(RSP, offset), (X64Reg) x); + offset += 16; + } + } +} + +void XEmitter::PopRegistersAndAlignStack(u32 mask) +{ + int size = 0; +#ifdef _WIN32 + size += 0x20; +#endif + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + { + MOVAPD((X64Reg) x, MDisp(RSP, size)); + size += 16; + } + } + int count = 0; + for (int r = 0; r < 16; r++) + { + if (mask & (1 << r)) + count++; + } + size += (count & 1) ? 0 : 8; + + if (size) + ADD(64, R(RSP), size >= 0x100 ? Imm32(size) : Imm8(size)); + for (int r = 15; r >= 0; r--) + { + if (mask & (1 << r)) + { + POP((X64Reg) r); + } + } +} + #endif } diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 14f0ddb811..87724a8092 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -691,6 +691,9 @@ public: #define DECLARE_IMPORT(x) extern "C" void *__imp_##x + void PushRegistersAndAlignStack(u32 mask); + void PopRegistersAndAlignStack(u32 mask); + #endif }; // class XEmitter diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 488b2fdec6..f2bff458ae 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -737,3 +737,21 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc return normalEntry; } + +u32 Jit64::RegistersInUse() +{ +#ifdef _M_X64 + u32 result = 0; + for (int i = 0; i < NUMXREGS; i++) + { + if (!gpr.IsFreeX(i)) + result |= (1 << i); + if (!fpr.IsFreeX(i)) + result |= (1 << (16 + i)); + } + return result; +#else + // not needed + return 0; +#endif +} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 23d95b2852..bb1c3a4a19 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -72,6 +72,8 @@ public: void Jit(u32 em_address); const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b); + u32 RegistersInUse(); + JitBlockCache *GetBlockCache() { return &blocks; } void Trace(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp index 39c05f88e9..58ea703840 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp @@ -76,11 +76,6 @@ void RegCache::LockX(int x1, int x2, int x3, int x4) if (x4 != 0xFF) xlocks[x4] = true; } -bool RegCache::IsFreeX(int xreg) const -{ - return xregs[xreg].free && !xlocks[xreg]; -} - void RegCache::UnlockAll() { for (int i = 0; i < 32; i++) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h index 73df8c8672..550036dba7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h @@ -106,7 +106,11 @@ public: void UnlockAll(); void UnlockAllX(); - bool IsFreeX(int xreg) const; + bool IsFreeX(int xreg) const + { + return xregs[xreg].free && !xlocks[xreg]; + } + X64Reg GetFreeXReg(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index 87a58415a1..a94b6977b1 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -121,7 +121,7 @@ void Jit64::lXXx(UGeckoInstruction inst) // do our job at first s32 offset = (s32)(s16)inst.SIMM_16; gpr.Lock(d); - SafeLoadToEAX(gpr.R(a), accessSize, offset, signExtend); + SafeLoadToEAX(gpr.R(a), accessSize, offset, RegistersInUse(), signExtend); gpr.KillImmediate(d, false, true); MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); @@ -193,7 +193,7 @@ void Jit64::lXXx(UGeckoInstruction inst) } } - SafeLoadToEAX(opAddress, accessSize, 0, signExtend); + SafeLoadToEAX(opAddress, accessSize, 0, RegistersInUse(), signExtend); // We must flush immediate values from the following registers because // they may change at runtime if no MMU exception has been raised @@ -373,7 +373,7 @@ void Jit64::stX(UGeckoInstruction inst) gpr.Lock(s, a); MOV(32, R(EDX), gpr.R(a)); MOV(32, R(ECX), gpr.R(s)); - SafeWriteRegToReg(ECX, EDX, accessSize, offset); + SafeWriteRegToReg(ECX, EDX, accessSize, offset, RegistersInUse()); if (update && offset) { @@ -429,7 +429,7 @@ void Jit64::stXx(UGeckoInstruction inst) } MOV(32, R(ECX), gpr.R(s)); - SafeWriteRegToReg(ECX, EDX, accessSize, 0); + SafeWriteRegToReg(ECX, EDX, accessSize, 0, RegistersInUse()); gpr.UnlockAll(); gpr.UnlockAllX(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index f324c32cf6..edeedcc6e4 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -50,7 +50,7 @@ void Jit64::lfs(UGeckoInstruction inst) } s32 offset = (s32)(s16)inst.SIMM_16; - SafeLoadToEAX(gpr.R(a), 32, offset, false); + SafeLoadToEAX(gpr.R(a), 32, offset, RegistersInUse(), false); MEMCHECK_START @@ -207,10 +207,10 @@ void Jit64::stfd(UGeckoInstruction inst) MOVAPD(XMM0, fpr.R(s)); PSRLQ(XMM0, 32); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); + SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0))); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); - SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4); + SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse()); SetJumpTarget(exit); @@ -282,7 +282,7 @@ void Jit64::stfs(UGeckoInstruction inst) MEMCHECK_END } CVTSD2SS(XMM0, fpr.R(s)); - SafeWriteFloatToReg(XMM0, ABI_PARAM2); + SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse()); gpr.UnlockAll(); gpr.UnlockAllX(); fpr.UnlockAll(); @@ -302,7 +302,7 @@ void Jit64::stfsx(UGeckoInstruction inst) ADD(32, R(ABI_PARAM1), gpr.R(inst.RA)); CVTSD2SS(XMM0, fpr.R(inst.RS)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); + SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse()); gpr.UnlockAllX(); fpr.UnlockAll(); @@ -337,7 +337,7 @@ void Jit64::lfsx(UGeckoInstruction inst) MEMCHECK_END } else { - SafeLoadToEAX(R(EAX), 32, 0, false); + SafeLoadToEAX(R(EAX), 32, 0, RegistersInUse(), false); MEMCHECK_START diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index 8b0a42086a..6932258739 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -77,6 +77,23 @@ struct RegInfo { RegInfo(RegInfo&); // DO NOT IMPLEMENT }; +static u32 regsInUse(RegInfo& R) { +#ifdef _M_X64 + u32 result = 0; + for (unsigned i = 0; i < MAX_NUMBER_OF_REGS; i++) + { + if (R.regs[i] != 0) + result |= (1 << i); + if (R.fregs[i] != 0) + result |= (1 << (16 + i)); + } + return result; +#else + // not needed + return 0; +#endif +} + static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) { unsigned& info = R.IInfo[Op - R.FirstI]; if (info == 0) R.IInfo[I - R.FirstI] |= 1 << (OpNum + 1); @@ -634,7 +651,7 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { if (RI.MakeProfile) { RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX)); } - RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0); + RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI)); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); } @@ -1337,7 +1354,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak Jit->MOV(32, R(EAX), loc1); } Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); - RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0); + RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI)); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) @@ -1400,12 +1417,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak Jit->PSRLQ(XMM0, 32); Jit->MOVD_xmm(R(EAX), XMM0); Jit->MOV(32, R(ECX), address); - RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0); + RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI)); Jit->MOVAPD(XMM0, value); Jit->MOVD_xmm(R(EAX), XMM0); Jit->MOV(32, R(ECX), address); - RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4); + RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4, regsInUse(RI)); Jit->SetJumpTarget(exit); if (RI.IInfo[I - RI.FirstI] & 4) diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp index 767035efb1..da210e2606 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSDW(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); @@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); @@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(16, R(AX), M((char*)psTemp + 4)); BSWAP(32, EAX); - SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); @@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() { MOVD_xmm(R(EAX), XMM0); BSWAP(32, EAX); ROL(32, R(EAX), Imm8(16)); - SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); @@ -295,7 +295,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { // Easy! const u8* storeSingleFloat = AlignCode4(); - SafeWriteFloatToReg(XMM0, ECX, SAFE_WRITE_NO_FASTMEM); + SafeWriteFloatToReg(XMM0, ECX, 0, SAFE_WRITE_NO_FASTMEM); RET(); /* if (cpu_info.bSSSE3) { @@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_255)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleS8 = AlignCode4(); @@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m128)); MINSS(XMM0, M((void *)&m_127)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleU16 = AlignCode4(); // Used by MKWii @@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_65535)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleS16 = AlignCode4(); @@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m32768)); MINSS(XMM0, M((void *)&m_32767)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp index 385a48362e..da7f7c1c27 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp @@ -56,7 +56,7 @@ void TrampolineCache::Shutdown() } // Extremely simplistic - just generate the requested trampoline. May reuse them in the future. -const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info) +const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -76,17 +76,18 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info) if (info.displacement) { ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); } + PushRegistersAndAlignStack(registersInUse); switch (info.operandSize) { case 4: - CALL(thunks.ProtectFunction((void *)&Memory::Read_U32, 1)); + CALL((void *)&Memory::Read_U32); break; case 2: - CALL(thunks.ProtectFunction((void *)&Memory::Read_U16, 1)); + CALL((void *)&Memory::Read_U16); SHL(32, R(EAX), Imm8(16)); break; case 1: - CALL(thunks.ProtectFunction((void *)&Memory::Read_U8, 1)); + CALL((void *)&Memory::Read_U8); break; } @@ -95,13 +96,14 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info) MOV(32, R(dataReg), R(EAX)); } + PopRegistersAndAlignStack(registersInUse); RET(); #endif return trampoline; } // Extremely simplistic - just generate the requested trampoline. May reuse them in the future. -const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info) +const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -135,25 +137,24 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info) ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); } - SUB(64, R(RSP), Imm8(8)); - + PushRegistersAndAlignStack(registersInUse); switch (info.operandSize) { case 8: - CALL(thunks.ProtectFunction((void *)&Memory::Write_U64, 2)); + CALL((void *)&Memory::Write_U64); break; case 4: - CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2)); + CALL((void *)&Memory::Write_U32); break; case 2: - CALL(thunks.ProtectFunction((void *)&Memory::Write_U16, 2)); + CALL((void *)&Memory::Write_U16); break; case 1: - CALL(thunks.ProtectFunction((void *)&Memory::Write_U8, 2)); + CALL((void *)&Memory::Write_U8); break; } - ADD(64, R(RSP), Imm8(8)); + PopRegistersAndAlignStack(registersInUse); RET(); #endif @@ -182,6 +183,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) PanicAlert("BackPatch : Base reg not RBX." "\n\nAttempted to access %08x.", emAddress); + auto it = registersInUseAtLoc.find(codePtr); + if (it == registersInUseAtLoc.end()) + PanicAlert("BackPatch: no register use entry for address %p", codePtr); + u32 registersInUse = it->second; + if (!info.isMemoryWrite) { XEmitter emitter(codePtr); @@ -191,7 +197,8 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) bswapNopCount = 3; else bswapNopCount = 2; - const u8 *trampoline = trampolines.GetReadTrampoline(info); + + const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse); emitter.CALL((void *)trampoline); emitter.NOP((int)info.instructionSize + bswapNopCount - 5); return codePtr; @@ -223,7 +230,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) u8 *start = codePtr - bswapSize; XEmitter emitter(start); - const u8 *trampoline = trampolines.GetWriteTrampoline(info); + const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse); emitter.CALL((void *)trampoline); emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr()); return start; diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h index 02cdf856f2..59310b6ecf 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h @@ -232,8 +232,8 @@ public: void Init(); void Shutdown(); - const u8 *GetReadTrampoline(const InstructionInfo &info); - const u8 *GetWriteTrampoline(const InstructionInfo &info); + const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); + const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse); private: ThunkManager thunks; }; diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index da5b735dcb..52b3985a74 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -58,21 +58,25 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i #endif } -void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) +u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) { + u8 *result; #ifdef _M_X64 if (opAddress.IsSimpleReg()) { + result = GetWritableCodePtr(); MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); } else { MOV(32, R(EAX), opAddress); + result = GetWritableCodePtr(); MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset)); } #else if (opAddress.IsImm()) { + result = GetWritableCodePtr(); MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); } else @@ -80,6 +84,7 @@ void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, if (!opAddress.IsSimpleReg(EAX)) MOV(32, R(EAX), opAddress); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + result = GetWritableCodePtr(); MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset)); } #endif @@ -105,9 +110,10 @@ void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, // TODO: bake 8-bit into the original load. MOVSX(32, accessSize, EAX, R(EAX)); } + return result; } -void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) +void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend) { #if defined(_M_X64) #ifdef ENABLE_MEM_CHECK @@ -116,7 +122,11 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem) #endif { - UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + u8 *mov = UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + + // XXX: are these dead anyway? + registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX)); + registersInUseAtLoc[mov] = registersInUse; } else #endif @@ -208,22 +218,26 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s } } -void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) +u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { + u8 *result; if (accessSize == 8 && reg_value >= 4) { PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); } if (swap) BSWAP(accessSize, reg_value); #ifdef _M_X64 + result = GetWritableCodePtr(); MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); #else AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); + result = GetWritableCodePtr(); MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); #endif + return result; } // Destroys both arg registers -void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, int flags) +void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) { #if defined(_M_X64) if (!Core::g_CoreStartupParameter.bMMU && @@ -234,12 +248,16 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce #endif ) { - UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP)); + u8 *mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_WRITE_NO_SWAP)); if (accessSize == 8) { NOP(1); NOP(1); } + + // XXX: are these dead anyway? + registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX)); + registersInUseAtLoc[mov] = registersInUse; return; } #endif @@ -278,7 +296,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce SetJumpTarget(exit); } -void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int flags) +void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags) { if (false && cpu_info.bSSSE3) { // This path should be faster but for some reason it causes errors so I've disabled it. @@ -311,7 +329,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, int fl } else { MOVSS(M(&float_buffer), xmm_value); MOV(32, R(EAX), M(&float_buffer)); - SafeWriteRegToReg(EAX, reg_addr, 32, 0, flags); + SafeWriteRegToReg(EAX, reg_addr, 32, 0, registersInUse, flags); } } diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h index ca3f05c395..0fd5db0380 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h @@ -7,25 +7,27 @@ #include "x64Emitter.h" #include "Thunk.h" +#include // Like XCodeBlock but has some utilities for memory access. class EmuCodeBlock : public Gen::XCodeBlock { public: void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset); - void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); - void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); - void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); + // these return the address of the MOV, for backpatching + u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); + u8 *UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); + void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend); enum SafeWriteFlags { SAFE_WRITE_NO_SWAP = 1, SAFE_WRITE_NO_PROLOG = 2, SAFE_WRITE_NO_FASTMEM = 4 }; - void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, int flags = 0); + void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0); // Trashes both inputs and EAX. - void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, int flags = 0); + void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, u32 registersInUse, int flags = 0); void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address); void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address); @@ -37,6 +39,7 @@ public: void ForceSinglePrecisionP(Gen::X64Reg xmm); protected: ThunkManager thunks; + std::unordered_map registersInUseAtLoc; }; #endif // _JITUTIL_H