diff --git a/Source/Core/Common/MemoryUtil.cpp b/Source/Core/Common/MemoryUtil.cpp index f7e1d7d902..a741deef4f 100644 --- a/Source/Core/Common/MemoryUtil.cpp +++ b/Source/Core/Common/MemoryUtil.cpp @@ -158,6 +158,25 @@ void FreeAlignedMemory(void* ptr) } } +void ReadProtectMemory(void* ptr, size_t size) +{ + bool error_occurred = false; + +#ifdef _WIN32 + DWORD oldValue; + if (!VirtualProtect(ptr, size, PAGE_NOACCESS, &oldValue)) + error_occurred = true; +#else + int retval = mprotect(ptr, size, PROT_NONE); + + if (retval != 0) + error_occurred = true; +#endif + + if (error_occurred) + PanicAlert("ReadProtectMemory failed!\n%s", GetLastErrorMsg()); +} + void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) { bool error_occurred = false; diff --git a/Source/Core/Common/MemoryUtil.h b/Source/Core/Common/MemoryUtil.h index 6f437fcda7..5f584f868d 100644 --- a/Source/Core/Common/MemoryUtil.h +++ b/Source/Core/Common/MemoryUtil.h @@ -12,8 +12,12 @@ void* AllocateMemoryPages(size_t size); void FreeMemoryPages(void* ptr, size_t size); void* AllocateAlignedMemory(size_t size,size_t alignment); void FreeAlignedMemory(void* ptr); +void ReadProtectMemory(void* ptr, size_t size); void WriteProtectMemory(void* ptr, size_t size, bool executable = false); void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false); std::string MemUsage(); +void GuardMemoryMake(void* ptr, size_t size); +void GuardMemoryUnmake(void* ptr, size_t size); + inline int GetPageSize() { return 4096; } diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index fa16cf2b36..75cd418379 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -1766,6 +1766,8 @@ void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI void XEmitter::LOCK() { Write8(0xF0); } void XEmitter::REP() { Write8(0xF3); } void XEmitter::REPNE() { Write8(0xF2); } +void XEmitter::FSOverride() { Write8(0x64); } +void XEmitter::GSOverride() { Write8(0x65); } void XEmitter::FWAIT() { diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 8f41065668..8b655c2c42 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -467,6 +467,8 @@ public: void LOCK(); void REP(); void REPNE(); + void FSOverride(); + void GSOverride(); // x87 enum x87StatusWordBits { diff --git a/Source/Core/Core/ArmMemTools.cpp b/Source/Core/Core/ArmMemTools.cpp index ff7d77e4f4..8b166580a8 100644 --- a/Source/Core/Core/ArmMemTools.cpp +++ b/Source/Core/Core/ArmMemTools.cpp @@ -32,9 +32,9 @@ typedef struct ucontext { } ucontext_t; #endif -void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) +static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) { - if (signal != SIGSEGV) + if (sig != SIGSEGV) { // We are not interested in other signals - handle it as usual. return; @@ -47,33 +47,18 @@ void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) return; } - // Get all the information we can out of the context. mcontext_t *ctx = &context->uc_mcontext; - void *fault_memory_ptr = (void*)ctx->arm_r10; - u8 *fault_instruction_ptr = (u8 *)ctx->arm_pc; + // comex says hello, and is most curious whether this is arm_r10 for a + // reason as opposed to si_addr like the x64MemTools.cpp version. Is there + // even a need for this file to be architecture specific? + uintptr_t fault_memory_ptr = (uintptr_t)ctx->arm_r10; - if (!JitInterface::IsInCodeSpace(fault_instruction_ptr)) + if (!JitInterface::HandleFault(fault_memory_ptr, ctx)) { - // Let's not prevent debugging. - return; - } - - u64 bad_address = (u64)fault_memory_ptr; - u64 memspace_bottom = (u64)Memory::base; - if (bad_address < memspace_bottom) - { - PanicAlertT("Exception handler - access below memory space. %08llx%08llx", - bad_address >> 32, bad_address); - } - - u32 em_address = (u32)(bad_address - memspace_bottom); - - const u8 *new_rip = jit->BackPatch(fault_instruction_ptr, em_address, ctx); - if (new_rip) - { - ctx->arm_pc = (u32) new_rip; + // retry and crash + signal(SIGSEGV, SIG_DFL); } } @@ -86,4 +71,7 @@ void InstallExceptionHandler() sigemptyset(&sa.sa_mask); sigaction(SIGSEGV, &sa, nullptr); } + +void UninstallExceptionHandler() {} + } // namespace diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 78172d5cb0..39800a448e 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -195,9 +195,10 @@ if(_M_X86) PowerPC/Jit64/Jit_Paired.cpp PowerPC/Jit64/JitRegCache.cpp PowerPC/Jit64/Jit_SystemRegisters.cpp - PowerPC/JitCommon/JitBackpatch.cpp PowerPC/JitCommon/JitAsmCommon.cpp - PowerPC/JitCommon/Jit_Util.cpp) + PowerPC/JitCommon/JitBackpatch.cpp + PowerPC/JitCommon/Jit_Util.cpp + PowerPC/JitCommon/TrampolineCache.cpp) elseif(_M_ARM_32) set(SRCS ${SRCS} ArmMemTools.cpp diff --git a/Source/Core/Core/Core.cpp b/Source/Core/Core/Core.cpp index 3a764a4e63..5b6294d2c7 100644 --- a/Source/Core/Core/Core.cpp +++ b/Source/Core/Core/Core.cpp @@ -277,6 +277,10 @@ static void CpuThread() if (!_CoreParameter.bCPUThread) g_video_backend->Video_Cleanup(); + #if _M_X86_64 || _M_ARM_32 + EMM::UninstallExceptionHandler(); + #endif + return; } diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index 7ead1e4172..b46357fd3d 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -229,6 +229,7 @@ + @@ -406,6 +407,7 @@ + @@ -464,4 +466,4 @@ - \ No newline at end of file + diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index 39e6aec8f4..faeb9bcd24 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -640,6 +640,9 @@ PowerPC\JitCommon + + PowerPC\JitCommon + PowerPC\JitIL @@ -1182,6 +1185,9 @@ PowerPC\JitCommon + + PowerPC\JitCommon + PowerPC\JitIL @@ -1204,4 +1210,4 @@ - \ No newline at end of file + diff --git a/Source/Core/Core/MemTools.h b/Source/Core/Core/MemTools.h index 276af3d887..fcc671b799 100644 --- a/Source/Core/Core/MemTools.h +++ b/Source/Core/Core/MemTools.h @@ -11,4 +11,5 @@ namespace EMM { typedef u32 EAddr; void InstallExceptionHandler(); + void UninstallExceptionHandler(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 720375613c..92595f6acd 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -95,6 +95,83 @@ using namespace PowerPC; and such, but it's currently limited to integer ops only. This can definitely be made better. */ +// The BLR optimization is nice, but it means that JITted code can overflow the +// native stack by repeatedly running BL. (The chance of this happening in any +// retail game is close to 0, but correctness is correctness...) Also, the +// overflow might not happen directly in the JITted code but in a C++ function +// called from it, so we can't just adjust RSP in the case of a fault. +// Instead, we have to have extra stack space preallocated under the fault +// point which allows the code to continue, after wiping the JIT cache so we +// can reset things at a safe point. Once this condition trips, the +// optimization is permanently disabled, under the assumption this will never +// happen in practice. + +// On Unix, we just mark an appropriate region of the stack as PROT_NONE and +// handle it the same way as fastmem faults. It's safe to take a fault with a +// bad RSP, because on Linux we can use sigaltstack and on OS X we're already +// on a separate thread. + +// On Windows, the OS gets upset if RSP doesn't work, and I don't know any +// equivalent of sigaltstack. Windows supports guard pages which, when +// accessed, immediately turn into regular pages but cause a trap... but +// putting them in the path of RSP just leads to something (in the kernel?) +// thinking a regular stack extension is required. So this protection is not +// supported on Windows yet... We still use a separate stack for the sake of +// simplicity. + +enum +{ + STACK_SIZE = 2 * 1024 * 1024, + SAFE_STACK_SIZE = 512 * 1024, + GUARD_SIZE = 0x10000, // two guards - bottom (permanent) and middle (see above) + GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE, +}; + +void Jit64::AllocStack() +{ +#if defined(_WIN32) + m_stack = (u8*)AllocateMemoryPages(STACK_SIZE); + ReadProtectMemory(m_stack, GUARD_SIZE); + ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); +#endif +} + +void Jit64::FreeStack() +{ +#if defined(_WIN32) + if (m_stack) + { + FreeMemoryPages(m_stack, STACK_SIZE); + m_stack = NULL; + } +#endif +} + +bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx) +{ + uintptr_t stack = (uintptr_t)m_stack, diff = access_address - stack; + // In the trap region? + if (stack && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE) + { + WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); + m_enable_blr_optimization = false; + UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); + // We're going to need to clear the whole cache to get rid of the bad + // CALLs, but we can't yet. Fake the downcount so we're forced to the + // dispatcher (no block linking), and clear the cache so we're sent to + // Jit. Yeah, it's kind of gross. + GetBlockCache()->InvalidateICache(0, 0xffffffff); + CoreTiming::ForceExceptionCheck(0); + m_clear_cache_asap = true; + + return true; + } + + return Jitx86Base::HandleFault(access_address, ctx); +} + + + void Jit64::Init() { jo.optimizeStack = true; @@ -130,8 +207,18 @@ void Jit64::Init() trampolines.Init(); AllocCodeSpace(CODE_SIZE); + + // BLR optimization has the same consequences as block linking, as well as + // depending on the fault handler to be safe in the event of excessive BL. + m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem; + m_clear_cache_asap = false; + + m_stack = nullptr; + if (m_enable_blr_optimization) + AllocStack(); + blocks.Init(); - asm_routines.Init(); + asm_routines.Init(m_stack ? (m_stack + STACK_SIZE) : nullptr); // important: do this *after* generating the global asm routines, because we can't use farcode in them. // it'll crash because the farcode functions get cleared on JIT clears. @@ -155,6 +242,7 @@ void Jit64::ClearCache() void Jit64::Shutdown() { + FreeStack(); FreeCodeSpace(); blocks.Shutdown(); @@ -174,7 +262,9 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst) MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunctionC((void*)instr, inst.hex); + ABI_PopRegistersAndAdjustStack(0, 0); } void Jit64::unknown_instruction(UGeckoInstruction inst) @@ -191,7 +281,9 @@ void Jit64::HLEFunction(UGeckoInstruction _inst) { gpr.Flush(); fpr.Flush(); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); + ABI_PopRegistersAndAdjustStack(0, 0); } void Jit64::DoNothing(UGeckoInstruction _inst) @@ -223,29 +315,52 @@ static void ImHere() been_here[PC] = 1; } -void Jit64::Cleanup() +bool Jit64::Cleanup() { + bool did_something = false; + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) { + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); + ABI_PopRegistersAndAdjustStack(0, 0); + did_something = true; } // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (MMCR0.Hex || MMCR1.Hex) + { ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst); + did_something = true; + } + + return did_something; } -void Jit64::WriteExit(u32 destination) +void Jit64::WriteExit(u32 destination, bool bl, u32 after) { + if (!m_enable_blr_optimization) + bl = false; + Cleanup(); + if (bl) + { + MOV(32, R(RSCRATCH2), Imm32(after)); + PUSH(RSCRATCH2); + } + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); + JustWriteExit(destination, bl, after); +} + +void Jit64::JustWriteExit(u32 destination, bool bl, u32 after) +{ //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; JitBlock::LinkData linkData; linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); linkData.linkStatus = false; // Link opportunity! @@ -253,24 +368,76 @@ void Jit64::WriteExit(u32 destination) if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) { // It exists! Joy of joy! - JMP(blocks.GetBlock(block)->checkedEntry, true); + JitBlock* jb = blocks.GetBlock(block); + const u8* addr = jb->checkedEntry; + linkData.exitPtrs = GetWritableCodePtr(); + if (bl) + CALL(addr); + else + JMP(addr, true); linkData.linkStatus = true; } else { MOV(32, PPCSTATE(pc), Imm32(destination)); - JMP(asm_routines.dispatcher, true); + linkData.exitPtrs = GetWritableCodePtr(); + if (bl) + CALL(asm_routines.dispatcher); + else + JMP(asm_routines.dispatcher, true); } b->linkData.push_back(linkData); + + if (bl) + { + POP(RSCRATCH); + JustWriteExit(after, false, 0); + } } -void Jit64::WriteExitDestInRSCRATCH() +void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after) { + if (!m_enable_blr_optimization) + bl = false; MOV(32, PPCSTATE(pc), R(RSCRATCH)); Cleanup(); + + if (bl) + { + MOV(32, R(RSCRATCH2), Imm32(after)); + PUSH(RSCRATCH2); + } + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher, true); + if (bl) + { + CALL(asm_routines.dispatcher); + POP(RSCRATCH); + JustWriteExit(after, false, 0); + } + else + { + JMP(asm_routines.dispatcher, true); + } +} + +void Jit64::WriteBLRExit() +{ + if (!m_enable_blr_optimization) + { + WriteExitDestInRSCRATCH(); + return; + } + MOV(32, PPCSTATE(pc), R(RSCRATCH)); + bool disturbed = Cleanup(); + if (disturbed) + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + CMP(64, R(RSCRATCH), MDisp(RSP, 8)); + MOV(32, R(RSCRATCH), Imm32(js.downcountAmount)); + J_CC(CC_NE, asm_routines.dispatcherMispredictedBLR); + SUB(32, PPCSTATE(downcount), R(RSCRATCH)); + RET(); } void Jit64::WriteRfiExitDestInRSCRATCH() @@ -278,7 +445,9 @@ void Jit64::WriteRfiExitDestInRSCRATCH() MOV(32, PPCSTATE(pc), R(RSCRATCH)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); Cleanup(); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); + ABI_PopRegistersAndAdjustStack(0, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -288,7 +457,9 @@ void Jit64::WriteExceptionExit() Cleanup(); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); + ABI_PopRegistersAndAdjustStack(0, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -298,7 +469,9 @@ void Jit64::WriteExternalExceptionExit() Cleanup(); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); + ABI_PopRegistersAndAdjustStack(0, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -340,8 +513,11 @@ void Jit64::Trace() void STACKALIGN Jit64::Jit(u32 em_address) { - if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() || - SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache) + if (GetSpaceLeft() < 0x10000 || + farcode.GetSpaceLeft() < 0x10000 || + blocks.IsFull() || + SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache || + m_clear_cache_asap) { ClearCache(); } @@ -395,7 +571,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc b->normalEntry = normalEntry; if (ImHereDebug) + { + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful + ABI_PopRegistersAndAdjustStack(0, 0); + } // Conditionally add profiling code. if (Profiler::g_ProfileBlocks) @@ -548,7 +728,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc fpr.Flush(); MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); + ABI_PopRegistersAndAdjustStack(0, 0); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index d444f0f834..0391d258cc 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -18,6 +18,10 @@ // ---------- #pragma once +#ifdef _WIN32 +#include +#endif + #include "Common/x64ABI.h" #include "Common/x64Analyzer.h" #include "Common/x64Emitter.h" @@ -40,6 +44,9 @@ class Jit64 : public Jitx86Base { private: + void AllocStack(); + void FreeStack(); + GPRRegCache gpr; FPURegCache fpr; @@ -48,6 +55,10 @@ private: PPCAnalyst::CodeBuffer code_buffer; Jit64AsmRoutineManager asm_routines; + bool m_enable_blr_optimization; + bool m_clear_cache_asap; + u8* m_stack; + public: Jit64() : code_buffer(32000) {} ~Jit64() {} @@ -55,6 +66,8 @@ public: void Init() override; void Shutdown() override; + bool HandleFault(uintptr_t access_address, SContext* ctx) override; + // Jit! void Jit(u32 em_address) override; @@ -89,13 +102,15 @@ public: // Utilities for use by opcodes - void WriteExit(u32 destination); - void WriteExitDestInRSCRATCH(); + void WriteExit(u32 destination, bool bl = false, u32 after = 0); + void JustWriteExit(u32 destination, bool bl, u32 after); + void WriteExitDestInRSCRATCH(bool bl = false, u32 after = 0); + void WriteBLRExit(); void WriteExceptionExit(); void WriteExternalExceptionExit(); void WriteRfiExitDestInRSCRATCH(); void WriteCallInterpreter(UGeckoInstruction _inst); - void Cleanup(); + bool Cleanup(); void GenerateConstantOverflow(bool overflow); void GenerateConstantOverflow(s64 val); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index a362768708..dcfffaa3e9 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -9,6 +9,9 @@ using namespace Gen; +// Not PowerPC state. Can't put in 'this' because it's out of range... +static void* s_saved_rsp; + // PLAN: no more block numbers - crazy opcodes just contain offset within // dynarec buffer // At this offset - 4, there is an int specifying the block number. @@ -16,7 +19,23 @@ using namespace Gen; void Jit64AsmRoutineManager::Generate() { enterCode = AlignCode16(); - ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + // We need to own the beginning of RSP, so we do an extra stack adjustment + // for the shadow region before calls in this function. This call will + // waste a bit of space for a second shadow, but whatever. + ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, /*frame*/ 16); + if (m_stack_top) + { + // Pivot the stack to our custom one. + MOV(64, R(RSCRATCH), R(RSP)); + MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20)); + MOV(64, MDisp(RSP, 0x18), R(RSCRATCH)); + } + else + { + MOV(64, M(&s_saved_rsp), R(RSP)); + } + // something that can't pass the BLR test + MOV(64, MDisp(RSP, 8), Imm32((u32)-1)); // Two statically allocated registers. MOV(64, R(RMEM), Imm64((u64)Memory::base)); @@ -24,24 +43,42 @@ void Jit64AsmRoutineManager::Generate() MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); + ABI_PopRegistersAndAdjustStack(0, 0); FixupBranch skipToRealDispatch = J(SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging); //skip the sync and compare first time + dispatcherMispredictedBLR = GetCodePtr(); + + #if 0 // debug mispredicts + MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc + ABI_PushRegistersAndAdjustStack(1 << RSCRATCH, 0); + CALL(reinterpret_cast(&ReportMispredict)); + ABI_PopRegistersAndAdjustStack(1 << RSCRATCH, 0); + #endif + + if (m_stack_top) + MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20)); + else + MOV(64, R(RSP), M(&s_saved_rsp)); + + SUB(32, PPCSTATE(downcount), R(RSCRATCH)); dispatcher = GetCodePtr(); // The result of slice decrementation should be in flags if somebody jumped here // IMPORTANT - We jump on negative, not carry!!! FixupBranch bail = J_CC(CC_BE, true); + FixupBranch dbg_exit; + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) { TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING)); FixupBranch notStepping = J_CC(CC_Z); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); + ABI_PopRegistersAndAdjustStack(0, 0); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); - FixupBranch noBreakpoint = J_CC(CC_Z); - ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); - RET(); - SetJumpTarget(noBreakpoint); + dbg_exit = J_CC(CC_NZ); SetJumpTarget(notStepping); } @@ -106,8 +143,9 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(notfound); //Ok, no block, let's jit - MOV(32, R(ABI_PARAM1), PPCSTATE(pc)); - CALL((void *)&Jit); + ABI_PushRegistersAndAdjustStack(0, 0); + ABI_CallFunctionA((void *)&Jit, PPCSTATE(pc)); + ABI_PopRegistersAndAdjustStack(0, 0); JMP(dispatcherNoCheck); // no point in special casing this @@ -119,14 +157,27 @@ void Jit64AsmRoutineManager::Generate() FixupBranch noExtException = J_CC(CC_Z); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); + ABI_PopRegistersAndAdjustStack(0, 0); SetJumpTarget(noExtException); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); J_CC(CC_Z, outerLoop); //Landing pad for drec space - ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) + SetJumpTarget(dbg_exit); + if (m_stack_top) + { + MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x8)); + POP(RSP); + } + else + { + MOV(64, R(RSP), M(&s_saved_rsp)); + } + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16); RET(); GenerateCommon(); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index e3cc4371f7..9272f5c8aa 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -25,10 +25,12 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines private: void Generate(); void GenerateCommon(); + u8* m_stack_top; public: - void Init() + void Init(u8* stack_top) { + m_stack_top = stack_top; AllocCodeSpace(8192); Generate(); WriteProtect(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 8456d56b7c..2508fe1417 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -92,7 +92,7 @@ void Jit64::bx(UGeckoInstruction inst) // make idle loops go faster js.downcountAmount += 8; } - WriteExit(destination); + WriteExit(destination, inst.LK, js.compilerPC + 4); } // TODO - optimize to hell and beyond @@ -133,7 +133,7 @@ void Jit64::bcx(UGeckoInstruction inst) gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExit(destination); + WriteExit(destination, inst.LK, js.compilerPC + 4); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); @@ -168,7 +168,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) if (inst.LK_3) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - WriteExitDestInRSCRATCH(); + WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); } else { @@ -187,7 +187,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInRSCRATCH(); + WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); // Would really like to continue the block here, but it ends. TODO. SetJumpTarget(b); @@ -235,7 +235,7 @@ void Jit64::bclrx(UGeckoInstruction inst) gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInRSCRATCH(); + WriteBLRExit(); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index dbb6a5fbf1..79a7c5d76d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -312,7 +312,7 @@ void Jit64::DoMergedBranch() destination = SignExt16(js.next_inst.BD << 2); else destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); - WriteExit(destination); + WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { @@ -320,7 +320,7 @@ void Jit64::DoMergedBranch() MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, R(RSCRATCH), M(&CTR)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - WriteExitDestInRSCRATCH(); + WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { @@ -328,7 +328,7 @@ void Jit64::DoMergedBranch() AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); if (js.next_inst.LK) MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); - WriteExitDestInRSCRATCH(); + WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4); } else { diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 81260249c7..9f9f9cf98c 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -272,7 +272,7 @@ void JitIL::Init() trampolines.Init(); AllocCodeSpace(CODE_SIZE); blocks.Init(); - asm_routines.Init(); + asm_routines.Init(nullptr); farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE); diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h index d0185719f3..5592500c2a 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h @@ -56,6 +56,10 @@ public: void Trace(); + JitBlockCache *GetBlockCache() override { return &blocks; } + + bool HandleFault(uintptr_t access_address, SContext* ctx) override { return false; } + void ClearCache() override; const u8 *GetDispatcher() { @@ -105,4 +109,5 @@ public: void DynaRunTable31(UGeckoInstruction _inst) override; void DynaRunTable59(UGeckoInstruction _inst) override; void DynaRunTable63(UGeckoInstruction _inst) override; + }; diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index 3cd4cf4478..3fa62d80ab 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -58,6 +58,8 @@ private: void SetFPException(ArmGen::ARMReg Reg, u32 Exception); ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); + + bool BackPatch(SContext* ctx); public: JitArm() : code_buffer(32000) {} ~JitArm() {} @@ -72,9 +74,7 @@ public: JitBaseBlockCache *GetBlockCache() { return &blocks; } - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx); - - bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } + bool HandleFault(uintptr_t access_address, SContext* ctx) override; void Trace(); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp index 6ba24195f5..ee0cf1ee76 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp @@ -66,12 +66,23 @@ bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store) } return true; } -const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void) + +bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx) +{ + if (access_address < (uintptr_t)Memory::base) + { + PanicAlertT("Exception handler - access below memory space. %08llx%08llx", + access_address >> 32, access_address); + } + return BackPatch(ctx); +} + +bool JitArm::BackPatch(SContext* ctx) { // TODO: This ctx needs to be filled with our information - SContext *ctx = (SContext *)ctx_void; // We need to get the destination register before we start + u8* codePtr = (u8*)ctx->CTX_PC; u32 Value = *(u32*)codePtr; ARMReg rD; u8 accessSize; @@ -109,7 +120,7 @@ const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void) u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4); ctx->CTX_PC = newPC; emitter.FlushIcache(); - return (u8*)ctx->CTX_PC; + return true; } else { @@ -135,7 +146,7 @@ const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void) emitter.MOV(rD, R14); // 8 ctx->CTX_PC -= ARMREGOFFSET + (4 * 4); emitter.FlushIcache(); - return (u8*)ctx->CTX_PC; + return true; } return 0; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index 2702db95e1..c3f6a69b5c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -17,6 +17,7 @@ public: const u8 *enterCode; + const u8 *dispatcherMispredictedBLR; const u8 *dispatcher; const u8 *dispatcherNoCheck; const u8 *dispatcherPcInRSCRATCH; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 6e89ae32eb..ea921817b8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -3,24 +3,14 @@ // Refer to the license.txt file included. #include -#include #include "disasm.h" -#include "Common/CommonTypes.h" -#include "Common/StringUtil.h" #include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitBase.h" -#ifdef _WIN32 - #include -#endif - - using namespace Gen; -extern u8 *trampolineCodePtr; - static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { u64 code_addr = (u64)codePtr; @@ -35,176 +25,51 @@ static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) return; } -void TrampolineCache::Init() +// This generates some fairly heavy trampolines, but it doesn't really hurt. +// Only instructions that access I/O will get these, and there won't be that +// many of them in a typical program/game. +bool Jitx86Base::HandleFault(uintptr_t access_address, SContext* ctx) { - AllocCodeSpace(4 * 1024 * 1024); + // TODO: do we properly handle off-the-end? + if (access_address >= (uintptr_t)Memory::base && access_address < (uintptr_t)Memory::base + 0x100010000) + return BackPatch((u32)(access_address - (uintptr_t)Memory::base), ctx); + + return false; } -void TrampolineCache::Shutdown() +bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) { - FreeCodeSpace(); -} + u8* codePtr = (u8*) ctx->CTX_PC; -// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. -const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse) -{ - if (GetSpaceLeft() < 1024) - PanicAlert("Trampoline cache full"); - - const u8 *trampoline = GetCodePtr(); - X64Reg addrReg = (X64Reg)info.scaledReg; - X64Reg dataReg = (X64Reg)info.regOperandReg; - - // It's a read. Easy. - // RSP alignment here is 8 due to the call. - ABI_PushRegistersAndAdjustStack(registersInUse, 8); - - if (addrReg != ABI_PARAM1) - MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); - - if (info.displacement) - ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); - - switch (info.operandSize) - { - case 4: - CALL((void *)&Memory::Read_U32); - break; - case 2: - CALL((void *)&Memory::Read_U16); - SHL(32, R(ABI_RETURN), Imm8(16)); - break; - case 1: - CALL((void *)&Memory::Read_U8); - break; - } - - if (info.signExtend && info.operandSize == 1) - { - // Need to sign extend value from Read_U8. - MOVSX(32, 8, dataReg, R(ABI_RETURN)); - } - else if (dataReg != EAX) - { - MOV(32, R(dataReg), R(ABI_RETURN)); - } - - ABI_PopRegistersAndAdjustStack(registersInUse, 8); - RET(); - return trampoline; -} - -// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. -const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc) -{ - if (GetSpaceLeft() < 1024) - PanicAlert("Trampoline cache full"); - - const u8 *trampoline = GetCodePtr(); - - X64Reg dataReg = (X64Reg)info.regOperandReg; - X64Reg addrReg = (X64Reg)info.scaledReg; - - // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a - // hardware access - we can take shortcuts. - // Don't treat FIFO writes specially for now because they require a burst - // check anyway. - - // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs - MOV(32, PPCSTATE(pc), Imm32(pc)); - - ABI_PushRegistersAndAdjustStack(registersInUse, 8); - - if (info.hasImmediate) - { - if (addrReg != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(addrReg)); - // we have to swap back the immediate to pass it to the write functions - switch (info.operandSize) - { - case 8: - PanicAlert("Invalid 64-bit immediate!"); - break; - case 4: - MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate))); - break; - case 2: - MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate))); - break; - case 1: - MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate)); - break; - } - } - else - { - MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg); - } - if (info.displacement) - { - ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); - } - - switch (info.operandSize) - { - case 8: - CALL((void *)&Memory::Write_U64); - break; - case 4: - CALL((void *)&Memory::Write_U32); - break; - case 2: - CALL((void *)&Memory::Write_U16); - break; - case 1: - CALL((void *)&Memory::Write_U8); - break; - } - - ABI_PopRegistersAndAdjustStack(registersInUse, 8); - RET(); - - return trampoline; -} - - -// This generates some fairly heavy trampolines, but: -// 1) It's really necessary. We don't know anything about the context. -// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be -// that many of them in a typical program/game. -const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) -{ - SContext *ctx = (SContext *)ctx_void; - - if (!jit->IsInCodeSpace(codePtr)) - return nullptr; // this will become a regular crash real soon after this + if (!IsInSpace(codePtr)) + return false; // this will become a regular crash real soon after this InstructionInfo info = {}; if (!DisassembleMov(codePtr, &info)) { BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress); - return nullptr; + return false; } if (info.otherReg != RMEM) { PanicAlert("BackPatch : Base reg not RMEM." "\n\nAttempted to access %08x.", emAddress); - return nullptr; + return false; } if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE) { PanicAlert("BackPatch: MOVBE is too small"); - return nullptr; + return false; } auto it = registersInUseAtLoc.find(codePtr); if (it == registersInUseAtLoc.end()) { PanicAlert("BackPatch: no register use entry for address %p", codePtr); - return nullptr; + return false; } u32 registersInUse = it->second; @@ -228,7 +93,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) { emitter.NOP(padding); } - return codePtr; + ctx->CTX_PC = (u64)codePtr; } else { @@ -281,6 +146,8 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) { emitter.NOP(padding); } - return start; + ctx->CTX_PC = (u64)start; } + + return true; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h index 3ca7656b21..39e3389501 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h @@ -5,11 +5,6 @@ #pragma once #include "Common/CommonTypes.h" -#include "Common/x64Analyzer.h" -#include "Common/x64Emitter.h" - -// We need at least this many bytes for backpatching. -const int BACKPATCH_SIZE = 5; // meh. #if defined(_WIN32) @@ -147,8 +142,8 @@ const int BACKPATCH_SIZE = 5; #endif #if _M_X86_64 -#define CTX_PC CTX_RIP #include +#define CTX_PC CTX_RIP static inline u64 *ContextRN(SContext* ctx, int n) { static const u8 offsets[] = @@ -173,13 +168,3 @@ static inline u64 *ContextRN(SContext* ctx, int n) return (u64 *) ((char *) ctx + offsets[n]); } #endif - -class TrampolineCache : public Gen::X64CodeBlock -{ -public: - void Init(); - void Shutdown(); - - const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); - const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc); -}; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index c6ff6e4967..52463ec619 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -26,6 +26,7 @@ #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitCache.h" +#include "Core/PowerPC/JitCommon/TrampolineCache.h" // TODO: find a better place for x86-specific stuff // The following register assignments are common to Jit64 and Jit64IL: @@ -110,24 +111,20 @@ public: virtual void Jit(u32 em_address) = 0; - virtual const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) = 0; - virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0; - virtual bool IsInCodeSpace(u8 *ptr) = 0; + virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0; }; class Jitx86Base : public JitBase, public EmuCodeBlock { protected: + bool BackPatch(u32 emAddress, SContext* ctx); JitBlockCache blocks; TrampolineCache trampolines; public: JitBlockCache *GetBlockCache() override { return &blocks; } - - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) override; - - bool IsInCodeSpace(u8 *ptr) override { return IsInSpace(ptr); } + bool HandleFault(uintptr_t access_address, SContext* ctx) override; }; extern JitBase *jit; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index bf1ce35596..d8fc87f449 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -364,7 +364,10 @@ using namespace Gen; void JitBlockCache::WriteLinkBlock(u8* location, const u8* address) { XEmitter emit(location); - emit.JMP(address, true); + if (*location == 0xE8) + emit.CALL(address); + else + emit.JMP(address, true); } void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp new file mode 100644 index 0000000000..5e961bc6e5 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp @@ -0,0 +1,156 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include +#include + +#include "Common/CommonTypes.h" +#include "Common/StringUtil.h" +#include "Common/x64ABI.h" +#include "Core/HW/Memmap.h" +#include "Core/PowerPC/JitCommon/JitBase.h" +#include "Core/PowerPC/JitCommon/TrampolineCache.h" + +#ifdef _WIN32 + #include +#endif + + +using namespace Gen; + +extern u8 *trampolineCodePtr; + +void TrampolineCache::Init() +{ + AllocCodeSpace(4 * 1024 * 1024); +} + +void TrampolineCache::Shutdown() +{ + FreeCodeSpace(); +} + +// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. +const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse) +{ + if (GetSpaceLeft() < 1024) + PanicAlert("Trampoline cache full"); + + const u8 *trampoline = GetCodePtr(); + X64Reg addrReg = (X64Reg)info.scaledReg; + X64Reg dataReg = (X64Reg)info.regOperandReg; + + // It's a read. Easy. + // RSP alignment here is 8 due to the call. + ABI_PushRegistersAndAdjustStack(registersInUse, 8); + + if (addrReg != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); + + if (info.displacement) + ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); + + switch (info.operandSize) + { + case 4: + CALL((void *)&Memory::Read_U32); + break; + case 2: + CALL((void *)&Memory::Read_U16); + SHL(32, R(ABI_RETURN), Imm8(16)); + break; + case 1: + CALL((void *)&Memory::Read_U8); + break; + } + + if (info.signExtend && info.operandSize == 1) + { + // Need to sign extend value from Read_U8. + MOVSX(32, 8, dataReg, R(ABI_RETURN)); + } + else if (dataReg != EAX) + { + MOV(32, R(dataReg), R(ABI_RETURN)); + } + + ABI_PopRegistersAndAdjustStack(registersInUse, 8); + RET(); + return trampoline; +} + +// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. +const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc) +{ + if (GetSpaceLeft() < 1024) + PanicAlert("Trampoline cache full"); + + const u8 *trampoline = GetCodePtr(); + + X64Reg dataReg = (X64Reg)info.regOperandReg; + X64Reg addrReg = (X64Reg)info.scaledReg; + + // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a + // hardware access - we can take shortcuts. + // Don't treat FIFO writes specially for now because they require a burst + // check anyway. + + // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs + MOV(32, PPCSTATE(pc), Imm32(pc)); + + ABI_PushRegistersAndAdjustStack(registersInUse, 8); + + if (info.hasImmediate) + { + if (addrReg != ABI_PARAM2) + MOV(64, R(ABI_PARAM2), R(addrReg)); + // we have to swap back the immediate to pass it to the write functions + switch (info.operandSize) + { + case 8: + PanicAlert("Invalid 64-bit immediate!"); + break; + case 4: + MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate))); + break; + case 2: + MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate))); + break; + case 1: + MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate)); + break; + } + } + else + { + MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg); + } + if (info.displacement) + { + ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); + } + + switch (info.operandSize) + { + case 8: + CALL((void *)&Memory::Write_U64); + break; + case 4: + CALL((void *)&Memory::Write_U32); + break; + case 2: + CALL((void *)&Memory::Write_U16); + break; + case 1: + CALL((void *)&Memory::Write_U8); + break; + } + + ABI_PopRegistersAndAdjustStack(registersInUse, 8); + RET(); + + return trampoline; +} + + diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h new file mode 100644 index 0000000000..516a071ac2 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h @@ -0,0 +1,22 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Common/CommonTypes.h" +#include "Common/x64Analyzer.h" +#include "Common/x64Emitter.h" + +// We need at least this many bytes for backpatching. +const int BACKPATCH_SIZE = 5; + +class TrampolineCache : public Gen::X64CodeBlock +{ +public: + void Init(); + void Shutdown(); + + const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); + const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc); +}; diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index ea9b12be70..6bfd1c4009 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -190,13 +190,9 @@ namespace JitInterface } #endif } - bool IsInCodeSpace(u8 *ptr) + bool HandleFault(uintptr_t access_address, SContext* ctx) { - return jit->IsInCodeSpace(ptr); - } - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) - { - return jit->BackPatch(codePtr, em_address, ctx); + return jit->HandleFault(access_address, ctx); } void ClearCache() diff --git a/Source/Core/Core/PowerPC/JitInterface.h b/Source/Core/Core/PowerPC/JitInterface.h index 3cb57422bb..a8ed783726 100644 --- a/Source/Core/Core/PowerPC/JitInterface.h +++ b/Source/Core/Core/PowerPC/JitInterface.h @@ -7,6 +7,7 @@ #include #include "Common/ChunkFile.h" #include "Core/PowerPC/CPUCoreBase.h" +#include "Core/PowerPC/JitCommon/JitBackpatch.h" namespace JitInterface { @@ -20,8 +21,7 @@ namespace JitInterface void WriteProfileResults(const std::string& filename); // Memory Utilities - bool IsInCodeSpace(u8 *ptr); - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx); + bool HandleFault(uintptr_t access_address, SContext* ctx); // used by JIT to read instructions u32 Read_Opcode_JIT(const u32 _Address); diff --git a/Source/Core/Core/x64MemTools.cpp b/Source/Core/Core/x64MemTools.cpp index de298363df..518c3bb160 100644 --- a/Source/Core/Core/x64MemTools.cpp +++ b/Source/Core/Core/x64MemTools.cpp @@ -23,42 +23,6 @@ namespace EMM { -static bool DoFault(u64 bad_address, SContext *ctx) -{ - if (!JitInterface::IsInCodeSpace((u8*) ctx->CTX_PC)) - { - // Let's not prevent debugging. - return false; - } - - u64 memspace_bottom = (u64)Memory::base; - u64 memspace_top = memspace_bottom + -#if _ARCH_64 - 0x100000000ULL; -#else - 0x40000000; -#endif - - if (bad_address < memspace_bottom || bad_address >= memspace_top) - { - return false; - } - - u32 em_address = (u32)(bad_address - memspace_bottom); - const u8 *new_pc = jit->BackPatch((u8*) ctx->CTX_PC, em_address, ctx); - if (new_pc) - { - ctx->CTX_PC = (u64) new_pc; - } - else - { - // there was an error, give the debugger a chance - return false; - } - - return true; -} - #ifdef _WIN32 LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) @@ -74,10 +38,10 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) } // virtual address of the inaccessible data - u64 badAddress = (u64)pPtrs->ExceptionRecord->ExceptionInformation[1]; + uintptr_t badAddress = (uintptr_t)pPtrs->ExceptionRecord->ExceptionInformation[1]; CONTEXT *ctx = pPtrs->ContextRecord; - if (DoFault(badAddress, ctx)) + if (JitInterface::HandleFault(badAddress, ctx)) { return (DWORD)EXCEPTION_CONTINUE_EXECUTION; } @@ -125,6 +89,8 @@ void InstallExceptionHandler() handlerInstalled = true; } +void UninstallExceptionHandler() {} + #elif defined(__APPLE__) void CheckKR(const char* name, kern_return_t kr) @@ -196,7 +162,7 @@ void ExceptionThread(mach_port_t port) x86_thread_state64_t *state = (x86_thread_state64_t *) msg_in.old_state; - bool ok = DoFault(msg_in.code[1], state); + bool ok = JitInterface::HandleFault((uintptr_t) msg_in.code[1], state); // Set up the reply. msg_out.Head.msgh_bits = MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(msg_in.Head.msgh_bits), 0); @@ -243,6 +209,8 @@ void InstallExceptionHandler() CheckKR("mach_port_request_notification", mach_port_request_notification(mach_task_self(), port, MACH_NOTIFY_NO_SENDERS, 0, port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &previous)); } +void UninstallExceptionHandler() {} + #elif defined(_POSIX_VERSION) static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) @@ -259,12 +227,12 @@ static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) // Huh? Return. return; } - u64 bad_address = (u64)info->si_addr; + uintptr_t bad_address = (uintptr_t)info->si_addr; // Get all the information we can out of the context. mcontext_t *ctx = &context->uc_mcontext; // assume it's not a write - if (!DoFault(bad_address, ctx)) + if (!JitInterface::HandleFault(bad_address, ctx)) { // retry and crash signal(SIGSEGV, SIG_DFL); @@ -273,6 +241,12 @@ static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) void InstallExceptionHandler() { + stack_t signal_stack; + signal_stack.ss_sp = malloc(SIGSTKSZ); + signal_stack.ss_size = SIGSTKSZ; + signal_stack.ss_flags = 0; + if (sigaltstack(&signal_stack, nullptr)) + PanicAlert("sigaltstack failed"); struct sigaction sa; sa.sa_handler = nullptr; sa.sa_sigaction = &sigsegv_handler; @@ -281,6 +255,16 @@ void InstallExceptionHandler() sigaction(SIGSEGV, &sa, nullptr); } +void UninstallExceptionHandler() +{ + stack_t signal_stack, old_stack; + signal_stack.ss_flags = SS_DISABLE; + if (!sigaltstack(&signal_stack, &old_stack) && + !(old_stack.ss_flags & SS_DISABLE)) + { + free(old_stack.ss_sp); + } +} #else #error Unsupported x86_64 platform! Report this if you support sigaction