Merge pull request #1025 from comex/blr-optimization

Opportunistically predict BLR destinations using RET.
This commit is contained in:
comex
2014-09-17 20:26:57 -04:00
30 changed files with 599 additions and 293 deletions

View File

@ -158,6 +158,25 @@ void FreeAlignedMemory(void* ptr)
} }
} }
void ReadProtectMemory(void* ptr, size_t size)
{
bool error_occurred = false;
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, PAGE_NOACCESS, &oldValue))
error_occurred = true;
#else
int retval = mprotect(ptr, size, PROT_NONE);
if (retval != 0)
error_occurred = true;
#endif
if (error_occurred)
PanicAlert("ReadProtectMemory failed!\n%s", GetLastErrorMsg());
}
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{ {
bool error_occurred = false; bool error_occurred = false;

View File

@ -12,8 +12,12 @@ void* AllocateMemoryPages(size_t size);
void FreeMemoryPages(void* ptr, size_t size); void FreeMemoryPages(void* ptr, size_t size);
void* AllocateAlignedMemory(size_t size,size_t alignment); void* AllocateAlignedMemory(size_t size,size_t alignment);
void FreeAlignedMemory(void* ptr); void FreeAlignedMemory(void* ptr);
void ReadProtectMemory(void* ptr, size_t size);
void WriteProtectMemory(void* ptr, size_t size, bool executable = false); void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false); void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
std::string MemUsage(); std::string MemUsage();
void GuardMemoryMake(void* ptr, size_t size);
void GuardMemoryUnmake(void* ptr, size_t size);
inline int GetPageSize() { return 4096; } inline int GetPageSize() { return 4096; }

View File

@ -1766,6 +1766,8 @@ void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI
void XEmitter::LOCK() { Write8(0xF0); } void XEmitter::LOCK() { Write8(0xF0); }
void XEmitter::REP() { Write8(0xF3); } void XEmitter::REP() { Write8(0xF3); }
void XEmitter::REPNE() { Write8(0xF2); } void XEmitter::REPNE() { Write8(0xF2); }
void XEmitter::FSOverride() { Write8(0x64); }
void XEmitter::GSOverride() { Write8(0x65); }
void XEmitter::FWAIT() void XEmitter::FWAIT()
{ {

View File

@ -467,6 +467,8 @@ public:
void LOCK(); void LOCK();
void REP(); void REP();
void REPNE(); void REPNE();
void FSOverride();
void GSOverride();
// x87 // x87
enum x87StatusWordBits { enum x87StatusWordBits {

View File

@ -32,9 +32,9 @@ typedef struct ucontext {
} ucontext_t; } ucontext_t;
#endif #endif
void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context)
{ {
if (signal != SIGSEGV) if (sig != SIGSEGV)
{ {
// We are not interested in other signals - handle it as usual. // We are not interested in other signals - handle it as usual.
return; return;
@ -47,33 +47,18 @@ void sigsegv_handler(int signal, siginfo_t *info, void *raw_context)
return; return;
} }
// Get all the information we can out of the context. // Get all the information we can out of the context.
mcontext_t *ctx = &context->uc_mcontext; mcontext_t *ctx = &context->uc_mcontext;
void *fault_memory_ptr = (void*)ctx->arm_r10; // comex says hello, and is most curious whether this is arm_r10 for a
u8 *fault_instruction_ptr = (u8 *)ctx->arm_pc; // reason as opposed to si_addr like the x64MemTools.cpp version. Is there
// even a need for this file to be architecture specific?
uintptr_t fault_memory_ptr = (uintptr_t)ctx->arm_r10;
if (!JitInterface::IsInCodeSpace(fault_instruction_ptr)) if (!JitInterface::HandleFault(fault_memory_ptr, ctx))
{ {
// Let's not prevent debugging. // retry and crash
return; signal(SIGSEGV, SIG_DFL);
}
u64 bad_address = (u64)fault_memory_ptr;
u64 memspace_bottom = (u64)Memory::base;
if (bad_address < memspace_bottom)
{
PanicAlertT("Exception handler - access below memory space. %08llx%08llx",
bad_address >> 32, bad_address);
}
u32 em_address = (u32)(bad_address - memspace_bottom);
const u8 *new_rip = jit->BackPatch(fault_instruction_ptr, em_address, ctx);
if (new_rip)
{
ctx->arm_pc = (u32) new_rip;
} }
} }
@ -86,4 +71,7 @@ void InstallExceptionHandler()
sigemptyset(&sa.sa_mask); sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, nullptr); sigaction(SIGSEGV, &sa, nullptr);
} }
void UninstallExceptionHandler() {}
} // namespace } // namespace

View File

@ -195,9 +195,10 @@ if(_M_X86)
PowerPC/Jit64/Jit_Paired.cpp PowerPC/Jit64/Jit_Paired.cpp
PowerPC/Jit64/JitRegCache.cpp PowerPC/Jit64/JitRegCache.cpp
PowerPC/Jit64/Jit_SystemRegisters.cpp PowerPC/Jit64/Jit_SystemRegisters.cpp
PowerPC/JitCommon/JitBackpatch.cpp
PowerPC/JitCommon/JitAsmCommon.cpp PowerPC/JitCommon/JitAsmCommon.cpp
PowerPC/JitCommon/Jit_Util.cpp) PowerPC/JitCommon/JitBackpatch.cpp
PowerPC/JitCommon/Jit_Util.cpp
PowerPC/JitCommon/TrampolineCache.cpp)
elseif(_M_ARM_32) elseif(_M_ARM_32)
set(SRCS ${SRCS} set(SRCS ${SRCS}
ArmMemTools.cpp ArmMemTools.cpp

View File

@ -277,6 +277,10 @@ static void CpuThread()
if (!_CoreParameter.bCPUThread) if (!_CoreParameter.bCPUThread)
g_video_backend->Video_Cleanup(); g_video_backend->Video_Cleanup();
#if _M_X86_64 || _M_ARM_32
EMM::UninstallExceptionHandler();
#endif
return; return;
} }

View File

@ -229,6 +229,7 @@
<ClCompile Include="PowerPC\JitCommon\JitBase.cpp" /> <ClCompile Include="PowerPC\JitCommon\JitBase.cpp" />
<ClCompile Include="PowerPC\JitCommon\JitCache.cpp" /> <ClCompile Include="PowerPC\JitCommon\JitCache.cpp" />
<ClCompile Include="PowerPC\JitCommon\Jit_Util.cpp" /> <ClCompile Include="PowerPC\JitCommon\Jit_Util.cpp" />
<ClCompile Include="PowerPC\JitCommon\TrampolineCache.cpp" />
<ClCompile Include="PowerPC\JitInterface.cpp" /> <ClCompile Include="PowerPC\JitInterface.cpp" />
<ClCompile Include="PowerPC\PowerPC.cpp" /> <ClCompile Include="PowerPC\PowerPC.cpp" />
<ClCompile Include="PowerPC\PPCAnalyst.cpp" /> <ClCompile Include="PowerPC\PPCAnalyst.cpp" />
@ -406,6 +407,7 @@
<ClInclude Include="PowerPC\JitCommon\JitBase.h" /> <ClInclude Include="PowerPC\JitCommon\JitBase.h" />
<ClInclude Include="PowerPC\JitCommon\JitCache.h" /> <ClInclude Include="PowerPC\JitCommon\JitCache.h" />
<ClInclude Include="PowerPC\JitCommon\Jit_Util.h" /> <ClInclude Include="PowerPC\JitCommon\Jit_Util.h" />
<ClInclude Include="PowerPC\JitCommon\TrampolineCache.h" />
<ClInclude Include="PowerPC\JitInterface.h" /> <ClInclude Include="PowerPC\JitInterface.h" />
<ClInclude Include="PowerPC\PowerPC.h" /> <ClInclude Include="PowerPC\PowerPC.h" />
<ClInclude Include="PowerPC\PPCAnalyst.h" /> <ClInclude Include="PowerPC\PPCAnalyst.h" />
@ -464,4 +466,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -640,6 +640,9 @@
<ClCompile Include="PowerPC\JitCommon\JitCache.cpp"> <ClCompile Include="PowerPC\JitCommon\JitCache.cpp">
<Filter>PowerPC\JitCommon</Filter> <Filter>PowerPC\JitCommon</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="PowerPC\JitCommon\TrampolineCache.cpp">
<Filter>PowerPC\JitCommon</Filter>
</ClCompile>
<ClCompile Include="PowerPC\Jit64IL\IR_X86.cpp"> <ClCompile Include="PowerPC\Jit64IL\IR_X86.cpp">
<Filter>PowerPC\JitIL</Filter> <Filter>PowerPC\JitIL</Filter>
</ClCompile> </ClCompile>
@ -1182,6 +1185,9 @@
<ClInclude Include="PowerPC\JitCommon\JitCache.h"> <ClInclude Include="PowerPC\JitCommon\JitCache.h">
<Filter>PowerPC\JitCommon</Filter> <Filter>PowerPC\JitCommon</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="PowerPC\JitCommon\TrampolineCache.h">
<Filter>PowerPC\JitCommon</Filter>
</ClInclude>
<ClInclude Include="PowerPC\Jit64IL\JitIL.h"> <ClInclude Include="PowerPC\Jit64IL\JitIL.h">
<Filter>PowerPC\JitIL</Filter> <Filter>PowerPC\JitIL</Filter>
</ClInclude> </ClInclude>
@ -1204,4 +1210,4 @@
<ItemGroup> <ItemGroup>
<Text Include="CMakeLists.txt" /> <Text Include="CMakeLists.txt" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -11,4 +11,5 @@ namespace EMM
{ {
typedef u32 EAddr; typedef u32 EAddr;
void InstallExceptionHandler(); void InstallExceptionHandler();
void UninstallExceptionHandler();
} }

View File

@ -95,6 +95,83 @@ using namespace PowerPC;
and such, but it's currently limited to integer ops only. This can definitely be made better. and such, but it's currently limited to integer ops only. This can definitely be made better.
*/ */
// The BLR optimization is nice, but it means that JITted code can overflow the
// native stack by repeatedly running BL. (The chance of this happening in any
// retail game is close to 0, but correctness is correctness...) Also, the
// overflow might not happen directly in the JITted code but in a C++ function
// called from it, so we can't just adjust RSP in the case of a fault.
// Instead, we have to have extra stack space preallocated under the fault
// point which allows the code to continue, after wiping the JIT cache so we
// can reset things at a safe point. Once this condition trips, the
// optimization is permanently disabled, under the assumption this will never
// happen in practice.
// On Unix, we just mark an appropriate region of the stack as PROT_NONE and
// handle it the same way as fastmem faults. It's safe to take a fault with a
// bad RSP, because on Linux we can use sigaltstack and on OS X we're already
// on a separate thread.
// On Windows, the OS gets upset if RSP doesn't work, and I don't know any
// equivalent of sigaltstack. Windows supports guard pages which, when
// accessed, immediately turn into regular pages but cause a trap... but
// putting them in the path of RSP just leads to something (in the kernel?)
// thinking a regular stack extension is required. So this protection is not
// supported on Windows yet... We still use a separate stack for the sake of
// simplicity.
enum
{
STACK_SIZE = 2 * 1024 * 1024,
SAFE_STACK_SIZE = 512 * 1024,
GUARD_SIZE = 0x10000, // two guards - bottom (permanent) and middle (see above)
GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE,
};
void Jit64::AllocStack()
{
#if defined(_WIN32)
m_stack = (u8*)AllocateMemoryPages(STACK_SIZE);
ReadProtectMemory(m_stack, GUARD_SIZE);
ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
#endif
}
void Jit64::FreeStack()
{
#if defined(_WIN32)
if (m_stack)
{
FreeMemoryPages(m_stack, STACK_SIZE);
m_stack = NULL;
}
#endif
}
bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx)
{
uintptr_t stack = (uintptr_t)m_stack, diff = access_address - stack;
// In the trap region?
if (stack && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE)
{
WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program.");
m_enable_blr_optimization = false;
UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
// We're going to need to clear the whole cache to get rid of the bad
// CALLs, but we can't yet. Fake the downcount so we're forced to the
// dispatcher (no block linking), and clear the cache so we're sent to
// Jit. Yeah, it's kind of gross.
GetBlockCache()->InvalidateICache(0, 0xffffffff);
CoreTiming::ForceExceptionCheck(0);
m_clear_cache_asap = true;
return true;
}
return Jitx86Base::HandleFault(access_address, ctx);
}
void Jit64::Init() void Jit64::Init()
{ {
jo.optimizeStack = true; jo.optimizeStack = true;
@ -130,8 +207,18 @@ void Jit64::Init()
trampolines.Init(); trampolines.Init();
AllocCodeSpace(CODE_SIZE); AllocCodeSpace(CODE_SIZE);
// BLR optimization has the same consequences as block linking, as well as
// depending on the fault handler to be safe in the event of excessive BL.
m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem;
m_clear_cache_asap = false;
m_stack = nullptr;
if (m_enable_blr_optimization)
AllocStack();
blocks.Init(); blocks.Init();
asm_routines.Init(); asm_routines.Init(m_stack ? (m_stack + STACK_SIZE) : nullptr);
// important: do this *after* generating the global asm routines, because we can't use farcode in them. // important: do this *after* generating the global asm routines, because we can't use farcode in them.
// it'll crash because the farcode functions get cleared on JIT clears. // it'll crash because the farcode functions get cleared on JIT clears.
@ -155,6 +242,7 @@ void Jit64::ClearCache()
void Jit64::Shutdown() void Jit64::Shutdown()
{ {
FreeStack();
FreeCodeSpace(); FreeCodeSpace();
blocks.Shutdown(); blocks.Shutdown();
@ -174,7 +262,9 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst)
MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
} }
Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunctionC((void*)instr, inst.hex); ABI_CallFunctionC((void*)instr, inst.hex);
ABI_PopRegistersAndAdjustStack(0, 0);
} }
void Jit64::unknown_instruction(UGeckoInstruction inst) void Jit64::unknown_instruction(UGeckoInstruction inst)
@ -191,7 +281,9 @@ void Jit64::HLEFunction(UGeckoInstruction _inst)
{ {
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
ABI_PopRegistersAndAdjustStack(0, 0);
} }
void Jit64::DoNothing(UGeckoInstruction _inst) void Jit64::DoNothing(UGeckoInstruction _inst)
@ -223,29 +315,52 @@ static void ImHere()
been_here[PC] = 1; been_here[PC] = 1;
} }
void Jit64::Cleanup() bool Jit64::Cleanup()
{ {
bool did_something = false;
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{ {
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
ABI_PopRegistersAndAdjustStack(0, 0);
did_something = true;
} }
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0.Hex || MMCR1.Hex) if (MMCR0.Hex || MMCR1.Hex)
{
ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst); ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst);
did_something = true;
}
return did_something;
} }
void Jit64::WriteExit(u32 destination) void Jit64::WriteExit(u32 destination, bool bl, u32 after)
{ {
if (!m_enable_blr_optimization)
bl = false;
Cleanup(); Cleanup();
if (bl)
{
MOV(32, R(RSCRATCH2), Imm32(after));
PUSH(RSCRATCH2);
}
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JustWriteExit(destination, bl, after);
}
void Jit64::JustWriteExit(u32 destination, bool bl, u32 after)
{
//If nobody has taken care of this yet (this can be removed when all branches are done) //If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock; JitBlock *b = js.curBlock;
JitBlock::LinkData linkData; JitBlock::LinkData linkData;
linkData.exitAddress = destination; linkData.exitAddress = destination;
linkData.exitPtrs = GetWritableCodePtr();
linkData.linkStatus = false; linkData.linkStatus = false;
// Link opportunity! // Link opportunity!
@ -253,24 +368,76 @@ void Jit64::WriteExit(u32 destination)
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
{ {
// It exists! Joy of joy! // It exists! Joy of joy!
JMP(blocks.GetBlock(block)->checkedEntry, true); JitBlock* jb = blocks.GetBlock(block);
const u8* addr = jb->checkedEntry;
linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(addr);
else
JMP(addr, true);
linkData.linkStatus = true; linkData.linkStatus = true;
} }
else else
{ {
MOV(32, PPCSTATE(pc), Imm32(destination)); MOV(32, PPCSTATE(pc), Imm32(destination));
JMP(asm_routines.dispatcher, true); linkData.exitPtrs = GetWritableCodePtr();
if (bl)
CALL(asm_routines.dispatcher);
else
JMP(asm_routines.dispatcher, true);
} }
b->linkData.push_back(linkData); b->linkData.push_back(linkData);
if (bl)
{
POP(RSCRATCH);
JustWriteExit(after, false, 0);
}
} }
void Jit64::WriteExitDestInRSCRATCH() void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after)
{ {
if (!m_enable_blr_optimization)
bl = false;
MOV(32, PPCSTATE(pc), R(RSCRATCH)); MOV(32, PPCSTATE(pc), R(RSCRATCH));
Cleanup(); Cleanup();
if (bl)
{
MOV(32, R(RSCRATCH2), Imm32(after));
PUSH(RSCRATCH2);
}
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); if (bl)
{
CALL(asm_routines.dispatcher);
POP(RSCRATCH);
JustWriteExit(after, false, 0);
}
else
{
JMP(asm_routines.dispatcher, true);
}
}
void Jit64::WriteBLRExit()
{
if (!m_enable_blr_optimization)
{
WriteExitDestInRSCRATCH();
return;
}
MOV(32, PPCSTATE(pc), R(RSCRATCH));
bool disturbed = Cleanup();
if (disturbed)
MOV(32, R(RSCRATCH), PPCSTATE(pc));
CMP(64, R(RSCRATCH), MDisp(RSP, 8));
MOV(32, R(RSCRATCH), Imm32(js.downcountAmount));
J_CC(CC_NE, asm_routines.dispatcherMispredictedBLR);
SUB(32, PPCSTATE(downcount), R(RSCRATCH));
RET();
} }
void Jit64::WriteRfiExitDestInRSCRATCH() void Jit64::WriteRfiExitDestInRSCRATCH()
@ -278,7 +445,9 @@ void Jit64::WriteRfiExitDestInRSCRATCH()
MOV(32, PPCSTATE(pc), R(RSCRATCH)); MOV(32, PPCSTATE(pc), R(RSCRATCH));
MOV(32, PPCSTATE(npc), R(RSCRATCH)); MOV(32, PPCSTATE(npc), R(RSCRATCH));
Cleanup(); Cleanup();
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
ABI_PopRegistersAndAdjustStack(0, 0);
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
@ -288,7 +457,9 @@ void Jit64::WriteExceptionExit()
Cleanup(); Cleanup();
MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH)); MOV(32, PPCSTATE(npc), R(RSCRATCH));
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
ABI_PopRegistersAndAdjustStack(0, 0);
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
@ -298,7 +469,9 @@ void Jit64::WriteExternalExceptionExit()
Cleanup(); Cleanup();
MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH)); MOV(32, PPCSTATE(npc), R(RSCRATCH));
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
ABI_PopRegistersAndAdjustStack(0, 0);
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
@ -340,8 +513,11 @@ void Jit64::Trace()
void STACKALIGN Jit64::Jit(u32 em_address) void STACKALIGN Jit64::Jit(u32 em_address)
{ {
if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() || if (GetSpaceLeft() < 0x10000 ||
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache) farcode.GetSpaceLeft() < 0x10000 ||
blocks.IsFull() ||
SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache ||
m_clear_cache_asap)
{ {
ClearCache(); ClearCache();
} }
@ -395,7 +571,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
b->normalEntry = normalEntry; b->normalEntry = normalEntry;
if (ImHereDebug) if (ImHereDebug)
{
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
ABI_PopRegistersAndAdjustStack(0, 0);
}
// Conditionally add profiling code. // Conditionally add profiling code.
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
@ -548,7 +728,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
fpr.Flush(); fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
ABI_PopRegistersAndAdjustStack(0, 0);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z); FixupBranch noBreakpoint = J_CC(CC_Z);

View File

@ -18,6 +18,10 @@
// ---------- // ----------
#pragma once #pragma once
#ifdef _WIN32
#include <winnt.h>
#endif
#include "Common/x64ABI.h" #include "Common/x64ABI.h"
#include "Common/x64Analyzer.h" #include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
@ -40,6 +44,9 @@
class Jit64 : public Jitx86Base class Jit64 : public Jitx86Base
{ {
private: private:
void AllocStack();
void FreeStack();
GPRRegCache gpr; GPRRegCache gpr;
FPURegCache fpr; FPURegCache fpr;
@ -48,6 +55,10 @@ private:
PPCAnalyst::CodeBuffer code_buffer; PPCAnalyst::CodeBuffer code_buffer;
Jit64AsmRoutineManager asm_routines; Jit64AsmRoutineManager asm_routines;
bool m_enable_blr_optimization;
bool m_clear_cache_asap;
u8* m_stack;
public: public:
Jit64() : code_buffer(32000) {} Jit64() : code_buffer(32000) {}
~Jit64() {} ~Jit64() {}
@ -55,6 +66,8 @@ public:
void Init() override; void Init() override;
void Shutdown() override; void Shutdown() override;
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
// Jit! // Jit!
void Jit(u32 em_address) override; void Jit(u32 em_address) override;
@ -89,13 +102,15 @@ public:
// Utilities for use by opcodes // Utilities for use by opcodes
void WriteExit(u32 destination); void WriteExit(u32 destination, bool bl = false, u32 after = 0);
void WriteExitDestInRSCRATCH(); void JustWriteExit(u32 destination, bool bl, u32 after);
void WriteExitDestInRSCRATCH(bool bl = false, u32 after = 0);
void WriteBLRExit();
void WriteExceptionExit(); void WriteExceptionExit();
void WriteExternalExceptionExit(); void WriteExternalExceptionExit();
void WriteRfiExitDestInRSCRATCH(); void WriteRfiExitDestInRSCRATCH();
void WriteCallInterpreter(UGeckoInstruction _inst); void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup(); bool Cleanup();
void GenerateConstantOverflow(bool overflow); void GenerateConstantOverflow(bool overflow);
void GenerateConstantOverflow(s64 val); void GenerateConstantOverflow(s64 val);

View File

@ -9,6 +9,9 @@
using namespace Gen; using namespace Gen;
// Not PowerPC state. Can't put in 'this' because it's out of range...
static void* s_saved_rsp;
// PLAN: no more block numbers - crazy opcodes just contain offset within // PLAN: no more block numbers - crazy opcodes just contain offset within
// dynarec buffer // dynarec buffer
// At this offset - 4, there is an int specifying the block number. // At this offset - 4, there is an int specifying the block number.
@ -16,7 +19,23 @@ using namespace Gen;
void Jit64AsmRoutineManager::Generate() void Jit64AsmRoutineManager::Generate()
{ {
enterCode = AlignCode16(); enterCode = AlignCode16();
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); // We need to own the beginning of RSP, so we do an extra stack adjustment
// for the shadow region before calls in this function. This call will
// waste a bit of space for a second shadow, but whatever.
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, /*frame*/ 16);
if (m_stack_top)
{
// Pivot the stack to our custom one.
MOV(64, R(RSCRATCH), R(RSP));
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
MOV(64, MDisp(RSP, 0x18), R(RSCRATCH));
}
else
{
MOV(64, M(&s_saved_rsp), R(RSP));
}
// something that can't pass the BLR test
MOV(64, MDisp(RSP, 8), Imm32((u32)-1));
// Two statically allocated registers. // Two statically allocated registers.
MOV(64, R(RMEM), Imm64((u64)Memory::base)); MOV(64, R(RMEM), Imm64((u64)Memory::base));
@ -24,24 +43,42 @@ void Jit64AsmRoutineManager::Generate()
MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80));
const u8* outerLoop = GetCodePtr(); const u8* outerLoop = GetCodePtr();
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance)); ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
ABI_PopRegistersAndAdjustStack(0, 0);
FixupBranch skipToRealDispatch = J(SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging); //skip the sync and compare first time FixupBranch skipToRealDispatch = J(SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging); //skip the sync and compare first time
dispatcherMispredictedBLR = GetCodePtr();
#if 0 // debug mispredicts
MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
ABI_PushRegistersAndAdjustStack(1 << RSCRATCH, 0);
CALL(reinterpret_cast<void *>(&ReportMispredict));
ABI_PopRegistersAndAdjustStack(1 << RSCRATCH, 0);
#endif
if (m_stack_top)
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
else
MOV(64, R(RSP), M(&s_saved_rsp));
SUB(32, PPCSTATE(downcount), R(RSCRATCH));
dispatcher = GetCodePtr(); dispatcher = GetCodePtr();
// The result of slice decrementation should be in flags if somebody jumped here // The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!! // IMPORTANT - We jump on negative, not carry!!!
FixupBranch bail = J_CC(CC_BE, true); FixupBranch bail = J_CC(CC_BE, true);
FixupBranch dbg_exit;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
{ {
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING));
FixupBranch notStepping = J_CC(CC_Z); FixupBranch notStepping = J_CC(CC_Z);
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
ABI_PopRegistersAndAdjustStack(0, 0);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z); dbg_exit = J_CC(CC_NZ);
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();
SetJumpTarget(noBreakpoint);
SetJumpTarget(notStepping); SetJumpTarget(notStepping);
} }
@ -106,8 +143,9 @@ void Jit64AsmRoutineManager::Generate()
SetJumpTarget(notfound); SetJumpTarget(notfound);
//Ok, no block, let's jit //Ok, no block, let's jit
MOV(32, R(ABI_PARAM1), PPCSTATE(pc)); ABI_PushRegistersAndAdjustStack(0, 0);
CALL((void *)&Jit); ABI_CallFunctionA((void *)&Jit, PPCSTATE(pc));
ABI_PopRegistersAndAdjustStack(0, 0);
JMP(dispatcherNoCheck); // no point in special casing this JMP(dispatcherNoCheck); // no point in special casing this
@ -119,14 +157,27 @@ void Jit64AsmRoutineManager::Generate()
FixupBranch noExtException = J_CC(CC_Z); FixupBranch noExtException = J_CC(CC_Z);
MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH)); MOV(32, PPCSTATE(npc), R(RSCRATCH));
ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
ABI_PopRegistersAndAdjustStack(0, 0);
SetJumpTarget(noExtException); SetJumpTarget(noExtException);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
J_CC(CC_Z, outerLoop); J_CC(CC_Z, outerLoop);
//Landing pad for drec space //Landing pad for drec space
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
SetJumpTarget(dbg_exit);
if (m_stack_top)
{
MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x8));
POP(RSP);
}
else
{
MOV(64, R(RSP), M(&s_saved_rsp));
}
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);
RET(); RET();
GenerateCommon(); GenerateCommon();

View File

@ -25,10 +25,12 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
private: private:
void Generate(); void Generate();
void GenerateCommon(); void GenerateCommon();
u8* m_stack_top;
public: public:
void Init() void Init(u8* stack_top)
{ {
m_stack_top = stack_top;
AllocCodeSpace(8192); AllocCodeSpace(8192);
Generate(); Generate();
WriteProtect(); WriteProtect();

View File

@ -92,7 +92,7 @@ void Jit64::bx(UGeckoInstruction inst)
// make idle loops go faster // make idle loops go faster
js.downcountAmount += 8; js.downcountAmount += 8;
} }
WriteExit(destination); WriteExit(destination, inst.LK, js.compilerPC + 4);
} }
// TODO - optimize to hell and beyond // TODO - optimize to hell and beyond
@ -133,7 +133,7 @@ void Jit64::bcx(UGeckoInstruction inst)
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExit(destination); WriteExit(destination, inst.LK, js.compilerPC + 4);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch ); SetJumpTarget( pConditionDontBranch );
@ -168,7 +168,7 @@ void Jit64::bcctrx(UGeckoInstruction inst)
if (inst.LK_3) if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInRSCRATCH(); WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
} }
else else
{ {
@ -187,7 +187,7 @@ void Jit64::bcctrx(UGeckoInstruction inst)
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInRSCRATCH(); WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
// Would really like to continue the block here, but it ends. TODO. // Would really like to continue the block here, but it ends. TODO.
SetJumpTarget(b); SetJumpTarget(b);
@ -235,7 +235,7 @@ void Jit64::bclrx(UGeckoInstruction inst)
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInRSCRATCH(); WriteBLRExit();
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch ); SetJumpTarget( pConditionDontBranch );

View File

@ -312,7 +312,7 @@ void Jit64::DoMergedBranch()
destination = SignExt16(js.next_inst.BD << 2); destination = SignExt16(js.next_inst.BD << 2);
else else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
WriteExit(destination); WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4);
} }
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
{ {
@ -320,7 +320,7 @@ void Jit64::DoMergedBranch()
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
MOV(32, R(RSCRATCH), M(&CTR)); MOV(32, R(RSCRATCH), M(&CTR));
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInRSCRATCH(); WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4);
} }
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
{ {
@ -328,7 +328,7 @@ void Jit64::DoMergedBranch()
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
WriteExitDestInRSCRATCH(); WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4);
} }
else else
{ {

View File

@ -272,7 +272,7 @@ void JitIL::Init()
trampolines.Init(); trampolines.Init();
AllocCodeSpace(CODE_SIZE); AllocCodeSpace(CODE_SIZE);
blocks.Init(); blocks.Init();
asm_routines.Init(); asm_routines.Init(nullptr);
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE); farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);

View File

@ -56,6 +56,10 @@ public:
void Trace(); void Trace();
JitBlockCache *GetBlockCache() override { return &blocks; }
bool HandleFault(uintptr_t access_address, SContext* ctx) override { return false; }
void ClearCache() override; void ClearCache() override;
const u8 *GetDispatcher() const u8 *GetDispatcher()
{ {
@ -105,4 +109,5 @@ public:
void DynaRunTable31(UGeckoInstruction _inst) override; void DynaRunTable31(UGeckoInstruction _inst) override;
void DynaRunTable59(UGeckoInstruction _inst) override; void DynaRunTable59(UGeckoInstruction _inst) override;
void DynaRunTable63(UGeckoInstruction _inst) override; void DynaRunTable63(UGeckoInstruction _inst) override;
}; };

View File

@ -58,6 +58,8 @@ private:
void SetFPException(ArmGen::ARMReg Reg, u32 Exception); void SetFPException(ArmGen::ARMReg Reg, u32 Exception);
ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
bool BackPatch(SContext* ctx);
public: public:
JitArm() : code_buffer(32000) {} JitArm() : code_buffer(32000) {}
~JitArm() {} ~JitArm() {}
@ -72,9 +74,7 @@ public:
JitBaseBlockCache *GetBlockCache() { return &blocks; } JitBaseBlockCache *GetBlockCache() { return &blocks; }
const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx); bool HandleFault(uintptr_t access_address, SContext* ctx) override;
bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); }
void Trace(); void Trace();

View File

@ -66,12 +66,23 @@ bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store)
} }
return true; return true;
} }
const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void)
bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx)
{
if (access_address < (uintptr_t)Memory::base)
{
PanicAlertT("Exception handler - access below memory space. %08llx%08llx",
access_address >> 32, access_address);
}
return BackPatch(ctx);
}
bool JitArm::BackPatch(SContext* ctx)
{ {
// TODO: This ctx needs to be filled with our information // TODO: This ctx needs to be filled with our information
SContext *ctx = (SContext *)ctx_void;
// We need to get the destination register before we start // We need to get the destination register before we start
u8* codePtr = (u8*)ctx->CTX_PC;
u32 Value = *(u32*)codePtr; u32 Value = *(u32*)codePtr;
ARMReg rD; ARMReg rD;
u8 accessSize; u8 accessSize;
@ -109,7 +120,7 @@ const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void)
u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4); u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4);
ctx->CTX_PC = newPC; ctx->CTX_PC = newPC;
emitter.FlushIcache(); emitter.FlushIcache();
return (u8*)ctx->CTX_PC; return true;
} }
else else
{ {
@ -135,7 +146,7 @@ const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void)
emitter.MOV(rD, R14); // 8 emitter.MOV(rD, R14); // 8
ctx->CTX_PC -= ARMREGOFFSET + (4 * 4); ctx->CTX_PC -= ARMREGOFFSET + (4 * 4);
emitter.FlushIcache(); emitter.FlushIcache();
return (u8*)ctx->CTX_PC; return true;
} }
return 0; return 0;
} }

View File

@ -17,6 +17,7 @@ public:
const u8 *enterCode; const u8 *enterCode;
const u8 *dispatcherMispredictedBLR;
const u8 *dispatcher; const u8 *dispatcher;
const u8 *dispatcherNoCheck; const u8 *dispatcherNoCheck;
const u8 *dispatcherPcInRSCRATCH; const u8 *dispatcherPcInRSCRATCH;

View File

@ -3,24 +3,14 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <cinttypes> #include <cinttypes>
#include <string>
#include "disasm.h" #include "disasm.h"
#include "Common/CommonTypes.h"
#include "Common/StringUtil.h"
#include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitBackpatch.h"
#include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitBase.h"
#ifdef _WIN32
#include <windows.h>
#endif
using namespace Gen; using namespace Gen;
extern u8 *trampolineCodePtr;
static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress)
{ {
u64 code_addr = (u64)codePtr; u64 code_addr = (u64)codePtr;
@ -35,176 +25,51 @@ static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress)
return; return;
} }
void TrampolineCache::Init() // This generates some fairly heavy trampolines, but it doesn't really hurt.
// Only instructions that access I/O will get these, and there won't be that
// many of them in a typical program/game.
bool Jitx86Base::HandleFault(uintptr_t access_address, SContext* ctx)
{ {
AllocCodeSpace(4 * 1024 * 1024); // TODO: do we properly handle off-the-end?
if (access_address >= (uintptr_t)Memory::base && access_address < (uintptr_t)Memory::base + 0x100010000)
return BackPatch((u32)(access_address - (uintptr_t)Memory::base), ctx);
return false;
} }
void TrampolineCache::Shutdown() bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
{ {
FreeCodeSpace(); u8* codePtr = (u8*) ctx->CTX_PC;
}
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. if (!IsInSpace(codePtr))
const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse) return false; // this will become a regular crash real soon after this
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
const u8 *trampoline = GetCodePtr();
X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg;
// It's a read. Easy.
// RSP alignment here is 8 due to the call.
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
switch (info.operandSize)
{
case 4:
CALL((void *)&Memory::Read_U32);
break;
case 2:
CALL((void *)&Memory::Read_U16);
SHL(32, R(ABI_RETURN), Imm8(16));
break;
case 1:
CALL((void *)&Memory::Read_U8);
break;
}
if (info.signExtend && info.operandSize == 1)
{
// Need to sign extend value from Read_U8.
MOVSX(32, 8, dataReg, R(ABI_RETURN));
}
else if (dataReg != EAX)
{
MOV(32, R(dataReg), R(ABI_RETURN));
}
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
RET();
return trampoline;
}
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
const u8 *trampoline = GetCodePtr();
X64Reg dataReg = (X64Reg)info.regOperandReg;
X64Reg addrReg = (X64Reg)info.scaledReg;
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
// hardware access - we can take shortcuts.
// Don't treat FIFO writes specially for now because they require a burst
// check anyway.
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc));
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
if (info.hasImmediate)
{
if (addrReg != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(addrReg));
// we have to swap back the immediate to pass it to the write functions
switch (info.operandSize)
{
case 8:
PanicAlert("Invalid 64-bit immediate!");
break;
case 4:
MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
break;
case 2:
MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
break;
case 1:
MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
break;
}
}
else
{
MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg);
}
if (info.displacement)
{
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
}
switch (info.operandSize)
{
case 8:
CALL((void *)&Memory::Write_U64);
break;
case 4:
CALL((void *)&Memory::Write_U32);
break;
case 2:
CALL((void *)&Memory::Write_U16);
break;
case 1:
CALL((void *)&Memory::Write_U8);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
RET();
return trampoline;
}
// This generates some fairly heavy trampolines, but:
// 1) It's really necessary. We don't know anything about the context.
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
// that many of them in a typical program/game.
const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{
SContext *ctx = (SContext *)ctx_void;
if (!jit->IsInCodeSpace(codePtr))
return nullptr; // this will become a regular crash real soon after this
InstructionInfo info = {}; InstructionInfo info = {};
if (!DisassembleMov(codePtr, &info)) if (!DisassembleMov(codePtr, &info))
{ {
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress); BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
return nullptr; return false;
} }
if (info.otherReg != RMEM) if (info.otherReg != RMEM)
{ {
PanicAlert("BackPatch : Base reg not RMEM." PanicAlert("BackPatch : Base reg not RMEM."
"\n\nAttempted to access %08x.", emAddress); "\n\nAttempted to access %08x.", emAddress);
return nullptr; return false;
} }
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE) if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
{ {
PanicAlert("BackPatch: MOVBE is too small"); PanicAlert("BackPatch: MOVBE is too small");
return nullptr; return false;
} }
auto it = registersInUseAtLoc.find(codePtr); auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end()) if (it == registersInUseAtLoc.end())
{ {
PanicAlert("BackPatch: no register use entry for address %p", codePtr); PanicAlert("BackPatch: no register use entry for address %p", codePtr);
return nullptr; return false;
} }
u32 registersInUse = it->second; u32 registersInUse = it->second;
@ -228,7 +93,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{ {
emitter.NOP(padding); emitter.NOP(padding);
} }
return codePtr; ctx->CTX_PC = (u64)codePtr;
} }
else else
{ {
@ -281,6 +146,8 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{ {
emitter.NOP(padding); emitter.NOP(padding);
} }
return start; ctx->CTX_PC = (u64)start;
} }
return true;
} }

View File

@ -5,11 +5,6 @@
#pragma once #pragma once
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h"
// We need at least this many bytes for backpatching.
const int BACKPATCH_SIZE = 5;
// meh. // meh.
#if defined(_WIN32) #if defined(_WIN32)
@ -147,8 +142,8 @@ const int BACKPATCH_SIZE = 5;
#endif #endif
#if _M_X86_64 #if _M_X86_64
#define CTX_PC CTX_RIP
#include <stddef.h> #include <stddef.h>
#define CTX_PC CTX_RIP
static inline u64 *ContextRN(SContext* ctx, int n) static inline u64 *ContextRN(SContext* ctx, int n)
{ {
static const u8 offsets[] = static const u8 offsets[] =
@ -173,13 +168,3 @@ static inline u64 *ContextRN(SContext* ctx, int n)
return (u64 *) ((char *) ctx + offsets[n]); return (u64 *) ((char *) ctx + offsets[n]);
} }
#endif #endif
class TrampolineCache : public Gen::X64CodeBlock
{
public:
void Init();
void Shutdown();
const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc);
};

View File

@ -26,6 +26,7 @@
#include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitBackpatch.h"
#include "Core/PowerPC/JitCommon/JitCache.h" #include "Core/PowerPC/JitCommon/JitCache.h"
#include "Core/PowerPC/JitCommon/TrampolineCache.h"
// TODO: find a better place for x86-specific stuff // TODO: find a better place for x86-specific stuff
// The following register assignments are common to Jit64 and Jit64IL: // The following register assignments are common to Jit64 and Jit64IL:
@ -110,24 +111,20 @@ public:
virtual void Jit(u32 em_address) = 0; virtual void Jit(u32 em_address) = 0;
virtual const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) = 0;
virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0; virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0;
virtual bool IsInCodeSpace(u8 *ptr) = 0; virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0;
}; };
class Jitx86Base : public JitBase, public EmuCodeBlock class Jitx86Base : public JitBase, public EmuCodeBlock
{ {
protected: protected:
bool BackPatch(u32 emAddress, SContext* ctx);
JitBlockCache blocks; JitBlockCache blocks;
TrampolineCache trampolines; TrampolineCache trampolines;
public: public:
JitBlockCache *GetBlockCache() override { return &blocks; } JitBlockCache *GetBlockCache() override { return &blocks; }
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) override;
bool IsInCodeSpace(u8 *ptr) override { return IsInSpace(ptr); }
}; };
extern JitBase *jit; extern JitBase *jit;

View File

@ -364,7 +364,10 @@ using namespace Gen;
void JitBlockCache::WriteLinkBlock(u8* location, const u8* address) void JitBlockCache::WriteLinkBlock(u8* location, const u8* address)
{ {
XEmitter emit(location); XEmitter emit(location);
emit.JMP(address, true); if (*location == 0xE8)
emit.CALL(address);
else
emit.JMP(address, true);
} }
void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address)

View File

@ -0,0 +1,156 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include <cinttypes>
#include <string>
#include "Common/CommonTypes.h"
#include "Common/StringUtil.h"
#include "Common/x64ABI.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/JitCommon/TrampolineCache.h"
#ifdef _WIN32
#include <windows.h>
#endif
using namespace Gen;
extern u8 *trampolineCodePtr;
void TrampolineCache::Init()
{
AllocCodeSpace(4 * 1024 * 1024);
}
void TrampolineCache::Shutdown()
{
FreeCodeSpace();
}
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
const u8 *trampoline = GetCodePtr();
X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg;
// It's a read. Easy.
// RSP alignment here is 8 due to the call.
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
switch (info.operandSize)
{
case 4:
CALL((void *)&Memory::Read_U32);
break;
case 2:
CALL((void *)&Memory::Read_U16);
SHL(32, R(ABI_RETURN), Imm8(16));
break;
case 1:
CALL((void *)&Memory::Read_U8);
break;
}
if (info.signExtend && info.operandSize == 1)
{
// Need to sign extend value from Read_U8.
MOVSX(32, 8, dataReg, R(ABI_RETURN));
}
else if (dataReg != EAX)
{
MOV(32, R(dataReg), R(ABI_RETURN));
}
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
RET();
return trampoline;
}
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc)
{
if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full");
const u8 *trampoline = GetCodePtr();
X64Reg dataReg = (X64Reg)info.regOperandReg;
X64Reg addrReg = (X64Reg)info.scaledReg;
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
// hardware access - we can take shortcuts.
// Don't treat FIFO writes specially for now because they require a burst
// check anyway.
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc));
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
if (info.hasImmediate)
{
if (addrReg != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(addrReg));
// we have to swap back the immediate to pass it to the write functions
switch (info.operandSize)
{
case 8:
PanicAlert("Invalid 64-bit immediate!");
break;
case 4:
MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
break;
case 2:
MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
break;
case 1:
MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
break;
}
}
else
{
MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg);
}
if (info.displacement)
{
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
}
switch (info.operandSize)
{
case 8:
CALL((void *)&Memory::Write_U64);
break;
case 4:
CALL((void *)&Memory::Write_U32);
break;
case 2:
CALL((void *)&Memory::Write_U16);
break;
case 1:
CALL((void *)&Memory::Write_U8);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
RET();
return trampoline;
}

View File

@ -0,0 +1,22 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include "Common/CommonTypes.h"
#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h"
// We need at least this many bytes for backpatching.
const int BACKPATCH_SIZE = 5;
class TrampolineCache : public Gen::X64CodeBlock
{
public:
void Init();
void Shutdown();
const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc);
};

View File

@ -190,13 +190,9 @@ namespace JitInterface
} }
#endif #endif
} }
bool IsInCodeSpace(u8 *ptr) bool HandleFault(uintptr_t access_address, SContext* ctx)
{ {
return jit->IsInCodeSpace(ptr); return jit->HandleFault(access_address, ctx);
}
const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx)
{
return jit->BackPatch(codePtr, em_address, ctx);
} }
void ClearCache() void ClearCache()

View File

@ -7,6 +7,7 @@
#include <string> #include <string>
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
#include "Core/PowerPC/CPUCoreBase.h" #include "Core/PowerPC/CPUCoreBase.h"
#include "Core/PowerPC/JitCommon/JitBackpatch.h"
namespace JitInterface namespace JitInterface
{ {
@ -20,8 +21,7 @@ namespace JitInterface
void WriteProfileResults(const std::string& filename); void WriteProfileResults(const std::string& filename);
// Memory Utilities // Memory Utilities
bool IsInCodeSpace(u8 *ptr); bool HandleFault(uintptr_t access_address, SContext* ctx);
const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx);
// used by JIT to read instructions // used by JIT to read instructions
u32 Read_Opcode_JIT(const u32 _Address); u32 Read_Opcode_JIT(const u32 _Address);

View File

@ -23,42 +23,6 @@
namespace EMM namespace EMM
{ {
static bool DoFault(u64 bad_address, SContext *ctx)
{
if (!JitInterface::IsInCodeSpace((u8*) ctx->CTX_PC))
{
// Let's not prevent debugging.
return false;
}
u64 memspace_bottom = (u64)Memory::base;
u64 memspace_top = memspace_bottom +
#if _ARCH_64
0x100000000ULL;
#else
0x40000000;
#endif
if (bad_address < memspace_bottom || bad_address >= memspace_top)
{
return false;
}
u32 em_address = (u32)(bad_address - memspace_bottom);
const u8 *new_pc = jit->BackPatch((u8*) ctx->CTX_PC, em_address, ctx);
if (new_pc)
{
ctx->CTX_PC = (u64) new_pc;
}
else
{
// there was an error, give the debugger a chance
return false;
}
return true;
}
#ifdef _WIN32 #ifdef _WIN32
LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs)
@ -74,10 +38,10 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs)
} }
// virtual address of the inaccessible data // virtual address of the inaccessible data
u64 badAddress = (u64)pPtrs->ExceptionRecord->ExceptionInformation[1]; uintptr_t badAddress = (uintptr_t)pPtrs->ExceptionRecord->ExceptionInformation[1];
CONTEXT *ctx = pPtrs->ContextRecord; CONTEXT *ctx = pPtrs->ContextRecord;
if (DoFault(badAddress, ctx)) if (JitInterface::HandleFault(badAddress, ctx))
{ {
return (DWORD)EXCEPTION_CONTINUE_EXECUTION; return (DWORD)EXCEPTION_CONTINUE_EXECUTION;
} }
@ -125,6 +89,8 @@ void InstallExceptionHandler()
handlerInstalled = true; handlerInstalled = true;
} }
void UninstallExceptionHandler() {}
#elif defined(__APPLE__) #elif defined(__APPLE__)
void CheckKR(const char* name, kern_return_t kr) void CheckKR(const char* name, kern_return_t kr)
@ -196,7 +162,7 @@ void ExceptionThread(mach_port_t port)
x86_thread_state64_t *state = (x86_thread_state64_t *) msg_in.old_state; x86_thread_state64_t *state = (x86_thread_state64_t *) msg_in.old_state;
bool ok = DoFault(msg_in.code[1], state); bool ok = JitInterface::HandleFault((uintptr_t) msg_in.code[1], state);
// Set up the reply. // Set up the reply.
msg_out.Head.msgh_bits = MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(msg_in.Head.msgh_bits), 0); msg_out.Head.msgh_bits = MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(msg_in.Head.msgh_bits), 0);
@ -243,6 +209,8 @@ void InstallExceptionHandler()
CheckKR("mach_port_request_notification", mach_port_request_notification(mach_task_self(), port, MACH_NOTIFY_NO_SENDERS, 0, port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &previous)); CheckKR("mach_port_request_notification", mach_port_request_notification(mach_task_self(), port, MACH_NOTIFY_NO_SENDERS, 0, port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &previous));
} }
void UninstallExceptionHandler() {}
#elif defined(_POSIX_VERSION) #elif defined(_POSIX_VERSION)
static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context)
@ -259,12 +227,12 @@ static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context)
// Huh? Return. // Huh? Return.
return; return;
} }
u64 bad_address = (u64)info->si_addr; uintptr_t bad_address = (uintptr_t)info->si_addr;
// Get all the information we can out of the context. // Get all the information we can out of the context.
mcontext_t *ctx = &context->uc_mcontext; mcontext_t *ctx = &context->uc_mcontext;
// assume it's not a write // assume it's not a write
if (!DoFault(bad_address, ctx)) if (!JitInterface::HandleFault(bad_address, ctx))
{ {
// retry and crash // retry and crash
signal(SIGSEGV, SIG_DFL); signal(SIGSEGV, SIG_DFL);
@ -273,6 +241,12 @@ static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context)
void InstallExceptionHandler() void InstallExceptionHandler()
{ {
stack_t signal_stack;
signal_stack.ss_sp = malloc(SIGSTKSZ);
signal_stack.ss_size = SIGSTKSZ;
signal_stack.ss_flags = 0;
if (sigaltstack(&signal_stack, nullptr))
PanicAlert("sigaltstack failed");
struct sigaction sa; struct sigaction sa;
sa.sa_handler = nullptr; sa.sa_handler = nullptr;
sa.sa_sigaction = &sigsegv_handler; sa.sa_sigaction = &sigsegv_handler;
@ -281,6 +255,16 @@ void InstallExceptionHandler()
sigaction(SIGSEGV, &sa, nullptr); sigaction(SIGSEGV, &sa, nullptr);
} }
void UninstallExceptionHandler()
{
stack_t signal_stack, old_stack;
signal_stack.ss_flags = SS_DISABLE;
if (!sigaltstack(&signal_stack, &old_stack) &&
!(old_stack.ss_flags & SS_DISABLE))
{
free(old_stack.ss_sp);
}
}
#else #else
#error Unsupported x86_64 platform! Report this if you support sigaction #error Unsupported x86_64 platform! Report this if you support sigaction