diff --git a/Source/Core/Common/MemoryUtil.cpp b/Source/Core/Common/MemoryUtil.cpp
index f7e1d7d902..a741deef4f 100644
--- a/Source/Core/Common/MemoryUtil.cpp
+++ b/Source/Core/Common/MemoryUtil.cpp
@@ -158,6 +158,25 @@ void FreeAlignedMemory(void* ptr)
}
}
+void ReadProtectMemory(void* ptr, size_t size)
+{
+ bool error_occurred = false;
+
+#ifdef _WIN32
+ DWORD oldValue;
+ if (!VirtualProtect(ptr, size, PAGE_NOACCESS, &oldValue))
+ error_occurred = true;
+#else
+ int retval = mprotect(ptr, size, PROT_NONE);
+
+ if (retval != 0)
+ error_occurred = true;
+#endif
+
+ if (error_occurred)
+ PanicAlert("ReadProtectMemory failed!\n%s", GetLastErrorMsg());
+}
+
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{
bool error_occurred = false;
diff --git a/Source/Core/Common/MemoryUtil.h b/Source/Core/Common/MemoryUtil.h
index 6f437fcda7..5f584f868d 100644
--- a/Source/Core/Common/MemoryUtil.h
+++ b/Source/Core/Common/MemoryUtil.h
@@ -12,8 +12,12 @@ void* AllocateMemoryPages(size_t size);
void FreeMemoryPages(void* ptr, size_t size);
void* AllocateAlignedMemory(size_t size,size_t alignment);
void FreeAlignedMemory(void* ptr);
+void ReadProtectMemory(void* ptr, size_t size);
void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
std::string MemUsage();
+void GuardMemoryMake(void* ptr, size_t size);
+void GuardMemoryUnmake(void* ptr, size_t size);
+
inline int GetPageSize() { return 4096; }
diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp
index fa16cf2b36..75cd418379 100644
--- a/Source/Core/Common/x64Emitter.cpp
+++ b/Source/Core/Common/x64Emitter.cpp
@@ -1766,6 +1766,8 @@ void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI
void XEmitter::LOCK() { Write8(0xF0); }
void XEmitter::REP() { Write8(0xF3); }
void XEmitter::REPNE() { Write8(0xF2); }
+void XEmitter::FSOverride() { Write8(0x64); }
+void XEmitter::GSOverride() { Write8(0x65); }
void XEmitter::FWAIT()
{
diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h
index 8f41065668..8b655c2c42 100644
--- a/Source/Core/Common/x64Emitter.h
+++ b/Source/Core/Common/x64Emitter.h
@@ -467,6 +467,8 @@ public:
void LOCK();
void REP();
void REPNE();
+ void FSOverride();
+ void GSOverride();
// x87
enum x87StatusWordBits {
diff --git a/Source/Core/Core/ArmMemTools.cpp b/Source/Core/Core/ArmMemTools.cpp
index ff7d77e4f4..8b166580a8 100644
--- a/Source/Core/Core/ArmMemTools.cpp
+++ b/Source/Core/Core/ArmMemTools.cpp
@@ -32,9 +32,9 @@ typedef struct ucontext {
} ucontext_t;
#endif
-void sigsegv_handler(int signal, siginfo_t *info, void *raw_context)
+static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context)
{
- if (signal != SIGSEGV)
+ if (sig != SIGSEGV)
{
// We are not interested in other signals - handle it as usual.
return;
@@ -47,33 +47,18 @@ void sigsegv_handler(int signal, siginfo_t *info, void *raw_context)
return;
}
-
// Get all the information we can out of the context.
mcontext_t *ctx = &context->uc_mcontext;
- void *fault_memory_ptr = (void*)ctx->arm_r10;
- u8 *fault_instruction_ptr = (u8 *)ctx->arm_pc;
+ // comex says hello, and is most curious whether this is arm_r10 for a
+ // reason as opposed to si_addr like the x64MemTools.cpp version. Is there
+ // even a need for this file to be architecture specific?
+ uintptr_t fault_memory_ptr = (uintptr_t)ctx->arm_r10;
- if (!JitInterface::IsInCodeSpace(fault_instruction_ptr))
+ if (!JitInterface::HandleFault(fault_memory_ptr, ctx))
{
- // Let's not prevent debugging.
- return;
- }
-
- u64 bad_address = (u64)fault_memory_ptr;
- u64 memspace_bottom = (u64)Memory::base;
- if (bad_address < memspace_bottom)
- {
- PanicAlertT("Exception handler - access below memory space. %08llx%08llx",
- bad_address >> 32, bad_address);
- }
-
- u32 em_address = (u32)(bad_address - memspace_bottom);
-
- const u8 *new_rip = jit->BackPatch(fault_instruction_ptr, em_address, ctx);
- if (new_rip)
- {
- ctx->arm_pc = (u32) new_rip;
+ // retry and crash
+ signal(SIGSEGV, SIG_DFL);
}
}
@@ -86,4 +71,7 @@ void InstallExceptionHandler()
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, nullptr);
}
+
+void UninstallExceptionHandler() {}
+
} // namespace
diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt
index 78172d5cb0..39800a448e 100644
--- a/Source/Core/Core/CMakeLists.txt
+++ b/Source/Core/Core/CMakeLists.txt
@@ -195,9 +195,10 @@ if(_M_X86)
PowerPC/Jit64/Jit_Paired.cpp
PowerPC/Jit64/JitRegCache.cpp
PowerPC/Jit64/Jit_SystemRegisters.cpp
- PowerPC/JitCommon/JitBackpatch.cpp
PowerPC/JitCommon/JitAsmCommon.cpp
- PowerPC/JitCommon/Jit_Util.cpp)
+ PowerPC/JitCommon/JitBackpatch.cpp
+ PowerPC/JitCommon/Jit_Util.cpp
+ PowerPC/JitCommon/TrampolineCache.cpp)
elseif(_M_ARM_32)
set(SRCS ${SRCS}
ArmMemTools.cpp
diff --git a/Source/Core/Core/Core.cpp b/Source/Core/Core/Core.cpp
index 3a764a4e63..5b6294d2c7 100644
--- a/Source/Core/Core/Core.cpp
+++ b/Source/Core/Core/Core.cpp
@@ -277,6 +277,10 @@ static void CpuThread()
if (!_CoreParameter.bCPUThread)
g_video_backend->Video_Cleanup();
+ #if _M_X86_64 || _M_ARM_32
+ EMM::UninstallExceptionHandler();
+ #endif
+
return;
}
diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj
index 7ead1e4172..b46357fd3d 100644
--- a/Source/Core/Core/Core.vcxproj
+++ b/Source/Core/Core/Core.vcxproj
@@ -229,6 +229,7 @@
+
@@ -406,6 +407,7 @@
+
@@ -464,4 +466,4 @@
-
\ No newline at end of file
+
diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters
index 39e6aec8f4..faeb9bcd24 100644
--- a/Source/Core/Core/Core.vcxproj.filters
+++ b/Source/Core/Core/Core.vcxproj.filters
@@ -640,6 +640,9 @@
PowerPC\JitCommon
+
+ PowerPC\JitCommon
+
PowerPC\JitIL
@@ -1182,6 +1185,9 @@
PowerPC\JitCommon
+
+ PowerPC\JitCommon
+
PowerPC\JitIL
@@ -1204,4 +1210,4 @@
-
\ No newline at end of file
+
diff --git a/Source/Core/Core/MemTools.h b/Source/Core/Core/MemTools.h
index 276af3d887..fcc671b799 100644
--- a/Source/Core/Core/MemTools.h
+++ b/Source/Core/Core/MemTools.h
@@ -11,4 +11,5 @@ namespace EMM
{
typedef u32 EAddr;
void InstallExceptionHandler();
+ void UninstallExceptionHandler();
}
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp
index 720375613c..92595f6acd 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp
@@ -95,6 +95,83 @@ using namespace PowerPC;
and such, but it's currently limited to integer ops only. This can definitely be made better.
*/
+// The BLR optimization is nice, but it means that JITted code can overflow the
+// native stack by repeatedly running BL. (The chance of this happening in any
+// retail game is close to 0, but correctness is correctness...) Also, the
+// overflow might not happen directly in the JITted code but in a C++ function
+// called from it, so we can't just adjust RSP in the case of a fault.
+// Instead, we have to have extra stack space preallocated under the fault
+// point which allows the code to continue, after wiping the JIT cache so we
+// can reset things at a safe point. Once this condition trips, the
+// optimization is permanently disabled, under the assumption this will never
+// happen in practice.
+
+// On Unix, we just mark an appropriate region of the stack as PROT_NONE and
+// handle it the same way as fastmem faults. It's safe to take a fault with a
+// bad RSP, because on Linux we can use sigaltstack and on OS X we're already
+// on a separate thread.
+
+// On Windows, the OS gets upset if RSP doesn't work, and I don't know any
+// equivalent of sigaltstack. Windows supports guard pages which, when
+// accessed, immediately turn into regular pages but cause a trap... but
+// putting them in the path of RSP just leads to something (in the kernel?)
+// thinking a regular stack extension is required. So this protection is not
+// supported on Windows yet... We still use a separate stack for the sake of
+// simplicity.
+
+enum
+{
+ STACK_SIZE = 2 * 1024 * 1024,
+ SAFE_STACK_SIZE = 512 * 1024,
+ GUARD_SIZE = 0x10000, // two guards - bottom (permanent) and middle (see above)
+ GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE,
+};
+
+void Jit64::AllocStack()
+{
+#if defined(_WIN32)
+ m_stack = (u8*)AllocateMemoryPages(STACK_SIZE);
+ ReadProtectMemory(m_stack, GUARD_SIZE);
+ ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
+#endif
+}
+
+void Jit64::FreeStack()
+{
+#if defined(_WIN32)
+ if (m_stack)
+ {
+ FreeMemoryPages(m_stack, STACK_SIZE);
+ m_stack = NULL;
+ }
+#endif
+}
+
+bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx)
+{
+ uintptr_t stack = (uintptr_t)m_stack, diff = access_address - stack;
+ // In the trap region?
+ if (stack && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE)
+ {
+ WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program.");
+ m_enable_blr_optimization = false;
+ UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE);
+ // We're going to need to clear the whole cache to get rid of the bad
+ // CALLs, but we can't yet. Fake the downcount so we're forced to the
+ // dispatcher (no block linking), and clear the cache so we're sent to
+ // Jit. Yeah, it's kind of gross.
+ GetBlockCache()->InvalidateICache(0, 0xffffffff);
+ CoreTiming::ForceExceptionCheck(0);
+ m_clear_cache_asap = true;
+
+ return true;
+ }
+
+ return Jitx86Base::HandleFault(access_address, ctx);
+}
+
+
+
void Jit64::Init()
{
jo.optimizeStack = true;
@@ -130,8 +207,18 @@ void Jit64::Init()
trampolines.Init();
AllocCodeSpace(CODE_SIZE);
+
+ // BLR optimization has the same consequences as block linking, as well as
+ // depending on the fault handler to be safe in the event of excessive BL.
+ m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem;
+ m_clear_cache_asap = false;
+
+ m_stack = nullptr;
+ if (m_enable_blr_optimization)
+ AllocStack();
+
blocks.Init();
- asm_routines.Init();
+ asm_routines.Init(m_stack ? (m_stack + STACK_SIZE) : nullptr);
// important: do this *after* generating the global asm routines, because we can't use farcode in them.
// it'll crash because the farcode functions get cleared on JIT clears.
@@ -155,6 +242,7 @@ void Jit64::ClearCache()
void Jit64::Shutdown()
{
+ FreeStack();
FreeCodeSpace();
blocks.Shutdown();
@@ -174,7 +262,9 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst)
MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
}
Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunctionC((void*)instr, inst.hex);
+ ABI_PopRegistersAndAdjustStack(0, 0);
}
void Jit64::unknown_instruction(UGeckoInstruction inst)
@@ -191,7 +281,9 @@ void Jit64::HLEFunction(UGeckoInstruction _inst)
{
gpr.Flush();
fpr.Flush();
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
+ ABI_PopRegistersAndAdjustStack(0, 0);
}
void Jit64::DoNothing(UGeckoInstruction _inst)
@@ -223,29 +315,52 @@ static void ImHere()
been_here[PC] = 1;
}
-void Jit64::Cleanup()
+bool Jit64::Cleanup()
{
+ bool did_something = false;
+
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
+ ABI_PopRegistersAndAdjustStack(0, 0);
+ did_something = true;
}
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0.Hex || MMCR1.Hex)
+ {
ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst);
+ did_something = true;
+ }
+
+ return did_something;
}
-void Jit64::WriteExit(u32 destination)
+void Jit64::WriteExit(u32 destination, bool bl, u32 after)
{
+ if (!m_enable_blr_optimization)
+ bl = false;
+
Cleanup();
+ if (bl)
+ {
+ MOV(32, R(RSCRATCH2), Imm32(after));
+ PUSH(RSCRATCH2);
+ }
+
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
+ JustWriteExit(destination, bl, after);
+}
+
+void Jit64::JustWriteExit(u32 destination, bool bl, u32 after)
+{
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
JitBlock::LinkData linkData;
linkData.exitAddress = destination;
- linkData.exitPtrs = GetWritableCodePtr();
linkData.linkStatus = false;
// Link opportunity!
@@ -253,24 +368,76 @@ void Jit64::WriteExit(u32 destination)
if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0)
{
// It exists! Joy of joy!
- JMP(blocks.GetBlock(block)->checkedEntry, true);
+ JitBlock* jb = blocks.GetBlock(block);
+ const u8* addr = jb->checkedEntry;
+ linkData.exitPtrs = GetWritableCodePtr();
+ if (bl)
+ CALL(addr);
+ else
+ JMP(addr, true);
linkData.linkStatus = true;
}
else
{
MOV(32, PPCSTATE(pc), Imm32(destination));
- JMP(asm_routines.dispatcher, true);
+ linkData.exitPtrs = GetWritableCodePtr();
+ if (bl)
+ CALL(asm_routines.dispatcher);
+ else
+ JMP(asm_routines.dispatcher, true);
}
b->linkData.push_back(linkData);
+
+ if (bl)
+ {
+ POP(RSCRATCH);
+ JustWriteExit(after, false, 0);
+ }
}
-void Jit64::WriteExitDestInRSCRATCH()
+void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after)
{
+ if (!m_enable_blr_optimization)
+ bl = false;
MOV(32, PPCSTATE(pc), R(RSCRATCH));
Cleanup();
+
+ if (bl)
+ {
+ MOV(32, R(RSCRATCH2), Imm32(after));
+ PUSH(RSCRATCH2);
+ }
+
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
- JMP(asm_routines.dispatcher, true);
+ if (bl)
+ {
+ CALL(asm_routines.dispatcher);
+ POP(RSCRATCH);
+ JustWriteExit(after, false, 0);
+ }
+ else
+ {
+ JMP(asm_routines.dispatcher, true);
+ }
+}
+
+void Jit64::WriteBLRExit()
+{
+ if (!m_enable_blr_optimization)
+ {
+ WriteExitDestInRSCRATCH();
+ return;
+ }
+ MOV(32, PPCSTATE(pc), R(RSCRATCH));
+ bool disturbed = Cleanup();
+ if (disturbed)
+ MOV(32, R(RSCRATCH), PPCSTATE(pc));
+ CMP(64, R(RSCRATCH), MDisp(RSP, 8));
+ MOV(32, R(RSCRATCH), Imm32(js.downcountAmount));
+ J_CC(CC_NE, asm_routines.dispatcherMispredictedBLR);
+ SUB(32, PPCSTATE(downcount), R(RSCRATCH));
+ RET();
}
void Jit64::WriteRfiExitDestInRSCRATCH()
@@ -278,7 +445,9 @@ void Jit64::WriteRfiExitDestInRSCRATCH()
MOV(32, PPCSTATE(pc), R(RSCRATCH));
MOV(32, PPCSTATE(npc), R(RSCRATCH));
Cleanup();
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions));
+ ABI_PopRegistersAndAdjustStack(0, 0);
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}
@@ -288,7 +457,9 @@ void Jit64::WriteExceptionExit()
Cleanup();
MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH));
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions));
+ ABI_PopRegistersAndAdjustStack(0, 0);
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}
@@ -298,7 +469,9 @@ void Jit64::WriteExternalExceptionExit()
Cleanup();
MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH));
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions));
+ ABI_PopRegistersAndAdjustStack(0, 0);
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}
@@ -340,8 +513,11 @@ void Jit64::Trace()
void STACKALIGN Jit64::Jit(u32 em_address)
{
- if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() ||
- SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache)
+ if (GetSpaceLeft() < 0x10000 ||
+ farcode.GetSpaceLeft() < 0x10000 ||
+ blocks.IsFull() ||
+ SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache ||
+ m_clear_cache_asap)
{
ClearCache();
}
@@ -395,7 +571,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
b->normalEntry = normalEntry;
if (ImHereDebug)
+ {
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
+ ABI_PopRegistersAndAdjustStack(0, 0);
+ }
// Conditionally add profiling code.
if (Profiler::g_ProfileBlocks)
@@ -548,7 +728,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints));
+ ABI_PopRegistersAndAdjustStack(0, 0);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z);
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index d444f0f834..0391d258cc 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -18,6 +18,10 @@
// ----------
#pragma once
+#ifdef _WIN32
+#include
+#endif
+
#include "Common/x64ABI.h"
#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h"
@@ -40,6 +44,9 @@
class Jit64 : public Jitx86Base
{
private:
+ void AllocStack();
+ void FreeStack();
+
GPRRegCache gpr;
FPURegCache fpr;
@@ -48,6 +55,10 @@ private:
PPCAnalyst::CodeBuffer code_buffer;
Jit64AsmRoutineManager asm_routines;
+ bool m_enable_blr_optimization;
+ bool m_clear_cache_asap;
+ u8* m_stack;
+
public:
Jit64() : code_buffer(32000) {}
~Jit64() {}
@@ -55,6 +66,8 @@ public:
void Init() override;
void Shutdown() override;
+ bool HandleFault(uintptr_t access_address, SContext* ctx) override;
+
// Jit!
void Jit(u32 em_address) override;
@@ -89,13 +102,15 @@ public:
// Utilities for use by opcodes
- void WriteExit(u32 destination);
- void WriteExitDestInRSCRATCH();
+ void WriteExit(u32 destination, bool bl = false, u32 after = 0);
+ void JustWriteExit(u32 destination, bool bl, u32 after);
+ void WriteExitDestInRSCRATCH(bool bl = false, u32 after = 0);
+ void WriteBLRExit();
void WriteExceptionExit();
void WriteExternalExceptionExit();
void WriteRfiExitDestInRSCRATCH();
void WriteCallInterpreter(UGeckoInstruction _inst);
- void Cleanup();
+ bool Cleanup();
void GenerateConstantOverflow(bool overflow);
void GenerateConstantOverflow(s64 val);
diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
index a362768708..dcfffaa3e9 100644
--- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
@@ -9,6 +9,9 @@
using namespace Gen;
+// Not PowerPC state. Can't put in 'this' because it's out of range...
+static void* s_saved_rsp;
+
// PLAN: no more block numbers - crazy opcodes just contain offset within
// dynarec buffer
// At this offset - 4, there is an int specifying the block number.
@@ -16,7 +19,23 @@ using namespace Gen;
void Jit64AsmRoutineManager::Generate()
{
enterCode = AlignCode16();
- ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
+ // We need to own the beginning of RSP, so we do an extra stack adjustment
+ // for the shadow region before calls in this function. This call will
+ // waste a bit of space for a second shadow, but whatever.
+ ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, /*frame*/ 16);
+ if (m_stack_top)
+ {
+ // Pivot the stack to our custom one.
+ MOV(64, R(RSCRATCH), R(RSP));
+ MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
+ MOV(64, MDisp(RSP, 0x18), R(RSCRATCH));
+ }
+ else
+ {
+ MOV(64, M(&s_saved_rsp), R(RSP));
+ }
+ // something that can't pass the BLR test
+ MOV(64, MDisp(RSP, 8), Imm32((u32)-1));
// Two statically allocated registers.
MOV(64, R(RMEM), Imm64((u64)Memory::base));
@@ -24,24 +43,42 @@ void Jit64AsmRoutineManager::Generate()
MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80));
const u8* outerLoop = GetCodePtr();
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance));
+ ABI_PopRegistersAndAdjustStack(0, 0);
FixupBranch skipToRealDispatch = J(SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging); //skip the sync and compare first time
+ dispatcherMispredictedBLR = GetCodePtr();
+
+ #if 0 // debug mispredicts
+ MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc
+ ABI_PushRegistersAndAdjustStack(1 << RSCRATCH, 0);
+ CALL(reinterpret_cast(&ReportMispredict));
+ ABI_PopRegistersAndAdjustStack(1 << RSCRATCH, 0);
+ #endif
+
+ if (m_stack_top)
+ MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20));
+ else
+ MOV(64, R(RSP), M(&s_saved_rsp));
+
+ SUB(32, PPCSTATE(downcount), R(RSCRATCH));
dispatcher = GetCodePtr();
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
FixupBranch bail = J_CC(CC_BE, true);
+ FixupBranch dbg_exit;
+
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
{
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING));
FixupBranch notStepping = J_CC(CC_Z);
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints));
+ ABI_PopRegistersAndAdjustStack(0, 0);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
- FixupBranch noBreakpoint = J_CC(CC_Z);
- ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
- RET();
- SetJumpTarget(noBreakpoint);
+ dbg_exit = J_CC(CC_NZ);
SetJumpTarget(notStepping);
}
@@ -106,8 +143,9 @@ void Jit64AsmRoutineManager::Generate()
SetJumpTarget(notfound);
//Ok, no block, let's jit
- MOV(32, R(ABI_PARAM1), PPCSTATE(pc));
- CALL((void *)&Jit);
+ ABI_PushRegistersAndAdjustStack(0, 0);
+ ABI_CallFunctionA((void *)&Jit, PPCSTATE(pc));
+ ABI_PopRegistersAndAdjustStack(0, 0);
JMP(dispatcherNoCheck); // no point in special casing this
@@ -119,14 +157,27 @@ void Jit64AsmRoutineManager::Generate()
FixupBranch noExtException = J_CC(CC_Z);
MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH));
+ ABI_PushRegistersAndAdjustStack(0, 0);
ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions));
+ ABI_PopRegistersAndAdjustStack(0, 0);
SetJumpTarget(noExtException);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
J_CC(CC_Z, outerLoop);
//Landing pad for drec space
- ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
+ if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
+ SetJumpTarget(dbg_exit);
+ if (m_stack_top)
+ {
+ MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x8));
+ POP(RSP);
+ }
+ else
+ {
+ MOV(64, R(RSP), M(&s_saved_rsp));
+ }
+ ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);
RET();
GenerateCommon();
diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h
index e3cc4371f7..9272f5c8aa 100644
--- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h
+++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h
@@ -25,10 +25,12 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines
private:
void Generate();
void GenerateCommon();
+ u8* m_stack_top;
public:
- void Init()
+ void Init(u8* stack_top)
{
+ m_stack_top = stack_top;
AllocCodeSpace(8192);
Generate();
WriteProtect();
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
index 8456d56b7c..2508fe1417 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp
@@ -92,7 +92,7 @@ void Jit64::bx(UGeckoInstruction inst)
// make idle loops go faster
js.downcountAmount += 8;
}
- WriteExit(destination);
+ WriteExit(destination, inst.LK, js.compilerPC + 4);
}
// TODO - optimize to hell and beyond
@@ -133,7 +133,7 @@ void Jit64::bcx(UGeckoInstruction inst)
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
- WriteExit(destination);
+ WriteExit(destination, inst.LK, js.compilerPC + 4);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch );
@@ -168,7 +168,7 @@ void Jit64::bcctrx(UGeckoInstruction inst)
if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
- WriteExitDestInRSCRATCH();
+ WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
}
else
{
@@ -187,7 +187,7 @@ void Jit64::bcctrx(UGeckoInstruction inst)
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
- WriteExitDestInRSCRATCH();
+ WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
// Would really like to continue the block here, but it ends. TODO.
SetJumpTarget(b);
@@ -235,7 +235,7 @@ void Jit64::bclrx(UGeckoInstruction inst)
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
- WriteExitDestInRSCRATCH();
+ WriteBLRExit();
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch );
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
index dbb6a5fbf1..79a7c5d76d 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@@ -312,7 +312,7 @@ void Jit64::DoMergedBranch()
destination = SignExt16(js.next_inst.BD << 2);
else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
- WriteExit(destination);
+ WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4);
}
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
{
@@ -320,7 +320,7 @@ void Jit64::DoMergedBranch()
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
MOV(32, R(RSCRATCH), M(&CTR));
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
- WriteExitDestInRSCRATCH();
+ WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4);
}
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
{
@@ -328,7 +328,7 @@ void Jit64::DoMergedBranch()
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
- WriteExitDestInRSCRATCH();
+ WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4);
}
else
{
diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp
index 81260249c7..9f9f9cf98c 100644
--- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp
+++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp
@@ -272,7 +272,7 @@ void JitIL::Init()
trampolines.Init();
AllocCodeSpace(CODE_SIZE);
blocks.Init();
- asm_routines.Init();
+ asm_routines.Init(nullptr);
farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);
diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h
index d0185719f3..5592500c2a 100644
--- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h
+++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h
@@ -56,6 +56,10 @@ public:
void Trace();
+ JitBlockCache *GetBlockCache() override { return &blocks; }
+
+ bool HandleFault(uintptr_t access_address, SContext* ctx) override { return false; }
+
void ClearCache() override;
const u8 *GetDispatcher()
{
@@ -105,4 +109,5 @@ public:
void DynaRunTable31(UGeckoInstruction _inst) override;
void DynaRunTable59(UGeckoInstruction _inst) override;
void DynaRunTable63(UGeckoInstruction _inst) override;
+
};
diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h
index 3cd4cf4478..3fa62d80ab 100644
--- a/Source/Core/Core/PowerPC/JitArm32/Jit.h
+++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h
@@ -58,6 +58,8 @@ private:
void SetFPException(ArmGen::ARMReg Reg, u32 Exception);
ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
+
+ bool BackPatch(SContext* ctx);
public:
JitArm() : code_buffer(32000) {}
~JitArm() {}
@@ -72,9 +74,7 @@ public:
JitBaseBlockCache *GetBlockCache() { return &blocks; }
- const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx);
-
- bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); }
+ bool HandleFault(uintptr_t access_address, SContext* ctx) override;
void Trace();
diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp
index 6ba24195f5..ee0cf1ee76 100644
--- a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp
+++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp
@@ -66,12 +66,23 @@ bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store)
}
return true;
}
-const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void)
+
+bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx)
+{
+ if (access_address < (uintptr_t)Memory::base)
+ {
+ PanicAlertT("Exception handler - access below memory space. %08llx%08llx",
+ access_address >> 32, access_address);
+ }
+ return BackPatch(ctx);
+}
+
+bool JitArm::BackPatch(SContext* ctx)
{
// TODO: This ctx needs to be filled with our information
- SContext *ctx = (SContext *)ctx_void;
// We need to get the destination register before we start
+ u8* codePtr = (u8*)ctx->CTX_PC;
u32 Value = *(u32*)codePtr;
ARMReg rD;
u8 accessSize;
@@ -109,7 +120,7 @@ const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void)
u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4);
ctx->CTX_PC = newPC;
emitter.FlushIcache();
- return (u8*)ctx->CTX_PC;
+ return true;
}
else
{
@@ -135,7 +146,7 @@ const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void)
emitter.MOV(rD, R14); // 8
ctx->CTX_PC -= ARMREGOFFSET + (4 * 4);
emitter.FlushIcache();
- return (u8*)ctx->CTX_PC;
+ return true;
}
return 0;
}
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
index 2702db95e1..c3f6a69b5c 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
@@ -17,6 +17,7 @@ public:
const u8 *enterCode;
+ const u8 *dispatcherMispredictedBLR;
const u8 *dispatcher;
const u8 *dispatcherNoCheck;
const u8 *dispatcherPcInRSCRATCH;
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp
index 6e89ae32eb..ea921817b8 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp
@@ -3,24 +3,14 @@
// Refer to the license.txt file included.
#include
-#include
#include "disasm.h"
-#include "Common/CommonTypes.h"
-#include "Common/StringUtil.h"
#include "Core/PowerPC/JitCommon/JitBackpatch.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
-#ifdef _WIN32
- #include
-#endif
-
-
using namespace Gen;
-extern u8 *trampolineCodePtr;
-
static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress)
{
u64 code_addr = (u64)codePtr;
@@ -35,176 +25,51 @@ static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress)
return;
}
-void TrampolineCache::Init()
+// This generates some fairly heavy trampolines, but it doesn't really hurt.
+// Only instructions that access I/O will get these, and there won't be that
+// many of them in a typical program/game.
+bool Jitx86Base::HandleFault(uintptr_t access_address, SContext* ctx)
{
- AllocCodeSpace(4 * 1024 * 1024);
+ // TODO: do we properly handle off-the-end?
+ if (access_address >= (uintptr_t)Memory::base && access_address < (uintptr_t)Memory::base + 0x100010000)
+ return BackPatch((u32)(access_address - (uintptr_t)Memory::base), ctx);
+
+ return false;
}
-void TrampolineCache::Shutdown()
+bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
{
- FreeCodeSpace();
-}
+ u8* codePtr = (u8*) ctx->CTX_PC;
-// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
-const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse)
-{
- if (GetSpaceLeft() < 1024)
- PanicAlert("Trampoline cache full");
-
- const u8 *trampoline = GetCodePtr();
- X64Reg addrReg = (X64Reg)info.scaledReg;
- X64Reg dataReg = (X64Reg)info.regOperandReg;
-
- // It's a read. Easy.
- // RSP alignment here is 8 due to the call.
- ABI_PushRegistersAndAdjustStack(registersInUse, 8);
-
- if (addrReg != ABI_PARAM1)
- MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
-
- if (info.displacement)
- ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
-
- switch (info.operandSize)
- {
- case 4:
- CALL((void *)&Memory::Read_U32);
- break;
- case 2:
- CALL((void *)&Memory::Read_U16);
- SHL(32, R(ABI_RETURN), Imm8(16));
- break;
- case 1:
- CALL((void *)&Memory::Read_U8);
- break;
- }
-
- if (info.signExtend && info.operandSize == 1)
- {
- // Need to sign extend value from Read_U8.
- MOVSX(32, 8, dataReg, R(ABI_RETURN));
- }
- else if (dataReg != EAX)
- {
- MOV(32, R(dataReg), R(ABI_RETURN));
- }
-
- ABI_PopRegistersAndAdjustStack(registersInUse, 8);
- RET();
- return trampoline;
-}
-
-// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
-const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc)
-{
- if (GetSpaceLeft() < 1024)
- PanicAlert("Trampoline cache full");
-
- const u8 *trampoline = GetCodePtr();
-
- X64Reg dataReg = (X64Reg)info.regOperandReg;
- X64Reg addrReg = (X64Reg)info.scaledReg;
-
- // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
- // hardware access - we can take shortcuts.
- // Don't treat FIFO writes specially for now because they require a burst
- // check anyway.
-
- // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
- MOV(32, PPCSTATE(pc), Imm32(pc));
-
- ABI_PushRegistersAndAdjustStack(registersInUse, 8);
-
- if (info.hasImmediate)
- {
- if (addrReg != ABI_PARAM2)
- MOV(64, R(ABI_PARAM2), R(addrReg));
- // we have to swap back the immediate to pass it to the write functions
- switch (info.operandSize)
- {
- case 8:
- PanicAlert("Invalid 64-bit immediate!");
- break;
- case 4:
- MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
- break;
- case 2:
- MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
- break;
- case 1:
- MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
- break;
- }
- }
- else
- {
- MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg);
- }
- if (info.displacement)
- {
- ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
- }
-
- switch (info.operandSize)
- {
- case 8:
- CALL((void *)&Memory::Write_U64);
- break;
- case 4:
- CALL((void *)&Memory::Write_U32);
- break;
- case 2:
- CALL((void *)&Memory::Write_U16);
- break;
- case 1:
- CALL((void *)&Memory::Write_U8);
- break;
- }
-
- ABI_PopRegistersAndAdjustStack(registersInUse, 8);
- RET();
-
- return trampoline;
-}
-
-
-// This generates some fairly heavy trampolines, but:
-// 1) It's really necessary. We don't know anything about the context.
-// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
-// that many of them in a typical program/game.
-const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
-{
- SContext *ctx = (SContext *)ctx_void;
-
- if (!jit->IsInCodeSpace(codePtr))
- return nullptr; // this will become a regular crash real soon after this
+ if (!IsInSpace(codePtr))
+ return false; // this will become a regular crash real soon after this
InstructionInfo info = {};
if (!DisassembleMov(codePtr, &info))
{
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
- return nullptr;
+ return false;
}
if (info.otherReg != RMEM)
{
PanicAlert("BackPatch : Base reg not RMEM."
"\n\nAttempted to access %08x.", emAddress);
- return nullptr;
+ return false;
}
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
{
PanicAlert("BackPatch: MOVBE is too small");
- return nullptr;
+ return false;
}
auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end())
{
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
- return nullptr;
+ return false;
}
u32 registersInUse = it->second;
@@ -228,7 +93,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{
emitter.NOP(padding);
}
- return codePtr;
+ ctx->CTX_PC = (u64)codePtr;
}
else
{
@@ -281,6 +146,8 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{
emitter.NOP(padding);
}
- return start;
+ ctx->CTX_PC = (u64)start;
}
+
+ return true;
}
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h
index 3ca7656b21..39e3389501 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h
@@ -5,11 +5,6 @@
#pragma once
#include "Common/CommonTypes.h"
-#include "Common/x64Analyzer.h"
-#include "Common/x64Emitter.h"
-
-// We need at least this many bytes for backpatching.
-const int BACKPATCH_SIZE = 5;
// meh.
#if defined(_WIN32)
@@ -147,8 +142,8 @@ const int BACKPATCH_SIZE = 5;
#endif
#if _M_X86_64
-#define CTX_PC CTX_RIP
#include
+#define CTX_PC CTX_RIP
static inline u64 *ContextRN(SContext* ctx, int n)
{
static const u8 offsets[] =
@@ -173,13 +168,3 @@ static inline u64 *ContextRN(SContext* ctx, int n)
return (u64 *) ((char *) ctx + offsets[n]);
}
#endif
-
-class TrampolineCache : public Gen::X64CodeBlock
-{
-public:
- void Init();
- void Shutdown();
-
- const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
- const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc);
-};
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
index c6ff6e4967..52463ec619 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h
@@ -26,6 +26,7 @@
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitBackpatch.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
+#include "Core/PowerPC/JitCommon/TrampolineCache.h"
// TODO: find a better place for x86-specific stuff
// The following register assignments are common to Jit64 and Jit64IL:
@@ -110,24 +111,20 @@ public:
virtual void Jit(u32 em_address) = 0;
- virtual const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) = 0;
-
virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0;
- virtual bool IsInCodeSpace(u8 *ptr) = 0;
+ virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0;
};
class Jitx86Base : public JitBase, public EmuCodeBlock
{
protected:
+ bool BackPatch(u32 emAddress, SContext* ctx);
JitBlockCache blocks;
TrampolineCache trampolines;
public:
JitBlockCache *GetBlockCache() override { return &blocks; }
-
- const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) override;
-
- bool IsInCodeSpace(u8 *ptr) override { return IsInSpace(ptr); }
+ bool HandleFault(uintptr_t access_address, SContext* ctx) override;
};
extern JitBase *jit;
diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp
index bf1ce35596..d8fc87f449 100644
--- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp
@@ -364,7 +364,10 @@ using namespace Gen;
void JitBlockCache::WriteLinkBlock(u8* location, const u8* address)
{
XEmitter emit(location);
- emit.JMP(address, true);
+ if (*location == 0xE8)
+ emit.CALL(address);
+ else
+ emit.JMP(address, true);
}
void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address)
diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp
new file mode 100644
index 0000000000..5e961bc6e5
--- /dev/null
+++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp
@@ -0,0 +1,156 @@
+// Copyright 2013 Dolphin Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#include
+#include
+
+#include "Common/CommonTypes.h"
+#include "Common/StringUtil.h"
+#include "Common/x64ABI.h"
+#include "Core/HW/Memmap.h"
+#include "Core/PowerPC/JitCommon/JitBase.h"
+#include "Core/PowerPC/JitCommon/TrampolineCache.h"
+
+#ifdef _WIN32
+ #include
+#endif
+
+
+using namespace Gen;
+
+extern u8 *trampolineCodePtr;
+
+void TrampolineCache::Init()
+{
+ AllocCodeSpace(4 * 1024 * 1024);
+}
+
+void TrampolineCache::Shutdown()
+{
+ FreeCodeSpace();
+}
+
+// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
+const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse)
+{
+ if (GetSpaceLeft() < 1024)
+ PanicAlert("Trampoline cache full");
+
+ const u8 *trampoline = GetCodePtr();
+ X64Reg addrReg = (X64Reg)info.scaledReg;
+ X64Reg dataReg = (X64Reg)info.regOperandReg;
+
+ // It's a read. Easy.
+ // RSP alignment here is 8 due to the call.
+ ABI_PushRegistersAndAdjustStack(registersInUse, 8);
+
+ if (addrReg != ABI_PARAM1)
+ MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
+
+ if (info.displacement)
+ ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
+
+ switch (info.operandSize)
+ {
+ case 4:
+ CALL((void *)&Memory::Read_U32);
+ break;
+ case 2:
+ CALL((void *)&Memory::Read_U16);
+ SHL(32, R(ABI_RETURN), Imm8(16));
+ break;
+ case 1:
+ CALL((void *)&Memory::Read_U8);
+ break;
+ }
+
+ if (info.signExtend && info.operandSize == 1)
+ {
+ // Need to sign extend value from Read_U8.
+ MOVSX(32, 8, dataReg, R(ABI_RETURN));
+ }
+ else if (dataReg != EAX)
+ {
+ MOV(32, R(dataReg), R(ABI_RETURN));
+ }
+
+ ABI_PopRegistersAndAdjustStack(registersInUse, 8);
+ RET();
+ return trampoline;
+}
+
+// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
+const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc)
+{
+ if (GetSpaceLeft() < 1024)
+ PanicAlert("Trampoline cache full");
+
+ const u8 *trampoline = GetCodePtr();
+
+ X64Reg dataReg = (X64Reg)info.regOperandReg;
+ X64Reg addrReg = (X64Reg)info.scaledReg;
+
+ // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
+ // hardware access - we can take shortcuts.
+ // Don't treat FIFO writes specially for now because they require a burst
+ // check anyway.
+
+ // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
+ MOV(32, PPCSTATE(pc), Imm32(pc));
+
+ ABI_PushRegistersAndAdjustStack(registersInUse, 8);
+
+ if (info.hasImmediate)
+ {
+ if (addrReg != ABI_PARAM2)
+ MOV(64, R(ABI_PARAM2), R(addrReg));
+ // we have to swap back the immediate to pass it to the write functions
+ switch (info.operandSize)
+ {
+ case 8:
+ PanicAlert("Invalid 64-bit immediate!");
+ break;
+ case 4:
+ MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
+ break;
+ case 2:
+ MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
+ break;
+ case 1:
+ MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
+ break;
+ }
+ }
+ else
+ {
+ MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg);
+ }
+ if (info.displacement)
+ {
+ ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
+ }
+
+ switch (info.operandSize)
+ {
+ case 8:
+ CALL((void *)&Memory::Write_U64);
+ break;
+ case 4:
+ CALL((void *)&Memory::Write_U32);
+ break;
+ case 2:
+ CALL((void *)&Memory::Write_U16);
+ break;
+ case 1:
+ CALL((void *)&Memory::Write_U8);
+ break;
+ }
+
+ ABI_PopRegistersAndAdjustStack(registersInUse, 8);
+ RET();
+
+ return trampoline;
+}
+
+
diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h
new file mode 100644
index 0000000000..516a071ac2
--- /dev/null
+++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h
@@ -0,0 +1,22 @@
+// Copyright 2013 Dolphin Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "Common/CommonTypes.h"
+#include "Common/x64Analyzer.h"
+#include "Common/x64Emitter.h"
+
+// We need at least this many bytes for backpatching.
+const int BACKPATCH_SIZE = 5;
+
+class TrampolineCache : public Gen::X64CodeBlock
+{
+public:
+ void Init();
+ void Shutdown();
+
+ const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
+ const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc);
+};
diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp
index ea9b12be70..6bfd1c4009 100644
--- a/Source/Core/Core/PowerPC/JitInterface.cpp
+++ b/Source/Core/Core/PowerPC/JitInterface.cpp
@@ -190,13 +190,9 @@ namespace JitInterface
}
#endif
}
- bool IsInCodeSpace(u8 *ptr)
+ bool HandleFault(uintptr_t access_address, SContext* ctx)
{
- return jit->IsInCodeSpace(ptr);
- }
- const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx)
- {
- return jit->BackPatch(codePtr, em_address, ctx);
+ return jit->HandleFault(access_address, ctx);
}
void ClearCache()
diff --git a/Source/Core/Core/PowerPC/JitInterface.h b/Source/Core/Core/PowerPC/JitInterface.h
index 3cb57422bb..a8ed783726 100644
--- a/Source/Core/Core/PowerPC/JitInterface.h
+++ b/Source/Core/Core/PowerPC/JitInterface.h
@@ -7,6 +7,7 @@
#include
#include "Common/ChunkFile.h"
#include "Core/PowerPC/CPUCoreBase.h"
+#include "Core/PowerPC/JitCommon/JitBackpatch.h"
namespace JitInterface
{
@@ -20,8 +21,7 @@ namespace JitInterface
void WriteProfileResults(const std::string& filename);
// Memory Utilities
- bool IsInCodeSpace(u8 *ptr);
- const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx);
+ bool HandleFault(uintptr_t access_address, SContext* ctx);
// used by JIT to read instructions
u32 Read_Opcode_JIT(const u32 _Address);
diff --git a/Source/Core/Core/x64MemTools.cpp b/Source/Core/Core/x64MemTools.cpp
index de298363df..518c3bb160 100644
--- a/Source/Core/Core/x64MemTools.cpp
+++ b/Source/Core/Core/x64MemTools.cpp
@@ -23,42 +23,6 @@
namespace EMM
{
-static bool DoFault(u64 bad_address, SContext *ctx)
-{
- if (!JitInterface::IsInCodeSpace((u8*) ctx->CTX_PC))
- {
- // Let's not prevent debugging.
- return false;
- }
-
- u64 memspace_bottom = (u64)Memory::base;
- u64 memspace_top = memspace_bottom +
-#if _ARCH_64
- 0x100000000ULL;
-#else
- 0x40000000;
-#endif
-
- if (bad_address < memspace_bottom || bad_address >= memspace_top)
- {
- return false;
- }
-
- u32 em_address = (u32)(bad_address - memspace_bottom);
- const u8 *new_pc = jit->BackPatch((u8*) ctx->CTX_PC, em_address, ctx);
- if (new_pc)
- {
- ctx->CTX_PC = (u64) new_pc;
- }
- else
- {
- // there was an error, give the debugger a chance
- return false;
- }
-
- return true;
-}
-
#ifdef _WIN32
LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs)
@@ -74,10 +38,10 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs)
}
// virtual address of the inaccessible data
- u64 badAddress = (u64)pPtrs->ExceptionRecord->ExceptionInformation[1];
+ uintptr_t badAddress = (uintptr_t)pPtrs->ExceptionRecord->ExceptionInformation[1];
CONTEXT *ctx = pPtrs->ContextRecord;
- if (DoFault(badAddress, ctx))
+ if (JitInterface::HandleFault(badAddress, ctx))
{
return (DWORD)EXCEPTION_CONTINUE_EXECUTION;
}
@@ -125,6 +89,8 @@ void InstallExceptionHandler()
handlerInstalled = true;
}
+void UninstallExceptionHandler() {}
+
#elif defined(__APPLE__)
void CheckKR(const char* name, kern_return_t kr)
@@ -196,7 +162,7 @@ void ExceptionThread(mach_port_t port)
x86_thread_state64_t *state = (x86_thread_state64_t *) msg_in.old_state;
- bool ok = DoFault(msg_in.code[1], state);
+ bool ok = JitInterface::HandleFault((uintptr_t) msg_in.code[1], state);
// Set up the reply.
msg_out.Head.msgh_bits = MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(msg_in.Head.msgh_bits), 0);
@@ -243,6 +209,8 @@ void InstallExceptionHandler()
CheckKR("mach_port_request_notification", mach_port_request_notification(mach_task_self(), port, MACH_NOTIFY_NO_SENDERS, 0, port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &previous));
}
+void UninstallExceptionHandler() {}
+
#elif defined(_POSIX_VERSION)
static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context)
@@ -259,12 +227,12 @@ static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context)
// Huh? Return.
return;
}
- u64 bad_address = (u64)info->si_addr;
+ uintptr_t bad_address = (uintptr_t)info->si_addr;
// Get all the information we can out of the context.
mcontext_t *ctx = &context->uc_mcontext;
// assume it's not a write
- if (!DoFault(bad_address, ctx))
+ if (!JitInterface::HandleFault(bad_address, ctx))
{
// retry and crash
signal(SIGSEGV, SIG_DFL);
@@ -273,6 +241,12 @@ static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context)
void InstallExceptionHandler()
{
+ stack_t signal_stack;
+ signal_stack.ss_sp = malloc(SIGSTKSZ);
+ signal_stack.ss_size = SIGSTKSZ;
+ signal_stack.ss_flags = 0;
+ if (sigaltstack(&signal_stack, nullptr))
+ PanicAlert("sigaltstack failed");
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &sigsegv_handler;
@@ -281,6 +255,16 @@ void InstallExceptionHandler()
sigaction(SIGSEGV, &sa, nullptr);
}
+void UninstallExceptionHandler()
+{
+ stack_t signal_stack, old_stack;
+ signal_stack.ss_flags = SS_DISABLE;
+ if (!sigaltstack(&signal_stack, &old_stack) &&
+ !(old_stack.ss_flags & SS_DISABLE))
+ {
+ free(old_stack.ss_sp);
+ }
+}
#else
#error Unsupported x86_64 platform! Report this if you support sigaction