mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-15 05:47:56 -07:00
Merge pull request #4870 from degasus/blr
JitArm64: Use a custom stack with proper guard pages.
This commit is contained in:
commit
3cbbd48da9
@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
#include "Core/ConfigManager.h"
|
#include "Core/ConfigManager.h"
|
||||||
#include "Core/Core.h"
|
#include "Core/Core.h"
|
||||||
|
#include "Core/CoreTiming.h"
|
||||||
#include "Core/HLE/HLE.h"
|
#include "Core/HLE/HLE.h"
|
||||||
#include "Core/HW/GPFifo.h"
|
#include "Core/HW/GPFifo.h"
|
||||||
#include "Core/HW/Memmap.h"
|
#include "Core/HW/Memmap.h"
|
||||||
@ -26,7 +27,15 @@
|
|||||||
|
|
||||||
using namespace Arm64Gen;
|
using namespace Arm64Gen;
|
||||||
|
|
||||||
static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16;
|
constexpr size_t CODE_SIZE = 1024 * 1024 * 32;
|
||||||
|
constexpr size_t FARCODE_SIZE = 1024 * 1024 * 16;
|
||||||
|
constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 48;
|
||||||
|
|
||||||
|
constexpr size_t STACK_SIZE = 2 * 1024 * 1024;
|
||||||
|
constexpr size_t SAFE_STACK_SIZE = 512 * 1024;
|
||||||
|
constexpr size_t GUARD_SIZE = 0x10000; // two guards - bottom (permanent) and middle (see above)
|
||||||
|
constexpr size_t GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE;
|
||||||
|
|
||||||
static bool HasCycleCounters()
|
static bool HasCycleCounters()
|
||||||
{
|
{
|
||||||
// Bit needs to be set to support cycle counters
|
// Bit needs to be set to support cycle counters
|
||||||
@ -38,7 +47,7 @@ static bool HasCycleCounters()
|
|||||||
|
|
||||||
void JitArm64::Init()
|
void JitArm64::Init()
|
||||||
{
|
{
|
||||||
size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : AARCH64_FARCODE_SIZE;
|
size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE;
|
||||||
AllocCodeSpace(CODE_SIZE + child_code_size);
|
AllocCodeSpace(CODE_SIZE + child_code_size);
|
||||||
AddChildCodeSpace(&farcode, child_code_size);
|
AddChildCodeSpace(&farcode, child_code_size);
|
||||||
jo.enableBlocklink = true;
|
jo.enableBlocklink = true;
|
||||||
@ -54,13 +63,64 @@ void JitArm64::Init()
|
|||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
|
||||||
m_enable_blr_optimization = true;
|
|
||||||
|
|
||||||
|
m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().bFastmem &&
|
||||||
|
!SConfig::GetInstance().bEnableDebugging;
|
||||||
|
m_cleanup_after_stackfault = false;
|
||||||
|
|
||||||
|
AllocStack();
|
||||||
GenerateAsm();
|
GenerateAsm();
|
||||||
|
|
||||||
m_supports_cycle_counter = HasCycleCounters();
|
m_supports_cycle_counter = HasCycleCounters();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
||||||
|
{
|
||||||
|
// We can't handle any fault from other threads.
|
||||||
|
if (!Core::IsCPUThread())
|
||||||
|
{
|
||||||
|
ERROR_LOG(DYNA_REC, "Exception handler - Not on CPU thread");
|
||||||
|
DoBacktrace(access_address, ctx);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool success = false;
|
||||||
|
|
||||||
|
// Handle BLR stack faults, may happen in C++ code.
|
||||||
|
uintptr_t stack = (uintptr_t)m_stack_base;
|
||||||
|
uintptr_t diff = access_address - stack;
|
||||||
|
if (diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE)
|
||||||
|
success = HandleStackFault();
|
||||||
|
|
||||||
|
// If the fault is in JIT code space, look for fastmem areas.
|
||||||
|
if (!success && IsInSpace((u8*)ctx->CTX_PC))
|
||||||
|
success = HandleFastmemFault(access_address, ctx);
|
||||||
|
|
||||||
|
if (!success)
|
||||||
|
{
|
||||||
|
ERROR_LOG(DYNA_REC, "Exception handler - Unhandled fault");
|
||||||
|
DoBacktrace(access_address, ctx);
|
||||||
|
}
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool JitArm64::HandleStackFault()
|
||||||
|
{
|
||||||
|
if (!m_enable_blr_optimization)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ERROR_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program.");
|
||||||
|
m_enable_blr_optimization = false;
|
||||||
|
#ifndef _WIN32
|
||||||
|
Common::UnWriteProtectMemory(m_stack_base + GUARD_OFFSET, GUARD_SIZE);
|
||||||
|
#endif
|
||||||
|
GetBlockCache()->InvalidateICache(0, 0xffffffff, true);
|
||||||
|
CoreTiming::ForceExceptionCheck(0);
|
||||||
|
m_cleanup_after_stackfault = true;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void JitArm64::ClearCache()
|
void JitArm64::ClearCache()
|
||||||
{
|
{
|
||||||
m_fault_to_handler.clear();
|
m_fault_to_handler.clear();
|
||||||
@ -78,6 +138,7 @@ void JitArm64::Shutdown()
|
|||||||
{
|
{
|
||||||
FreeCodeSpace();
|
FreeCodeSpace();
|
||||||
blocks.Shutdown();
|
blocks.Shutdown();
|
||||||
|
FreeStack();
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
||||||
@ -199,7 +260,41 @@ void JitArm64::ResetStack()
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
||||||
SUB(SP, X0, 16);
|
ADD(SP, X0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitArm64::AllocStack()
|
||||||
|
{
|
||||||
|
if (!m_enable_blr_optimization)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
m_stack_base = static_cast<u8*>(Common::AllocateMemoryPages(STACK_SIZE));
|
||||||
|
if (!m_stack_base)
|
||||||
|
{
|
||||||
|
m_enable_blr_optimization = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_stack_pointer = m_stack_base + GUARD_OFFSET;
|
||||||
|
Common::ReadProtectMemory(m_stack_base, GUARD_SIZE);
|
||||||
|
Common::ReadProtectMemory(m_stack_pointer, GUARD_SIZE);
|
||||||
|
#else
|
||||||
|
// For windows we just keep using the system stack and reserve a large amount of memory at the end
|
||||||
|
// of the stack.
|
||||||
|
ULONG reserveSize = SAFE_STACK_SIZE;
|
||||||
|
SetThreadStackGuarantee(&reserveSize);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitArm64::FreeStack()
|
||||||
|
{
|
||||||
|
#ifndef _WIN32
|
||||||
|
if (m_stack_base)
|
||||||
|
Common::FreeMemoryPages(m_stack_base, STACK_SIZE);
|
||||||
|
m_stack_base = nullptr;
|
||||||
|
m_stack_pointer = nullptr;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return)
|
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return)
|
||||||
@ -506,6 +601,16 @@ void JitArm64::SingleStep()
|
|||||||
|
|
||||||
void JitArm64::Jit(u32)
|
void JitArm64::Jit(u32)
|
||||||
{
|
{
|
||||||
|
if (m_cleanup_after_stackfault)
|
||||||
|
{
|
||||||
|
ClearCache();
|
||||||
|
m_cleanup_after_stackfault = false;
|
||||||
|
#ifdef _WIN32
|
||||||
|
// The stack is in an invalid state with no guard page, reset it.
|
||||||
|
_resetstkoflw();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
if (IsAlmostFull() || farcode.IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache)
|
if (IsAlmostFull() || farcode.IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache)
|
||||||
{
|
{
|
||||||
ClearCache();
|
ClearCache();
|
||||||
|
@ -18,9 +18,6 @@
|
|||||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||||
#include "Core/PowerPC/PPCAnalyst.h"
|
#include "Core/PowerPC/PPCAnalyst.h"
|
||||||
|
|
||||||
constexpr size_t CODE_SIZE = 1024 * 1024 * 32;
|
|
||||||
constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 48;
|
|
||||||
|
|
||||||
class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonAsmRoutinesBase
|
class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonAsmRoutinesBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -32,6 +29,9 @@ public:
|
|||||||
JitBaseBlockCache* GetBlockCache() override { return &blocks; }
|
JitBaseBlockCache* GetBlockCache() override { return &blocks; }
|
||||||
bool IsInCodeSpace(const u8* ptr) const { return IsInSpace(ptr); }
|
bool IsInCodeSpace(const u8* ptr) const { return IsInSpace(ptr); }
|
||||||
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
|
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
|
||||||
|
void DoBacktrace(uintptr_t access_address, SContext* ctx);
|
||||||
|
bool HandleStackFault() override;
|
||||||
|
bool HandleFastmemFault(uintptr_t access_address, SContext* ctx);
|
||||||
|
|
||||||
void ClearCache() override;
|
void ClearCache() override;
|
||||||
|
|
||||||
@ -191,6 +191,10 @@ private:
|
|||||||
bool m_supports_cycle_counter;
|
bool m_supports_cycle_counter;
|
||||||
|
|
||||||
bool m_enable_blr_optimization;
|
bool m_enable_blr_optimization;
|
||||||
|
bool m_cleanup_after_stackfault = false;
|
||||||
|
u8* m_stack_base = nullptr;
|
||||||
|
u8* m_stack_pointer = nullptr;
|
||||||
|
u8* m_saved_stack_pointer = nullptr;
|
||||||
|
|
||||||
void EmitResetCycleCounters();
|
void EmitResetCycleCounters();
|
||||||
void EmitGetCycles(Arm64Gen::ARM64Reg reg);
|
void EmitGetCycles(Arm64Gen::ARM64Reg reg);
|
||||||
@ -226,6 +230,8 @@ private:
|
|||||||
void DoDownCount();
|
void DoDownCount();
|
||||||
void Cleanup();
|
void Cleanup();
|
||||||
void ResetStack();
|
void ResetStack();
|
||||||
|
void AllocStack();
|
||||||
|
void FreeStack();
|
||||||
|
|
||||||
// AsmRoutines
|
// AsmRoutines
|
||||||
void GenerateAsm();
|
void GenerateAsm();
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
using namespace Arm64Gen;
|
using namespace Arm64Gen;
|
||||||
|
|
||||||
static void DoBacktrace(uintptr_t access_address, SContext* ctx)
|
void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 30; i += 2)
|
for (int i = 0; i < 30; i += 2)
|
||||||
ERROR_LOG(DYNA_REC, "R%d: 0x%016llx\tR%d: 0x%016llx", i, ctx->CTX_REG(i), i + 1,
|
ERROR_LOG(DYNA_REC, "R%d: 0x%016llx\tR%d: 0x%016llx", i, ctx->CTX_REG(i), i + 1,
|
||||||
@ -283,17 +283,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
bool JitArm64::HandleFastmemFault(uintptr_t access_address, SContext* ctx)
|
||||||
{
|
{
|
||||||
if (!IsInSpace((u8*)ctx->CTX_PC))
|
|
||||||
{
|
|
||||||
ERROR_LOG(DYNA_REC, "Backpatch location not within codespace 0x%016llx(0x%08x)", ctx->CTX_PC,
|
|
||||||
Common::swap32(*(u32*)ctx->CTX_PC));
|
|
||||||
|
|
||||||
DoBacktrace(access_address, ctx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!(access_address >= (uintptr_t)Memory::physical_base &&
|
if (!(access_address >= (uintptr_t)Memory::physical_base &&
|
||||||
access_address < (uintptr_t)Memory::physical_base + 0x100010000) &&
|
access_address < (uintptr_t)Memory::physical_base + 0x100010000) &&
|
||||||
!(access_address >= (uintptr_t)Memory::logical_base &&
|
!(access_address >= (uintptr_t)Memory::logical_base &&
|
||||||
@ -302,8 +293,6 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
|||||||
ERROR_LOG(DYNA_REC,
|
ERROR_LOG(DYNA_REC,
|
||||||
"Exception handler - access below memory space. PC: 0x%016llx 0x%016lx < 0x%016lx",
|
"Exception handler - access below memory space. PC: 0x%016llx 0x%016lx < 0x%016lx",
|
||||||
ctx->CTX_PC, access_address, (uintptr_t)Memory::physical_base);
|
ctx->CTX_PC, access_address, (uintptr_t)Memory::physical_base);
|
||||||
|
|
||||||
DoBacktrace(access_address, ctx);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,14 +28,24 @@ void JitArm64::GenerateAsm()
|
|||||||
|
|
||||||
MOVP2R(PPC_REG, &PowerPC::ppcState);
|
MOVP2R(PPC_REG, &PowerPC::ppcState);
|
||||||
|
|
||||||
// Store the stack pointer, so we can reset it if the BLR optimization fails.
|
// Swap the stack pointer, so we have proper guard pages.
|
||||||
ADD(X0, SP, 0);
|
ADD(X0, SP, 0);
|
||||||
STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
MOVP2R(X1, &m_saved_stack_pointer);
|
||||||
|
STR(INDEX_UNSIGNED, X0, X1, 0);
|
||||||
|
MOVP2R(X1, &m_stack_pointer);
|
||||||
|
LDR(INDEX_UNSIGNED, X0, X1, 0);
|
||||||
|
FixupBranch no_fake_stack = CBZ(X0);
|
||||||
|
ADD(SP, X0, 0);
|
||||||
|
SetJumpTarget(no_fake_stack);
|
||||||
|
|
||||||
// Push {nullptr; -1} as invalid destination on the stack.
|
// Push {nullptr; -1} as invalid destination on the stack.
|
||||||
MOVI2R(X0, 0xFFFFFFFF);
|
MOVI2R(X0, 0xFFFFFFFF);
|
||||||
STP(INDEX_PRE, ZR, X0, SP, -16);
|
STP(INDEX_PRE, ZR, X0, SP, -16);
|
||||||
|
|
||||||
|
// Store the stack pointer, so we can reset it if the BLR optimization fails.
|
||||||
|
ADD(X0, SP, 0);
|
||||||
|
STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
||||||
|
|
||||||
// The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().
|
// The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().
|
||||||
// Advance() does an exception check so we don't know what PC to use until afterwards.
|
// Advance() does an exception check so we don't know what PC to use until afterwards.
|
||||||
FixupBranch to_start_of_timing_slice = B();
|
FixupBranch to_start_of_timing_slice = B();
|
||||||
@ -161,7 +171,8 @@ void JitArm64::GenerateAsm()
|
|||||||
SetJumpTarget(Exit);
|
SetJumpTarget(Exit);
|
||||||
|
|
||||||
// Reset the stack pointer, as the BLR optimization have touched it.
|
// Reset the stack pointer, as the BLR optimization have touched it.
|
||||||
LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
|
MOVP2R(X1, &m_saved_stack_pointer);
|
||||||
|
LDR(INDEX_UNSIGNED, X0, X1, 0);
|
||||||
ADD(SP, X0, 0);
|
ADD(SP, X0, 0);
|
||||||
|
|
||||||
ABI_PopRegisters(regs_to_save);
|
ABI_PopRegisters(regs_to_save);
|
||||||
|
Loading…
Reference in New Issue
Block a user