diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 6eaa4fdfca..f5b00de3cc 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -271,8 +271,8 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned void ARM64XEmitter::SetCodePtr(u8* ptr) { m_code = ptr; - m_startcode = m_code; - m_lastCacheFlushEnd = ptr; + if (!m_lastCacheFlushEnd) + m_lastCacheFlushEnd = ptr; } const u8* ARM64XEmitter::GetCodePtr() const @@ -315,6 +315,9 @@ void ARM64XEmitter::FlushIcache() void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end) { + if (start == end) + return; + #if defined(IOS) // Header file says this is equivalent to: sys_icache_invalidate(start, end - start); sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 6939984308..c4ce6ab253 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -324,7 +324,6 @@ class ARM64XEmitter private: u8* m_code; - u8* m_startcode; u8* m_lastCacheFlushEnd; void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr); @@ -365,14 +364,13 @@ protected: public: ARM64XEmitter() - : m_code(nullptr), m_startcode(nullptr), m_lastCacheFlushEnd(nullptr) + : m_code(nullptr), m_lastCacheFlushEnd(nullptr) { } ARM64XEmitter(u8* code_ptr) { m_code = code_ptr; m_lastCacheFlushEnd = code_ptr; - m_startcode = code_ptr; } virtual ~ARM64XEmitter() diff --git a/Source/Core/Common/CodeBlock.h b/Source/Core/Common/CodeBlock.h index 972fc707d6..5699fc5385 100644 --- a/Source/Core/Common/CodeBlock.h +++ b/Source/Core/Common/CodeBlock.h @@ -72,5 +72,11 @@ public: { return region_size - (T::GetCodePtr() - region); } + + bool IsAlmostFull() const + { + // This should be bigger than the biggest block ever. + return GetSpaceLeft() < 0x10000; + } }; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 65227a07f4..723d237315 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -490,9 +490,7 @@ void Jit64::Trace() void Jit64::Jit(u32 em_address) { - if (GetSpaceLeft() < 0x10000 || - farcode.GetSpaceLeft() < 0x10000 || - trampolines.GetSpaceLeft() < 0x10000 || + if (IsAlmostFull() || farcode.IsAlmostFull() || trampolines.IsAlmostFull() || blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache || m_clear_cache_asap) diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 0c82b234a7..d457f6e766 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -473,7 +473,7 @@ void JitIL::Trace() void JitIL::Jit(u32 em_address) { - if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() || + if (IsAlmostFull() || farcode.IsAlmostFull() || blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache) { ClearCache(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index c1bf82b05d..9836e0b70a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -17,6 +17,7 @@ using namespace Arm64Gen; void JitArm64::Init() { AllocCodeSpace(CODE_SIZE); + farcode.Init(SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE); jo.enableBlocklink = true; jo.optimizeGatherPipe = true; UpdateMemoryOptions(); @@ -36,6 +37,7 @@ void JitArm64::Init() void JitArm64::ClearCache() { ClearCodeSpace(); + farcode.ClearCodeSpace(); blocks.Clear(); UpdateMemoryOptions(); } @@ -43,6 +45,7 @@ void JitArm64::ClearCache() void JitArm64::Shutdown() { FreeCodeSpace(); + farcode.Shutdown(); blocks.Shutdown(); asm_routines.Shutdown(); } @@ -276,7 +279,7 @@ void JitArm64::SingleStep() void JitArm64::Jit(u32) { - if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache) + if (IsAlmostFull() || farcode.IsAlmostFull() || blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache) { ClearCache(); } @@ -397,6 +400,10 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); FixupBranch b1 = TBNZ(WA, 13); // Test FP enabled bit + FixupBranch far = B(); + SwitchToFarCode(); + SetJumpTarget(far); + gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); @@ -407,6 +414,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB MOVI2R(WA, js.compilerPC); WriteExceptionExit(WA); + SwitchToNearCode(); + SetJumpTarget(b1); js.firstFPInstructionFound = true; @@ -450,5 +459,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB b->originalSize = code_block.m_num_instructions; FlushIcache(); + farcode.FlushIcache(); return start; } diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 6b2d0c3fe8..476af4c3d8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -18,6 +18,15 @@ #define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) +// A place to throw blocks of code we don't want polluting the cache, e.g. rarely taken +// exception branches. +class FarCodeCacheArm64 : public Arm64Gen::ARM64CodeBlock +{ +public: + void Init(int size) { AllocCodeSpace(size); } + void Shutdown() { FreeCodeSpace(); } +}; + // Some asserts to make sure we will be able to load everything static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); @@ -185,6 +194,22 @@ private: ARM64FloatEmitter m_float_emit; + FarCodeCacheArm64 farcode; + u8* nearcode; // Backed up when we switch to far code. + + // Simple functions to switch between near and far code emitting + void SwitchToFarCode() + { + nearcode = GetWritableCodePtr(); + SetCodePtr(farcode.GetWritableCodePtr()); + } + + void SwitchToNearCode() + { + farcode.SetCodePtr(GetWritableCodePtr()); + SetCodePtr(nearcode); + } + // Dump a memory range of code void DumpCode(const u8* start, const u8* end); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index 4dfd88b92e..2f5cb1b049 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -142,6 +142,10 @@ void JitArm64::bcx(UGeckoInstruction inst) !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } + FixupBranch far = B(); + SwitchToFarCode(); + SetJumpTarget(far); + if (inst.LK) { u32 Jumpto = js.compilerPC + 4; @@ -161,6 +165,8 @@ void JitArm64::bcx(UGeckoInstruction inst) WriteExit(destination); + SwitchToNearCode(); + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) @@ -235,6 +241,10 @@ void JitArm64::bclrx(UGeckoInstruction inst) !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } + FixupBranch far = B(); + SwitchToFarCode(); + SetJumpTarget(far); + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. @@ -252,6 +262,8 @@ void JitArm64::bclrx(UGeckoInstruction inst) WriteExitDestInR(WA); + SwitchToNearCode(); + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 4964e2e3e1..133ff3e936 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -411,6 +411,10 @@ void JitArm64::lXX(UGeckoInstruction inst) // if it's still 0, we can wait until the next event FixupBranch noIdle = CBNZ(gpr.R(d)); + FixupBranch far = B(); + SwitchToFarCode(); + SetJumpTarget(far); + gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); @@ -423,6 +427,8 @@ void JitArm64::lXX(UGeckoInstruction inst) gpr.Unlock(WA); WriteExceptionExit(); + SwitchToNearCode(); + SetJumpTarget(noIdle); //js.compilerPC += 8; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 703241fa64..cce98aee07 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -156,7 +156,7 @@ void JitArm64::psq_st(UGeckoInstruction inst) m_float_emit.ABI_PushRegisters(fprs_in_use, X30); BLR(EncodeRegTo64(type_reg)); m_float_emit.ABI_PopRegisters(fprs_in_use, X30); - ABI_PushRegisters(gprs_in_use); + ABI_PopRegisters(gprs_in_use); SetJumpTarget(continue1); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 212c12fa8c..058a9f1d9d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -184,6 +184,10 @@ void JitArm64::twx(UGeckoInstruction inst) SetJumpTarget(fixup); } + FixupBranch far = B(); + SwitchToFarCode(); + SetJumpTarget(far); + gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); @@ -196,6 +200,8 @@ void JitArm64::twx(UGeckoInstruction inst) // WA is unlocked in this function WriteExceptionExit(WA); + SwitchToNearCode(); + SetJumpTarget(dont_trap); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))