From dbf5dca11c22967af747a0b29231ffea853aee6b Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Aug 2021 14:56:24 +0200 Subject: [PATCH] JitArm64: FIFO optimization improvements JitArm64 port of 789975e. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 50 +++++++++++++++++ Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 + .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 56 ++++++++++--------- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 7 ++- 4 files changed, 87 insertions(+), 28 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 6f8aeaceec..c1519450ed 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -320,6 +320,50 @@ void JitArm64::FreeStack() #endif } +void JitArm64::IntializeSpeculativeConstants() +{ + // If the block depends on an input register which looks like a gather pipe or MMIO related + // constant, guess that it is actually a constant input, and specialize the block based on this + // assumption. This happens when there are branches in code writing to the gather pipe, but only + // the first block loads the constant. + // Insert a check at the start of the block to verify that the value is actually constant. + // This can save a lot of backpatching and optimize gather pipe writes in more places. + const u8* fail = nullptr; + for (auto i : code_block.m_gpr_inputs) + { + u32 compile_time_value = PowerPC::ppcState.gpr[i]; + if (PowerPC::IsOptimizableGatherPipeWrite(compile_time_value) || + PowerPC::IsOptimizableGatherPipeWrite(compile_time_value - 0x8000) || + compile_time_value == 0xCC000000) + { + if (!fail) + { + SwitchToFarCode(); + fail = GetCodePtr(); + MOVI2R(DISPATCHER_PC, js.blockStart); + STR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); + MOVP2R(ARM64Reg::X8, &JitInterface::CompileExceptionCheck); + MOVI2R(ARM64Reg::W0, static_cast(JitInterface::ExceptionType::SpeculativeConstants)); + BLR(ARM64Reg::X8); + B(dispatcher_no_check); + SwitchToNearCode(); + } + + ARM64Reg tmp = gpr.GetReg(); + ARM64Reg value = gpr.R(i); + MOVI2R(tmp, compile_time_value); + CMP(value, tmp); + gpr.Unlock(tmp); + + FixupBranch no_fail = B(CCFlags::CC_EQ); + B(fail); + SetJumpTarget(no_fail); + + gpr.SetImmediate(i, compile_time_value, true); + } + } +} + void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return) { Cleanup(); @@ -806,6 +850,12 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) gpr.Start(js.gpa); fpr.Start(js.fpa); + if (js.noSpeculativeConstantsAddresses.find(js.blockStart) == + js.noSpeculativeConstantsAddresses.end()) + { + IntializeSpeculativeConstants(); + } + // Translate instructions for (u32 i = 0; i < code_block.m_num_instructions; i++) { diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 75565b771d..ac1b232578 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -285,6 +285,8 @@ protected: void ResetFreeMemoryRanges(); + void IntializeSpeculativeConstants(); + // AsmRoutines void GenerateAsm(); void GenerateCommonAsm(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 3913cdff79..83bbfe1b3a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -202,37 +202,40 @@ void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state, ARM64Reg tm } else if (reg.GetType() == RegType::Immediate) { - if (!reg.GetImm()) + if (reg.IsDirty()) { - m_emit->STR(IndexType::Unsigned, bitsize == 64 ? ARM64Reg::ZR : ARM64Reg::WZR, PPC_REG, - u32(guest_reg.ppc_offset)); - } - else - { - bool allocated_tmp_reg = false; - if (tmp_reg != ARM64Reg::INVALID_REG) + if (!reg.GetImm()) { - ASSERT(IsGPR(tmp_reg)); + m_emit->STR(IndexType::Unsigned, bitsize == 64 ? ARM64Reg::ZR : ARM64Reg::WZR, PPC_REG, + u32(guest_reg.ppc_offset)); } else { - ASSERT_MSG(DYNA_REC, !maintain_state, - "Flushing immediate while maintaining state requires temporary register"); - tmp_reg = GetReg(); - allocated_tmp_reg = true; + bool allocated_tmp_reg = false; + if (tmp_reg != ARM64Reg::INVALID_REG) + { + ASSERT(IsGPR(tmp_reg)); + } + else + { + ASSERT_MSG(DYNA_REC, !maintain_state, + "Flushing immediate while maintaining state requires temporary register"); + tmp_reg = GetReg(); + allocated_tmp_reg = true; + } + + const ARM64Reg encoded_tmp_reg = bitsize != 64 ? tmp_reg : EncodeRegTo64(tmp_reg); + + m_emit->MOVI2R(encoded_tmp_reg, reg.GetImm()); + m_emit->STR(IndexType::Unsigned, encoded_tmp_reg, PPC_REG, u32(guest_reg.ppc_offset)); + + if (allocated_tmp_reg) + UnlockRegister(tmp_reg); } - const ARM64Reg encoded_tmp_reg = bitsize != 64 ? tmp_reg : EncodeRegTo64(tmp_reg); - - m_emit->MOVI2R(encoded_tmp_reg, reg.GetImm()); - m_emit->STR(IndexType::Unsigned, encoded_tmp_reg, PPC_REG, u32(guest_reg.ppc_offset)); - - if (allocated_tmp_reg) - UnlockRegister(tmp_reg); + if (!maintain_state) + reg.Flush(); } - - if (!maintain_state) - reg.Flush(); } } @@ -335,12 +338,13 @@ ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg) return ARM64Reg::INVALID_REG; } -void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm) +void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool dirty) { OpArg& reg = guest_reg.reg; if (reg.GetType() == RegType::Register) UnlockRegister(EncodeRegTo32(reg.GetReg())); reg.LoadToImm(imm); + reg.SetDirty(dirty); } void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool will_read, bool will_write) @@ -373,8 +377,8 @@ void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool will_read m_emit->MOVI2R(host_reg, reg.GetImm()); } reg.Load(host_reg); - // If the register had an immediate value, the register was effectively already dirty - reg.SetDirty(true); + if (will_write) + reg.SetDirty(true); } else if (will_write) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index d814e846f2..a8e63eb006 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -262,7 +262,10 @@ public: Arm64Gen::ARM64Reg CR(size_t preg) { return R(GetGuestCR(preg)); } // Set a register to an immediate. Only valid for guest GPRs. - void SetImmediate(size_t preg, u32 imm) { SetImmediate(GetGuestGPR(preg), imm); } + void SetImmediate(size_t preg, u32 imm, bool dirty = true) + { + SetImmediate(GetGuestGPR(preg), imm, dirty); + } // Returns if a register is set as an immediate. Only valid for guest GPRs. bool IsImm(size_t preg) const { return GetGuestGPROpArg(preg).GetType() == RegType::Immediate; } @@ -345,7 +348,7 @@ private: GuestRegInfo GetGuestByIndex(size_t index); Arm64Gen::ARM64Reg R(const GuestRegInfo& guest_reg); - void SetImmediate(const GuestRegInfo& guest_reg, u32 imm); + void SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool dirty); void BindToRegister(const GuestRegInfo& guest_reg, bool will_read, bool will_write = true); void FlushRegisters(BitSet32 regs, bool maintain_state, Arm64Gen::ARM64Reg tmp_reg);