JitArm64/Jit64: Load the memory register without jumps and only when necessary.

This commit is contained in:
Franz-Josef Haider 2023-07-14 17:10:51 +03:00 committed by Frajo Haider
parent 17fa9dfc4e
commit 8bfcd2deb7
14 changed files with 104 additions and 41 deletions

View File

@ -490,6 +490,21 @@ void Jit64::FakeBLCall(u32 after)
SetJumpTarget(skip_exit);
}
void Jit64::EmitUpdateMembase()
{
MOV(64, R(RMEM), PPCSTATE(mem_ptr));
}
void Jit64::EmitStoreMembase(const OpArg& msr, X64Reg scratch_reg)
{
auto& memory = m_system.GetMemory();
MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase()));
MOV(64, R(scratch_reg), ImmPtr(memory.GetPhysicalBase()));
TEST(32, msr, Imm32(1 << (31 - 27)));
CMOVcc(64, RMEM, R(scratch_reg), CC_Z);
MOV(64, PPCSTATE(mem_ptr), R(RMEM));
}
void Jit64::WriteExit(u32 destination, bool bl, u32 after)
{
if (!m_enable_blr_optimization)
@ -599,6 +614,7 @@ void Jit64::WriteRfiExitDestInRSCRATCH()
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckExceptionsFromJIT, &m_system.GetPowerPC());
ABI_PopRegistersAndAdjustStack({}, 0);
EmitUpdateMembase();
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, Jump::Near);
}
@ -620,6 +636,7 @@ void Jit64::WriteExceptionExit()
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckExceptionsFromJIT, &m_system.GetPowerPC());
ABI_PopRegistersAndAdjustStack({}, 0);
EmitUpdateMembase();
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, Jump::Near);
}
@ -632,6 +649,7 @@ void Jit64::WriteExternalExceptionExit()
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckExternalExceptionsFromJIT, &m_system.GetPowerPC());
ABI_PopRegistersAndAdjustStack({}, 0);
EmitUpdateMembase();
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, Jump::Near);
}
@ -639,6 +657,7 @@ void Jit64::WriteExternalExceptionExit()
void Jit64::Run()
{
ProtectStack();
m_system.GetJitInterface().UpdateMembase();
CompiledCode pExecAddr = (CompiledCode)asm_routines.enter_code;
pExecAddr();
@ -649,6 +668,7 @@ void Jit64::Run()
void Jit64::SingleStep()
{
ProtectStack();
m_system.GetJitInterface().UpdateMembase();
CompiledCode pExecAddr = (CompiledCode)asm_routines.enter_code;
pExecAddr();
@ -745,6 +765,7 @@ void Jit64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
m_ppc_state.npc = nextPC;
m_ppc_state.Exceptions |= EXCEPTION_ISI;
m_system.GetPowerPC().CheckExceptions();
m_system.GetJitInterface().UpdateMembase();
WARN_LOG_FMT(POWERPC, "ISI exception at {:#010x}", nextPC);
return;
}

View File

@ -87,6 +87,8 @@ public:
// Utilities for use by opcodes
void EmitUpdateMembase();
void EmitStoreMembase(const Gen::OpArg& msr, Gen::X64Reg scratch_reg);
void FakeBLCall(u32 after);
void WriteExit(u32 destination, bool bl = false, u32 after = 0);
void JustWriteExit(u32 destination, bool bl, u32 after);

View File

@ -65,6 +65,11 @@ void Jit64AsmRoutineManager::Generate()
ABI_CallFunction(CoreTiming::GlobalAdvance);
ABI_PopRegistersAndAdjustStack({}, 0);
// When we've just entered the jit we need to update the membase
// GlobalAdvance also checks exceptions after which we need to
// update the membase so it makes sense to do this here.
m_jit.EmitUpdateMembase();
// skip the sync and compare first time
FixupBranch skipToRealDispatch = J(enable_debugging ? Jump::Near : Jump::Short);
@ -104,8 +109,6 @@ void Jit64AsmRoutineManager::Generate()
dispatcher_no_check = GetCodePtr();
auto& memory = system.GetMemory();
// The following is a translation of JitBaseBlockCache::Dispatch into assembly.
const bool assembly_dispatcher = true;
if (assembly_dispatcher)
@ -165,13 +168,6 @@ void Jit64AsmRoutineManager::Generate()
FixupBranch state_mismatch = J_CC(CC_NE);
// Success; branch to the block we found.
// Switch to the correct memory base, in case MSR.DR has changed.
TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27)));
FixupBranch physmem = J_CC(CC_Z);
MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase()));
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, normalEntry))));
SetJumpTarget(physmem);
MOV(64, R(RMEM), ImmPtr(memory.GetPhysicalBase()));
JMPptr(MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, normalEntry))));
SetJumpTarget(not_found);
@ -189,13 +185,7 @@ void Jit64AsmRoutineManager::Generate()
TEST(64, R(ABI_RETURN), R(ABI_RETURN));
FixupBranch no_block_available = J_CC(CC_Z);
// Switch to the correct memory base, in case MSR.DR has changed.
TEST(32, PPCSTATE(msr), Imm32(1 << (31 - 27)));
FixupBranch physmem = J_CC(CC_Z);
MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase()));
JMPptr(R(ABI_RETURN));
SetJumpTarget(physmem);
MOV(64, R(RMEM), ImmPtr(memory.GetPhysicalBase()));
// Jump to the block
JMPptr(R(ABI_RETURN));
SetJumpTarget(no_block_available);

View File

@ -12,6 +12,7 @@
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/System.h"
// The branches are known good, or at least reasonably good.
// No need for a disable-mechanism.
@ -54,6 +55,9 @@ void Jit64::rfi(UGeckoInstruction inst)
MOV(32, R(RSCRATCH), PPCSTATE_SRR1);
AND(32, R(RSCRATCH), Imm32(mask & clearMSR13));
OR(32, PPCSTATE(msr), R(RSCRATCH));
EmitStoreMembase(R(RSCRATCH), RSCRATCH2);
// NPC = SRR0;
MOV(32, R(RSCRATCH), PPCSTATE_SRR0);
WriteRfiExitDestInRSCRATCH();

View File

@ -438,7 +438,10 @@ void Jit64::mtmsr(UGeckoInstruction inst)
RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs);
EmitStoreMembase(PPCSTATE(msr), RSCRATCH2);
}
gpr.Flush();
fpr.Flush();

View File

@ -127,8 +127,11 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
{
ERROR_LOG_FMT(DYNA_REC,
"JitArm64 address calculation overflowed. This should never happen! "
"PC {:#018x}, access address {:#018x}, memory base {:#018x}, MSR.DR {}",
ctx->CTX_PC, access_address, memory_base, m_ppc_state.msr.DR);
"PC {:#018x}, access address {:#018x}, memory base {:#018x}, MSR.DR {}, "
"mem_ptr {}, pbase {}, lbase {}",
ctx->CTX_PC, access_address, memory_base, m_ppc_state.msr.DR,
fmt::ptr(m_ppc_state.mem_ptr), fmt::ptr(memory.GetPhysicalBase()),
fmt::ptr(memory.GetLogicalBase()));
}
else
{
@ -353,6 +356,24 @@ void JitArm64::IntializeSpeculativeConstants()
}
}
void JitArm64::EmitUpdateMembase()
{
LDR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
}
void JitArm64::EmitStoreMembase(const ARM64Reg& msr)
{
auto& memory = m_system.GetMemory();
ARM64Reg WD = gpr.GetReg();
ARM64Reg XD = EncodeRegTo64(WD);
MOVP2R(MEM_REG, jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
MOVP2R(XD, jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
TST(msr, LogicalImm(1 << (31 - 27), 32));
CSEL(MEM_REG, MEM_REG, XD, CCFlags::CC_NEQ);
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
gpr.Unlock(WD);
}
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return)
{
Cleanup();
@ -523,6 +544,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always
else
MOVP2R(EncodeRegTo64(DISPATCHER_PC), &PowerPC::CheckExceptionsFromJIT);
BLR(EncodeRegTo64(DISPATCHER_PC));
EmitUpdateMembase();
LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
@ -636,6 +658,7 @@ void JitArm64::EndTimeProfile(JitBlock* b)
void JitArm64::Run()
{
ProtectStack();
m_system.GetJitInterface().UpdateMembase();
CompiledCode pExecAddr = (CompiledCode)enter_code;
pExecAddr();
@ -646,6 +669,7 @@ void JitArm64::Run()
void JitArm64::SingleStep()
{
ProtectStack();
m_system.GetJitInterface().UpdateMembase();
CompiledCode pExecAddr = (CompiledCode)enter_code;
pExecAddr();
@ -747,6 +771,7 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
m_ppc_state.npc = nextPC;
m_ppc_state.Exceptions |= EXCEPTION_ISI;
m_system.GetPowerPC().CheckExceptions();
m_system.GetJitInterface().UpdateMembase();
WARN_LOG_FMT(POWERPC, "ISI exception at {:#010x}", nextPC);
return;
}

View File

@ -311,6 +311,9 @@ protected:
void BeginTimeProfile(JitBlock* b);
void EndTimeProfile(JitBlock* b);
void EmitUpdateMembase();
void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr);
// Exits
void WriteExit(u32 destination, bool LK = false, u32 exit_address_after_return = 0);
void WriteExit(Arm64Gen::ARM64Reg dest, bool LK = false, u32 exit_address_after_return = 0);

View File

@ -11,6 +11,7 @@
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/System.h"
using namespace Arm64Gen;
@ -64,6 +65,8 @@ void JitArm64::rfi(UGeckoInstruction inst)
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA
EmitStoreMembase(WA);
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR0));
gpr.Unlock(WB, WC);

View File

@ -94,6 +94,8 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
gpr.BindToRegister(inst.RS, true);
STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr));
EmitStoreMembase(gpr.R(inst.RS));
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);

View File

@ -95,26 +95,13 @@ void JitArm64::GenerateAsm()
bool assembly_dispatcher = true;
auto& memory = m_system.GetMemory();
if (assembly_dispatcher)
{
// set the mem_base based on MSR flags
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG,
jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
FixupBranch membaseend = B();
SetJumpTarget(physmem);
MOVP2R(MEM_REG,
jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
SetJumpTarget(membaseend);
if (GetBlockCache()->GetFastBlockMap())
{
// Check if there is a block
ARM64Reg pc_masked = ARM64Reg::X25;
ARM64Reg cache_base = ARM64Reg::X27;
ARM64Reg cache_base = ARM64Reg::X24;
ARM64Reg block = ARM64Reg::X30;
LSL(pc_masked, DISPATCHER_PC, 1);
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap());
@ -122,7 +109,7 @@ void JitArm64::GenerateAsm()
FixupBranch not_found = CBZ(block);
// b.msrBits != msr
ARM64Reg msr = ARM64Reg::W25;
ARM64Reg msr = ARM64Reg::W27;
ARM64Reg msr2 = ARM64Reg::W24;
LDR(IndexType::Unsigned, msr, PPC_REG, PPCSTATE_OFF(msr));
AND(msr, msr, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32));
@ -181,14 +168,6 @@ void JitArm64::GenerateAsm()
FixupBranch no_block_available = CBZ(ARM64Reg::X0);
// set the mem_base based on MSR flags and jump to next block.
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG,
jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
BR(ARM64Reg::X0);
SetJumpTarget(physmem);
MOVP2R(MEM_REG, jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
BR(ARM64Reg::X0);
// Call JIT
@ -217,6 +196,11 @@ void JitArm64::GenerateAsm()
MOVP2R(ARM64Reg::X8, &CoreTiming::GlobalAdvance);
BLR(ARM64Reg::X8);
// When we've just entered the jit we need to update the membase
// GlobalAdvance also checks exceptions after which we need to
// update the membase so it makes sense to do this here.
EmitUpdateMembase();
// Load the PC back into DISPATCHER_PC (the exception handler might have changed it)
LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));

View File

@ -98,6 +98,25 @@ void JitInterface::SetProfilingState(ProfilingState state)
m_jit->jo.profile_blocks = state == ProfilingState::Enabled;
}
void JitInterface::UpdateMembase()
{
if (!m_jit)
return;
auto& ppc_state = m_system.GetPPCState();
auto& memory = m_system.GetMemory();
if (ppc_state.msr.DR)
{
ppc_state.mem_ptr =
m_jit->jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase();
}
else
{
ppc_state.mem_ptr =
m_jit->jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase();
}
}
void JitInterface::WriteProfileResults(const std::string& filename) const
{
Profiler::ProfileStats prof_stats;

View File

@ -61,6 +61,7 @@ public:
u32 entry_address;
};
void UpdateMembase();
void SetProfilingState(ProfilingState state);
void WriteProfileResults(const std::string& filename) const;
void GetProfileResults(Profiler::ProfileStats* prof_stats) const;

View File

@ -569,7 +569,10 @@ void PowerPCManager::CheckExceptions()
else
{
CheckExternalExceptions();
return;
}
m_system.GetJitInterface().UpdateMembase();
}
void PowerPCManager::CheckExternalExceptions()
@ -623,6 +626,8 @@ void PowerPCManager::CheckExternalExceptions()
exceptions);
}
}
m_system.GetJitInterface().UpdateMembase();
}
void PowerPCManager::CheckBreakPoints()

View File

@ -169,6 +169,7 @@ struct PowerPCState
// Storage for the stack pointer of the BLR optimization.
u8* stored_stack_pointer = nullptr;
u8* mem_ptr = nullptr;
std::array<std::array<TLBEntry, TLB_SIZE / TLB_WAYS>, NUM_TLBS> tlb;