Merge pull request #12086 from krnlyng/jitcompilerPCload

JitArm64: Avoid loading compilerPC multiple times if it's already in a register.
This commit is contained in:
JosJuice 2023-08-27 10:05:11 +02:00 committed by GitHub
commit 7ac0db70c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 80 additions and 29 deletions

View File

@ -374,7 +374,8 @@ void JitArm64::EmitStoreMembase(const ARM64Reg& msr)
gpr.Unlock(WD);
}
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return)
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return,
ARM64Reg exit_address_after_return_reg)
{
Cleanup();
EndTimeProfile(js.curBlock);
@ -386,11 +387,16 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
if (LK)
{
// Push {ARM_PC; PPC_PC} on the stack
ARM64Reg reg_to_push = exit_address_after_return_reg;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, exit_address_after_return);
reg_to_push = ARM64Reg::X1;
}
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
host_address_after_return = GetCodePtr() + adr_offset;
ADR(ARM64Reg::X0, adr_offset);
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
STP(IndexType::Pre, ARM64Reg::X0, reg_to_push, ARM64Reg::SP, -16);
}
constexpr size_t primary_farcode_size = 3 * sizeof(u32);
@ -457,7 +463,8 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
SwitchToNearCode();
}
void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_after_return)
void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_after_return,
ARM64Reg exit_address_after_return_reg)
{
if (dest != DISPATCHER_PC)
MOV(DISPATCHER_PC, dest);
@ -475,11 +482,17 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
else
{
// Push {ARM_PC, PPC_PC} on the stack
ARM64Reg reg_to_push = exit_address_after_return_reg;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, exit_address_after_return);
reg_to_push = ARM64Reg::X1;
}
MOVI2R(ARM64Reg::X1, exit_address_after_return);
constexpr s32 adr_offset = sizeof(u32) * 3;
const u8* host_address_after_return = GetCodePtr() + adr_offset;
ADR(ARM64Reg::X0, adr_offset);
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
STP(IndexType::Pre, ARM64Reg::X0, reg_to_push, ARM64Reg::SP, -16);
BL(dispatcher);
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
@ -515,26 +528,43 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
}
}
void JitArm64::FakeLKExit(u32 exit_address_after_return)
void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_after_return_reg)
{
if (!m_enable_blr_optimization)
return;
// We may need to fake the BLR stack on inlined CALL instructions.
// Else we can't return to this location any more.
if (exit_address_after_return_reg != ARM64Reg::W30)
{
// Do not lock W30 if it is the same as the exit address register, since
// it's already locked. It'll only get clobbered at the BL (below) where
// we do not need its value anymore.
// NOTE: This means W30 won't contain the return address anymore after this
// function has been called!
gpr.Lock(ARM64Reg::W30);
ARM64Reg after_reg = gpr.GetReg();
ARM64Reg code_reg = gpr.GetReg();
}
ARM64Reg after_reg = exit_address_after_return_reg;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
after_reg = gpr.GetReg();
MOVI2R(after_reg, exit_address_after_return);
}
ARM64Reg code_reg = gpr.GetReg();
constexpr s32 adr_offset = sizeof(u32) * 3;
const u8* host_address_after_return = GetCodePtr() + adr_offset;
ADR(EncodeRegTo64(code_reg), adr_offset);
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
gpr.Unlock(after_reg, code_reg);
gpr.Unlock(code_reg);
if (after_reg != exit_address_after_return_reg)
gpr.Unlock(after_reg);
FixupBranch skip_exit = BL();
DEBUG_ASSERT(GetCodePtr() == host_address_after_return || HasWriteFailed());
if (exit_address_after_return_reg != ARM64Reg::W30)
{
gpr.Unlock(ARM64Reg::W30);
}
// Write the regular exit node after the return.
JitBlock* b = js.curBlock;

View File

@ -315,8 +315,12 @@ protected:
void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr);
// Exits
void WriteExit(u32 destination, bool LK = false, u32 exit_address_after_return = 0);
void WriteExit(Arm64Gen::ARM64Reg dest, bool LK = false, u32 exit_address_after_return = 0);
void
WriteExit(u32 destination, bool LK = false, u32 exit_address_after_return = 0,
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
void
WriteExit(Arm64Gen::ARM64Reg dest, bool LK = false, u32 exit_address_after_return = 0,
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
void WriteExceptionExit(u32 destination, bool only_external = false,
bool always_exception = false);
void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false,
@ -325,7 +329,9 @@ protected:
void WriteConditionalExceptionExit(int exception, Arm64Gen::ARM64Reg temp_gpr,
Arm64Gen::ARM64Reg temp_fpr = Arm64Gen::ARM64Reg::INVALID_REG,
u64 increment_sp_on_exit = 0);
void FakeLKExit(u32 exit_address_after_return);
void
FakeLKExit(u32 exit_address_after_return,
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
void WriteBLRExit(Arm64Gen::ARM64Reg dest);
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);

View File

@ -79,12 +79,12 @@ void JitArm64::bx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITBranchOff);
ARM64Reg WA = ARM64Reg::INVALID_REG;
if (inst.LK)
{
ARM64Reg WA = gpr.GetReg();
WA = gpr.GetReg();
MOVI2R(WA, js.compilerPC + 4);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
gpr.Unlock(WA);
}
if (!js.isLastInstruction)
@ -94,8 +94,12 @@ void JitArm64::bx(UGeckoInstruction inst)
// We have to fake the stack as the RET instruction was not
// found in the same block. This is a big overhead, but still
// better than calling the dispatcher.
FakeLKExit(js.compilerPC + 4);
FakeLKExit(js.compilerPC + 4, WA);
}
if (WA != ARM64Reg::INVALID_REG)
gpr.Unlock(WA);
return;
}
@ -104,19 +108,24 @@ void JitArm64::bx(UGeckoInstruction inst)
if (js.op->branchIsIdleLoop)
{
// make idle loops go faster
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVP2R(XA, &CoreTiming::GlobalIdle);
BLR(XA);
if (WA != ARM64Reg::INVALID_REG)
gpr.Unlock(WA);
// make idle loops go faster
ARM64Reg WB = gpr.GetReg();
ARM64Reg XB = EncodeRegTo64(WB);
MOVP2R(XB, &CoreTiming::GlobalIdle);
BLR(XB);
gpr.Unlock(WB);
WriteExceptionExit(js.op->branchTo);
return;
}
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4);
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, inst.LK ? WA : ARM64Reg::INVALID_REG);
if (WA != ARM64Reg::INVALID_REG)
gpr.Unlock(WA);
}
void JitArm64::bcx(UGeckoInstruction inst)
@ -125,6 +134,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
JITDISABLE(bJITBranchOff);
ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = inst.LK ? gpr.GetReg() : WA;
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
@ -156,7 +167,7 @@ void JitArm64::bcx(UGeckoInstruction inst)
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
}
gpr.Flush(FlushMode::MaintainState, WA);
gpr.Flush(FlushMode::MaintainState, WB);
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
if (js.op->branchIsIdleLoop)
@ -171,7 +182,7 @@ void JitArm64::bcx(UGeckoInstruction inst)
}
else
{
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4);
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, inst.LK ? WA : ARM64Reg::INVALID_REG);
}
SwitchToNearCode();
@ -189,6 +200,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
}
gpr.Unlock(WA);
if (WB != WA)
gpr.Unlock(WB);
}
void JitArm64::bcctrx(UGeckoInstruction inst)
@ -211,12 +224,12 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
ARM64Reg WB = ARM64Reg::INVALID_REG;
if (inst.LK_3)
{
ARM64Reg WB = gpr.GetReg();
WB = gpr.GetReg();
MOVI2R(WB, js.compilerPC + 4);
STR(IndexType::Unsigned, WB, PPC_REG, PPCSTATE_OFF_SPR(SPR_LR));
gpr.Unlock(WB);
}
ARM64Reg WA = gpr.GetReg();
@ -224,8 +237,10 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR));
AND(WA, WA, LogicalImm(~0x3, 32));
WriteExit(WA, inst.LK_3, js.compilerPC + 4);
WriteExit(WA, inst.LK_3, js.compilerPC + 4, inst.LK_3 ? WB : ARM64Reg::INVALID_REG);
if (WB != ARM64Reg::INVALID_REG)
gpr.Unlock(WB);
gpr.Unlock(WA);
}