mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-25 23:29:44 -06:00
[AArch64] Clean up bogus vector FCVT{N,L} instruction usage.
Replace the instruction with the scalar variant FCVT instruction. FCVT{N,L} 8 cycles latency on the Cortex A57 FCVT has five cycle latency and slightly higher throughput On the A72 all three of these instructions will have three cycle latency, While FCVT{N,L} will have half the throughput.
This commit is contained in:
@ -405,7 +405,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
|||||||
if (!code_block.m_gqr_modified[gqr] && !GQR(gqr))
|
if (!code_block.m_gqr_modified[gqr] && !GQR(gqr))
|
||||||
{
|
{
|
||||||
LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(spr[SPR_GQR0]) + gqr * 4);
|
LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(spr[SPR_GQR0]) + gqr * 4);
|
||||||
FixupBranch no_fail = B(CC_EQ);
|
FixupBranch no_fail = CBZ(W0);
|
||||||
FixupBranch fail = B();
|
FixupBranch fail = B();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(fail);
|
SetJumpTarget(fail);
|
||||||
|
@ -83,7 +83,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
|||||||
{
|
{
|
||||||
m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr);
|
m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr);
|
||||||
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||||
m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
m_float_emit.FCVT(64, 32, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -211,7 +211,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
|||||||
MOVI2R(X30, (u64)&PowerPC::Read_U32);
|
MOVI2R(X30, (u64)&PowerPC::Read_U32);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
m_float_emit.INS(32, RS, 0, X0);
|
m_float_emit.INS(32, RS, 0, X0);
|
||||||
m_float_emit.FCVTL(64, RS, RS);
|
m_float_emit.FCVT(64, 32, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -335,8 +335,8 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
|||||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||||
ARM64Reg VD = fpr.RW(d, REG_DUP);
|
ARM64Reg VD = fpr.RW(d, REG_DUP);
|
||||||
|
|
||||||
m_float_emit.FCVTN(32, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||||
m_float_emit.FCVTL(64, EncodeRegToDouble(VD), EncodeRegToDouble(VD));
|
m_float_emit.FCVT(64, 32, EncodeRegToDouble(VD), EncodeRegToDouble(VD));
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::fcmpx(UGeckoInstruction inst)
|
void JitArm64::fcmpx(UGeckoInstruction inst)
|
||||||
@ -441,7 +441,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
|||||||
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF000000000000ULL);
|
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF000000000000ULL);
|
||||||
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
|
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
|
||||||
|
|
||||||
m_float_emit.FCVTN(32, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||||
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z);
|
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z);
|
||||||
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
|
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
|
||||||
fpr.Unlock(V0);
|
fpr.Unlock(V0);
|
||||||
|
@ -66,14 +66,14 @@ void JitArm64::psq_l(UGeckoInstruction inst)
|
|||||||
ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), X28);
|
ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), X28);
|
||||||
m_float_emit.LD1(32, 1, EncodeRegToDouble(VS), EncodeRegTo64(addr_reg));
|
m_float_emit.LD1(32, 1, EncodeRegToDouble(VS), EncodeRegTo64(addr_reg));
|
||||||
m_float_emit.REV32(8, VS, VS);
|
m_float_emit.REV32(8, VS, VS);
|
||||||
|
m_float_emit.FCVTL(64, VS, VS);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), X28);
|
m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), X28);
|
||||||
m_float_emit.REV32(8, VS, VS);
|
m_float_emit.REV32(8, VS, VS);
|
||||||
|
m_float_emit.FCVT(64, 32, EncodeRegToDouble(VS), EncodeRegToDouble(VS));
|
||||||
}
|
}
|
||||||
m_float_emit.FCVTL(64, VS, VS);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -86,7 +86,10 @@ void JitArm64::psq_l(UGeckoInstruction inst)
|
|||||||
BLR(X30);
|
BLR(X30);
|
||||||
|
|
||||||
VS = fpr.RW(inst.RS, REG_REG);
|
VS = fpr.RW(inst.RS, REG_REG);
|
||||||
|
if (!inst.W)
|
||||||
m_float_emit.FCVTL(64, VS, D0);
|
m_float_emit.FCVTL(64, VS, D0);
|
||||||
|
else
|
||||||
|
m_float_emit.FCVT(64, 32, EncodeRegToDouble(VS), D0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst.W)
|
if (inst.W)
|
||||||
|
Reference in New Issue
Block a user