mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-11-14 21:37:52 -07:00
Merge pull request #5919 from degasus/arm
JitArm64: Small performance optimizations.
This commit is contained in:
commit
06da1973a8
@ -1542,19 +1542,22 @@ void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm)
|
|||||||
}
|
}
|
||||||
void ARM64XEmitter::LSL(ARM64Reg Rd, ARM64Reg Rm, int shift)
|
void ARM64XEmitter::LSL(ARM64Reg Rd, ARM64Reg Rm, int shift)
|
||||||
{
|
{
|
||||||
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, shift));
|
int bits = Is64Bit(Rd) ? 64 : 32;
|
||||||
|
UBFM(Rd, Rm, (bits - shift) & (bits - 1), bits - shift - 1);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::LSR(ARM64Reg Rd, ARM64Reg Rm, int shift)
|
void ARM64XEmitter::LSR(ARM64Reg Rd, ARM64Reg Rm, int shift)
|
||||||
{
|
{
|
||||||
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSR, shift));
|
int bits = Is64Bit(Rd) ? 64 : 32;
|
||||||
|
UBFM(Rd, Rm, shift, bits - 1);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::ASR(ARM64Reg Rd, ARM64Reg Rm, int shift)
|
void ARM64XEmitter::ASR(ARM64Reg Rd, ARM64Reg Rm, int shift)
|
||||||
{
|
{
|
||||||
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ASR, shift));
|
int bits = Is64Bit(Rd) ? 64 : 32;
|
||||||
|
SBFM(Rd, Rm, shift, bits - 1);
|
||||||
}
|
}
|
||||||
void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift)
|
void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift)
|
||||||
{
|
{
|
||||||
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ROR, shift));
|
EXTR(Rd, Rm, Rm, shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Logical (immediate)
|
// Logical (immediate)
|
||||||
|
@ -721,7 +721,7 @@ public:
|
|||||||
void MOV(ARM64Reg Rd, ARM64Reg Rm);
|
void MOV(ARM64Reg Rd, ARM64Reg Rm);
|
||||||
void MVN(ARM64Reg Rd, ARM64Reg Rm);
|
void MVN(ARM64Reg Rd, ARM64Reg Rm);
|
||||||
|
|
||||||
// TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants.
|
// Convenience wrappers around UBFM/EXTR.
|
||||||
void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||||
void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||||
void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
|
||||||
|
@ -196,14 +196,14 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
|||||||
{
|
{
|
||||||
m_float_emit.FCVTN(32, D0, RS);
|
m_float_emit.FCVTN(32, D0, RS);
|
||||||
m_float_emit.UMOV(64, X0, D0, 0);
|
m_float_emit.UMOV(64, X0, D0, 0);
|
||||||
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
ROR(X0, X0, 32);
|
||||||
MOVP2R(X30, &PowerPC::Write_U64);
|
MOVP2R(X30, &PowerPC::Write_U64);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
|
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
|
||||||
{
|
{
|
||||||
m_float_emit.UMOV(64, X0, RS, 0);
|
m_float_emit.UMOV(64, X0, RS, 0);
|
||||||
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
ROR(X0, X0, 32);
|
||||||
MOVP2R(X30, &PowerPC::Write_U64);
|
MOVP2R(X30, &PowerPC::Write_U64);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
|
@ -532,11 +532,28 @@ void JitArm64::rlwinmx(UGeckoInstruction inst)
|
|||||||
|
|
||||||
gpr.BindToRegister(a, a == s);
|
gpr.BindToRegister(a, a == s);
|
||||||
|
|
||||||
ARM64Reg WA = gpr.GetReg();
|
if (!inst.SH)
|
||||||
ArithOption Shift(gpr.R(s), ST_ROR, 32 - inst.SH);
|
{
|
||||||
MOVI2R(WA, mask);
|
// Immediate mask
|
||||||
AND(gpr.R(a), WA, gpr.R(s), Shift);
|
ANDI2R(gpr.R(a), gpr.R(s), mask);
|
||||||
gpr.Unlock(WA);
|
}
|
||||||
|
else if (inst.ME == 31 && 31 < inst.SH + inst.MB)
|
||||||
|
{
|
||||||
|
// Bit select of the upper part
|
||||||
|
UBFX(gpr.R(a), gpr.R(s), 32 - inst.SH, 32 - inst.MB);
|
||||||
|
}
|
||||||
|
else if (inst.ME == 31 - inst.SH && 32 > inst.SH + inst.MB)
|
||||||
|
{
|
||||||
|
// Bit select of the lower part
|
||||||
|
UBFIZ(gpr.R(a), gpr.R(s), inst.SH, 32 - inst.SH - inst.MB);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ARM64Reg WA = gpr.GetReg();
|
||||||
|
MOVI2R(WA, mask);
|
||||||
|
AND(gpr.R(a), WA, gpr.R(s), ArithOption(gpr.R(s), ST_ROR, 32 - inst.SH));
|
||||||
|
gpr.Unlock(WA);
|
||||||
|
}
|
||||||
|
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC0(gpr.R(a));
|
ComputeRC0(gpr.R(a));
|
||||||
|
@ -499,8 +499,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||||||
LDP(INDEX_POST, EncodeRegTo64(RX1), EncodeRegTo64(RX3), XA, 16);
|
LDP(INDEX_POST, EncodeRegTo64(RX1), EncodeRegTo64(RX3), XA, 16);
|
||||||
REV32(EncodeRegTo64(RX1), EncodeRegTo64(RX1));
|
REV32(EncodeRegTo64(RX1), EncodeRegTo64(RX1));
|
||||||
REV32(EncodeRegTo64(RX3), EncodeRegTo64(RX3));
|
REV32(EncodeRegTo64(RX3), EncodeRegTo64(RX3));
|
||||||
ORR(EncodeRegTo64(RX2), ZR, EncodeRegTo64(RX1), ArithOption(EncodeRegTo64(RX1), ST_LSR, 32));
|
LSR(EncodeRegTo64(RX2), EncodeRegTo64(RX1), 32);
|
||||||
ORR(EncodeRegTo64(RX4), ZR, EncodeRegTo64(RX3), ArithOption(EncodeRegTo64(RX3), ST_LSR, 32));
|
LSR(EncodeRegTo64(RX4), EncodeRegTo64(RX3), 32);
|
||||||
i += 3;
|
i += 3;
|
||||||
}
|
}
|
||||||
else if (remaining >= 2)
|
else if (remaining >= 2)
|
||||||
|
@ -300,12 +300,12 @@ void JitArm64::mfspr(UGeckoInstruction inst)
|
|||||||
if (iIndex == SPR_TL)
|
if (iIndex == SPR_TL)
|
||||||
MOV(gpr.R(d), Wresult);
|
MOV(gpr.R(d), Wresult);
|
||||||
else
|
else
|
||||||
ORR(EncodeRegTo64(gpr.R(d)), ZR, Xresult, ArithOption(Xresult, ST_LSR, 32));
|
LSR(EncodeRegTo64(gpr.R(d)), Xresult, 32);
|
||||||
|
|
||||||
if (nextIndex == SPR_TL)
|
if (nextIndex == SPR_TL)
|
||||||
MOV(gpr.R(n), Wresult);
|
MOV(gpr.R(n), Wresult);
|
||||||
else
|
else
|
||||||
ORR(EncodeRegTo64(gpr.R(n)), ZR, Xresult, ArithOption(Xresult, ST_LSR, 32));
|
LSR(EncodeRegTo64(gpr.R(n)), Xresult, 32);
|
||||||
|
|
||||||
gpr.Unlock(Wg, Wresult, WA, WB);
|
gpr.Unlock(Wg, Wresult, WA, WB);
|
||||||
fpr.Unlock(VC, VD);
|
fpr.Unlock(VC, VD);
|
||||||
@ -314,7 +314,7 @@ void JitArm64::mfspr(UGeckoInstruction inst)
|
|||||||
}
|
}
|
||||||
gpr.BindToRegister(d, false);
|
gpr.BindToRegister(d, false);
|
||||||
if (iIndex == SPR_TU)
|
if (iIndex == SPR_TU)
|
||||||
ORR(EncodeRegTo64(gpr.R(d)), ZR, Xresult, ArithOption(Xresult, ST_LSR, 32));
|
LSR(EncodeRegTo64(gpr.R(d)), Xresult, 32);
|
||||||
else
|
else
|
||||||
MOV(gpr.R(d), Wresult);
|
MOV(gpr.R(d), Wresult);
|
||||||
|
|
||||||
|
@ -374,7 +374,7 @@ void JitArm64::GenerateCommonAsm()
|
|||||||
|
|
||||||
storePairedFloatSlow = GetCodePtr();
|
storePairedFloatSlow = GetCodePtr();
|
||||||
float_emit.UMOV(64, X0, Q0, 0);
|
float_emit.UMOV(64, X0, Q0, 0);
|
||||||
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
ROR(X0, X0, 32);
|
||||||
MOVP2R(X2, &PowerPC::Write_U64);
|
MOVP2R(X2, &PowerPC::Write_U64);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user