Merge pull request #11243 from Sintendo/arm64mul

JitArm64: Optimize multiplication
This commit is contained in:
Admiral H. Curtiss 2022-11-15 04:05:49 +01:00 committed by GitHub
commit d7593dd721
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 3 deletions

View File

@ -332,6 +332,7 @@ protected:
void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, u64,
Arm64Gen::ARM64Reg),
bool Rc = false);
bool MultiplyImmediate(u32 imm, int a, int d, bool rc);
void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg);
void Force25BitPrecision(Arm64Gen::ARM64Reg output, Arm64Gen::ARM64Reg input);

View File

@ -7,6 +7,7 @@
#include "Common/Assert.h"
#include "Common/BitUtils.h"
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
@ -883,6 +884,75 @@ void JitArm64::addic(UGeckoInstruction inst)
}
}
bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc)
{
if (imm == 0)
{
// Multiplication by zero (0).
gpr.SetImmediate(d, 0);
if (rc)
ComputeRC0(gpr.GetImm(d));
}
else if (imm == 1)
{
// Multiplication by one (1).
if (d != a)
{
gpr.BindToRegister(d, false);
MOV(gpr.R(d), gpr.R(a));
}
if (rc)
ComputeRC0(gpr.R(d));
}
else if (MathUtil::IsPow2(imm))
{
// Multiplication by a power of two (2^n).
const int shift = IntLog2(imm);
gpr.BindToRegister(d, d == a);
LSL(gpr.R(d), gpr.R(a), shift);
if (rc)
ComputeRC0(gpr.R(d));
}
else if (MathUtil::IsPow2(imm - 1))
{
// Multiplication by a power of two plus one (2^n + 1).
const int shift = IntLog2(imm - 1);
gpr.BindToRegister(d, d == a);
ADD(gpr.R(d), gpr.R(a), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift));
if (rc)
ComputeRC0(gpr.R(d));
}
else if (MathUtil::IsPow2(~imm + 1))
{
// Multiplication by a negative power of two (-(2^n)).
const int shift = IntLog2(~imm + 1);
gpr.BindToRegister(d, d == a);
NEG(gpr.R(d), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift));
if (rc)
ComputeRC0(gpr.R(d));
}
else if (MathUtil::IsPow2(~imm + 2))
{
// Multiplication by a negative power of two plus one (-(2^n) + 1).
const int shift = IntLog2(~imm + 2);
gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), gpr.R(a), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift));
if (rc)
ComputeRC0(gpr.R(d));
}
else
{
// Immediate did not match any known special cases.
return false;
}
return true;
}
void JitArm64::mulli(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -895,12 +965,21 @@ void JitArm64::mulli(UGeckoInstruction inst)
s32 i = (s32)gpr.GetImm(a);
gpr.SetImmediate(d, i * inst.SIMM_16);
}
else if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false))
{
// Code is generated inside MultiplyImmediate, nothing to be done here.
}
else
{
gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg();
const bool allocate_reg = d == a;
gpr.BindToRegister(d, allocate_reg);
// Reuse d to hold the immediate if possible, allocate a register otherwise.
ARM64Reg WA = allocate_reg ? gpr.GetReg() : gpr.R(d);
MOVI2R(WA, (u32)(s32)inst.SIMM_16);
MUL(gpr.R(d), gpr.R(a), WA);
if (allocate_reg)
gpr.Unlock(WA);
}
}
@ -920,6 +999,11 @@ void JitArm64::mullwx(UGeckoInstruction inst)
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) ||
(gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc)))
{
// Code is generated inside MultiplyImmediate, nothing to be done here.
}
else
{
gpr.BindToRegister(d, d == a || d == b);